diff --git a/.gitignore b/.gitignore index c2232377e..06c7d3dc3 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,7 @@ images/ .idea/ .vscode/ .empty/ +.DS_Store/ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] diff --git a/describe/README.md b/describe/README.md index f765a9666..621dbd1a3 100644 --- a/describe/README.md +++ b/describe/README.md @@ -1 +1,26 @@ -# describe \ No newline at end of file +# Describe + +Get the table's summary statistics and summary plots + +The functions will require the following parameters: + +```markdown + +:param context: the function context +:param table: MLRun input pointing to pandas dataframe (csv/parquet file path) +:param label_column: ground truth column label +:param class_labels: label for each class in tables and plots +:param plot_hist: (True) set this to False for large tables +:param plots_dest: destination folder of summary plots (relative to artifact_path) +:param update_dataset: when the table is a registered dataset update the charts in-place + +``` + +The function will output the following artifacts per column within the data frame (based on data types): + +1. histogram chart +2. violin chart +3. imbalance chart +4. correlation-matrix chart +5. correlation-matrix csv +6. imbalance-weights-vec csv \ No newline at end of file diff --git a/describe/describe.ipynb b/describe/describe.ipynb index d0718b12b..ac56d712e 100644 --- a/describe/describe.ipynb +++ b/describe/describe.ipynb @@ -20,13 +20,13 @@ "output_type": "stream", "text": [ "%nuclio: setting kind to 'job'\n", - "%nuclio: setting spec.image to 'mlrun/ml-models'\n" + "%nuclio: setting spec.image to 'mlrun/mlrun'\n" ] } ], "source": [ "%nuclio config kind = \"job\"\n", - "%nuclio config spec.image = \"mlrun/ml-models\"" + "%nuclio config spec.image = \"mlrun/mlrun\"" ] }, { @@ -61,7 +61,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -161,7 +161,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -172,59 +172,75 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### mlconfig" + "### MLconfig" ] }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> 2020-11-24 15:34:46,837 [warning] warning!, server (0.5.4-rc1) and client (0.5.4) ver dont match\n" + ] + }, + { + "data": { + "text/plain": [ + "'/User/functions-udpate/describe'" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "from mlrun import mlconf\n", - "import os\n", - "mlconf.dbpath = mlconf.dbpath or 'http://mlrun-api:8080'\n", - "mlconf.artifact_path = mlconf.artifact_path or os.path.abspath('./')" + "import mlrun\n", + "mlrun.set_environment(api_path = 'http://mlrun-api:8080',\n", + " artifact_path = os.path.abspath('./'))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### save" + "### Save" ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "> 2020-07-23 07:46:39,543 [info] function spec saved to path: function.yaml\n" + "> 2020-11-24 15:35:06,452 [info] function spec saved to path: function.yaml\n" ] }, { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 18, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "from mlrun import code_to_function \n", "# create job function object from notebook code\n", - "fn = code_to_function(\"describe\", handler=\"summarize\",\n", - " description=\"describe and visualizes dataset stats\",\n", - " categories=[\"analysis\"],\n", - " labels = {\"author\": \"yjb\"},\n", - " code_output='.')\n", + "fn = mlrun.code_to_function(\"describe\", handler=\"summarize\",\n", + " description=\"describe and visualizes dataset stats\",\n", + " categories=[\"analysis\"],\n", + " labels = {\"author\": \"yjb\"},\n", + " code_output='.')\n", "\n", "fn.export()" ] @@ -233,7 +249,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## tests" + "## Tests" ] }, { @@ -244,7 +260,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 9, @@ -253,8 +269,7 @@ } ], "source": [ - "from mlrun.platforms import auto_mount\n", - "fn.apply(auto_mount())" + "fn.apply(mlrun.platforms.auto_mount())" ] }, { @@ -263,48 +278,34 @@ "metadata": {}, "outputs": [], "source": [ - "from mlrun import NewTask, run_local\n", + "DATA_URL = 'https://s3.wasabisys.com/iguazio/data/iris/iris_dataset.csv'\n", "\n", - "#DATA_URL = \"https://iguazio-sample-data.s3.amazonaws.com/datasets/classifier-data.csv\"\n", - "DATA_URL = 'https://iguazio-sample-data.s3.amazonaws.com/datasets/iris_dataset.csv'" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ - "task = NewTask(\n", - " name=\"tasks-describe\", \n", - " handler=summarize, \n", - " inputs={\"table\": DATA_URL}, params={'update_dataset': True, 'label_column': 'label'})" + "task = mlrun.NewTask(name=\"tasks-describe\", \n", + " handler=summarize, \n", + " inputs={\"table\": DATA_URL}, \n", + " params={'update_dataset': True, \n", + " 'label_column': 'label'})" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### run locally" + "### Run Locally" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "> 2020-07-22 09:00:32,582 [debug] Validating field against patterns: {'field_name': 'run.metadata.name', 'field_value': 'tasks-describe', 'pattern': ['^.{0,63}$', '^(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])?$']}\n", - "> 2020-07-22 09:00:32,598 [info] starting run tasks-describe uid=f30656601819462c892a9365dd175f72 -> http://mlrun-api:8080\n", - "> 2020-07-22 09:00:37,475 [debug] log artifact histograms at /User/functions/describe/plots/hist.html, size: 140127, db: N\n", - "> 2020-07-22 09:00:38,377 [debug] log artifact violin at /User/functions/describe/plots/violin.html, size: 54096, db: N\n", - "> 2020-07-22 09:00:38,680 [debug] log artifact imbalance at /User/functions/describe/plots/imbalance.html, size: 10045, db: Y\n", - "> 2020-07-22 09:00:38,697 [debug] log artifact imbalance-weights-vec at /User/functions/describe/plots/imbalance-weights-vec.csv, size: 65, db: N\n", - "> 2020-07-22 09:00:38,702 [debug] log artifact correlation-matrix at /User/functions/describe/plots/correlation-matrix.csv, size: 324, db: N\n", - "> 2020-07-22 09:00:38,877 [debug] log artifact correlation at /User/functions/describe/plots/corr.html, size: 12052, db: N\n" + "> 2020-11-24 15:35:06,489 [warning] warning!, server (0.5.4-rc1) and client (0.5.4) ver dont match\n", + "> 2020-11-24 15:35:06,489 [info] starting run tasks-describe uid=38d5c276628e46ff8634942b3585f636 DB=http://mlrun-api:8080\n", + "> 2020-11-24 15:35:06,538 [warning] warning!, server (0.5.4-rc1) and client (0.5.4) ver dont match\n" ] }, { @@ -312,13 +313,13 @@ "text/html": [ "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
projectuiditerstartstatenamelabelsinputsparametersresultsartifacts
default0Oct 28 13:54:47runningdescribe-spark-describe_spark
v3io_user=admin
kind=job
owner=admin
dataset
\n", + "
\n", + "
\n", + "
\n", + " Title\n", + " ×\n", + "
\n", + " \n", + "
\n", + "
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "to track results use .show() or .logs() or in CLI: \n", + "!mlrun get run a9cc8a2b48ce42d180e490043091da52 --project default , !mlrun logs a9cc8a2b48ce42d180e490043091da52 --project default\n", + "> 2020-10-28 13:54:48,285 [info] run executed, status=running\n" + ] + } + ], + "source": [ + "run_res = fn.run(inputs={\"dataset\": \"iris_dataset.csv\"},\n", + " artifact_path=artifact_path, watch=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> 2020-10-28 13:55:36,021 [warning] warning!, server (0.5.3-rc1) and client (0.5.2) ver dont match\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
projectuiditerstartstatenamelabelsinputsparametersresultsartifacts
default0Oct 28 13:54:55completeddescribe-spark-describe_spark
v3io_user=admin
kind=job
owner=admin
host=describe-spark-describe-spark-pxzxc
dataset
n=150
nvar=5
total_missing=0.0
memsize=0.0 YiB
recordsize=0.0 YiB
NUM=5
DATE=0
CONST=0
CAT=0
UNIQUE=0
CORR=0
REJECTED=0
summary_stats
\n", + "
\n", + "
\n", + "
\n", + " Title\n", + " ×\n", + "
\n", + " \n", + "
\n", + "
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "run_res.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python [conda env:root] *", + "language": "python", + "name": "conda-root-py" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/describe_spark/function.yaml b/describe_spark/function.yaml new file mode 100644 index 000000000..98075a3cf --- /dev/null +++ b/describe_spark/function.yaml @@ -0,0 +1,332 @@ +kind: job +metadata: + name: describe-spark + tag: '' + hash: 242cd594bd1c4be61f4fe6a2ff5a8d2902d5b8ca + project: default +spec: + command: '' + args: [] + image: iguazio/shell:3.0_b5565_20201026062233_wsdf + env: + - name: V3IO_API + value: '' + - name: V3IO_USERNAME + value: '' + - name: V3IO_ACCESS_KEY + value: '' + - name: CURRENT_NODE_IP + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: status.hostIP + - name: IGZ_DATA_CONFIG_FILE + value: /igz/java/conf/v3io.conf + default_handler: describe_spark + entry_points: + describe: + name: describe + doc: '' + parameters: + - name: df + default: '' + - name: bins + default: '' + - name: corr_reject + default: '' + - name: config + default: '' + outputs: + - default: '' + lineno: 38 + pretty_name: + name: pretty_name + doc: '' + parameters: + - name: x + default: '' + outputs: + - default: '' + lineno: 51 + corr_matrix: + name: corr_matrix + doc: '' + parameters: + - name: df + default: '' + - name: columns + default: null + outputs: + - default: '' + lineno: 58 + separate: + name: separate + doc: '' + parameters: + - name: l + default: '' + - name: n + default: '' + outputs: + - default: '' + lineno: 63 + create_hist_data: + name: create_hist_data + doc: '' + parameters: + - name: df + default: '' + - name: column + default: '' + - name: minim + default: '' + - name: maxim + default: '' + - name: bins + default: 10 + outputs: + - default: '' + lineno: 80 + create_all_conditions: + name: create_all_conditions + doc: 'Recursive function that exploits the + + ability to call the Spark SQL Column method + + .when() in a recursive way.' + parameters: + - name: current_col + default: '' + - name: column + default: '' + - name: left_edges + default: '' + - name: count + default: 1 + outputs: + - default: '' + lineno: 82 + describe_integer_1d: + name: describe_integer_1d + doc: '' + parameters: + - name: df + default: '' + - name: column + default: '' + - name: current_result + default: '' + - name: nrows + default: '' + outputs: + - default: '' + lineno: 134 + describe_float_1d: + name: describe_float_1d + doc: '' + parameters: + - name: df + default: '' + - name: column + default: '' + - name: current_result + default: '' + - name: nrows + default: '' + outputs: + - default: '' + lineno: 170 + describe_date_1d: + name: describe_date_1d + doc: '' + parameters: + - name: df + default: '' + - name: column + default: '' + outputs: + - default: '' + lineno: 204 + guess_json_type: + name: guess_json_type + doc: '' + parameters: + - name: string_value + default: '' + outputs: + - default: '' + lineno: 221 + describe_categorical_1d: + name: describe_categorical_1d + doc: '' + parameters: + - name: df + default: '' + - name: column + default: '' + outputs: + - default: '' + lineno: 229 + describe_constant_1d: + name: describe_constant_1d + doc: '' + parameters: + - name: df + default: '' + - name: column + default: '' + outputs: + - default: '' + lineno: 267 + describe_unique_1d: + name: describe_unique_1d + doc: '' + parameters: + - name: df + default: '' + - name: column + default: '' + outputs: + - default: '' + lineno: 274 + describe_1d: + name: describe_1d + doc: '' + parameters: + - name: df + default: '' + - name: column + default: '' + - name: nrows + default: '' + - name: lookup_config + default: null + outputs: + - default: '' + lineno: 281 + gradient_format: + name: gradient_format + doc: '' + parameters: + - name: value + default: '' + - name: limit1 + default: '' + - name: limit2 + default: '' + - name: c1 + default: '' + - name: c2 + default: '' + outputs: + - default: '' + lineno: 396 + LerpColour: + name: LerpColour + doc: '' + parameters: + - name: c1 + default: '' + - name: c2 + default: '' + - name: t + default: '' + outputs: + - default: '' + lineno: 397 + fmt_color: + name: fmt_color + doc: '' + parameters: + - name: text + default: '' + - name: color + default: '' + outputs: + - default: '' + lineno: 403 + fmt_class: + name: fmt_class + doc: '' + parameters: + - name: text + default: '' + - name: cls + default: '' + outputs: + - default: '' + lineno: 407 + fmt_bytesize: + name: fmt_bytesize + doc: '' + parameters: + - name: num + default: '' + - name: suffix + default: B + outputs: + - default: '' + lineno: 411 + fmt_percent: + name: fmt_percent + doc: '' + parameters: + - name: v + default: '' + outputs: + - default: '' + lineno: 421 + fmt_varname: + name: fmt_varname + doc: '' + parameters: + - name: v + default: '' + outputs: + - default: '' + lineno: 424 + fmt_row_severity: + name: fmt_row_severity + doc: '' + parameters: + - name: v + default: '' + outputs: + - default: '' + lineno: 441 + fmt_skewness: + name: fmt_skewness + doc: '' + parameters: + - name: v + default: '' + outputs: + - default: '' + lineno: 447 + describe_spark: + name: describe_spark + doc: '' + parameters: + - name: context + type: MLClientCtx + default: '' + - name: dataset + type: DataItem + default: '' + - name: artifact_path + default: '' + - name: bins + type: int + default: 30 + - name: describe_extended + type: bool + default: true + outputs: + - default: '' + lineno: 463 + description: '' + image_pull_policy: IfNotPresent + build: + functionSourceCode:  + commands: [] + code_origin: https://github.com/Idan707/functions.git#a68e6f7607e56573f329abc5b510bb24612d886e:.ipynb diff --git a/describe_spark/read_csv_spark.ipynb b/describe_spark/read_csv_spark.ipynb new file mode 100644 index 000000000..dd27375ec --- /dev/null +++ b/describe_spark/read_csv_spark.ipynb @@ -0,0 +1,421 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Import and Config" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# nuclio: ignore\n", + "import nuclio" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "%nuclio: setting kind to 'job'\n", + "%nuclio: setting spec.image to 'iguazio/shell:3.0_b5565_20201026062233_wsdf'\n" + ] + } + ], + "source": [ + "%nuclio config kind = \"job\"\n", + "%nuclio config spec.image = \"iguazio/shell:3.0_b5565_20201026062233_wsdf\" # docker image available on idan707/spark_shell " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "#!/usr/local/bin/python\n", + "\n", + "import mlrun\n", + "from mlrun.platforms.iguazio import mount_v3io, mount_v3iod\n", + "from mlrun.datastore import DataItem\n", + "from mlrun.execution import MLClientCtx\n", + "\n", + "import os\n", + "#import spark_df_profiling\n", + "from subprocess import run\n", + "\n", + "from pyspark.sql import SparkSession\n", + "import pyspark.sql.functions as f\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Build Simple Read CSV Function using Spark" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "#!/usr/local/bin/python\n", + "\n", + "run([\"/bin/bash\", \"/etc/config/v3io/v3io-spark-operator.sh\"])\n", + "\n", + "def describe_spark(context: MLClientCtx, \n", + " dataset: DataItem, \n", + " artifact_path):\n", + " \n", + " # get file location\n", + " location = dataset.local()\n", + " \n", + " # build spark session\n", + " spark = SparkSession.builder.appName(\"Spark job\").getOrCreate()\n", + " \n", + " # read csv\n", + " df = spark.read.csv(location, header=True, inferSchema= True)\n", + " \n", + " # show\n", + " df.show(5)\n", + " \n", + " # sample for logging\n", + " df_to_log = df.sample(False, 0.1).toPandas()\n", + " \n", + " # log final report\n", + " context.log_dataset(\"df_sample\", \n", + " df=df_to_log,\n", + " format=\"csv\", index=False,\n", + " artifact_path=context.artifact_subpath('data'))\n", + " \n", + " spark.stop()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# nuclio: end-code" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Save and Config" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "fn = mlrun.code_to_function(handler=\"describe_spark\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "fn.apply(mount_v3io())\n", + "fn.apply(mount_v3iod(namespace=\"default-tenant\", v3io_config_configmap=\"spark-operator-v3io-config\"))\n", + "fn.spec.image_pull_policy = \"IfNotPresent\"" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> 2020-10-28 11:26:16,525 [warning] warning!, server (0.5.3-rc1) and client (0.5.2) ver dont match\n" + ] + } + ], + "source": [ + "artifact_path = mlrun.set_environment(api_path = 'http://mlrun-api:8080',\n", + " artifact_path = os.path.abspath('./'))" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> 2020-10-28 11:26:16,536 [warning] warning!, server (0.5.3-rc1) and client (0.5.2) ver dont match\n", + "> 2020-10-28 11:26:16,536 [info] starting run test-describe_spark uid=c90bf87b5c9641ca9bc17940e068ab38 -> http://mlrun-api:8080\n", + "> 2020-10-28 11:26:16,680 [info] Job is running in the background, pod: test-describe-spark-xlzj7\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
projectuiditerstartstatenamelabelsinputsparametersresultsartifacts
default0Oct 28 11:26:16runningtest-describe_spark
v3io_user=admin
kind=job
owner=admin
dataset
\n", + "
\n", + "
\n", + "
\n", + " Title\n", + " ×\n", + "
\n", + " \n", + "
\n", + "
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "to track results use .show() or .logs() or in CLI: \n", + "!mlrun get run c90bf87b5c9641ca9bc17940e068ab38 --project default , !mlrun logs c90bf87b5c9641ca9bc17940e068ab38 --project default\n", + "> 2020-10-28 11:26:16,762 [info] run executed, status=running\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fn.run(inputs={\"dataset\": \"iris_dataset.csv\"},\n", + " artifact_path=artifact_path)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python [conda env:root] *", + "language": "python", + "name": "conda-root-py" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/model_server/README.md b/model_server/README.md index a5ccc81b9..1b382cb9a 100644 --- a/model_server/README.md +++ b/model_server/README.md @@ -1,6 +1,11 @@ # serving models -**`xgboost/xgb-serving.ipynb`** deploy an xgboost server model
+Model Serving provides a solution to host machine learning / deep learning (ML/DL) models as REST endpoints that are updated automatically, enabling data science teams to own the end-to-end lifecycle of a real-time machine learning model from training to production. -**`model_server.ipynb`** deploy any classifier model that has been pickled (cloudpickle)
- For demonstrations, see **[lightgbm-project](https://github.com/yjb-ds/lightgbm-project)**, **[demo-sklearn-project](https://github.com/yjb-ds/demo-sklearn-project)**, and **[demo-xgb-project](https://github.com/yjb-ds/demo-xgb-project/tree/functions)** \ No newline at end of file +**`model_server.ipynb`** deploy any classifier model that has been pickled (cloudpickle). + +For more demonstrations : + +1. **[lightgbm-project](https://github.com/yjb-ds/lightgbm-project)** +2. **[demo-sklearn-project](https://github.com/yjb-ds/demo-sklearn-project)** +3. **[demo-xgb-project](https://github.com/yjb-ds/demo-xgb-project/tree/functions)** \ No newline at end of file diff --git a/model_server/function.yaml b/model_server/function.yaml index 1c7739f92..8de42cf67 100644 --- a/model_server/function.yaml +++ b/model_server/function.yaml @@ -1,7 +1,7 @@ kind: remote metadata: name: model-server - hash: 8f9b901041ee2f8781fe86beb1c9486193a1f9ee + hash: a1dc5a391186ead91ff5cef97ba9ab277adb0ceb project: default labels: author: yaronh @@ -14,12 +14,11 @@ spec: args: [] image: '' description: generic sklearn model server + min_replicas: 1 max_replicas: 4 env: - name: MODEL_CLASS value: ClassifierModel - - name: ENABLE_EXPLAINER - value: 'False' config: spec.triggers.http: kind: http @@ -29,23 +28,23 @@ spec: annotations: {} base_spec: apiVersion: nuclio.io/v1 - kind: nuclio:serving + kind: serving metadata: annotations: - nuclio.io/generated_by: function generated from 30-08-2020 + nuclio.io/generated_by: function generated from 29-11-2020 by admin labels: {} name: model-server spec: build: - baseImage: mlrun/mlrun commands: - - python -m pip install numpy cloudpickle v3io sklearn - functionSourceCode: IyBHZW5lcmF0ZWQgYnkgbnVjbGlvLmV4cG9ydC5OdWNsaW9FeHBvcnRlcgoKaW1wb3J0IG9zCmZyb20gY2xvdWRwaWNrbGUgaW1wb3J0IGxvYWQKaW1wb3J0IG51bXB5IGFzIG5wCmZyb20gdHlwaW5nIGltcG9ydCBMaXN0CmZyb20gZGF0ZXRpbWUgaW1wb3J0IGRhdGV0aW1lCmltcG9ydCBtbHJ1bgoKY2xhc3MgQ2xhc3NpZmllck1vZGVsKG1scnVuLnJ1bnRpbWVzLk1MTW9kZWxTZXJ2ZXIpOgogICAgZGVmIGxvYWQoc2VsZik6CiAgICAgICAgIiIiTG9hZCBtb2RlbCBmcm9tIHN0b3JhZ2UuIiIiCiAgICAgICAgbW9kZWxfZmlsZSwgZXh0cmFfZGF0YSA9IHNlbGYuZ2V0X21vZGVsKCcucGtsJykKICAgICAgICBzZWxmLm1vZGVsID0gbG9hZChvcGVuKG1vZGVsX2ZpbGUsICdyYicpKQoKICAgIGRlZiBwcmVkaWN0KHNlbGYsIGJvZHk6IGRpY3QpIC0+IExpc3Q6CiAgICAgICAgIiIiR2VuZXJhdGUgbW9kZWwgcHJlZGljdGlvbnMgZnJvbSBzYW1wbGUuCiAgICAgICAgCiAgICAgICAgOnBhcmFtIGJvZHkgOiBBIGRpY3Qgb2Ygb2JzZXJ2YXRpb25zLCBlYWNoIG9mIHdoaWNoIGlzIGFuIDEtZGltZW5zaW9uYWwgZmVhdHVyZSB2ZWN0b3IuCiAgICAgICAgICAgIAogICAgICAgIFJldHVybnMgbW9kZWwgcHJlZGljdGlvbnMgYXMgYSBgTGlzdGAsIG9uZSBmb3IgZWFjaCByb3cgaW4gdGhlIGBib2R5YCBpbnB1dCBgTGlzdGAuCiAgICAgICAgIiIiCiAgICAgICAgdHJ5OgogICAgICAgICAgICBmZWF0cyA9IG5wLmFzYXJyYXkoYm9keVsnaW5zdGFuY2VzJ10pCiAgICAgICAgICAgIHJlc3VsdDogbnAubmRhcnJheSA9IHNlbGYubW9kZWwucHJlZGljdChmZWF0cykKICAgICAgICAgICAgcmVzcCA9IHJlc3VsdC50b2xpc3QoKQogICAgICAgIGV4Y2VwdCBFeGNlcHRpb24gYXMgZToKICAgICAgICAgICAgcmFpc2UgRXhjZXB0aW9uKGYiRmFpbGVkIHRvIHByZWRpY3Qge2V9IikKICAgICAgICAKICAgICAgICByZXR1cm4gcmVzcAoKCmZyb20gbWxydW4ucnVudGltZXMgaW1wb3J0IG51Y2xpb19pbml0X2hvb2sKZGVmIGluaXRfY29udGV4dChjb250ZXh0KToKICAgIG51Y2xpb19pbml0X2hvb2soY29udGV4dCwgZ2xvYmFscygpLCAnc2VydmluZycpCgpkZWYgaGFuZGxlcihjb250ZXh0LCBldmVudCk6CiAgICByZXR1cm4gY29udGV4dC5tbHJ1bl9oYW5kbGVyKGNvbnRleHQsIGV2ZW50KQo= + - python -m pip install numpy cloudpickle v3io sklearn mlrun + functionSourceCode: IyBHZW5lcmF0ZWQgYnkgbnVjbGlvLmV4cG9ydC5OdWNsaW9FeHBvcnRlcgoKaW1wb3J0IG1scnVuCgpmcm9tIGNsb3VkcGlja2xlIGltcG9ydCBsb2FkCmZyb20gdHlwaW5nIGltcG9ydCBMaXN0CmZyb20gZGF0ZXRpbWUgaW1wb3J0IGRhdGV0aW1lCmZyb20gc2tsZWFybi5kYXRhc2V0cyBpbXBvcnQgbG9hZF9pcmlzCgppbXBvcnQgd2FybmluZ3MKd2FybmluZ3MuZmlsdGVyd2FybmluZ3MoJ2lnbm9yZScpCgppbXBvcnQgb3MKaW1wb3J0IG51bXB5IGFzIG5wCgpjbGFzcyBDbGFzc2lmaWVyTW9kZWwobWxydW4ucnVudGltZXMuTUxNb2RlbFNlcnZlcik6CiAgICBkZWYgbG9hZChzZWxmKToKICAgICAgICAiIiJMb2FkIG1vZGVsIGZyb20gc3RvcmFnZS4iIiIKICAgICAgICBtb2RlbF9maWxlLCBleHRyYV9kYXRhID0gc2VsZi5nZXRfbW9kZWwoJy5wa2wnKQogICAgICAgIHNlbGYubW9kZWwgPSBsb2FkKG9wZW4obW9kZWxfZmlsZSwgJ3JiJykpCgogICAgZGVmIHByZWRpY3Qoc2VsZiwgYm9keTogZGljdCkgLT4gTGlzdDoKICAgICAgICAiIiJHZW5lcmF0ZSBtb2RlbCBwcmVkaWN0aW9ucyBmcm9tIHNhbXBsZS4KICAgICAgICAKICAgICAgICA6cGFyYW0gYm9keSA6IEEgZGljdCBvZiBvYnNlcnZhdGlvbnMsIGVhY2ggb2Ygd2hpY2ggaXMgYW4gMS1kaW1lbnNpb25hbCBmZWF0dXJlIHZlY3Rvci4KICAgICAgICAgICAgCiAgICAgICAgUmV0dXJucyBtb2RlbCBwcmVkaWN0aW9ucyBhcyBhIGBMaXN0YCwgb25lIGZvciBlYWNoIHJvdyBpbiB0aGUgYGJvZHlgIGlucHV0IGBMaXN0YC4KICAgICAgICAiIiIKICAgICAgICB0cnk6CiAgICAgICAgICAgIGZlYXRzID0gbnAuYXNhcnJheShib2R5WydpbnN0YW5jZXMnXSkKICAgICAgICAgICAgcmVzdWx0OiBucC5uZGFycmF5ID0gc2VsZi5tb2RlbC5wcmVkaWN0KGZlYXRzKQogICAgICAgICAgICByZXNwID0gcmVzdWx0LnRvbGlzdCgpCiAgICAgICAgZXhjZXB0IEV4Y2VwdGlvbiBhcyBlOgogICAgICAgICAgICByYWlzZSBFeGNlcHRpb24oZiJGYWlsZWQgdG8gcHJlZGljdCB7ZX0iKQogICAgICAgIAogICAgICAgIHJldHVybiByZXNwCgoKZnJvbSBtbHJ1bi5ydW50aW1lcyBpbXBvcnQgbnVjbGlvX2luaXRfaG9vawpkZWYgaW5pdF9jb250ZXh0KGNvbnRleHQpOgogICAgbnVjbGlvX2luaXRfaG9vayhjb250ZXh0LCBnbG9iYWxzKCksICdzZXJ2aW5nJykKCmRlZiBoYW5kbGVyKGNvbnRleHQsIGV2ZW50KToKICAgIHJldHVybiBjb250ZXh0Lm1scnVuX2hhbmRsZXIoY29udGV4dCwgZXZlbnQpCg== noBaseImagesPull: true env: - name: MODEL_CLASS value: ClassifierModel handler: model_server:handler + image: mlrun/ml-models runtime: python:3.6 volumes: [] source: '' diff --git a/model_server/model_server.ipynb b/model_server/model_server.ipynb index c79e3b591..750fe3a99 100644 --- a/model_server/model_server.ipynb +++ b/model_server/model_server.ipynb @@ -9,43 +9,32 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ - "# nuclio: ignore\n", - "import nuclio" + "import mlrun" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "%nuclio: setting kind to 'nuclio:serving'\n", + "%nuclio: setting kind to 'serving'\n", "%nuclio: setting 'MODEL_CLASS' environment variable\n", - "%nuclio: setting spec.build.baseImage to 'mlrun/mlrun'\n" + "%nuclio: setting spec.image to 'mlrun/mlrun'\n" ] } ], "source": [ - "%nuclio config kind=\"nuclio:serving\"\n", + "%nuclio config kind=\"serving\"\n", "%nuclio env MODEL_CLASS=ClassifierModel\n", - "%nuclio config spec.build.baseImage = \"mlrun/mlrun\"" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "%%nuclio cmd -c\n", - "python -m pip install numpy cloudpickle v3io sklearn" + "%nuclio config spec.image = \"mlrun/mlrun\"" ] }, { @@ -54,20 +43,17 @@ "metadata": {}, "outputs": [], "source": [ - "import os\n", "from cloudpickle import load\n", - "import numpy as np\n", "from typing import List\n", "from datetime import datetime\n", - "import mlrun" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ + "from sklearn.datasets import load_iris\n", + "\n", + "import warnings\n", + "warnings.filterwarnings('ignore')\n", + "\n", + "import os\n", + "import numpy as np\n", + "\n", "class ClassifierModel(mlrun.runtimes.MLModelServer):\n", " def load(self):\n", " \"\"\"Load model from storage.\"\"\"\n", @@ -93,7 +79,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -118,73 +104,47 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### test locally" + "### Test locally" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ - "import cloudpickle as cp\n", - "models_path = '/User/ml/demos/sklearn-pipe/models'\n", + "model = 'https://s3.wasabisys.com/iguazio/models/iris/model.pkl'\n", "\n", - "from sklearn.datasets import load_iris\n", "iris = load_iris()\n", "\n", "x = iris['data'].tolist()\n", "y = iris['target']\n", "\n", - "for model in os.listdir(models_path):\n", - " if model.endswith(\".pkl\"):\n", - " \n", - " my_server = ClassifierModel('classifier', model_dir=os.path.join(models_path, model))\n", - " my_server.load()\n", - "\n", - " a = my_server.predict({\"instances\": x})\n", + "my_server = ClassifierModel('classifier', model_dir=model)\n", + "my_server.load()\n", "\n", - " assert len(a)==150" + "a = my_server.predict({\"instances\": x})\n", + "assert len(a)==150" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## document and save" + "## Document and save" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "> 2020-08-10 12:54:58,765 [info] function spec saved to path: function.yaml\n" - ] - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "from mlrun import new_model_server\n", - "fn = new_model_server('model-server', model_class='ClassifierModel')\n", + "fn = mlrun.new_model_server('model-server', model_class='ClassifierModel')\n", "fn.spec.description = \"generic sklearn model server\"\n", "fn.metadata.categories = ['serving', 'ml']\n", "fn.metadata.labels = {'author': 'yaronh', 'framework': 'sklearn'}\n", - "#print(fn.to_yaml())\n", - "fn.export()" + "#fn.export()" ] }, { @@ -196,26 +156,33 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "> 2020-08-10 12:55:08,975 [info] deploy started\n", - "[nuclio] 2020-08-10 12:55:10,073 (info) Build complete\n", - "[nuclio] 2020-08-10 12:55:16,173 (info) Function deploy complete\n", - "[nuclio] 2020-08-10 12:55:16,181 done updating sk-project-sklearn-server, function address: 34.202.248.16:30045\n" + "> 2020-12-06 11:26:41,000 [info] Starting remote function deploy\n", + "2020-12-06 11:26:41 (info) Deploying function\n", + "2020-12-06 11:26:41 (info) Building\n", + "2020-12-06 11:26:41 (info) Staging files and preparing base images\n", + "2020-12-06 11:26:41 (info) Building processor image\n", + "2020-12-06 11:28:28 (info) Build complete\n", + "2020-12-06 11:28:34 (info) Function deploy complete\n", + "> 2020-12-06 11:28:35,076 [info] function deployed, address=default-tenant.app.yh210.iguazio-cd2.com:31804\n" ] } ], "source": [ - "from mlrun import mount_v3io\n", - "fn.apply(mount_v3io())\n", - "fn.set_envs({'SERVING_MODEL_iris_dataset_v1': models_path,\n", - " 'INFERENCE_STREAM': 'users/admin/tststream'})\n", - "#fn.verbose = True\n", + "import mlrun\n", + "user_name = os.getenv(\"V3IO_USER_NAME\")\n", + "artifact_path = mlrun.set_environment(api_path = 'http://mlrun-api:8080',\n", + " artifact_path = os.path.abspath('./'))\n", + "fn.apply(mlrun.mount_v3io())\n", + "fn.set_envs({'SERVING_MODEL_iris_dataset_v1': model,\n", + " 'INFERENCE_STREAM': 'users/{}/tststream'.format(user_name)})\n", + "\n", "address = fn.deploy(project='sk-project')" ] }, @@ -228,7 +195,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -269,7 +236,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.8" + "version": "3.7.6" } }, "nbformat": 4, diff --git a/model_server_tester/README.md b/model_server_tester/README.md new file mode 100644 index 000000000..d04fe7d9e --- /dev/null +++ b/model_server_tester/README.md @@ -0,0 +1,14 @@ +# Live Model Server Testing + +Test your model server via HTTP calls + +```markdown + +:param table: csv/parquet table with test data +:param addr: function address/url +:param label_column: name of the label column in table +:param model: tested model name +:param match_err: raise error on validation (require proper test set) +:param rows: number of rows to use from test set + +``` diff --git a/model_server_tester/function.yaml b/model_server_tester/function.yaml index 483ec9759..24563f817 100644 --- a/model_server_tester/function.yaml +++ b/model_server_tester/function.yaml @@ -2,8 +2,8 @@ kind: job metadata: name: model-server-tester tag: '' - hash: 934f5a336bffc3204b47174b6c5d367a1e0e6267 - project: '' + hash: 23c722aee8394de7116a7665c69c815851c5cdbf + project: default labels: author: yaronh categories: @@ -21,12 +21,15 @@ spec: doc: Test a model server parameters: - name: context + default: '' - name: table type: DataItem doc: csv/parquet table with test data + default: '' - name: addr type: str doc: function address/url + default: '' - name: label_column type: str doc: name of the label column in table @@ -34,17 +37,20 @@ spec: - name: model type: str doc: 'tested model name ' + default: '' - name: match_err type: bool doc: raise error on validation (require proper test set) + default: false - name: rows type: int doc: number of rows to use from test set default: 20 - outputs: [] - lineno: 12 + outputs: + - default: '' + lineno: 14 description: test model servers build: - functionSourceCode: IyBHZW5lcmF0ZWQgYnkgbnVjbGlvLmV4cG9ydC5OdWNsaW9FeHBvcnRlciBvbiAyMDIwLTA1LTA4IDIxOjM1CgppbXBvcnQgb3MKaW1wb3J0IHBhbmRhcyBhcyBwZAppbXBvcnQgcmVxdWVzdHMKaW1wb3J0IGpzb24KaW1wb3J0IG51bXB5IGFzIG5wCmZyb20gZGF0ZXRpbWUgaW1wb3J0IGRhdGV0aW1lCmZyb20gbWxydW4uZGF0YXN0b3JlIGltcG9ydCBEYXRhSXRlbQpmcm9tIG1scnVuLmFydGlmYWN0cyBpbXBvcnQgZ2V0X21vZGVsLCBDaGFydEFydGlmYWN0CgpkZWYgbW9kZWxfc2VydmVyX3Rlc3Rlcihjb250ZXh0LAogICAgICAgICAgICAgICAgICAgICAgICB0YWJsZTogRGF0YUl0ZW0sCiAgICAgICAgICAgICAgICAgICAgICAgIGFkZHI6IHN0ciwgCiAgICAgICAgICAgICAgICAgICAgICAgIGxhYmVsX2NvbHVtbjogc3RyID0gImxhYmVsIiwKICAgICAgICAgICAgICAgICAgICAgICAgbW9kZWw6IHN0ciA9ICcnLAogICAgICAgICAgICAgICAgICAgICAgICBtYXRjaF9lcnI6IGJvb2wgPSBGYWxzZSwKICAgICAgICAgICAgICAgICAgICAgICAgcm93czogaW50ID0gMjApOgogICAgIiIiIFRlc3QgYSBtb2RlbCBzZXJ2ZXIgCiAgICAKICAgIDpwYXJhbSB0YWJsZTogICAgICAgICBjc3YvcGFycXVldCB0YWJsZSB3aXRoIHRlc3QgZGF0YQogICAgOnBhcmFtIGFkZHI6ICAgICAgICAgIGZ1bmN0aW9uIGFkZHJlc3MvdXJsCiAgICA6cGFyYW0gbGFiZWxfY29sdW1uOiAgbmFtZSBvZiB0aGUgbGFiZWwgY29sdW1uIGluIHRhYmxlCiAgICA6cGFyYW0gbW9kZWw6ICAgICAgICAgdGVzdGVkIG1vZGVsIG5hbWUgCiAgICA6cGFyYW0gbWF0Y2hfZXJyOiAgICAgcmFpc2UgZXJyb3Igb24gdmFsaWRhdGlvbiAocmVxdWlyZSBwcm9wZXIgdGVzdCBzZXQpCiAgICA6cGFyYW0gcm93czogICAgICAgICAgbnVtYmVyIG9mIHJvd3MgdG8gdXNlIGZyb20gdGVzdCBzZXQKICAgICIiIgogICAgICAgIAogICAgdGFibGUgPSB0YWJsZS5hc19kZigpCgogICAgeV9saXN0ID0gdGFibGUucG9wKGxhYmVsX2NvbHVtbikudmFsdWVzLnRvbGlzdCgpCiAgICBjb250ZXh0LmxvZ2dlci5pbmZvKGYndGVzdGluZyB3aXRoIGRhdGFzZXQgYWdhaW5zdCB7YWRkcn0sIG1vZGVsOiB7bW9kZWx9JykKICAgIGlmIHJvd3MgYW5kIHJvd3MgPCB0YWJsZS5zaGFwZVswXToKICAgICAgICB0YWJsZSA9IHRhYmxlLnNhbXBsZShyb3dzKQogICAgCiAgICBjb3VudCA9IGVycl9jb3VudCA9IG1hdGNoID0gMAogICAgdGltZXMgPSBbXQogICAgZm9yIHgsIHkgaW4gemlwKHRhYmxlLnZhbHVlcywgeV9saXN0KToKICAgICAgICBjb3VudCArPSAxCiAgICAgICAgZXZlbnRfZGF0YSA9IGpzb24uZHVtcHMoeyJpbnN0YW5jZXMiOlt4LnRvbGlzdCgpXX0pCiAgICAgICAgaGFkX2VyciA9IEZhbHNlCiAgICAgICAgdHJ5OgogICAgICAgICAgICBzdGFydCA9IGRhdGV0aW1lLm5vdygpCiAgICAgICAgICAgIHJlc3AgPSByZXF1ZXN0cy5wdXQoZid7YWRkcn0ve21vZGVsfS9wcmVkaWN0JywganNvbj1ldmVudF9kYXRhKQogICAgICAgICAgICBpZiBub3QgcmVzcC5vazoKICAgICAgICAgICAgICAgIGNvbnRleHQubG9nZ2VyLmVycm9yKGYnYmFkIGZ1bmN0aW9uIHJlc3AhIVxue3Jlc3AudGV4dH0nKQogICAgICAgICAgICAgICAgZXJyX2NvdW50ICs9IDEKICAgICAgICAgICAgICAgIGNvbnRpbnVlCiAgICAgICAgICAgIHRpbWVzLmFwcGVuZCgoZGF0ZXRpbWUubm93KCktc3RhcnQpLm1pY3Jvc2Vjb25kcykKICAgICAgICAgICAgICAgIAogICAgICAgIGV4Y2VwdCBPU0Vycm9yIGFzIGVycjoKICAgICAgICAgICAgY29udGV4dC5sb2dnZXIuZXJyb3IoZidlcnJvciBpbiByZXF1ZXN0LCBkYXRhOntldmVudF9kYXRhfSwgZXJyb3I6IHtlcnJ9JykKICAgICAgICAgICAgZXJyX2NvdW50ICs9IDEKICAgICAgICAgICAgY29udGludWUKICAgICAgICAgICAgCiAgICAgICAgeV9yZXNwID0gcmVzcC5qc29uKClbMF0KICAgICAgICBpZiB5ID09IHlfcmVzcDoKICAgICAgICAgICAgbWF0Y2ggKz0gMQogICAgICAgIAogICAgY29udGV4dC5sb2dfcmVzdWx0KCd0b3RhbF90ZXN0cycsIGNvdW50KQogICAgY29udGV4dC5sb2dfcmVzdWx0KCdlcnJvcnMnLCBlcnJfY291bnQpCiAgICBjb250ZXh0LmxvZ19yZXN1bHQoJ21hdGNoJywgbWF0Y2gpCiAgICBpZiBjb3VudCAtIGVycl9jb3VudCA+IDA6CiAgICAgICAgdGltZXNfYXJyID0gbnAuYXJyYXkodGltZXMpCiAgICAgICAgY29udGV4dC5sb2dfcmVzdWx0KCdhdmdfbGF0ZW5jeScsIGludChucC5tZWFuKHRpbWVzX2FycikpKQogICAgICAgIGNvbnRleHQubG9nX3Jlc3VsdCgnbWluX2xhdGVuY3knLCBpbnQobnAuYW1pbih0aW1lc19hcnIpKSkKICAgICAgICBjb250ZXh0LmxvZ19yZXN1bHQoJ21heF9sYXRlbmN5JywgaW50KG5wLmFtYXgodGltZXNfYXJyKSkpCiAgICAgICAgCiAgICAgICAgY2hhcnQgPSBDaGFydEFydGlmYWN0KCdsYXRlbmN5JywgaGVhZGVyPVsnVGVzdCcsICdMYXRlbmN5IChtaWNyb3NlYyknXSkKICAgICAgICBmb3IgaSBpbiByYW5nZShsZW4odGltZXMpKToKICAgICAgICAgICAgY2hhcnQuYWRkX3JvdyhbaSsxLCBpbnQodGltZXNbaV0pXSkKICAgICAgICBjb250ZXh0LmxvZ19hcnRpZmFjdChjaGFydCkKCiAgICBjb250ZXh0LmxvZ2dlci5pbmZvKGYncnVuIHtjb3VudH0gdGVzdHMsIHtlcnJfY291bnR9IGVycm9ycyBhbmQge21hdGNofSBtYXRjaCBleHBlY3RlZCB2YWx1ZScpCiAgICAKICAgIGlmIGVycl9jb3VudDoKICAgICAgICByYWlzZSBWYWx1ZUVycm9yKGYnZmFpbGVkIG9uIHtlcnJfY291bnR9IHRlc3RzIG9mIHtjb3VudH0nKQogICAgCiAgICBpZiBtYXRjaF9lcnIgYW5kIG1hdGNoICE9IGNvdW50OgogICAgICAgIHJhaXNlIFZhbHVlRXJyb3IoZidvbmx5IHttYXRjaH0gcmVzdWx0cyBtYXRjaCBvdXQgb2Yge2NvdW50fScpCgo= + functionSourceCode: IyBHZW5lcmF0ZWQgYnkgbnVjbGlvLmV4cG9ydC5OdWNsaW9FeHBvcnRlcgoKaW1wb3J0IG9zCmltcG9ydCBwYW5kYXMgYXMgcGQKaW1wb3J0IHJlcXVlc3RzCmltcG9ydCBqc29uCmltcG9ydCBudW1weSBhcyBucAppbXBvcnQgbWxydW4KCmZyb20gZGF0ZXRpbWUgaW1wb3J0IGRhdGV0aW1lCmZyb20gbWxydW4uZGF0YXN0b3JlIGltcG9ydCBEYXRhSXRlbQpmcm9tIG1scnVuLmFydGlmYWN0cyBpbXBvcnQgZ2V0X21vZGVsLCBDaGFydEFydGlmYWN0CgpkZWYgbW9kZWxfc2VydmVyX3Rlc3Rlcihjb250ZXh0LAogICAgICAgICAgICAgICAgICAgICAgICB0YWJsZTogRGF0YUl0ZW0sCiAgICAgICAgICAgICAgICAgICAgICAgIGFkZHI6IHN0ciwgCiAgICAgICAgICAgICAgICAgICAgICAgIGxhYmVsX2NvbHVtbjogc3RyID0gImxhYmVsIiwKICAgICAgICAgICAgICAgICAgICAgICAgbW9kZWw6IHN0ciA9ICcnLAogICAgICAgICAgICAgICAgICAgICAgICBtYXRjaF9lcnI6IGJvb2wgPSBGYWxzZSwKICAgICAgICAgICAgICAgICAgICAgICAgcm93czogaW50ID0gMjApOgogICAgIiIiIFRlc3QgYSBtb2RlbCBzZXJ2ZXIgCiAgICAKICAgIDpwYXJhbSB0YWJsZTogICAgICAgICBjc3YvcGFycXVldCB0YWJsZSB3aXRoIHRlc3QgZGF0YQogICAgOnBhcmFtIGFkZHI6ICAgICAgICAgIGZ1bmN0aW9uIGFkZHJlc3MvdXJsCiAgICA6cGFyYW0gbGFiZWxfY29sdW1uOiAgbmFtZSBvZiB0aGUgbGFiZWwgY29sdW1uIGluIHRhYmxlCiAgICA6cGFyYW0gbW9kZWw6ICAgICAgICAgdGVzdGVkIG1vZGVsIG5hbWUgCiAgICA6cGFyYW0gbWF0Y2hfZXJyOiAgICAgcmFpc2UgZXJyb3Igb24gdmFsaWRhdGlvbiAocmVxdWlyZSBwcm9wZXIgdGVzdCBzZXQpCiAgICA6cGFyYW0gcm93czogICAgICAgICAgbnVtYmVyIG9mIHJvd3MgdG8gdXNlIGZyb20gdGVzdCBzZXQKICAgICIiIgogICAgICAgIAogICAgdGFibGUgPSB0YWJsZS5hc19kZigpCgogICAgeV9saXN0ID0gdGFibGUucG9wKGxhYmVsX2NvbHVtbikudmFsdWVzLnRvbGlzdCgpCiAgICBjb250ZXh0LmxvZ2dlci5pbmZvKGYndGVzdGluZyB3aXRoIGRhdGFzZXQgYWdhaW5zdCB7YWRkcn0sIG1vZGVsOiB7bW9kZWx9JykKICAgIGlmIHJvd3MgYW5kIHJvd3MgPCB0YWJsZS5zaGFwZVswXToKICAgICAgICB0YWJsZSA9IHRhYmxlLnNhbXBsZShyb3dzKQogICAgCiAgICBjb3VudCA9IGVycl9jb3VudCA9IG1hdGNoID0gMAogICAgdGltZXMgPSBbXQogICAgZm9yIHgsIHkgaW4gemlwKHRhYmxlLnZhbHVlcywgeV9saXN0KToKICAgICAgICBjb3VudCArPSAxCiAgICAgICAgZXZlbnRfZGF0YSA9IGpzb24uZHVtcHMoeyJpbnN0YW5jZXMiOlt4LnRvbGlzdCgpXX0pCiAgICAgICAgaGFkX2VyciA9IEZhbHNlCiAgICAgICAgdHJ5OgogICAgICAgICAgICBzdGFydCA9IGRhdGV0aW1lLm5vdygpCiAgICAgICAgICAgIHJlc3AgPSByZXF1ZXN0cy5wdXQoZid7YWRkcn0ve21vZGVsfS9wcmVkaWN0JywganNvbj1ldmVudF9kYXRhKQogICAgICAgICAgICBpZiBub3QgcmVzcC5vazoKICAgICAgICAgICAgICAgIGNvbnRleHQubG9nZ2VyLmVycm9yKGYnYmFkIGZ1bmN0aW9uIHJlc3AhIVxue3Jlc3AudGV4dH0nKQogICAgICAgICAgICAgICAgZXJyX2NvdW50ICs9IDEKICAgICAgICAgICAgICAgIGNvbnRpbnVlCiAgICAgICAgICAgIHRpbWVzLmFwcGVuZCgoZGF0ZXRpbWUubm93KCktc3RhcnQpLm1pY3Jvc2Vjb25kcykKICAgICAgICAgICAgICAgIAogICAgICAgIGV4Y2VwdCBPU0Vycm9yIGFzIGVycjoKICAgICAgICAgICAgY29udGV4dC5sb2dnZXIuZXJyb3IoZidlcnJvciBpbiByZXF1ZXN0LCBkYXRhOntldmVudF9kYXRhfSwgZXJyb3I6IHtlcnJ9JykKICAgICAgICAgICAgZXJyX2NvdW50ICs9IDEKICAgICAgICAgICAgY29udGludWUKICAgICAgICAgICAgCiAgICAgICAgeV9yZXNwID0gcmVzcC5qc29uKClbMF0KICAgICAgICBpZiB5ID09IHlfcmVzcDoKICAgICAgICAgICAgbWF0Y2ggKz0gMQogICAgICAgIAogICAgY29udGV4dC5sb2dfcmVzdWx0KCd0b3RhbF90ZXN0cycsIGNvdW50KQogICAgY29udGV4dC5sb2dfcmVzdWx0KCdlcnJvcnMnLCBlcnJfY291bnQpCiAgICBjb250ZXh0LmxvZ19yZXN1bHQoJ21hdGNoJywgbWF0Y2gpCiAgICBpZiBjb3VudCAtIGVycl9jb3VudCA+IDA6CiAgICAgICAgdGltZXNfYXJyID0gbnAuYXJyYXkodGltZXMpCiAgICAgICAgY29udGV4dC5sb2dfcmVzdWx0KCdhdmdfbGF0ZW5jeScsIGludChucC5tZWFuKHRpbWVzX2FycikpKQogICAgICAgIGNvbnRleHQubG9nX3Jlc3VsdCgnbWluX2xhdGVuY3knLCBpbnQobnAuYW1pbih0aW1lc19hcnIpKSkKICAgICAgICBjb250ZXh0LmxvZ19yZXN1bHQoJ21heF9sYXRlbmN5JywgaW50KG5wLmFtYXgodGltZXNfYXJyKSkpCiAgICAgICAgCiAgICAgICAgY2hhcnQgPSBDaGFydEFydGlmYWN0KCdsYXRlbmN5JywgaGVhZGVyPVsnVGVzdCcsICdMYXRlbmN5IChtaWNyb3NlYyknXSkKICAgICAgICBmb3IgaSBpbiByYW5nZShsZW4odGltZXMpKToKICAgICAgICAgICAgY2hhcnQuYWRkX3JvdyhbaSsxLCBpbnQodGltZXNbaV0pXSkKICAgICAgICBjb250ZXh0LmxvZ19hcnRpZmFjdChjaGFydCkKCiAgICBjb250ZXh0LmxvZ2dlci5pbmZvKGYncnVuIHtjb3VudH0gdGVzdHMsIHtlcnJfY291bnR9IGVycm9ycyBhbmQge21hdGNofSBtYXRjaCBleHBlY3RlZCB2YWx1ZScpCiAgICAKICAgIGlmIGVycl9jb3VudDoKICAgICAgICByYWlzZSBWYWx1ZUVycm9yKGYnZmFpbGVkIG9uIHtlcnJfY291bnR9IHRlc3RzIG9mIHtjb3VudH0nKQogICAgCiAgICBpZiBtYXRjaF9lcnIgYW5kIG1hdGNoICE9IGNvdW50OgogICAgICAgIHJhaXNlIFZhbHVlRXJyb3IoZidvbmx5IHttYXRjaH0gcmVzdWx0cyBtYXRjaCBvdXQgb2Yge2NvdW50fScpCgo= commands: [] - code_origin: https://github.com/mlrun/functions.git#544df038d917cad745e946cb64378582151527ee:model_server_tester.ipynb + code_origin: https://github.com/Idan707/functions.git#7175ca7249cf11e0e163b21cffacd692032ba0a5:model_server_tester.ipynb diff --git a/model_server_tester/model_server_tester.ipynb b/model_server_tester/model_server_tester.ipynb index a34ef3193..a2d284104 100644 --- a/model_server_tester/model_server_tester.ipynb +++ b/model_server_tester/model_server_tester.ipynb @@ -11,7 +11,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -21,7 +21,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -47,6 +47,7 @@ "import requests\n", "import json\n", "import numpy as np\n", + "\n", "from datetime import datetime\n", "from mlrun.datastore import DataItem\n", "from mlrun.artifacts import get_model, ChartArtifact\n", @@ -132,49 +133,105 @@ "# marks the end of a code section" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Deploy model server for testing" + ] + }, { "cell_type": "code", "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> 2020-10-28 16:43:54,679 [warning] warning!, server (0.5.3-rc1) and client (0.5.2) ver dont match\n", + "> 2020-10-28 16:43:55,002 [info] deploy started\n", + "[nuclio] 2020-10-28 16:45:17,274 (info) Build complete\n", + "[nuclio] 2020-10-28 16:45:22,363 done updating default-model-server, function address: default-tenant.app.dsteam.iguazio-cd1.com:30150\n", + "> 2020-10-28 16:45:22,369 [warning] warning!, server (0.5.3-rc1) and client (0.5.2) ver dont match\n" + ] + } + ], "source": [ - "from os import path\n", - "from mlrun import run_local, NewTask, mlconf, import_function, mount_v3io\n", - "mlconf.dbpath = mlconf.dbpath or 'http://mlrun-api:8080'\n", + "import mlrun\n", + "project_name = 'sk-project'\n", + "MODEL_PATH = 'https://s3.wasabisys.com/iguazio/models/iris/model.pkl'\n", "\n", - "# specify artifacts target location\n", - "artifact_path = mlconf.artifact_path or path.abspath('./')\n", - "project_name = 'sk-project'" + "artifact_path = mlrun.set_environment(api_path = 'http://mlrun-api:8080',\n", + " artifact_path = os.path.abspath('./'))\n", + "\n", + "# import model server function from hub\n", + "fn = mlrun.import_function('hub://model_server')\n", + "fn.add_model(\"mymodel\", MODEL_PATH)\n", + "address = fn.deploy()" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[mlrun] 2020-04-06 23:26:00,746 starting run model_server_tester uid=b293663098374087b0bdd4d1e24bca86 -> http://10.196.88.27:80\n", - "[mlrun] 2020-04-06 23:26:00,915 testing with dataset against http://13.58.191.176:30115, model: iris_dataset_v1\n", - "[mlrun] 2020-04-06 23:26:01,137 run 10 tests, 0 errors and 1 match expected value\n", - "\n" + "> 2020-10-28 16:45:22,418 [info] deploy started\n", + "[nuclio] 2020-10-28 16:45:25,272 (info) Build complete\n", + "[nuclio] 2020-10-28 16:45:34,868 done updating default-model-server, function address: default-tenant.app.dsteam.iguazio-cd1.com:30150\n" + ] + } + ], + "source": [ + "user_name = os.getenv('V3IO_USERNAME')\n", + "\n", + "fn.apply(mlrun.mount_v3io())\n", + "fn.set_envs({'SERVING_MODEL_iris_dataset_v1': MODEL_PATH,\n", + " 'INFERENCE_STREAM': 'users/{}/tststream'.format(user_name)})\n", + "\n", + "address = fn.deploy()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Run model server tester locally" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> 2020-10-28 16:45:34,916 [warning] warning!, server (0.5.3-rc1) and client (0.5.2) ver dont match\n", + "> 2020-10-28 16:45:34,916 [info] starting run model_server_tester uid=c84fdd4dfacd447dbe417d709f5983f0 -> http://mlrun-api:8080\n", + "> 2020-10-28 16:45:34,972 [warning] warning!, server (0.5.3-rc1) and client (0.5.2) ver dont match\n", + "> 2020-10-28 16:45:35,264 [info] testing with dataset against http://default-tenant.app.dsteam.iguazio-cd1.com:30150, model: mymodel\n", + "> 2020-10-28 16:45:35,967 [info] run 20 tests, 0 errors and 6 match expected value\n" ] }, { "data": { "text/html": [ - "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
projectuiditerstartstatenamelabelsinputsparametersresultsartifacts
sk-project0Oct 28 16:45:43runningmodel_server_tester
v3io_user=admin
kind=job
owner=admin
table
addr=http://default-tenant.app.dsteam.iguazio-cd1.com:30150
model=mymodel
\n", + "
\n", + "
\n", + "
\n", + " Title\n", + " ×\n", + "
\n", + " \n", + "
\n", + "
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "to track results use .show() or .logs() or in CLI: \n", + "!mlrun get run 73cb7da1bfeb4b50afb29bce40a9c861 --project sk-project , !mlrun logs 73cb7da1bfeb4b50afb29bce40a9c861 --project sk-project\n", + "> 2020-10-28 16:45:43,790 [info] run executed, status=running\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_func.run(mlrun.NewTask(name='model_server_tester', \n", + " handler=model_server_tester, \n", + " params={'addr': address, 'model': 'mymodel'},\n", + " inputs={'table': DATA_PATH},\n", + " project=project_name, \n", + " artifact_path=os.path.join(artifact_path, 'data')))" + ] + }, { "cell_type": "code", "execution_count": null, @@ -426,9 +744,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python [conda env:root] *", "language": "python", - "name": "python3" + "name": "conda-root-py" }, "language_info": { "codemirror_mode": { @@ -440,7 +758,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.8" + "version": "3.7.6" } }, "nbformat": 4, diff --git a/sklearn_classifier/README.md b/sklearn_classifier/README.md index 6aa7c6d4b..4fdee7ee2 100644 --- a/sklearn_classifier/README.md +++ b/sklearn_classifier/README.md @@ -1,5 +1,24 @@ -# training functions +# **Training Functions** -1. **`sklearn-classify`**
-train any sklearn classifier model - \ No newline at end of file +## `sklearn-classifer` + +Run any scikit-learn compatible classifier or list of classifiers + +### steps + +1. **generate a scikit-learn model configuration** using the `model_pkg_class` parameter + * input a package and class name, for example, `sklearn.linear_model.LogisticRegression` + * mlrun will find the class and instantiate a copy using default parameters + * You can modify both the model class instantiator and the fit methods (other functions could be similarly modified) +2. **get a sample of data** from a data source + * select all rows using -1 + * select a random sample of rows using a negative integer + * select consecutive rows using a positive integer +3. **split the data** into train, validation, and test sets + * the test set is saved as an artifact and never seen again until testing + * WIP: this will be parametrized to produce cross-validator splits (one way of performing CV) +4. **train the model** +5. **pickle / serialize the model** + * models can be pickled or saved as json +6. **evaluate the model** + * a custom evaluator can be provided, see function doc for details diff --git a/sklearn_classifier/function.yaml b/sklearn_classifier/function.yaml index 15dba88b3..6f521afab 100644 --- a/sklearn_classifier/function.yaml +++ b/sklearn_classifier/function.yaml @@ -2,8 +2,8 @@ kind: job metadata: name: sklearn-classifier tag: '' - hash: aa99bb25dd46d0b0a183541030e7850fa2b71873 - project: '' + hash: 9ad5f21bff75a52254d34af4b68bcc1afd8a8bc3 + project: default labels: author: yjb framework: sklearn @@ -28,13 +28,16 @@ spec: - name: context type: MLClientCtx doc: the function context + default: '' - name: model_pkg_class type: str doc: the model to train, e.g, "sklearn.neural_networks.MLPClassifier", or json model config + default: '' - name: dataset type: DataItem doc: ("data") name of raw data file + default: '' - name: label_column type: str doc: ground-truth (y) labels @@ -42,11 +45,12 @@ spec: - name: encode_cols type: List[str] doc: dictionary of names and prefixes for columns that are to hot be encoded. + default: [] - name: sample type: int doc: Selects the first n rows, or select a sample starting from the first. If negative <-1, select a random sample - default: <_ast.USub object at 0x7f768a309e10> + default: <_ast.USub object at 0x7f62bdf73410> - name: test_size type: float doc: (0.05) test set size @@ -55,16 +59,18 @@ spec: type: float doc: (0.75) Once the test set has been removed the training set gets this proportion. - default: 0.75 + default: 0.7 - name: test_set_key type: str doc: key of held out data in artifact store default: test_set - name: model_evaluator doc: (None) a custom model evaluator can be specified + default: null - name: models_dest type: str doc: ("") models subfolder on artifact path + default: '' - name: plots_dest type: str doc: plot subfolder on artifact path @@ -75,14 +81,16 @@ spec: default: parquet - name: model_pkg_file type: str + default: '' - name: random_state type: int doc: (1) sklearn rng seed default: 1 - outputs: [] - lineno: 28 + outputs: + - default: '' + lineno: 29 description: train any classifier using scikit-learn's API build: - functionSourceCode: IyBHZW5lcmF0ZWQgYnkgbnVjbGlvLmV4cG9ydC5OdWNsaW9FeHBvcnRlcgoKaW1wb3J0IHdhcm5pbmdzCndhcm5pbmdzLnNpbXBsZWZpbHRlcihhY3Rpb249Imlnbm9yZSIsIGNhdGVnb3J5PUZ1dHVyZVdhcm5pbmcpCgppbXBvcnQganNvbgppbXBvcnQgb3MKCmZyb20gY2xvdWRwaWNrbGUgaW1wb3J0IGR1bXBzLCBsb2FkLCBkdW1wCgpmcm9tIHNrbGVhcm4gaW1wb3J0IG1ldHJpY3MKaW1wb3J0IHBhbmRhcyBhcyBwZAppbXBvcnQgbnVtcHkgYXMgbnAKaW1wb3J0IG1hdHBsb3RsaWIucHlwbG90IGFzIHBsdAoKZnJvbSBza2xlYXJuLnByZXByb2Nlc3NpbmcgaW1wb3J0IGxhYmVsX2JpbmFyaXplCmZyb20gc2tsZWFybi5tb2RlbF9zZWxlY3Rpb24gaW1wb3J0IHRyYWluX3Rlc3Rfc3BsaXQKZnJvbSBza2xlYXJuIGltcG9ydCBtZXRyaWNzCgpmcm9tIHR5cGluZyBpbXBvcnQgTGlzdApmcm9tIG1scnVuLmV4ZWN1dGlvbiBpbXBvcnQgTUxDbGllbnRDdHgKZnJvbSBtbHJ1bi5kYXRhc3RvcmUgaW1wb3J0IERhdGFJdGVtCmZyb20gbWxydW4uYXJ0aWZhY3RzIGltcG9ydCBQbG90QXJ0aWZhY3QKCmZyb20gbWxydW4ubWx1dGlscyBpbXBvcnQgKGdldF9zYW1wbGUsIGdldF9zcGxpdHMsCiAgICAgICAgICAgICAgICAgICAgIGdlbl9za2xlYXJuX21vZGVsLCBjcmVhdGVfY2xhc3MsIGV2YWxfbW9kZWxfdjIpCgpkZWYgdHJhaW5fbW9kZWwoCiAgICBjb250ZXh0OiBNTENsaWVudEN0eCwKICAgIG1vZGVsX3BrZ19jbGFzczogc3RyLAogICAgZGF0YXNldDogRGF0YUl0ZW0sCiAgICBsYWJlbF9jb2x1bW46IHN0ciA9ICJsYWJlbHMiLAogICAgZW5jb2RlX2NvbHM6IExpc3Rbc3RyXSA9IFtdLAogICAgc2FtcGxlOiBpbnQgPSAtMSwKICAgIHRlc3Rfc2l6ZTogZmxvYXQgPSAwLjMwLAogICAgdHJhaW5fdmFsX3NwbGl0OiBmbG9hdCA9IDAuNzUsCiAgICB0ZXN0X3NldF9rZXk6IHN0ciA9ICJ0ZXN0X3NldCIsCiAgICBtb2RlbF9ldmFsdWF0b3IgPSBOb25lLAogICAgbW9kZWxzX2Rlc3Q6IHN0ciA9ICIiLAogICAgcGxvdHNfZGVzdDogc3RyID0gInBsb3RzIiwKICAgIGZpbGVfZXh0OiBzdHIgPSAicGFycXVldCIsCiAgICBtb2RlbF9wa2dfZmlsZTogc3RyID0gIiIsCiAgICByYW5kb21fc3RhdGU6IGludCA9IDEsCikgLT4gTm9uZToKICAgICIiInRyYWluIGEgY2xhc3NpZmllcgogICAgCiAgICBBbiBvcHRpb25hbCBjdXRvbSBtb2RlbCBldmFsdWF0b3IgY2FuIGJlIHN1cHBsaWVkIHRoYXQgc2hvdWxkIGhhdmUgdGhlIHNpZ25hdHVyZToKICAgIGBteV9jdXN0b21fZXZhbHVhdG9yKGNvbnRleHQsIHh2YWxpZCwgeXZhbGlkLCBtb2RlbClgIGFuZCByZXR1cm4gYSBkaWN0aW9uYXJ5IG9mIAogICAgc2NhbGFyICJyZXN1bHRzIiwgYSAicGxvdHMiIGtleXMgd2l0aCBhIGxpc3Qgb2YgUGxvdEFydGlmYWN0cywgYW5kIAogICAgYW5kICJ0YWJsZXMiIGtleSBjb250YWluaW5nIGEgcmV0dXJuZWQgbGlzdCBvZiBUYWJsZUFydGlmYWN0cy4KICAgIAogICAgOnBhcmFtIGNvbnRleHQ6ICAgICAgICAgICB0aGUgZnVuY3Rpb24gY29udGV4dAogICAgOnBhcmFtIG1vZGVsX3BrZ19jbGFzczogICB0aGUgbW9kZWwgdG8gdHJhaW4sIGUuZywgInNrbGVhcm4ubmV1cmFsX25ldHdvcmtzLk1MUENsYXNzaWZpZXIiLCAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgb3IganNvbiBtb2RlbCBjb25maWcKICAgIDpwYXJhbSBkYXRhc2V0OiAgICAgICAgICAgKCJkYXRhIikgbmFtZSBvZiByYXcgZGF0YSBmaWxlCiAgICA6cGFyYW0gbGFiZWxfY29sdW1uOiAgICAgIGdyb3VuZC10cnV0aCAoeSkgbGFiZWxzCiAgICA6cGFyYW0gZW5jb2RlX2NvbHM6ICAgICAgIGRpY3Rpb25hcnkgb2YgbmFtZXMgYW5kIHByZWZpeGVzIGZvciBjb2x1bW5zIHRoYXQgYXJlCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHRvIGhvdCBiZSBlbmNvZGVkLgogICAgOnBhcmFtIHNhbXBsZTogICAgICAgICAgICBTZWxlY3RzIHRoZSBmaXJzdCBuIHJvd3MsIG9yIHNlbGVjdCBhIHNhbXBsZQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICBzdGFydGluZyBmcm9tIHRoZSBmaXJzdC4gSWYgbmVnYXRpdmUgPC0xLCBzZWxlY3QKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgYSByYW5kb20gc2FtcGxlCiAgICA6cGFyYW0gdGVzdF9zaXplOiAgICAgICAgICgwLjA1KSB0ZXN0IHNldCBzaXplCiAgICA6cGFyYW0gdHJhaW5fdmFsX3NwbGl0OiAgICgwLjc1KSBPbmNlIHRoZSB0ZXN0IHNldCBoYXMgYmVlbiByZW1vdmVkIHRoZQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICB0cmFpbmluZyBzZXQgZ2V0cyB0aGlzIHByb3BvcnRpb24uCiAgICA6cGFyYW0gdGVzdF9zZXRfa2V5OiAgICAgIGtleSBvZiBoZWxkIG91dCBkYXRhIGluIGFydGlmYWN0IHN0b3JlCiAgICA6cGFyYW0gbW9kZWxfZXZhbHVhdG9yOiAgIChOb25lKSBhIGN1c3RvbSBtb2RlbCBldmFsdWF0b3IgY2FuIGJlIHNwZWNpZmllZAogICAgOnBhcmFtIG1vZGVsc19kZXN0OiAgICAgICAoIiIpIG1vZGVscyBzdWJmb2xkZXIgb24gYXJ0aWZhY3QgcGF0aAogICAgOnBhcmFtIHBsb3RzX2Rlc3Q6ICAgICAgICBwbG90IHN1YmZvbGRlciBvbiBhcnRpZmFjdCBwYXRoCiAgICA6cGFyYW0gZmlsZV9leHQ6ICAgICAgICAgICgicGFycXVldCIpIGZvcm1hdCBmb3IgdGVzdF9zZXRfa2V5IGhvbGQgb3V0IGRhdGEKICAgIDpwYXJhbSByYW5kb21fc3RhdGU6ICAgICAgKDEpIHNrbGVhcm4gcm5nIHNlZWQKCiAgICAiIiIKICAgIG1vZGVsc19kZXN0ID0gbW9kZWxzX2Rlc3Qgb3IgIm1vZGVsIgogICAgCiAgICByYXcsIGxhYmVscywgaGVhZGVyID0gZ2V0X3NhbXBsZShkYXRhc2V0LCBzYW1wbGUsIGxhYmVsX2NvbHVtbikKICAgIAogICAgaWYgZW5jb2RlX2NvbHM6CiAgICAgICAgcmF3ID0gcGQuZ2V0X2R1bW1pZXMocmF3LCAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICBjb2x1bW5zPWxpc3QoZW5jb2RlX2NvbHMua2V5cygpKSwgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgcHJlZml4PWxpc3QoZW5jb2RlX2NvbHMudmFsdWVzKCkpLCAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICBkcm9wX2ZpcnN0PVRydWUpCiAgICAKICAgICh4dHJhaW4sIHl0cmFpbiksICh4dmFsaWQsIHl2YWxpZCksICh4dGVzdCwgeXRlc3QpID0gICAgICAgICBnZXRfc3BsaXRzKHJhdywgbGFiZWxzLCAzLCB0ZXN0X3NpemUsIDEtdHJhaW5fdmFsX3NwbGl0LCByYW5kb21fc3RhdGUpCiAgICAKICAgIGNvbnRleHQubG9nX2RhdGFzZXQodGVzdF9zZXRfa2V5LCAKICAgICAgICAgICAgICAgICAgICAgICAgZGY9cGQuY29uY2F0KFt4dGVzdCwgeXRlc3QudG9fZnJhbWUoKV0sIGF4aXM9MSksCiAgICAgICAgICAgICAgICAgICAgICAgIGZvcm1hdD1maWxlX2V4dCwgaW5kZXg9RmFsc2UsIAogICAgICAgICAgICAgICAgICAgICAgICBsYWJlbHM9eyJkYXRhLXR5cGUiOiAiaGVsZC1vdXQifSwKICAgICAgICAgICAgICAgICAgICAgICAgYXJ0aWZhY3RfcGF0aD1jb250ZXh0LmFydGlmYWN0X3N1YnBhdGgoJ2RhdGEnKSkKCiAgICBtb2RlbF9jb25maWcgPSBnZW5fc2tsZWFybl9tb2RlbChtb2RlbF9wa2dfY2xhc3MsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBjb250ZXh0LnBhcmFtZXRlcnMuaXRlbXMoKSkKCiAgICBtb2RlbF9jb25maWdbIkZJVCJdLnVwZGF0ZSh7IlgiOiB4dHJhaW4sCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgInkiOiB5dHJhaW4udmFsdWVzfSkKICAgIAogICAgQ2xhc3NpZmllckNsYXNzID0gY3JlYXRlX2NsYXNzKG1vZGVsX2NvbmZpZ1siTUVUQSJdWyJjbGFzcyJdKQogICAgCiAgICBtb2RlbCA9IENsYXNzaWZpZXJDbGFzcygqKm1vZGVsX2NvbmZpZ1siQ0xBU1MiXSkKICAgIAogICAgbW9kZWwuZml0KCoqbW9kZWxfY29uZmlnWyJGSVQiXSkKICAgIAogICAgYXJ0aWZhY3RfcGF0aCA9IGNvbnRleHQuYXJ0aWZhY3Rfc3VicGF0aChtb2RlbHNfZGVzdCkKICAgIHBsb3RzX3BhdGggPSBjb250ZXh0LmFydGlmYWN0X3N1YnBhdGgobW9kZWxzX2Rlc3QsIHBsb3RzX2Rlc3QpCiAgICBpZiBtb2RlbF9ldmFsdWF0b3I6CiAgICAgICAgZXZhbF9tZXRyaWNzID0gbW9kZWxfZXZhbHVhdG9yKGNvbnRleHQsIHh2YWxpZCwgeXZhbGlkLCBtb2RlbCwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgcGxvdHNfYXJ0aWZhY3RfcGF0aD1wbG90c19wYXRoKQogICAgZWxzZToKICAgICAgICBldmFsX21ldHJpY3MgPSBldmFsX21vZGVsX3YyKGNvbnRleHQsIHh2YWxpZCwgeXZhbGlkLCBtb2RlbCwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHBsb3RzX2FydGlmYWN0X3BhdGg9cGxvdHNfcGF0aCkKICAgICAgICAKICAgIGNvbnRleHQuc2V0X2xhYmVsKCdjbGFzcycsIG1vZGVsX3BrZ19jbGFzcykKICAgIGNvbnRleHQubG9nX21vZGVsKCJtb2RlbCIsIGJvZHk9ZHVtcHMobW9kZWwpLAogICAgICAgICAgICAgICAgICAgICAgYXJ0aWZhY3RfcGF0aD1hcnRpZmFjdF9wYXRoLAogICAgICAgICAgICAgICAgICAgICAgZXh0cmFfZGF0YT1ldmFsX21ldHJpY3MsIAogICAgICAgICAgICAgICAgICAgICAgbW9kZWxfZmlsZT0ibW9kZWwucGtsIiwKICAgICAgICAgICAgICAgICAgICAgIG1ldHJpY3M9Y29udGV4dC5yZXN1bHRzLAogICAgICAgICAgICAgICAgICAgICAgbGFiZWxzPXsiY2xhc3MiOiBtb2RlbF9wa2dfY2xhc3N9KQoK + functionSourceCode: IyBHZW5lcmF0ZWQgYnkgbnVjbGlvLmV4cG9ydC5OdWNsaW9FeHBvcnRlcgoKaW1wb3J0IHdhcm5pbmdzCndhcm5pbmdzLnNpbXBsZWZpbHRlcihhY3Rpb249Imlnbm9yZSIsIGNhdGVnb3J5PUZ1dHVyZVdhcm5pbmcpCgppbXBvcnQganNvbgppbXBvcnQgb3MKCmZyb20gY2xvdWRwaWNrbGUgaW1wb3J0IGR1bXBzLCBsb2FkLCBkdW1wCgpmcm9tIHNrbGVhcm4gaW1wb3J0IG1ldHJpY3MKaW1wb3J0IHBhbmRhcyBhcyBwZAppbXBvcnQgbnVtcHkgYXMgbnAKaW1wb3J0IG1hdHBsb3RsaWIucHlwbG90IGFzIHBsdAoKZnJvbSBza2xlYXJuLnByZXByb2Nlc3NpbmcgaW1wb3J0IGxhYmVsX2JpbmFyaXplCmZyb20gc2tsZWFybi5tb2RlbF9zZWxlY3Rpb24gaW1wb3J0IHRyYWluX3Rlc3Rfc3BsaXQKZnJvbSBza2xlYXJuIGltcG9ydCBtZXRyaWNzCgpmcm9tIHR5cGluZyBpbXBvcnQgTGlzdApmcm9tIG1scnVuLmV4ZWN1dGlvbiBpbXBvcnQgTUxDbGllbnRDdHgKZnJvbSBtbHJ1bi5kYXRhc3RvcmUgaW1wb3J0IERhdGFJdGVtCmZyb20gbWxydW4uYXJ0aWZhY3RzIGltcG9ydCBQbG90QXJ0aWZhY3QKZnJvbSBtbHJ1bi5tbHV0aWxzIGltcG9ydCAoZ2V0X3NhbXBsZSwgZ2V0X3NwbGl0cywKICAgICAgICAgICAgICAgICAgICAgZ2VuX3NrbGVhcm5fbW9kZWwsIGNyZWF0ZV9jbGFzcywgZXZhbF9tb2RlbF92MikKCmltcG9ydCBtbHJ1bgoKZGVmIHRyYWluX21vZGVsKAogICAgY29udGV4dDogTUxDbGllbnRDdHgsCiAgICBtb2RlbF9wa2dfY2xhc3M6IHN0ciwKICAgIGRhdGFzZXQ6IERhdGFJdGVtLAogICAgbGFiZWxfY29sdW1uOiBzdHIgPSAibGFiZWxzIiwKICAgIGVuY29kZV9jb2xzOiBMaXN0W3N0cl0gPSBbXSwKICAgIHNhbXBsZTogaW50ID0gLTEsCiAgICB0ZXN0X3NpemU6IGZsb2F0ID0gMC4zMCwKICAgIHRyYWluX3ZhbF9zcGxpdDogZmxvYXQgPSAwLjcwLAogICAgdGVzdF9zZXRfa2V5OiBzdHIgPSAidGVzdF9zZXQiLAogICAgbW9kZWxfZXZhbHVhdG9yID0gTm9uZSwKICAgIG1vZGVsc19kZXN0OiBzdHIgPSAiIiwKICAgIHBsb3RzX2Rlc3Q6IHN0ciA9ICJwbG90cyIsCiAgICBmaWxlX2V4dDogc3RyID0gInBhcnF1ZXQiLAogICAgbW9kZWxfcGtnX2ZpbGU6IHN0ciA9ICIiLAogICAgcmFuZG9tX3N0YXRlOiBpbnQgPSAxLAopIC0+IE5vbmU6CiAgICAiIiJ0cmFpbiBhIGNsYXNzaWZpZXIKICAgIAogICAgQW4gb3B0aW9uYWwgY3V0b20gbW9kZWwgZXZhbHVhdG9yIGNhbiBiZSBzdXBwbGllZCB0aGF0IHNob3VsZCBoYXZlIHRoZSBzaWduYXR1cmU6CiAgICBgbXlfY3VzdG9tX2V2YWx1YXRvcihjb250ZXh0LCB4dmFsaWQsIHl2YWxpZCwgbW9kZWwpYCBhbmQgcmV0dXJuIGEgZGljdGlvbmFyeSBvZiAKICAgIHNjYWxhciAicmVzdWx0cyIsIGEgInBsb3RzIiBrZXlzIHdpdGggYSBsaXN0IG9mIFBsb3RBcnRpZmFjdHMsIGFuZCAKICAgIGFuZCAidGFibGVzIiBrZXkgY29udGFpbmluZyBhIHJldHVybmVkIGxpc3Qgb2YgVGFibGVBcnRpZmFjdHMuCiAgICAKICAgIDpwYXJhbSBjb250ZXh0OiAgICAgICAgICAgdGhlIGZ1bmN0aW9uIGNvbnRleHQKICAgIDpwYXJhbSBtb2RlbF9wa2dfY2xhc3M6ICAgdGhlIG1vZGVsIHRvIHRyYWluLCBlLmcsICJza2xlYXJuLm5ldXJhbF9uZXR3b3Jrcy5NTFBDbGFzc2lmaWVyIiwgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIG9yIGpzb24gbW9kZWwgY29uZmlnCiAgICA6cGFyYW0gZGF0YXNldDogICAgICAgICAgICgiZGF0YSIpIG5hbWUgb2YgcmF3IGRhdGEgZmlsZQogICAgOnBhcmFtIGxhYmVsX2NvbHVtbjogICAgICBncm91bmQtdHJ1dGggKHkpIGxhYmVscwogICAgOnBhcmFtIGVuY29kZV9jb2xzOiAgICAgICBkaWN0aW9uYXJ5IG9mIG5hbWVzIGFuZCBwcmVmaXhlcyBmb3IgY29sdW1ucyB0aGF0IGFyZQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICB0byBob3QgYmUgZW5jb2RlZC4KICAgIDpwYXJhbSBzYW1wbGU6ICAgICAgICAgICAgU2VsZWN0cyB0aGUgZmlyc3QgbiByb3dzLCBvciBzZWxlY3QgYSBzYW1wbGUKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgc3RhcnRpbmcgZnJvbSB0aGUgZmlyc3QuIElmIG5lZ2F0aXZlIDwtMSwgc2VsZWN0CiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGEgcmFuZG9tIHNhbXBsZQogICAgOnBhcmFtIHRlc3Rfc2l6ZTogICAgICAgICAoMC4wNSkgdGVzdCBzZXQgc2l6ZQogICAgOnBhcmFtIHRyYWluX3ZhbF9zcGxpdDogICAoMC43NSkgT25jZSB0aGUgdGVzdCBzZXQgaGFzIGJlZW4gcmVtb3ZlZCB0aGUKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgdHJhaW5pbmcgc2V0IGdldHMgdGhpcyBwcm9wb3J0aW9uLgogICAgOnBhcmFtIHRlc3Rfc2V0X2tleTogICAgICBrZXkgb2YgaGVsZCBvdXQgZGF0YSBpbiBhcnRpZmFjdCBzdG9yZQogICAgOnBhcmFtIG1vZGVsX2V2YWx1YXRvcjogICAoTm9uZSkgYSBjdXN0b20gbW9kZWwgZXZhbHVhdG9yIGNhbiBiZSBzcGVjaWZpZWQKICAgIDpwYXJhbSBtb2RlbHNfZGVzdDogICAgICAgKCIiKSBtb2RlbHMgc3ViZm9sZGVyIG9uIGFydGlmYWN0IHBhdGgKICAgIDpwYXJhbSBwbG90c19kZXN0OiAgICAgICAgcGxvdCBzdWJmb2xkZXIgb24gYXJ0aWZhY3QgcGF0aAogICAgOnBhcmFtIGZpbGVfZXh0OiAgICAgICAgICAoInBhcnF1ZXQiKSBmb3JtYXQgZm9yIHRlc3Rfc2V0X2tleSBob2xkIG91dCBkYXRhCiAgICA6cGFyYW0gcmFuZG9tX3N0YXRlOiAgICAgICgxKSBza2xlYXJuIHJuZyBzZWVkCgogICAgIiIiCiAgICBtb2RlbHNfZGVzdCA9IG1vZGVsc19kZXN0IG9yICJtb2RlbCIKICAgIAogICAgcmF3LCBsYWJlbHMsIGhlYWRlciA9IGdldF9zYW1wbGUoZGF0YXNldCwgc2FtcGxlLCBsYWJlbF9jb2x1bW4pCiAgICAKICAgIGlmIGVuY29kZV9jb2xzOgogICAgICAgIHJhdyA9IHBkLmdldF9kdW1taWVzKHJhdywgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgY29sdW1ucz1saXN0KGVuY29kZV9jb2xzLmtleXMoKSksIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgIHByZWZpeD1saXN0KGVuY29kZV9jb2xzLnZhbHVlcygpKSwgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgZHJvcF9maXJzdD1UcnVlKQogICAgCiAgICAoeHRyYWluLCB5dHJhaW4pLCAoeHZhbGlkLCB5dmFsaWQpLCAoeHRlc3QsIHl0ZXN0KSA9ICAgICAgICAgZ2V0X3NwbGl0cyhyYXcsIGxhYmVscywgMywgdGVzdF9zaXplLCAxLXRyYWluX3ZhbF9zcGxpdCwgcmFuZG9tX3N0YXRlKQogICAgCiAgICBjb250ZXh0LmxvZ19kYXRhc2V0KHRlc3Rfc2V0X2tleSwgCiAgICAgICAgICAgICAgICAgICAgICAgIGRmPXBkLmNvbmNhdChbeHRlc3QsIHl0ZXN0LnRvX2ZyYW1lKCldLCBheGlzPTEpLAogICAgICAgICAgICAgICAgICAgICAgICBmb3JtYXQ9ZmlsZV9leHQsIGluZGV4PUZhbHNlLCAKICAgICAgICAgICAgICAgICAgICAgICAgbGFiZWxzPXsiZGF0YS10eXBlIjogImhlbGQtb3V0In0sCiAgICAgICAgICAgICAgICAgICAgICAgIGFydGlmYWN0X3BhdGg9Y29udGV4dC5hcnRpZmFjdF9zdWJwYXRoKCdkYXRhJykpCgogICAgbW9kZWxfY29uZmlnID0gZ2VuX3NrbGVhcm5fbW9kZWwobW9kZWxfcGtnX2NsYXNzLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgY29udGV4dC5wYXJhbWV0ZXJzLml0ZW1zKCkpCgogICAgbW9kZWxfY29uZmlnWyJGSVQiXS51cGRhdGUoeyJYIjogeHRyYWluLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICJ5IjogeXRyYWluLnZhbHVlc30pCiAgICAKICAgIENsYXNzaWZpZXJDbGFzcyA9IGNyZWF0ZV9jbGFzcyhtb2RlbF9jb25maWdbIk1FVEEiXVsiY2xhc3MiXSkKICAgIAogICAgbW9kZWwgPSBDbGFzc2lmaWVyQ2xhc3MoKiptb2RlbF9jb25maWdbIkNMQVNTIl0pCiAgICAKICAgIG1vZGVsLmZpdCgqKm1vZGVsX2NvbmZpZ1siRklUIl0pCiAgICAKICAgIGFydGlmYWN0X3BhdGggPSBjb250ZXh0LmFydGlmYWN0X3N1YnBhdGgobW9kZWxzX2Rlc3QpCiAgICBwbG90c19wYXRoID0gY29udGV4dC5hcnRpZmFjdF9zdWJwYXRoKG1vZGVsc19kZXN0LCBwbG90c19kZXN0KQogICAgaWYgbW9kZWxfZXZhbHVhdG9yOgogICAgICAgIGV2YWxfbWV0cmljcyA9IG1vZGVsX2V2YWx1YXRvcihjb250ZXh0LCB4dmFsaWQsIHl2YWxpZCwgbW9kZWwsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHBsb3RzX2FydGlmYWN0X3BhdGg9cGxvdHNfcGF0aCkKICAgIGVsc2U6CiAgICAgICAgZXZhbF9tZXRyaWNzID0gZXZhbF9tb2RlbF92Mihjb250ZXh0LCB4dmFsaWQsIHl2YWxpZCwgbW9kZWwsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBwbG90c19hcnRpZmFjdF9wYXRoPXBsb3RzX3BhdGgpCiAgICAgICAgCiAgICBjb250ZXh0LnNldF9sYWJlbCgnY2xhc3MnLCBtb2RlbF9wa2dfY2xhc3MpCiAgICBjb250ZXh0LmxvZ19tb2RlbCgibW9kZWwiLCBib2R5PWR1bXBzKG1vZGVsKSwKICAgICAgICAgICAgICAgICAgICAgIGFydGlmYWN0X3BhdGg9YXJ0aWZhY3RfcGF0aCwKICAgICAgICAgICAgICAgICAgICAgIGV4dHJhX2RhdGE9ZXZhbF9tZXRyaWNzLCAKICAgICAgICAgICAgICAgICAgICAgIG1vZGVsX2ZpbGU9Im1vZGVsLnBrbCIsCiAgICAgICAgICAgICAgICAgICAgICBtZXRyaWNzPWNvbnRleHQucmVzdWx0cywKICAgICAgICAgICAgICAgICAgICAgIGxhYmVscz17ImNsYXNzIjogbW9kZWxfcGtnX2NsYXNzfSkKCg== commands: [] - code_origin: sklearn_classifier.ipynb + code_origin: https://github.com/Idan707/functions.git#156b4145b7fa1fdada432f00f0081ca5ccdf1b35:sklearn_classifier.ipynb diff --git a/sklearn_classifier/sample-configs/BayesianGaussianMixture.json b/sklearn_classifier/sample-configs/BayesianGaussianMixture.json deleted file mode 100644 index 37a953f72..000000000 --- a/sklearn_classifier/sample-configs/BayesianGaussianMixture.json +++ /dev/null @@ -1 +0,0 @@ -{"CLASS_PARAMS": {"self": 1, "n_components": "full", "covariance_type": 0.001, "tol": 1e-06, "reg_covar": 100, "max_iter": 1, "n_init": "kmeans", "init_params": "dirichlet_process", "weight_concentration_prior_type": null, "weight_concentration_prior": null, "mean_precision_prior": null, "mean_prior": null, "degrees_of_freedom_prior": null, "covariance_prior": null, "random_state": false, "warm_start": 0, "verbose": 10}} \ No newline at end of file diff --git a/sklearn_classifier/sample-configs/LGBMClassifier.json b/sklearn_classifier/sample-configs/LGBMClassifier.json deleted file mode 100644 index b8299edc3..000000000 --- a/sklearn_classifier/sample-configs/LGBMClassifier.json +++ /dev/null @@ -1,27 +0,0 @@ -{ - "CLASS" : { - "boosting_type" : "gbdt", - "num_leaves" : 300, - "max_depth" : 50, - "learning_rate" : 0.1, - "n_estimators" : 300, - "objective" : "binary", - "scale_pos_weight" : 1, - "min_split_gain" : 0.0, - "min_child_samples" : 20, - "subsample" : 1, - "colsample_bytree" : 1, - "reg_alpha" : 0, - "reg_lambda" : 1, - "n_jobs" : 16, - "silent" : true, - "importance_type" : "split", - "random_state" : 1}, - "FIT" : { - "verbose" : false - }, - "META" : { - "class" : "lightgbm.sklearn.LGBMClassifier", - "version" : "2.3.1" - } -} diff --git a/sklearn_classifier/sample-configs/LogisticRegression.json b/sklearn_classifier/sample-configs/LogisticRegression.json deleted file mode 100644 index f6b21c9fd..000000000 --- a/sklearn_classifier/sample-configs/LogisticRegression.json +++ /dev/null @@ -1 +0,0 @@ -{"CLASS": {"penalty": "l2", "dual": false, "tol": 0.0001, "C": 1.0, "fit_intercept": true, "intercept_scaling": 1, "class_weight": null, "random_state": null, "solver": "warn", "max_iter": 100, "multi_class": "warn", "verbose": 0, "warm_start": false, "n_jobs": null, "l1_ratio": null}, "FIT": {"X": null, "y": null, "sample_weight": null}, "META": {"sklearn_version": "0.21.3", "classifier": "sklearn.linear_model.logistic.LogisticRegression"}} \ No newline at end of file diff --git a/sklearn_classifier/sample-configs/XGBClassifier.json b/sklearn_classifier/sample-configs/XGBClassifier.json deleted file mode 100644 index 8dcf84b82..000000000 --- a/sklearn_classifier/sample-configs/XGBClassifier.json +++ /dev/null @@ -1,27 +0,0 @@ -{ - "CLASS" : { - "num_class" : 3, - "max_depth" : 50, - "learning_rate" : 0.1, - "verbosity" : 1, - "objective" : "multi:softmax", - "booster" : "gbtree", - "tree_method" : "hist", - "n_jobs" : 16, - "random_state" : 1, - "n_estimators" : 200, - "gamma" : null, - "min_child_weight" : 1, - "max_delta_step" : 0, - "subsample" : 1, - "reg_alpha" : 0, - "reg_lambda" : 1, - "scale_pos_weight" : 1, - "random_state" : 1}, - "FIT" : { - "verbose" : false}, - "META" : { - "class": "xgboost.sklearn.XGBClassifier", - "version" : "1.0.2" - } -} \ No newline at end of file diff --git a/sklearn_classifier/sklearn-classifier.py b/sklearn_classifier/sklearn-classifier.py index a787f4c5a..dc0b58de6 100644 --- a/sklearn_classifier/sklearn-classifier.py +++ b/sklearn_classifier/sklearn-classifier.py @@ -21,10 +21,11 @@ from mlrun.execution import MLClientCtx from mlrun.datastore import DataItem from mlrun.artifacts import PlotArtifact - from mlrun.mlutils import (get_sample, get_splits, gen_sklearn_model, create_class, eval_model_v2) +import mlrun + def train_model( context: MLClientCtx, model_pkg_class: str, @@ -33,7 +34,7 @@ def train_model( encode_cols: List[str] = [], sample: int = -1, test_size: float = 0.30, - train_val_split: float = 0.75, + train_val_split: float = 0.70, test_set_key: str = "test_set", model_evaluator = None, models_dest: str = "", diff --git a/sklearn_classifier/sklearn_classifier.ipynb b/sklearn_classifier/sklearn_classifier.ipynb index 0b75e92f2..933c347c1 100644 --- a/sklearn_classifier/sklearn_classifier.ipynb +++ b/sklearn_classifier/sklearn_classifier.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# generic scikit-learn classifier\n", + "# Generic scikit-learn classifier\n", "\n", "run any scikit-learn compatible classifier or list of classifiers" ] @@ -13,7 +13,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## steps\n", + "## Steps\n", "1. **generate a scikit-learn model configuration** using the `model_pkg_class` parameter\n", " * input a package and class name, for example, `sklearn.linear_model.LogisticRegression` \n", " * mlrun will find the class and instantiate a copy using default parameters \n", @@ -44,9 +44,19 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "%nuclio: setting kind to 'job'\n", + "%nuclio: setting spec.image to 'mlrun/ml-models'\n", + "%nuclio: setting spec.maxReplicas to 1\n" + ] + } + ], "source": [ "%nuclio config kind = \"job\"\n", "%nuclio config spec.image = \"mlrun/ml-models\"" @@ -54,7 +64,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -79,10 +89,8 @@ "from mlrun.execution import MLClientCtx\n", "from mlrun.datastore import DataItem\n", "from mlrun.artifacts import PlotArtifact\n", - "\n", "from mlrun.mlutils import (get_sample, get_splits,\n", " gen_sklearn_model, create_class, eval_model_v2)\n", - "#from models import eval_class_model, log_model\n", "\n", "def train_model(\n", " context: MLClientCtx,\n", @@ -92,7 +100,7 @@ " encode_cols: List[str] = [],\n", " sample: int = -1,\n", " test_size: float = 0.30,\n", - " train_val_split: float = 0.75,\n", + " train_val_split: float = 0.70,\n", " test_set_key: str = \"test_set\",\n", " model_evaluator = None,\n", " models_dest: str = \"\",\n", @@ -191,7 +199,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "metadata": { "pycharm": { "name": "#%%\n" @@ -210,14 +218,14 @@ } }, "source": [ - "### sklearn trainer setup\n", + "### Sklearn trainer setup\n", "\n", "the following task paramaters are common to all runs" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "metadata": { "pycharm": { "name": "#%%\n" @@ -230,7 +238,7 @@ " \"params\" : {\n", " \"sample\" : -1,\n", " \"test_size\" : 0.30,\n", - " \"train_val_split\" : 0.75,\n", + " \"train_val_split\" : 0.70,\n", " \"random_state\" : 1,\n", " \"n_jobs\" : -1,\n", " \"plots_dest\" : \"plots-p\",\n", @@ -251,7 +259,7 @@ } }, "source": [ - "### set model parameters and run locally\n", + "### Set model parameters and run locally\n", "\n", "* loop over a list of candidate models, update the task and run a local trainer for that model \n", "* optionally customize some parameters for each model\n", @@ -262,53 +270,41 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ - "from mlrun import mlconf\n", - "\n", - "DATA_REPO = \"https://raw.githubusercontent.com/yjb-ds/testdata/master/\" \n", - "\n", - "# choose a binary or multiclass dataset\n", - "#DATA_PATH = \"sklearn_classfier/iris_dataset.csv\" # MULTICLASS\n", - "DATA_PATH = \"data/clf-k4-m24-n10k-imb.csv\" # MULTICLASS\n", - "\n", - "DATA_URL = f\"{DATA_REPO}/{DATA_PATH}\"" + "DATA_URL = 'https://s3.wasabisys.com/iguazio/data/iris/iris_dataset.csv'" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[mlrun] 2020-06-05 20:18:32,985 starting run sklearn_ensemble_RandomForestClassifier uid=ceaa30f9ef3f4f83bf50b0ce965e6d2f -> http://10.199.227.162:8080\n", - "[mlrun] 2020-06-05 20:18:34,831 log artifact test_set at /User/ml2/sklearn.ensemble.RandomForestClassifier/data/test_set.parquet, size: 701200, db: Y\n", - "[mlrun] 2020-06-05 20:18:35,955 log artifact confusion-matrix at /User/ml2/sklearn.ensemble.RandomForestClassifier/model/plots/confusion-matrix.html, size: 21829, db: N\n", - "[mlrun] 2020-06-05 20:18:36,216 log artifact feature-importances at /User/ml2/sklearn.ensemble.RandomForestClassifier/model/plots/feature-importances.html, size: 13789, db: N\n", - "[mlrun] 2020-06-05 20:18:36,324 log artifact precision-recall-multiclass at /User/ml2/sklearn.ensemble.RandomForestClassifier/model/plots/precision-recall-multiclass.html, size: 50133, db: N\n", - "[mlrun] 2020-06-05 20:18:36,456 log artifact roc-multiclass at /User/ml2/sklearn.ensemble.RandomForestClassifier/model/plots/roc-multiclass.html, size: 26213, db: N\n", - "[mlrun] 2020-06-05 20:18:36,528 log artifact model at /User/ml2/sklearn.ensemble.RandomForestClassifier/model/, size: 549340, db: Y\n", - "\n" + "> 2020-10-28 15:07:07,185 [warning] warning!, server (0.5.3-rc1) and client (0.5.2) ver dont match\n", + "> 2020-10-28 15:07:07,192 [warning] warning!, server (0.5.3-rc1) and client (0.5.2) ver dont match\n", + "> 2020-10-28 15:07:07,192 [info] starting run sklearn_ensemble_RandomForestClassifier uid=7828bb4ac6c142f1bb30e7e2d289d5fc -> http://mlrun-api:8080\n", + "> 2020-10-28 15:07:07,228 [warning] warning!, server (0.5.3-rc1) and client (0.5.2) ver dont match\n" ] }, { "data": { "text/html": [ - "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
projectuiditerstartstatenamelabelsinputsparametersresultsartifacts
default0Oct 28 15:07:28runningsklearn_ensemble_RandomForestClassifier
v3io_user=admin
kind=job
owner=admin
dataset
model_pkg_class=sklearn.ensemble.RandomForestClassifier
label_column=label
CLASS_max_depth=5
\n", + "
\n", + "
\n", + "
\n", + " Title\n", + " ×\n", + "
\n", + " \n", + "
\n", + "
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "to track results use .show() or .logs() or in CLI: \n", + "!mlrun get run e10b0a9b81cb42b48116ba59a2851375 --project default , !mlrun logs e10b0a9b81cb42b48116ba59a2851375 --project default\n", + "> 2020-10-28 15:07:28,964 [info] run executed, status=running\n", + "> 2020-10-28 15:07:28,965 [info] starting run sklearn_linear_model_LogisticRegression uid=ccf66a211ed44789ae62bd9a71439543 -> http://mlrun-api:8080\n", + "> 2020-10-28 15:07:29,103 [info] Job is running in the background, pod: sklearn-linear-model-logisticregression-gdcrk\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
projectuiditerstartstatenamelabelsinputsparametersresultsartifacts
default0Oct 28 15:07:29runningsklearn_linear_model_LogisticRegression
v3io_user=admin
kind=job
owner=admin
dataset
model_pkg_class=sklearn.linear_model.LogisticRegression
label_column=label
CLASS_solver=liblinear
\n", + "
\n", + "
\n", + "
\n", + " Title\n", + " ×\n", + "
\n", + " \n", + "
\n", + "
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "to track results use .show() or .logs() or in CLI: \n", + "!mlrun get run ccf66a211ed44789ae62bd9a71439543 --project default , !mlrun logs ccf66a211ed44789ae62bd9a71439543 --project default\n", + "> 2020-10-28 15:07:29,164 [info] run executed, status=running\n", + "> 2020-10-28 15:07:29,165 [info] starting run sklearn_ensemble_AdaBoostClassifier uid=4cbdeb89e6784f8b826102ed7a3bc24b -> http://mlrun-api:8080\n", + "> 2020-10-28 15:07:29,414 [info] Job is running in the background, pod: sklearn-ensemble-adaboostclassifier-h6svl\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
projectuiditerstartstatenamelabelsinputsparametersresultsartifacts
default0Oct 28 15:07:29runningsklearn_ensemble_AdaBoostClassifier
v3io_user=admin
kind=job
owner=admin
dataset
model_pkg_class=sklearn.ensemble.AdaBoostClassifier
label_column=label
CLASS_n_estimators=200
CLASS_learning_rate=0.01
\n", + "
\n", + "
\n", + "
\n", + " Title\n", + " ×\n", + "
\n", + " \n", + "
\n", + "
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "to track results use .show() or .logs() or in CLI: \n", + "!mlrun get run 4cbdeb89e6784f8b826102ed7a3bc24b --project default , !mlrun logs 4cbdeb89e6784f8b826102ed7a3bc24b --project default\n", + "> 2020-10-28 15:07:29,496 [info] run executed, status=running\n" + ] + } + ], + "source": [ + "outputs = []\n", + "for model in models:\n", + " task_copy = task_params.copy()\n", + " task_copy.update(\n", + " {\n", + " \"params\":{ \"model_pkg_class\" : model,\n", + " \"label_column\" : \"label\"}\n", + " }\n", + " )\n", + " \n", + " # customize specific model parameters\n", + " if \"RandomForestClassifier\" in model:\n", + " task_copy[\"params\"].update({\"CLASS_max_depth\" : 5})\n", + "\n", + " if \"LogisticRegression\" in model:\n", + " task_copy[\"params\"].update({\"CLASS_solver\" : \"liblinear\"})\n", + " \n", + " if \"AdaBoostClassifier\" in model:\n", + " task_copy[\"params\"].update({\"CLASS_n_estimators\" : 200,\n", + " \"CLASS_learning_rate\" : 0.01\n", + " })\n", + " \n", + " name = model.replace('.', '_')\n", + " output = fn.run(mlrun.NewTask(**task_copy),\n", + " handler=train_model,\n", + " name=name,\n", + " inputs={\"dataset\" : DATA_URL}, \n", + " artifact_path=os.path.join(artifact_path, model))\n", + " \n", + " outputs.append({name: output.outputs})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -1130,9 +1847,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.8" + "version": "3.7.6" } }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +} diff --git a/sklearn_classifier_dask/README.md b/sklearn_classifier_dask/README.md new file mode 100644 index 000000000..fde0b0808 --- /dev/null +++ b/sklearn_classifier_dask/README.md @@ -0,0 +1,49 @@ +# **Training Functions** + +## `sklearn-classifer with Dask` + +Run any scikit-learn compatible classifier or list of classifiers with Dask + +### steps + +1. **Generate a scikit-learn model configuration** using the `model_pkg_class` parameter + * input a package and class name, for example, `sklearn.linear_model.LogisticRegression` + * mlrun will find the class and instantiate a copy using default parameters + * You can modify both the model class and the fit methods +2. **Get a sample of data** from a data source + * select a random sample of rows using a negative integer + * select consecutive rows using a positive integer +3. **Split the data** into train, validation, and test sets + * the test set is saved as an artifact and never seen again until testing +4. **Train the model** +5. **pickle / serialize the model** + * models can be pickled or saved as json +6. **Evaluate the model** + * a custom evaluator can be provided, see function doc for details + + +Train a sklearn classifier with Dask + + :param context: Function context. + :param dataset: Raw data file. + :param model_pkg_class: Model to train, e.g, "sklearn.ensemble.RandomForestClassifier", + or json model config. + :param label_column: (label) Ground-truth y labels. + :param train_validation_size: (0.75) Train validation set proportion out of the full dataset. + :param sample: (1.0) Select sample from dataset (n-rows/% of total), randomzie rows as default. + :param models_dest: (models) Models subfolder on artifact path. + :param test_set_key: (test_set) Mlrun db key of held out data in artifact store. + :param plots_dest: (plots) Plot subfolder on artifact path. + :param dask_key: (dask key) Key of dataframe in dask client "datasets" attribute. + :param dask_persist: (False) Should the data be persisted (through the `client.persist`) + :param scheduler_key: (scheduler) Dask scheduler configuration, json also logged as an artifact. + :param file_ext: (parquet) format for test_set_key hold out data + :param random_state: (42) sklearn seed + + +### TODO + +1. Add cross validation methods +2. Improve dask efficiency by calling dask data frame (not from pandas) +3. Log dataset artifact as dask data frame +4. Add values imputer (instead of drop na) diff --git a/sklearn_classifier_dask/function.yaml b/sklearn_classifier_dask/function.yaml new file mode 100644 index 000000000..64afd17d9 --- /dev/null +++ b/sklearn_classifier_dask/function.yaml @@ -0,0 +1,23 @@ +kind: dask +metadata: + name: dask_init + hash: b4b3af2251b76b5aea339535e1e9d5d71f83aae1 + project: default + categories: [] +spec: + command: '' + image: mlrun/ml-models + env: [] + resources: + limits: + memory: 8G + build: + commands: [] + description: '' + replicas: 5 + remote: true + service_type: NodePort + nthreads: 6 + min_replicas: 0 + max_replicas: 16 + scheduler_timeout: 60 minutes diff --git a/sklearn_classifier_dask/sklearn-classifier-dask.py b/sklearn_classifier_dask/sklearn-classifier-dask.py new file mode 100644 index 000000000..8a9f6b232 --- /dev/null +++ b/sklearn_classifier_dask/sklearn-classifier-dask.py @@ -0,0 +1,197 @@ +# Generated by nuclio.export.NuclioExporter + +import warnings +warnings.filterwarnings('ignore') + +import os +import joblib +import numpy as np +import pandas as pd +import sklearn +from cloudpickle import dumps, load, dump +from typing import List, Optional + +from dask.distributed import Client +from dask import dataframe as dd +from dask import array as da +from dask.delayed import delayed +from dask_ml import model_selection +from dask_ml import metrics +from dask_ml.preprocessing import StandardScaler, LabelEncoder + +from mlrun.execution import MLClientCtx +from mlrun.datastore import DataItem +from mlrun.artifacts import PlotArtifact +from mlrun.mlutils import (gen_sklearn_model, create_class) + +import matplotlib.pyplot as plt +from yellowbrick.classifier import ROCAUC, ClassificationReport, ConfusionMatrix +from yellowbrick.model_selection import FeatureImportances + +def train_model(context: MLClientCtx, + dataset: DataItem, + model_pkg_class: str, + label_column: str = "label", + train_validation_size: float = 0.75, + sample: float = 1.0, + models_dest: str = "models", + test_set_key: str = "test_set", + plots_dest: str = "plots", + dask_key: str = "dask_key", + dask_persist: bool = False, + scheduler_key: str = '', + file_ext: str = "parquet", + random_state: int = 42) -> None: + + """ + Train a sklearn classifier with Dask + + :param context: Function context. + :param dataset: Raw data file. + :param model_pkg_class: Model to train, e.g, "sklearn.ensemble.RandomForestClassifier", + or json model config. + :param label_column: (label) Ground-truth y labels. + :param train_validation_size: (0.75) Train validation set proportion out of the full dataset. + :param sample: (1.0) Select sample from dataset (n-rows/% of total), randomzie rows as default. + :param models_dest: (models) Models subfolder on artifact path. + :param test_set_key: (test_set) Mlrun db key of held out data in artifact store. + :param plots_dest: (plots) Plot subfolder on artifact path. + :param dask_key: (dask key) Key of dataframe in dask client "datasets" attribute. + :param dask_persist: (False) Should the data be persisted (through the `client.persist`) + :param scheduler_key: (scheduler) Dask scheduler configuration, json also logged as an artifact. + :param file_ext: (parquet) format for test_set_key hold out data + :param random_state: (42) sklearn seed + """ + + if scheduler_key: + client = Client(scheduler_key) + + else: + client = Client() + + context.logger.info("Read Data") + df = dataset.as_df(df_module=dd) + + context.logger.info("Prep Data") + numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64'] + df = df.select_dtypes(include=numerics) + + if df.isna().any().any().compute() == True: + raise Exception('NAs valus found') + + df_header = df.columns + + df = df.sample(frac=sample).reset_index(drop=True) + encoder = LabelEncoder() + encoder = encoder.fit(df[label_column]) + X = df.drop(label_column, axis=1).to_dask_array(lengths=True) + y = encoder.transform(df[label_column]) + + classes = df[label_column].drop_duplicates() # no unique values in dask + classes = [str(i) for i in classes] + + context.logger.info("Split and Train") + X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, train_size=train_validation_size, + random_state=random_state) + + scaler = StandardScaler() + scaler = scaler.fit(X_train) + X_train_transformed = scaler.transform(X_train) + X_test_transformed = scaler.transform(X_test) + + model_config = gen_sklearn_model(model_pkg_class, + context.parameters.items()) + + model_config["FIT"].update({"X": X_train_transformed, + "y": y_train}) + + ClassifierClass = create_class(model_config["META"]["class"]) + + model = ClassifierClass(**model_config["CLASS"]) + + with joblib.parallel_backend("dask"): + + model = model.fit(**model_config["FIT"]) + + artifact_path = context.artifact_subpath(models_dest) + + plots_path = context.artifact_subpath(models_dest, plots_dest) + + context.logger.info("Evaluate") + extra_data_dict = {} + for report in (ROCAUC, ClassificationReport, ConfusionMatrix): + + report_name = str(report.__name__) + plt.cla() + plt.clf() + plt.close() + + viz = report(model, classes=classes, per_class=True, is_fitted=True) + viz.fit(X_train_transformed, y_train) # Fit the training data to the visualizer + viz.score(X_test_transformed, y_test.compute()) # Evaluate the model on the test data + + plot = context.log_artifact(PlotArtifact(report_name, + body=viz.fig, + title=report_name), + db_key=False) + extra_data_dict[str(report)] = plot + + if report_name == 'ROCAUC': + context.log_results({"micro": viz.roc_auc.get("micro"), + "macro": viz.roc_auc.get("macro")}) + + elif report_name == 'ClassificationReport': + for score_name in viz.scores_: + for score_class in viz.scores_[score_name]: + + context.log_results({score_name + "-" + score_class : + viz.scores_[score_name].get(score_class)}) + + + viz = FeatureImportances(model, classes=classes, per_class=True, + is_fitted=True, labels=df_header.delete(df_header.get_loc(label_column))) + viz.fit(X_train_transformed, y_train) + viz.score(X_test_transformed, y_test) + + plot = context.log_artifact(PlotArtifact("FeatureImportances", body=viz.fig, + title="FeatureImportances"), db_key=False) + extra_data_dict[str("FeatureImportances")] = plot + + plt.cla() + plt.clf() + plt.close() + + context.logger.info("Log artifacts") + artifact_path = context.artifact_subpath(models_dest) + + plots_path = context.artifact_subpath(models_dest, plots_dest) + + context.set_label('class', model_pkg_class) + + context.log_model("model", body=dumps(model), + artifact_path=artifact_path, + model_file="model.pkl", + extra_data=extra_data_dict, + metrics=context.results, + labels={"class": model_pkg_class}) + + context.log_artifact("standard_scaler", body=dumps(scaler), + artifact_path=artifact_path, + model_file="scaler.gz", + label="standard_scaler") + + context.log_artifact("label_encoder", body=dumps(encoder), + artifact_path=artifact_path, + model_file="encoder.gz", + label="label_encoder") + + df_to_save = delayed(np.column_stack)((X_test, y_test)).compute() + context.log_dataset(test_set_key, + df=pd.DataFrame(df_to_save, + columns=df_header), # improve log dataset ability + format=file_ext, index=False, + labels={"data-type": "held-out"}, + artifact_path=context.artifact_subpath('data')) + + context.logger.info("Done!") + diff --git a/sklearn_classifier_dask/sklearn_classifier_dask.ipynb b/sklearn_classifier_dask/sklearn_classifier_dask.ipynb new file mode 100644 index 000000000..a334248b5 --- /dev/null +++ b/sklearn_classifier_dask/sklearn_classifier_dask.ipynb @@ -0,0 +1,1261 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Generic Scikit-Learn Classifier With Dask\n", + "\n", + "Run any scikit-learn compatible classifier or list of classifiers" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# nuclio: ignore\n", + "import nuclio" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "%nuclio: setting kind to 'job'\n", + "%nuclio: setting spec.image to 'mlrun/ml-models'\n" + ] + } + ], + "source": [ + "%nuclio config kind = \"job\"\n", + "%nuclio config spec.image = \"mlrun/ml-models\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import warnings\n", + "warnings.filterwarnings('ignore')\n", + "\n", + "import os\n", + "import joblib\n", + "import numpy as np\n", + "import pandas as pd\n", + "import sklearn\n", + "from cloudpickle import dumps, load, dump\n", + "from typing import List, Optional\n", + "\n", + "from dask.distributed import Client\n", + "from dask import dataframe as dd\n", + "from dask import array as da\n", + "from dask.delayed import delayed\n", + "from dask_ml import model_selection\n", + "from dask_ml import metrics\n", + "from dask_ml.preprocessing import StandardScaler, LabelEncoder\n", + "\n", + "from mlrun.execution import MLClientCtx\n", + "from mlrun.datastore import DataItem\n", + "from mlrun.artifacts import PlotArtifact\n", + "from mlrun.mlutils import (gen_sklearn_model, create_class)\n", + "\n", + "import matplotlib.pyplot as plt\n", + "from yellowbrick.classifier import ROCAUC, ClassificationReport, ConfusionMatrix\n", + "from yellowbrick.model_selection import FeatureImportances" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "def train_model(context: MLClientCtx,\n", + " dataset: DataItem,\n", + " model_pkg_class: str,\n", + " label_column: str = \"label\",\n", + " train_validation_size: float = 0.75,\n", + " sample: float = 1.0,\n", + " models_dest: str = \"models\",\n", + " test_set_key: str = \"test_set\",\n", + " plots_dest: str = \"plots\",\n", + " dask_key: str = \"dask_key\",\n", + " dask_persist: bool = False,\n", + " scheduler_key: str = '',\n", + " file_ext: str = \"parquet\",\n", + " random_state: int = 42) -> None:\n", + " \n", + " \"\"\"\n", + " Train a sklearn classifier with Dask\n", + " \n", + " :param context: Function context.\n", + " :param dataset: Raw data file.\n", + " :param model_pkg_class: Model to train, e.g, \"sklearn.ensemble.RandomForestClassifier\", \n", + " or json model config.\n", + " :param label_column: (label) Ground-truth y labels.\n", + " :param train_validation_size: (0.75) Train validation set proportion out of the full dataset.\n", + " :param sample: (1.0) Select sample from dataset (n-rows/% of total), randomzie rows as default.\n", + " :param models_dest: (models) Models subfolder on artifact path.\n", + " :param test_set_key: (test_set) Mlrun db key of held out data in artifact store.\n", + " :param plots_dest: (plots) Plot subfolder on artifact path.\n", + " :param dask_key: (dask key) Key of dataframe in dask client \"datasets\" attribute.\n", + " :param dask_persist: (False) Should the data be persisted (through the `client.persist`)\n", + " :param scheduler_key: (scheduler) Dask scheduler configuration, json also logged as an artifact.\n", + " :param file_ext: (parquet) format for test_set_key hold out data\n", + " :param random_state: (42) sklearn seed\n", + " \"\"\"\n", + " \n", + " # set up dask client \n", + " if scheduler_key:\n", + " client = Client(scheduler_key)\n", + " \n", + " else:\n", + " client = Client()\n", + "\n", + " context.logger.info(\"Read Data\")\n", + " # read data with dask and mlrun\n", + " df = dataset.as_df(df_module=dd) \n", + "\n", + " # take only numrical cols\n", + " context.logger.info(\"Prep Data\")\n", + " numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']\n", + " df = df.select_dtypes(include=numerics)\n", + " \n", + " # dropna\n", + " if df.isna().any().any().compute() == True:\n", + " raise Exception('NAs valus found')\n", + " \n", + " # save cols names\n", + " df_header = df.columns\n", + " \n", + " df = df.sample(frac=sample).reset_index(drop=True)\n", + " encoder = LabelEncoder()\n", + " encoder = encoder.fit(df[label_column])\n", + " X = df.drop(label_column, axis=1).to_dask_array(lengths=True)\n", + " y = encoder.transform(df[label_column])\n", + "\n", + " classes = df[label_column].drop_duplicates() # no unique values in dask\n", + " classes = [str(i) for i in classes]\n", + "\n", + " context.logger.info(\"Split and Train\")\n", + " X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, train_size=train_validation_size,\n", + " random_state=random_state)\n", + " \n", + " scaler = StandardScaler()\n", + " scaler = scaler.fit(X_train)\n", + " X_train_transformed = scaler.transform(X_train)\n", + " X_test_transformed = scaler.transform(X_test)\n", + " \n", + " model_config = gen_sklearn_model(model_pkg_class,\n", + " context.parameters.items())\n", + "\n", + " model_config[\"FIT\"].update({\"X\": X_train_transformed,\n", + " \"y\": y_train})\n", + " \n", + " ClassifierClass = create_class(model_config[\"META\"][\"class\"])\n", + " \n", + " model = ClassifierClass(**model_config[\"CLASS\"])\n", + " \n", + " # load and fit model\n", + " with joblib.parallel_backend(\"dask\"):\n", + " \n", + " # initialize classifier from sklearn\n", + " model = model.fit(**model_config[\"FIT\"])\n", + "\n", + " # log artifacts\n", + " artifact_path = context.artifact_subpath(models_dest)\n", + " \n", + " # log plots\n", + " plots_path = context.artifact_subpath(models_dest, plots_dest)\n", + "\n", + " # create reports\n", + " context.logger.info(\"Evaluate\")\n", + " extra_data_dict = {}\n", + " for report in (ROCAUC, ClassificationReport, ConfusionMatrix):\n", + " \n", + " report_name = str(report.__name__)\n", + " # clear output\n", + " plt.cla()\n", + " plt.clf()\n", + " plt.close()\n", + " \n", + " # genrate report\n", + " viz = report(model, classes=classes, per_class=True, is_fitted=True)\n", + " viz.fit(X_train_transformed, y_train) # Fit the training data to the visualizer\n", + " viz.score(X_test_transformed, y_test.compute()) # Evaluate the model on the test data\n", + " \n", + " # log reports\n", + " plot = context.log_artifact(PlotArtifact(report_name, \n", + " body=viz.fig,\n", + " title=report_name), \n", + " db_key=False)\n", + " extra_data_dict[str(report)] = plot\n", + " \n", + " # log results\n", + " if report_name == 'ROCAUC':\n", + " context.log_results({\"micro\": viz.roc_auc.get(\"micro\"),\n", + " \"macro\": viz.roc_auc.get(\"macro\")})\n", + " \n", + " elif report_name == 'ClassificationReport':\n", + " for score_name in viz.scores_:\n", + " for score_class in viz.scores_[score_name]:\n", + " \n", + " context.log_results({score_name + \"-\" + score_class : \n", + " viz.scores_[score_name].get(score_class)})\n", + " \n", + " #viz.show()\n", + " \n", + " # get feature importance\n", + " viz = FeatureImportances(model, classes=classes, per_class=True, \n", + " is_fitted=True, labels=df_header.delete(df_header.get_loc(label_column)))\n", + " viz.fit(X_train_transformed, y_train) \n", + " viz.score(X_test_transformed, y_test)\n", + " #viz.show()\n", + " \n", + " plot = context.log_artifact(PlotArtifact(\"FeatureImportances\", body=viz.fig, \n", + " title=\"FeatureImportances\"), db_key=False)\n", + " extra_data_dict[str(\"FeatureImportances\")] = plot\n", + " \n", + " # clear final output\n", + " plt.cla()\n", + " plt.clf()\n", + " plt.close()\n", + "\n", + " # log artifacts\n", + " context.logger.info(\"Log artifacts\")\n", + " artifact_path = context.artifact_subpath(models_dest)\n", + " \n", + " # log plots\n", + " plots_path = context.artifact_subpath(models_dest, plots_dest)\n", + " \n", + " # set label\n", + " context.set_label('class', model_pkg_class)\n", + " \n", + " # log models\n", + " context.log_model(\"model\", body=dumps(model),\n", + " artifact_path=artifact_path,\n", + " model_file=\"model.pkl\",\n", + " extra_data=extra_data_dict,\n", + " metrics=context.results,\n", + " labels={\"class\": model_pkg_class})\n", + " \n", + " # log scalers\n", + " context.log_artifact(\"standard_scaler\", body=dumps(scaler),\n", + " artifact_path=artifact_path,\n", + " model_file=\"scaler.gz\",\n", + " label=\"standard_scaler\")\n", + " \n", + " # log encoder\n", + " context.log_artifact(\"label_encoder\", body=dumps(encoder),\n", + " artifact_path=artifact_path,\n", + " model_file=\"encoder.gz\",\n", + " label=\"label_encoder\")\n", + " \n", + " # set aside some test data\n", + " df_to_save = delayed(np.column_stack)((X_test, y_test)).compute()\n", + " context.log_dataset(test_set_key, \n", + " df=pd.DataFrame(df_to_save, \n", + " columns=df_header), # improve log dataset ability\n", + " format=file_ext, index=False, \n", + " labels={\"data-type\": \"held-out\"},\n", + " artifact_path=context.artifact_subpath('data'))\n", + " \n", + " context.logger.info(\"Done!\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# nuclio: end-code" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Save and Config" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "import mlrun\n", + "skf = mlrun.code_to_function('sklearn-classifier-dask', kind='job', code_output=\".\") .apply(mlrun.mount_v3io())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Set Environment" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> 2020-11-23 14:59:39,336 [warning] warning!, server (0.5.4-rc1) and client (0.5.4) ver dont match\n" + ] + } + ], + "source": [ + "artifact_path = mlrun.set_environment(api_path = 'http://mlrun-api:8080',\n", + " artifact_path = os.path.abspath('./'))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Init Dask" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### init a dask cluster and set dask specs" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> 2020-11-23 14:59:40,443 [info] using in-cluster config.\n" + ] + } + ], + "source": [ + "dsf = mlrun.new_function('dask_init', kind='dask', image='mlrun/ml-models')" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> 2020-11-23 14:59:41,313 [info] function spec saved to path: function.yaml\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dsf.spec.remote = True\n", + "dsf.spec.replicas = 5\n", + "dsf.spec.service_type = 'NodePort'\n", + "dsf.with_limits(mem=\"8G\")\n", + "dsf.spec.nthreads = 6\n", + "dsf.export(\"function.yaml\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### mount v3io in for file system access" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dsf.apply(mlrun.mount_v3io())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### init dask client \n", + "copy the scheduler address to **DASK_CLIENT** param in the following cell, this will make the function use the dask cluster." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> 2020-11-23 15:00:15,924 [info] trying dask client at: tcp://mlrun-dask-init-4fdf1dc3-5.default-tenant:8786\n", + "> 2020-11-23 15:00:15,932 [info] using remote dask scheduler (mlrun-dask-init-4fdf1dc3-5) at: tcp://mlrun-dask-init-4fdf1dc3-5.default-tenant:8786\n" + ] + }, + { + "data": { + "text/html": [ + "dashboard link: default-tenant.app.dsteam.iguazio-cd1.com:32122" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "

Client

\n", + "\n", + "
\n", + "

Cluster

\n", + "
    \n", + "
  • Workers: 4
  • \n", + "
  • Cores: 24
  • \n", + "
  • Memory: 32.00 GB
  • \n", + "
\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dsf.client" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Set Parameters" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "DATA_URL = '/User/iris.csv'\n", + "DASK_CLIENT = 'tcp://mlrun-dask-init-4fdf1dc3-5.default-tenant:8786'\n", + "\n", + "task_params = {\n", + " \"params\" : {\n", + " \"sample\" : 1,\n", + " \"train_val_split\" : 0.75,\n", + " \"random_state\" : 42,\n", + " \"n_jobs\" : -1,\n", + " \"plots_dest\" : \"plots-p\",\n", + " \"models_dest\" : 'sklearn-clfmodel'}}\n", + "\n", + "\n", + "models = [\n", + " \"sklearn.ensemble.RandomForestClassifier\",\n", + " \"sklearn.ensemble.AdaBoostClassifier\",\n", + " \"sklearn.linear_model.LogisticRegression\"\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Test and Run" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> 2020-11-23 15:00:23,779 [warning] warning!, server (0.5.4-rc1) and client (0.5.4) ver dont match\n", + "> 2020-11-23 15:00:23,780 [info] starting run sklearn_ensemble_RandomForestClassifier uid=a78d70155eb54280a04f1d6c5b42f673 DB=http://mlrun-api:8080\n", + "> 2020-11-23 15:00:23,941 [info] Job is running in the background, pod: sklearn-ensemble-randomforestclassifier-cd6bk\n", + "> 2020-11-23 15:00:29,218 [info] Read Data\n", + "> 2020-11-23 15:00:29,236 [info] Prep Data\n", + "> 2020-11-23 15:00:29,665 [info] Split and Train\n", + "> 2020-11-23 15:00:32,016 [info] Evaluate\n", + "> 2020-11-23 15:00:33,768 [info] Log artifacts\n", + "> 2020-11-23 15:00:34,595 [info] Done!\n", + "> 2020-11-23 15:00:34,660 [info] run executed, status=completed\n", + "final state: completed\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
projectuiditerstartstatenamelabelsinputsparametersresultsartifacts
default0Nov 23 15:00:29completedsklearn_ensemble_RandomForestClassifier
v3io_user=admin
kind=job
owner=admin
host=sklearn-ensemble-randomforestclassifier-cd6bk
class=sklearn.ensemble.RandomForestClassifier
dataset
model_pkg_class=sklearn.ensemble.RandomForestClassifier
label_column=label
scheduler_key=tcp://mlrun-dask-init-4fdf1dc3-5.default-tenant:8786
CLASS_max_depth=5
micro=0.9941135734072022
macro=0.9942943331178625
precision-1=1.0
precision-2=0.8888888888888888
precision-0=0.9166666666666666
recall-1=1.0
recall-2=0.9411764705882353
recall-0=0.8461538461538461
f1-1=1.0
f1-2=0.9142857142857143
f1-0=0.8799999999999999
ROCAUC
ClassificationReport
ConfusionMatrix
FeatureImportances
model
standard_scaler
label_encoder
test_set
\n", + "
\n", + "
\n", + "
\n", + " Title\n", + " ×\n", + "
\n", + " \n", + "
\n", + "
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "to track results use .show() or .logs() or in CLI: \n", + "!mlrun get run a78d70155eb54280a04f1d6c5b42f673 --project default , !mlrun logs a78d70155eb54280a04f1d6c5b42f673 --project default\n", + "> 2020-11-23 15:00:43,168 [info] run executed, status=completed\n", + "> 2020-11-23 15:00:43,169 [info] starting run sklearn_ensemble_AdaBoostClassifier uid=efac1f79e21f46259c423268d334ae6d DB=http://mlrun-api:8080\n", + "> 2020-11-23 15:00:43,335 [info] Job is running in the background, pod: sklearn-ensemble-adaboostclassifier-fd887\n", + "> 2020-11-23 15:00:48,569 [info] Read Data\n", + "> 2020-11-23 15:00:48,588 [info] Prep Data\n", + "> 2020-11-23 15:00:48,796 [info] Split and Train\n", + "> 2020-11-23 15:00:49,220 [info] Evaluate\n", + "> 2020-11-23 15:00:51,094 [info] Log artifacts\n", + "> 2020-11-23 15:00:51,533 [info] Done!\n", + "> 2020-11-23 15:00:51,581 [info] run executed, status=completed\n", + "final state: completed\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
projectuiditerstartstatenamelabelsinputsparametersresultsartifacts
default0Nov 23 15:00:48completedsklearn_ensemble_AdaBoostClassifier
v3io_user=admin
kind=job
owner=admin
host=sklearn-ensemble-adaboostclassifier-fd887
class=sklearn.ensemble.AdaBoostClassifier
dataset
model_pkg_class=sklearn.ensemble.AdaBoostClassifier
label_column=label
scheduler_key=tcp://mlrun-dask-init-4fdf1dc3-5.default-tenant:8786
CLASS_n_estimators=200
CLASS_learning_rate=0.01
micro=0.9581024930747923
macro=0.9808974358974359
precision-0=1.0
precision-2=0.8
precision-1=0.9375
recall-0=1.0
recall-2=0.9230769230769231
recall-1=0.8333333333333334
f1-0=1.0
f1-2=0.8571428571428571
f1-1=0.8823529411764706
ROCAUC
ClassificationReport
ConfusionMatrix
FeatureImportances
model
standard_scaler
label_encoder
test_set
\n", + "
\n", + "
\n", + "
\n", + " Title\n", + " ×\n", + "
\n", + " \n", + "
\n", + "
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "to track results use .show() or .logs() or in CLI: \n", + "!mlrun get run efac1f79e21f46259c423268d334ae6d --project default , !mlrun logs efac1f79e21f46259c423268d334ae6d --project default\n", + "> 2020-11-23 15:00:52,501 [info] run executed, status=completed\n", + "> 2020-11-23 15:00:52,502 [info] starting run sklearn_linear_model_LogisticRegression uid=e314ab6e5e8546afbf851765e47506b0 DB=http://mlrun-api:8080\n", + "> 2020-11-23 15:00:52,675 [info] Job is running in the background, pod: sklearn-linear-model-logisticregression-drxn4\n", + "> 2020-11-23 15:00:58,029 [info] Read Data\n", + "> 2020-11-23 15:00:58,045 [info] Prep Data\n", + "> 2020-11-23 15:00:58,232 [info] Split and Train\n", + "> 2020-11-23 15:00:58,420 [info] Evaluate\n", + "> 2020-11-23 15:01:00,116 [info] Log artifacts\n", + "> 2020-11-23 15:01:00,439 [info] Done!\n", + "> 2020-11-23 15:01:00,489 [info] run executed, status=completed\n", + "final state: completed\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
projectuiditerstartstatenamelabelsinputsparametersresultsartifacts
default0Nov 23 15:00:57completedsklearn_linear_model_LogisticRegression
v3io_user=admin
kind=job
owner=admin
host=sklearn-linear-model-logisticregression-drxn4
class=sklearn.linear_model.LogisticRegression
dataset
model_pkg_class=sklearn.linear_model.LogisticRegression
label_column=label
scheduler_key=tcp://mlrun-dask-init-4fdf1dc3-5.default-tenant:8786
CLASS_solver=liblinear
micro=0.9854570637119113
macro=0.9832142857142858
precision-1=1.0
precision-0=0.8461538461538461
precision-2=0.8571428571428571
recall-1=1.0
recall-0=0.8461538461538461
recall-2=0.8571428571428571
f1-1=1.0
f1-0=0.8461538461538461
f1-2=0.8571428571428571
ROCAUC
ClassificationReport
ConfusionMatrix
FeatureImportances
model
standard_scaler
label_encoder
test_set
\n", + "
\n", + "
\n", + "
\n", + " Title\n", + " ×\n", + "
\n", + " \n", + "
\n", + "
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "to track results use .show() or .logs() or in CLI: \n", + "!mlrun get run e314ab6e5e8546afbf851765e47506b0 --project default , !mlrun logs e314ab6e5e8546afbf851765e47506b0 --project default\n", + "> 2020-11-23 15:01:01,914 [info] run executed, status=completed\n" + ] + } + ], + "source": [ + "outputs = []\n", + "for model in models:\n", + " task_copy = task_params.copy()\n", + " task_copy.update(\n", + " {\n", + " \"params\":{ \"model_pkg_class\" : model,\n", + " \"label_column\" : \"label\",\n", + " \"scheduler_key\": DASK_CLIENT}\n", + " }\n", + " )\n", + " \n", + " # customize specific model parameters\n", + " if \"RandomForestClassifier\" in model:\n", + " task_copy[\"params\"].update({\"CLASS_max_depth\" : 5})\n", + "\n", + " if \"LogisticRegression\" in model:\n", + " task_copy[\"params\"].update({\"CLASS_solver\" : \"liblinear\"})\n", + " \n", + " if \"AdaBoostClassifier\" in model:\n", + " task_copy[\"params\"].update({\"CLASS_n_estimators\" : 200,\n", + " \"CLASS_learning_rate\" : 0.01\n", + " })\n", + " \n", + " name = model.replace('.', '_')\n", + " output = skf.run(mlrun.NewTask(**task_copy),\n", + " handler=train_model,\n", + " name=name,\n", + " inputs={\"dataset\" : DATA_URL},\n", + " artifact_path=os.path.join(artifact_path, model))\n", + " \n", + " outputs.append({name: output.outputs})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/test_classifier/README.md b/test_classifier/README.md index e69de29bb..6680e33bb 100644 --- a/test_classifier/README.md +++ b/test_classifier/README.md @@ -0,0 +1,22 @@ +# **Testing Functions** + +## `sklearn-classifer` + +Test one or more classifier models against held-out dataset +Using held-out test features, evaluates the performance of the estimated model +Can be part of a kubeflow pipeline as a test step that is run post EDA and +training/validation cycles. + +```markdown + +:param context: the function context +:param models_path: artifact models representing a file or a folder +:param test_set: test features and labels +:param label_column: column name for ground truth labels +:param score_method: for multiclass classification +:param plots_dest: dir for test plots +:param model_evaluator: NOT IMPLEMENTED: specific method to generate eval, passed in as string + or available in this folder +:param predictions_column: column name for the predictions column on the resulted artifact +:param model_update: (True) update model, when running as stand alone no need in update +``` \ No newline at end of file diff --git a/test_classifier/function.yaml b/test_classifier/function.yaml index 632bd284d..494dcdbdd 100644 --- a/test_classifier/function.yaml +++ b/test_classifier/function.yaml @@ -2,8 +2,8 @@ kind: job metadata: name: test-classifier tag: '' - hash: b3a28d41d4e9142cd7426ed970aa46237bb40728 - project: '' + hash: 7ede87ea7a064bd1d4b4771a9ebb517f08ba2cca + project: default labels: author: yjb framework: sklearn @@ -26,15 +26,19 @@ spec: parameters: - name: context doc: the function context + default: '' - name: models_path type: DataItem doc: artifact models representing a file or a folder + default: '' - name: test_set type: DataItem doc: test features and labels + default: '' - name: label_column type: str doc: column name for ground truth labels + default: '' - name: score_method type: str doc: for multiclass classification @@ -42,9 +46,11 @@ spec: - name: plots_dest type: str doc: dir for test plots + default: '' - name: model_evaluator doc: 'NOT IMPLEMENTED: specific method to generate eval, passed in as string or available in this folder' + default: null - name: default_model type: str default: model.pkl @@ -52,10 +58,14 @@ spec: type: str doc: column name for the predictions column on the resulted artifact default: yscore - outputs: [] - lineno: 14 + - name: model_update + doc: (True) update model, when running as stand alone no need in update + default: true + outputs: + - default: '' + lineno: 16 description: test a classifier using held-out or new data build: - functionSourceCode: IyBHZW5lcmF0ZWQgYnkgbnVjbGlvLmV4cG9ydC5OdWNsaW9FeHBvcnRlcgoKaW1wb3J0IHdhcm5pbmdzCndhcm5pbmdzLnNpbXBsZWZpbHRlcihhY3Rpb249Imlnbm9yZSIsIGNhdGVnb3J5PUZ1dHVyZVdhcm5pbmcpCgppbXBvcnQgb3MKaW1wb3J0IHBhbmRhcyBhcyBwZApmcm9tIG1scnVuLmRhdGFzdG9yZSBpbXBvcnQgRGF0YUl0ZW0KZnJvbSBtbHJ1bi5hcnRpZmFjdHMgaW1wb3J0IGdldF9tb2RlbCwgdXBkYXRlX21vZGVsCmZyb20gbWxydW4ubWx1dGlscyBpbXBvcnQgZXZhbF9tb2RlbF92Mgpmcm9tIGNsb3VkcGlja2xlIGltcG9ydCBsb2FkCmZyb20gdXJsbGliLnJlcXVlc3QgaW1wb3J0IHVybG9wZW4KCmRlZiB0ZXN0X2NsYXNzaWZpZXIoCiAgICBjb250ZXh0LAogICAgbW9kZWxzX3BhdGg6IERhdGFJdGVtLCAKICAgIHRlc3Rfc2V0OiBEYXRhSXRlbSwKICAgIGxhYmVsX2NvbHVtbjogc3RyLAogICAgc2NvcmVfbWV0aG9kOiBzdHIgPSAnbWljcm8nLAogICAgcGxvdHNfZGVzdDogc3RyID0gIiIsCiAgICBtb2RlbF9ldmFsdWF0b3IgPSBOb25lLAogICAgZGVmYXVsdF9tb2RlbDogc3RyID0gIm1vZGVsLnBrbCIsCiAgICBwcmVkaWN0aW9uc19jb2x1bW46IHN0ciA9ICd5c2NvcmUnCikgLT4gTm9uZToKICAgICIiIlRlc3Qgb25lIG9yIG1vcmUgY2xhc3NpZmllciBtb2RlbHMgYWdhaW5zdCBoZWxkLW91dCBkYXRhc2V0CiAgICAKICAgIFVzaW5nIGhlbGQtb3V0IHRlc3QgZmVhdHVyZXMsIGV2YWx1YXRlcyB0aGUgcGVmb3JtYW5jZSBvZiB0aGUgZXN0aW1hdGVkIG1vZGVsCiAgICAKICAgIENhbiBiZSBwYXJ0IG9mIGEga3ViZWZsb3cgcGlwZWxpbmUgYXMgYSB0ZXN0IHN0ZXAgdGhhdCBpcyBydW4gcG9zdCBFREEgYW5kIAogICAgdHJhaW5pbmcvdmFsaWRhdGlvbiBjeWNsZXMKICAgIAogICAgOnBhcmFtIGNvbnRleHQ6ICAgICAgICAgICAgdGhlIGZ1bmN0aW9uIGNvbnRleHQKICAgIDpwYXJhbSBtb2RlbHNfcGF0aDogICAgICAgIGFydGlmYWN0IG1vZGVscyByZXByZXNlbnRpbmcgYSBmaWxlIG9yIGEgZm9sZGVyCiAgICA6cGFyYW0gdGVzdF9zZXQ6ICAgICAgICAgICB0ZXN0IGZlYXR1cmVzIGFuZCBsYWJlbHMKICAgIDpwYXJhbSBsYWJlbF9jb2x1bW46ICAgICAgIGNvbHVtbiBuYW1lIGZvciBncm91bmQgdHJ1dGggbGFiZWxzCiAgICA6cGFyYW0gc2NvcmVfbWV0aG9kOiAgICAgICBmb3IgbXVsdGljbGFzcyBjbGFzc2lmaWNhdGlvbgogICAgOnBhcmFtIHBsb3RzX2Rlc3Q6ICAgICAgICAgZGlyIGZvciB0ZXN0IHBsb3RzCiAgICA6cGFyYW0gbW9kZWxfZXZhbHVhdG9yOiAgICBOT1QgSU1QTEVNRU5URUQ6IHNwZWNpZmljIG1ldGhvZCB0byBnZW5lcmF0ZSBldmFsLCBwYXNzZWQgaW4gYXMgc3RyaW5nCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBvciBhdmFpbGFibGUgaW4gdGhpcyBmb2xkZXIKICAgIDpwYXJhbSBwcmVkaWN0aW9uc19jb2x1bW46IGNvbHVtbiBuYW1lIGZvciB0aGUgcHJlZGljdGlvbnMgY29sdW1uIG9uIHRoZSByZXN1bHRlZCBhcnRpZmFjdAogICAgIiIiCiAgICB4dGVzdCA9IHRlc3Rfc2V0LmFzX2RmKCkKICAgIHl0ZXN0ID0geHRlc3QucG9wKGxhYmVsX2NvbHVtbikKICAgIAogICAgdHJ5OgogICAgICAgIG1vZGVsX2ZpbGUsIG1vZGVsX29iaiwgXyA9IGdldF9tb2RlbChtb2RlbHNfcGF0aCwgc3VmZml4PScucGtsJykKICAgICAgICBtb2RlbF9vYmogPSBsb2FkKG9wZW4obW9kZWxfZmlsZSwgInJiIikpCiAgICBleGNlcHQgRXhjZXB0aW9uIGFzIGE6CiAgICAgICAgcmFpc2UgRXhjZXB0aW9uKCJtb2RlbCBsb2NhdGlvbiBsaWtlbHkgc3BlY2lmaWVkIikKICAgIAogICAgZXh0cmFfZGF0YSA9IGV2YWxfbW9kZWxfdjIoY29udGV4dCwgeHRlc3QsIHl0ZXN0LnZhbHVlcywgbW9kZWxfb2JqKQogICAgaWYgbW9kZWxfb2JqOgogICAgICAgIHVwZGF0ZV9tb2RlbChtb2RlbHNfcGF0aCwgZXh0cmFfZGF0YT1leHRyYV9kYXRhLCAKICAgICAgICAgICAgICAgICAgICAgbWV0cmljcz1jb250ZXh0LnJlc3VsdHMsIGtleV9wcmVmaXg9J3ZhbGlkYXRpb24tJykKICAgIAogICAgeV9oYXQgPSBtb2RlbF9vYmoucHJlZGljdCh4dGVzdCkKICAgIGlmIHlfaGF0Lm5kaW0gPT0gMSBvciB5X2hhdC5zaGFwZVsxXSA9PSAxOgogICAgICAgIHNjb3JlX25hbWVzID0gW3ByZWRpY3Rpb25zX2NvbHVtbl0KICAgIGVsc2U6CiAgICAgICAgc2NvcmVfbmFtZXMgPSBbZiJ7cHJlZGljdGlvbnNfY29sdW1ufV8iICsgc3RyKHgpIGZvciB4IGluIHJhbmdlKHlfaGF0LnNoYXBlWzFdKV0KCiAgICBkZiA9IHBkLmNvbmNhdChbeHRlc3QsIHl0ZXN0LCBwZC5EYXRhRnJhbWUoeV9oYXQsIGNvbHVtbnM9c2NvcmVfbmFtZXMpXSwgYXhpcz0xKQogICAgY29udGV4dC5sb2dfZGF0YXNldCgidGVzdF9zZXRfcHJlZHMiLCBkZj1kZiwgZm9ybWF0PSJwYXJxdWV0IiwgaW5kZXg9RmFsc2UpCgo= + functionSourceCode: IyBHZW5lcmF0ZWQgYnkgbnVjbGlvLmV4cG9ydC5OdWNsaW9FeHBvcnRlcgoKaW1wb3J0IHdhcm5pbmdzCndhcm5pbmdzLnNpbXBsZWZpbHRlcihhY3Rpb249Imlnbm9yZSIsIGNhdGVnb3J5PUZ1dHVyZVdhcm5pbmcpCgppbXBvcnQgb3MKaW1wb3J0IHBhbmRhcyBhcyBwZAppbXBvcnQgbWxydW4KCmZyb20gbWxydW4uZGF0YXN0b3JlIGltcG9ydCBEYXRhSXRlbQpmcm9tIG1scnVuLmFydGlmYWN0cyBpbXBvcnQgZ2V0X21vZGVsLCB1cGRhdGVfbW9kZWwKZnJvbSBtbHJ1bi5tbHV0aWxzIGltcG9ydCBldmFsX21vZGVsX3YyCmZyb20gY2xvdWRwaWNrbGUgaW1wb3J0IGxvYWQKZnJvbSB1cmxsaWIucmVxdWVzdCBpbXBvcnQgdXJsb3BlbgoKZGVmIHRlc3RfY2xhc3NpZmllcigKICAgIGNvbnRleHQsCiAgICBtb2RlbHNfcGF0aDogRGF0YUl0ZW0sIAogICAgdGVzdF9zZXQ6IERhdGFJdGVtLAogICAgbGFiZWxfY29sdW1uOiBzdHIsCiAgICBzY29yZV9tZXRob2Q6IHN0ciA9ICdtaWNybycsCiAgICBwbG90c19kZXN0OiBzdHIgPSAiIiwKICAgIG1vZGVsX2V2YWx1YXRvciA9IE5vbmUsCiAgICBkZWZhdWx0X21vZGVsOiBzdHIgPSAibW9kZWwucGtsIiwKICAgIHByZWRpY3Rpb25zX2NvbHVtbjogc3RyID0gJ3lzY29yZScsCiAgICBtb2RlbF91cGRhdGUgPSBUcnVlCikgLT4gTm9uZToKICAgICIiIlRlc3Qgb25lIG9yIG1vcmUgY2xhc3NpZmllciBtb2RlbHMgYWdhaW5zdCBoZWxkLW91dCBkYXRhc2V0CiAgICAKICAgIFVzaW5nIGhlbGQtb3V0IHRlc3QgZmVhdHVyZXMsIGV2YWx1YXRlcyB0aGUgcGVmb3JtYW5jZSBvZiB0aGUgZXN0aW1hdGVkIG1vZGVsCiAgICAKICAgIENhbiBiZSBwYXJ0IG9mIGEga3ViZWZsb3cgcGlwZWxpbmUgYXMgYSB0ZXN0IHN0ZXAgdGhhdCBpcyBydW4gcG9zdCBFREEgYW5kIAogICAgdHJhaW5pbmcvdmFsaWRhdGlvbiBjeWNsZXMKICAgIAogICAgOnBhcmFtIGNvbnRleHQ6ICAgICAgICAgICAgdGhlIGZ1bmN0aW9uIGNvbnRleHQKICAgIDpwYXJhbSBtb2RlbHNfcGF0aDogICAgICAgIGFydGlmYWN0IG1vZGVscyByZXByZXNlbnRpbmcgYSBmaWxlIG9yIGEgZm9sZGVyCiAgICA6cGFyYW0gdGVzdF9zZXQ6ICAgICAgICAgICB0ZXN0IGZlYXR1cmVzIGFuZCBsYWJlbHMKICAgIDpwYXJhbSBsYWJlbF9jb2x1bW46ICAgICAgIGNvbHVtbiBuYW1lIGZvciBncm91bmQgdHJ1dGggbGFiZWxzCiAgICA6cGFyYW0gc2NvcmVfbWV0aG9kOiAgICAgICBmb3IgbXVsdGljbGFzcyBjbGFzc2lmaWNhdGlvbgogICAgOnBhcmFtIHBsb3RzX2Rlc3Q6ICAgICAgICAgZGlyIGZvciB0ZXN0IHBsb3RzCiAgICA6cGFyYW0gbW9kZWxfZXZhbHVhdG9yOiAgICBOT1QgSU1QTEVNRU5URUQ6IHNwZWNpZmljIG1ldGhvZCB0byBnZW5lcmF0ZSBldmFsLCBwYXNzZWQgaW4gYXMgc3RyaW5nCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBvciBhdmFpbGFibGUgaW4gdGhpcyBmb2xkZXIKICAgIDpwYXJhbSBwcmVkaWN0aW9uc19jb2x1bW46IGNvbHVtbiBuYW1lIGZvciB0aGUgcHJlZGljdGlvbnMgY29sdW1uIG9uIHRoZSByZXN1bHRlZCBhcnRpZmFjdAogICAgOnBhcmFtIG1vZGVsX3VwZGF0ZTogICAgICAgKFRydWUpIHVwZGF0ZSBtb2RlbCwgd2hlbiBydW5uaW5nIGFzIHN0YW5kIGFsb25lIG5vIG5lZWQgaW4gdXBkYXRlCiAgICAiIiIKICAgIHh0ZXN0ID0gdGVzdF9zZXQuYXNfZGYoKQogICAgeXRlc3QgPSB4dGVzdC5wb3AobGFiZWxfY29sdW1uKQogICAgCiAgICB0cnk6CiAgICAgICAgbW9kZWxfZmlsZSwgbW9kZWxfb2JqLCBfID0gZ2V0X21vZGVsKG1vZGVsc19wYXRoLCBzdWZmaXg9Jy5wa2wnKQogICAgICAgIG1vZGVsX29iaiA9IGxvYWQob3Blbihtb2RlbF9maWxlLCAicmIiKSkKICAgIGV4Y2VwdCBFeGNlcHRpb24gYXMgYToKICAgICAgICByYWlzZSBFeGNlcHRpb24oIm1vZGVsIGxvY2F0aW9uIGxpa2VseSBzcGVjaWZpZWQiKQogICAgCiAgICBleHRyYV9kYXRhID0gZXZhbF9tb2RlbF92Mihjb250ZXh0LCB4dGVzdCwgeXRlc3QudmFsdWVzLCBtb2RlbF9vYmopCiAgICBpZiBtb2RlbF9vYmogYW5kIG1vZGVsX3VwZGF0ZSA9PSBUcnVlOgogICAgICAgIHVwZGF0ZV9tb2RlbChtb2RlbHNfcGF0aCwgZXh0cmFfZGF0YT1leHRyYV9kYXRhLCAKICAgICAgICAgICAgICAgICAgICAgbWV0cmljcz1jb250ZXh0LnJlc3VsdHMsIGtleV9wcmVmaXg9J3ZhbGlkYXRpb24tJykKICAgIAogICAgeV9oYXQgPSBtb2RlbF9vYmoucHJlZGljdCh4dGVzdCkKICAgIGlmIHlfaGF0Lm5kaW0gPT0gMSBvciB5X2hhdC5zaGFwZVsxXSA9PSAxOgogICAgICAgIHNjb3JlX25hbWVzID0gW3ByZWRpY3Rpb25zX2NvbHVtbl0KICAgIGVsc2U6CiAgICAgICAgc2NvcmVfbmFtZXMgPSBbZiJ7cHJlZGljdGlvbnNfY29sdW1ufV8iICsgc3RyKHgpIGZvciB4IGluIHJhbmdlKHlfaGF0LnNoYXBlWzFdKV0KCiAgICBkZiA9IHBkLmNvbmNhdChbeHRlc3QsIHl0ZXN0LCBwZC5EYXRhRnJhbWUoeV9oYXQsIGNvbHVtbnM9c2NvcmVfbmFtZXMpXSwgYXhpcz0xKQogICAgY29udGV4dC5sb2dfZGF0YXNldCgidGVzdF9zZXRfcHJlZHMiLCBkZj1kZiwgZm9ybWF0PSJwYXJxdWV0IiwgaW5kZXg9RmFsc2UpCgo= commands: [] - code_origin: https://github.com/mlrun/functions#c60a3607cf4805a927738969a8e4730c01e803d6:test_classifier.ipynb + code_origin: https://github.com/Idan707/functions.git#877277c6378d0bd61e1938e6c6c9bb9e51810fcb:test_classifier.ipynb diff --git a/test_classifier/test-classifier.py b/test_classifier/test-classifier.py index 511d29967..75af1012e 100644 --- a/test_classifier/test-classifier.py +++ b/test_classifier/test-classifier.py @@ -5,6 +5,8 @@ import os import pandas as pd +import mlrun + from mlrun.datastore import DataItem from mlrun.artifacts import get_model, update_model from mlrun.mlutils import eval_model_v2 @@ -20,7 +22,8 @@ def test_classifier( plots_dest: str = "", model_evaluator = None, default_model: str = "model.pkl", - predictions_column: str = 'yscore' + predictions_column: str = 'yscore', + model_update = True ) -> None: """Test one or more classifier models against held-out dataset @@ -38,6 +41,7 @@ def test_classifier( :param model_evaluator: NOT IMPLEMENTED: specific method to generate eval, passed in as string or available in this folder :param predictions_column: column name for the predictions column on the resulted artifact + :param model_update: (True) update model, when running as stand alone no need in update """ xtest = test_set.as_df() ytest = xtest.pop(label_column) @@ -49,7 +53,7 @@ def test_classifier( raise Exception("model location likely specified") extra_data = eval_model_v2(context, xtest, ytest.values, model_obj) - if model_obj: + if model_obj and model_update == True: update_model(models_path, extra_data=extra_data, metrics=context.results, key_prefix='validation-') diff --git a/test_classifier/test_classifier.ipynb b/test_classifier/test_classifier.ipynb index 6832924be..bc2711406 100644 --- a/test_classifier/test_classifier.ipynb +++ b/test_classifier/test_classifier.ipynb @@ -36,17 +36,11 @@ "outputs": [], "source": [ "import warnings\n", - "warnings.simplefilter(action=\"ignore\", category=FutureWarning)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ + "warnings.filterwarnings('ignore')\n", + "\n", "import os\n", "import pandas as pd\n", + "\n", "from mlrun.datastore import DataItem\n", "from mlrun.artifacts import get_model, update_model\n", "from mlrun.mlutils import eval_model_v2\n", @@ -62,7 +56,8 @@ " plots_dest: str = \"\",\n", " model_evaluator = None,\n", " default_model: str = \"model.pkl\",\n", - " predictions_column: str = 'yscore'\n", + " predictions_column: str = 'yscore',\n", + " model_update = True\n", ") -> None:\n", " \"\"\"Test one or more classifier models against held-out dataset\n", " \n", @@ -80,6 +75,7 @@ " :param model_evaluator: NOT IMPLEMENTED: specific method to generate eval, passed in as string\n", " or available in this folder\n", " :param predictions_column: column name for the predictions column on the resulted artifact\n", + " :param model_update: (True) update model, when running as stand alone no need in update\n", " \"\"\"\n", " xtest = test_set.as_df()\n", " ytest = xtest.pop(label_column)\n", @@ -91,7 +87,7 @@ " raise Exception(\"model location likely specified\")\n", " \n", " extra_data = eval_model_v2(context, xtest, ytest.values, model_obj)\n", - " if model_obj:\n", + " if model_obj and model_update == True:\n", " update_model(models_path, extra_data=extra_data, \n", " metrics=context.results, key_prefix='validation-')\n", " \n", @@ -110,7 +106,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -125,24 +121,30 @@ } }, "source": [ - "### mlconfig" + "### MLconfig" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 5, "metadata": { "pycharm": { "name": "#%%\n" } }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> 2020-10-28 15:23:07,168 [warning] warning!, server (0.5.3-rc1) and client (0.5.2) ver dont match\n" + ] + } + ], "source": [ - "from mlrun import mlconf\n", - "import os\n", - "\n", - "mlconf.dbpath = mlconf.dbpath or 'http://mlrun-api:8080'\n", - "artifact_path = mlconf.artifact_path or os.path.abspath('./')" + "import mlrun\n", + "artifact_path = mlrun.set_environment(api_path = 'http://mlrun-api:8080',\n", + " artifact_path = os.path.abspath('./'))" ] }, { @@ -153,12 +155,12 @@ } }, "source": [ - "### save" + "### Save" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 6, "metadata": { "pycharm": { "name": "#%%\n" @@ -169,28 +171,28 @@ "name": "stdout", "output_type": "stream", "text": [ - "[mlrun] 2020-07-13 16:17:02,696 function spec saved to path: function.yaml\n" + "> 2020-10-28 15:23:15,013 [info] function spec saved to path: function.yaml\n" ] }, { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 12, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "from mlrun import code_to_function \n", "# create job function object from notebook code\n", - "fn = code_to_function(\"test_classifier\", handler=\"test_classifier\",\n", - " description=\"test a classifier using held-out or new data\",\n", - " categories=[\"ml\", \"test\"],\n", - " labels = {\"author\": \"yjb\", \"framework\": \"sklearn\"},\n", - " code_output='.')\n", + "fn = mlrun.code_to_function(\"test_classifier\", \n", + " handler=\"test_classifier\",\n", + " description=\"test a classifier using held-out or new data\",\n", + " categories=[\"ml\", \"test\"],\n", + " labels = {\"author\": \"yjb\", \"framework\": \"sklearn\"},\n", + " code_output='.')\n", "fn.export()" ] }, @@ -202,12 +204,12 @@ } }, "source": [ - "## tests" + "## Tests" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "metadata": { "pycharm": { "name": "#%%\n" @@ -217,29 +219,16 @@ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 8, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "from mlrun import mount_v3io\n", - "fn.apply(mount_v3io())" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "task_params = {\n", - " \"name\" : \"tasks test classifier\",\n", - " \"params\": {\n", - " \"label_column\" : \"labels\"}}" + "fn.apply(mlrun.platforms.auto_mount())" ] }, { @@ -250,7 +239,7 @@ } }, "source": [ - "### run locally" + "### Run Locally" ] }, { @@ -259,9 +248,8 @@ "metadata": {}, "outputs": [], "source": [ - "TEST_REPO = \"https://raw.githubusercontent.com/yjb-ds/testdata/master\"\n", - "DATA_PATH = \"/User/ml2/test_set.parquet\"\n", - "MODELS_PATH = \"/User/artifacts/multi-models/sklearn.linear_model.LogisticRegression/model.pkl\"" + "DATA_PATH = 'https://s3.wasabisys.com/iguazio/data/iris/iris_dataset.csv'\n", + "MODEL_PATH = 'https://s3.wasabisys.com/iguazio/models/iris/model.pkl'" ] }, { @@ -277,28 +265,24 @@ "name": "stdout", "output_type": "stream", "text": [ - "[mlrun] 2020-06-05 23:36:35,094 starting run tasks test classifier uid=9958837817a048dea70fe0b6780484c2 -> http://10.199.227.162:8080\n", - "[mlrun] 2020-06-05 23:36:36,015 log artifact confusion-matrix at /User/ml2/test/plots/confusion-matrix.html, size: 20273, db: N\n", - "[mlrun] 2020-06-05 23:36:36,273 log artifact feature-importances at /User/ml2/test/plots/feature-importances.html, size: 11857, db: N\n", - "[mlrun] 2020-06-05 23:36:36,393 log artifact precision-recall-multiclass at /User/ml2/test/plots/precision-recall-multiclass.html, size: 55889, db: N\n", - "[mlrun] 2020-06-05 23:36:36,523 log artifact roc-multiclass at /User/ml2/test/plots/roc-multiclass.html, size: 34633, db: N\n", - "[mlrun] 2020-06-05 23:36:36,928 log artifact test_set_preds at /User/ml2/test/test_set_preds.parquet, size: 702584, db: Y\n", - "\n" + "> 2020-10-28 15:23:15,049 [warning] warning!, server (0.5.3-rc1) and client (0.5.2) ver dont match\n", + "> 2020-10-28 15:23:15,049 [info] starting run mlrun-ffeba8-test_classifier uid=40514aecc0d64b8ebf1f2efb32198484 -> http://mlrun-api:8080\n", + "> 2020-10-28 15:23:15,093 [warning] warning!, server (0.5.3-rc1) and client (0.5.2) ver dont match\n" ] }, { "data": { "text/html": [ - "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
projectuiditerstartstatenamelabelsinputsparametersresultsartifacts
default0Oct 28 15:23:19runningtest-classifier-test_classifier
v3io_user=admin
kind=job
owner=admin
test_set
models_path
label_column=label
model_update=False
\n", + "
\n", + "
\n", + "
\n", + " Title\n", + " ×\n", + "
\n", + " \n", + "
\n", + "
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "to track results use .show() or .logs() or in CLI: \n", + "!mlrun get run 11372c9755964a5bb76599f4821a3bd1 --project default , !mlrun logs 11372c9755964a5bb76599f4821a3bd1 --project default\n", + "> 2020-10-28 15:23:19,488 [info] run executed, status=running\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fn.run(mlrun.NewTask(params= {'label_column':'label',\n", + " 'model_update': False}), #Change to True when you have a old model metadata to update\n", + " handler=test_classifier,\n", + " inputs={\"test_set\": DATA_PATH,\n", + " \"models_path\": MODEL_PATH})" + ] } ], "metadata": { @@ -566,7 +789,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.8" + "version": "3.7.6" } }, "nbformat": 4, diff --git a/tf2_serving/tf2_serving.ipynb b/tf2_serving/tf2_serving.ipynb index 126f17502..5142fd379 100644 --- a/tf2_serving/tf2_serving.ipynb +++ b/tf2_serving/tf2_serving.ipynb @@ -558,7 +558,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.8" + "version": "3.7.6" } }, "nbformat": 4, diff --git a/v2_model_server/function.yaml b/v2_model_server/function.yaml index a7149b32b..441b5b656 100644 --- a/v2_model_server/function.yaml +++ b/v2_model_server/function.yaml @@ -2,7 +2,7 @@ kind: serving metadata: name: v2-model-server tag: '' - hash: 22485e4f7ad229768915af4ef48b058d6af4476e + hash: 06e79919a7cdc95ceeaf430fb10fc935a932e2f0 project: default labels: author: yaronh @@ -23,7 +23,7 @@ spec: default: '' outputs: - default: '' - lineno: 10 + lineno: 14 predict: name: predict doc: Generate model predictions from sample. @@ -36,7 +36,7 @@ spec: outputs: - default: '' type: List - lineno: 15 + lineno: 19 init_context: name: init_context doc: '' @@ -45,7 +45,7 @@ spec: default: '' outputs: - default: '' - lineno: 23 + lineno: 27 handler: name: handler doc: '' @@ -56,7 +56,7 @@ spec: default: '' outputs: - default: '' - lineno: 26 + lineno: 30 description: generic sklearn model server min_replicas: 1 max_replicas: 4 @@ -66,15 +66,14 @@ spec: kind: Function metadata: annotations: - nuclio.io/generated_by: function generated from 12-10-2020 by admin + nuclio.io/generated_by: function generated from 06-12-2020 by admin labels: {} name: v2-model-server spec: build: - baseImage: mlrun/mlrun - commands: - - python -m pip install numpy cloudpickle v3io sklearn - functionSourceCode: IyBHZW5lcmF0ZWQgYnkgbnVjbGlvLmV4cG9ydC5OdWNsaW9FeHBvcnRlcgoKaW1wb3J0IG1scnVuCgpmcm9tIGNsb3VkcGlja2xlIGltcG9ydCBsb2FkCmltcG9ydCBudW1weSBhcyBucApmcm9tIHR5cGluZyBpbXBvcnQgTGlzdAoKY2xhc3MgQ2xhc3NpZmllck1vZGVsKG1scnVuLnNlcnZpbmcuVjJNb2RlbFNlcnZlcik6CiAgICBkZWYgbG9hZChzZWxmKToKICAgICAgICAiIiJsb2FkIGFuZCBpbml0aWFsaXplIHRoZSBtb2RlbCBhbmQvb3Igb3RoZXIgZWxlbWVudHMiIiIKICAgICAgICBtb2RlbF9maWxlLCBleHRyYV9kYXRhID0gc2VsZi5nZXRfbW9kZWwoJy5wa2wnKQogICAgICAgIHNlbGYubW9kZWwgPSBsb2FkKG9wZW4obW9kZWxfZmlsZSwgJ3JiJykpCgogICAgZGVmIHByZWRpY3Qoc2VsZiwgYm9keTogZGljdCkgLT4gTGlzdDoKICAgICAgICAiIiJHZW5lcmF0ZSBtb2RlbCBwcmVkaWN0aW9ucyBmcm9tIHNhbXBsZS4iIiIKICAgICAgICBmZWF0cyA9IG5wLmFzYXJyYXkoYm9keVsnaW5wdXRzJ10pCiAgICAgICAgcmVzdWx0OiBucC5uZGFycmF5ID0gc2VsZi5tb2RlbC5wcmVkaWN0KGZlYXRzKQogICAgICAgIHJldHVybiByZXN1bHQudG9saXN0KCkKCgpmcm9tIG1scnVuLnJ1bnRpbWVzIGltcG9ydCBudWNsaW9faW5pdF9ob29rCmRlZiBpbml0X2NvbnRleHQoY29udGV4dCk6CiAgICBudWNsaW9faW5pdF9ob29rKGNvbnRleHQsIGdsb2JhbHMoKSwgJ3NlcnZpbmdfdjInKQoKZGVmIGhhbmRsZXIoY29udGV4dCwgZXZlbnQpOgogICAgcmV0dXJuIGNvbnRleHQubWxydW5faGFuZGxlcihjb250ZXh0LCBldmVudCkK + baseImage: mlrun/ml-models + commands: [] + functionSourceCode: IyBHZW5lcmF0ZWQgYnkgbnVjbGlvLmV4cG9ydC5OdWNsaW9FeHBvcnRlcgoKaW1wb3J0IG1scnVuCgpmcm9tIGNsb3VkcGlja2xlIGltcG9ydCBsb2FkCmZyb20gdHlwaW5nIGltcG9ydCBMaXN0CmZyb20gc2tsZWFybi5kYXRhc2V0cyBpbXBvcnQgbG9hZF9pcmlzCmltcG9ydCBudW1weSBhcyBucAoKaW1wb3J0IHdhcm5pbmdzIAp3YXJuaW5ncy5maWx0ZXJ3YXJuaW5ncygnaWdub3JlJykKCmNsYXNzIENsYXNzaWZpZXJNb2RlbChtbHJ1bi5zZXJ2aW5nLlYyTW9kZWxTZXJ2ZXIpOgogICAgZGVmIGxvYWQoc2VsZik6CiAgICAgICAgIiIibG9hZCBhbmQgaW5pdGlhbGl6ZSB0aGUgbW9kZWwgYW5kL29yIG90aGVyIGVsZW1lbnRzIiIiCiAgICAgICAgbW9kZWxfZmlsZSwgZXh0cmFfZGF0YSA9IHNlbGYuZ2V0X21vZGVsKCcucGtsJykKICAgICAgICBzZWxmLm1vZGVsID0gbG9hZChvcGVuKG1vZGVsX2ZpbGUsICdyYicpKQoKICAgIGRlZiBwcmVkaWN0KHNlbGYsIGJvZHk6IGRpY3QpIC0+IExpc3Q6CiAgICAgICAgIiIiR2VuZXJhdGUgbW9kZWwgcHJlZGljdGlvbnMgZnJvbSBzYW1wbGUuIiIiCiAgICAgICAgZmVhdHMgPSBucC5hc2FycmF5KGJvZHlbJ2lucHV0cyddKQogICAgICAgIHJlc3VsdDogbnAubmRhcnJheSA9IHNlbGYubW9kZWwucHJlZGljdChmZWF0cykKICAgICAgICByZXR1cm4gcmVzdWx0LnRvbGlzdCgpCgoKZnJvbSBtbHJ1bi5ydW50aW1lcyBpbXBvcnQgbnVjbGlvX2luaXRfaG9vawpkZWYgaW5pdF9jb250ZXh0KGNvbnRleHQpOgogICAgbnVjbGlvX2luaXRfaG9vayhjb250ZXh0LCBnbG9iYWxzKCksICdzZXJ2aW5nX3YyJykKCmRlZiBoYW5kbGVyKGNvbnRleHQsIGV2ZW50KToKICAgIHJldHVybiBjb250ZXh0Lm1scnVuX2hhbmRsZXIoY29udGV4dCwgZXZlbnQpCg== noBaseImagesPull: true env: [] handler: v2_model_server:handler @@ -82,4 +81,6 @@ spec: volumes: [] source: '' function_kind: serving_v2 + graph: + kind: router default_class: ClassifierModel diff --git a/v2_model_server/v2-model-server.py b/v2_model_server/v2-model-server.py index 2fb44b742..c5bd59db3 100644 --- a/v2_model_server/v2-model-server.py +++ b/v2_model_server/v2-model-server.py @@ -3,8 +3,12 @@ import mlrun from cloudpickle import load -import numpy as np from typing import List +from sklearn.datasets import load_iris +import numpy as np + +import warnings +warnings.filterwarnings('ignore') class ClassifierModel(mlrun.serving.V2ModelServer): def load(self): diff --git a/v2_model_server/v2_model_server.ipynb b/v2_model_server/v2_model_server.ipynb index 0bd7fcbe3..da88b89a4 100644 --- a/v2_model_server/v2_model_server.ipynb +++ b/v2_model_server/v2_model_server.ipynb @@ -11,15 +11,7 @@ "cell_type": "code", "execution_count": 1, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "> 2020-10-12 14:52:57,535 [warning] Failed resolving version info. Ignoring and using defaults\n" - ] - } - ], + "outputs": [], "source": [ "import mlrun" ] @@ -47,17 +39,7 @@ ], "source": [ "%nuclio config kind=\"serving\"\n", - "%nuclio config spec.build.baseImage = \"mlrun/mlrun\"" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "%%nuclio cmd -c\n", - "python -m pip install numpy cloudpickle v3io sklearn" + "%nuclio config spec.build.baseImage=\"mlrun/mlrun\"" ] }, { @@ -69,18 +51,22 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "from cloudpickle import load\n", + "from typing import List\n", + "from sklearn.datasets import load_iris\n", "import numpy as np\n", - "from typing import List" + "\n", + "import warnings \n", + "warnings.filterwarnings('ignore')" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -99,7 +85,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -113,22 +99,6 @@ "# Convert to function object" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The sklearn-project generated one or more models that will be deployed in the server project `sklearn-servers`" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "models_path = 'https://s3.wasabisys.com/iguazio/models/iris/model.pkl'" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -138,32 +108,35 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "> 2020-10-12 14:47:46,331 [info] function spec saved to path: function.yaml\n" + "> 2020-12-06 11:49:27,049 [info] function spec saved to path: function.yaml\n" ] }, { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 16, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "fn = mlrun.code_to_function('v2-model-server', description=\"generic sklearn model server\",\n", - " categories=['serving', 'ml'],\n", - " labels={'author': 'yaronh', 'framework': 'sklearn'},\n", - " code_output='.')\n", + "import mlrun\n", + "fn = mlrun.code_to_function('v2-model-server', \n", + " description=\"generic sklearn model server\",\n", + " categories=['serving', 'ml'],\n", + " labels={'author': 'yaronh', 'framework': 'sklearn'},\n", + " code_output='.')\n", + "\n", "fn.spec.default_class = 'ClassifierModel'\n", "#print(fn.to_yaml())\n", "fn.export()" @@ -178,10 +151,12 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ + "models_path = 'https://s3.wasabisys.com/iguazio/models/iris/model.pkl'\n", + "mlrun.mlconf.dbpath = mlrun.mlconf.dbpath or 'http://mlrun-api:8080'\n", "fn.add_model('mymodel', model_path=models_path)\n", "#fn.verbose = True" ] @@ -195,9 +170,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'routes': }\n", + "{'model_path': 'https://s3.wasabisys.com/iguazio/models/iris/model.pkl'}\n", + "> 2020-12-06 11:49:27,287 [info] model mymodel was loaded\n", + "> 2020-12-06 11:49:27,288 [info] Loaded ['mymodel']\n" + ] + } + ], "source": [ "# create an emulator (mock server) from the function configuration)\n", "server = fn.to_mock_server(globals())" @@ -212,35 +198,26 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ - "from sklearn.datasets import load_iris\n", "iris = load_iris()\n", "x = iris['data'].tolist()" ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 10, "metadata": {}, "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "> 2020-10-12 14:44:19,297 [debug] router run model mymodel, op=infer\n", - "> 2020-10-12 14:44:19,297 [debug] router run model mymodel, op=infer\n" - ] - }, { "data": { "text/plain": [ "dict_keys(['id', 'model_name', 'outputs'])" ] }, - "execution_count": 13, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -259,33 +236,56 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fn.apply(mlrun.mount_v3io())" + ] + }, + { + "cell_type": "code", + "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "> 2020-10-12 14:44:20,662 [info] deploy started\n", - "[nuclio] 2020-10-12 14:44:21,772 (info) Build complete\n", - "[nuclio] 2020-10-12 14:44:29,852 (info) Function deploy complete\n", - "[nuclio] 2020-10-12 14:44:29,859 done updating v2-srv-v2-model-server, function address: 3.128.234.166:30830\n" + "> 2020-12-06 11:49:27,353 [info] Starting remote function deploy\n", + "2020-12-06 11:49:27 (info) Deploying function\n", + "2020-12-06 11:49:27 (info) Building\n", + "2020-12-06 11:49:27 (info) Staging files and preparing base images\n", + "2020-12-06 11:49:27 (info) Building processor image\n", + "2020-12-06 11:53:30 (info) Build complete\n", + "2020-12-06 11:53:36 (info) Function deploy complete\n", + "> 2020-12-06 11:53:36,887 [info] function deployed, address=default-tenant.app.yh210.iguazio-cd2.com:31544\n" ] }, { "data": { "text/plain": [ - "'http://3.128.234.166:30830'" + "'http://default-tenant.app.yh210.iguazio-cd2.com:31544'" ] }, - "execution_count": 14, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "fn.apply(mlrun.mount_v3io())\n", - "fn.deploy(project='v2-srv')" + "fn.deploy()" ] }, { @@ -297,18 +297,18 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{'id': 'bd688fa9-38cc-4a5d-95e8-1445bdd1520a',\n", + "{'id': '85877b5c-28a5-40dd-98d2-9c4e234ada57',\n", " 'model_name': 'mymodel',\n", " 'outputs': [0, 2]}" ] }, - "execution_count": 15, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -328,9 +328,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python [conda env:root] *", "language": "python", - "name": "python3" + "name": "conda-root-py" }, "language_info": { "codemirror_mode": { diff --git a/v2_model_tester/function.yaml b/v2_model_tester/function.yaml index 5a7b9d7be..7bbb6b1ad 100644 --- a/v2_model_tester/function.yaml +++ b/v2_model_tester/function.yaml @@ -2,7 +2,7 @@ kind: job metadata: name: v2-model-tester tag: '' - hash: bbffca7f8decc17c1de599015984a548bd60702e + hash: 989fa7ebf36b83e40fbb657b708a894cbbda3a81 project: default labels: author: yaronh @@ -48,9 +48,9 @@ spec: default: 20 outputs: - default: '' - lineno: 12 + lineno: 13 description: test v2 model servers build: - functionSourceCode: IyBHZW5lcmF0ZWQgYnkgbnVjbGlvLmV4cG9ydC5OdWNsaW9FeHBvcnRlcgoKaW1wb3J0IG9zCmltcG9ydCBwYW5kYXMgYXMgcGQKaW1wb3J0IHJlcXVlc3RzCmltcG9ydCBqc29uCmltcG9ydCBudW1weSBhcyBucApmcm9tIGRhdGV0aW1lIGltcG9ydCBkYXRldGltZQpmcm9tIG1scnVuLmRhdGFzdG9yZSBpbXBvcnQgRGF0YUl0ZW0KZnJvbSBtbHJ1bi5hcnRpZmFjdHMgaW1wb3J0IENoYXJ0QXJ0aWZhY3QKCmRlZiBtb2RlbF9zZXJ2ZXJfdGVzdGVyKGNvbnRleHQsCiAgICAgICAgICAgICAgICAgICAgICAgIHRhYmxlOiBEYXRhSXRlbSwKICAgICAgICAgICAgICAgICAgICAgICAgYWRkcjogc3RyLCAKICAgICAgICAgICAgICAgICAgICAgICAgbGFiZWxfY29sdW1uOiBzdHIgPSAibGFiZWwiLAogICAgICAgICAgICAgICAgICAgICAgICBtb2RlbDogc3RyID0gJycsCiAgICAgICAgICAgICAgICAgICAgICAgIG1hdGNoX2VycjogYm9vbCA9IEZhbHNlLAogICAgICAgICAgICAgICAgICAgICAgICByb3dzOiBpbnQgPSAyMCk6CiAgICAiIiIgVGVzdCBhIG1vZGVsIHNlcnZlciAKICAgIAogICAgOnBhcmFtIHRhYmxlOiAgICAgICAgIGNzdi9wYXJxdWV0IHRhYmxlIHdpdGggdGVzdCBkYXRhCiAgICA6cGFyYW0gYWRkcjogICAgICAgICAgZnVuY3Rpb24gYWRkcmVzcy91cmwKICAgIDpwYXJhbSBsYWJlbF9jb2x1bW46ICBuYW1lIG9mIHRoZSBsYWJlbCBjb2x1bW4gaW4gdGFibGUKICAgIDpwYXJhbSBtb2RlbDogICAgICAgICB0ZXN0ZWQgbW9kZWwgbmFtZSAKICAgIDpwYXJhbSBtYXRjaF9lcnI6ICAgICByYWlzZSBlcnJvciBvbiB2YWxpZGF0aW9uIChyZXF1aXJlIHByb3BlciB0ZXN0IHNldCkKICAgIDpwYXJhbSByb3dzOiAgICAgICAgICBudW1iZXIgb2Ygcm93cyB0byB1c2UgZnJvbSB0ZXN0IHNldAogICAgIiIiCiAgICAgICAgCiAgICB0YWJsZSA9IHRhYmxlLmFzX2RmKCkKCiAgICB5X2xpc3QgPSB0YWJsZS5wb3AobGFiZWxfY29sdW1uKS52YWx1ZXMudG9saXN0KCkKICAgIGNvbnRleHQubG9nZ2VyLmluZm8oZid0ZXN0aW5nIHdpdGggZGF0YXNldCBhZ2FpbnN0IHthZGRyfSwgbW9kZWw6IHttb2RlbH0nKQogICAgaWYgcm93cyBhbmQgcm93cyA8IHRhYmxlLnNoYXBlWzBdOgogICAgICAgIHRhYmxlID0gdGFibGUuc2FtcGxlKHJvd3MpCiAgICAKICAgIGNvdW50ID0gZXJyX2NvdW50ID0gbWF0Y2ggPSAwCiAgICB0aW1lcyA9IFtdCiAgICBmb3IgeCwgeSBpbiB6aXAodGFibGUudmFsdWVzLCB5X2xpc3QpOgogICAgICAgIGNvdW50ICs9IDEKICAgICAgICBldmVudF9kYXRhID0ganNvbi5kdW1wcyh7ImlucHV0cyI6W3gudG9saXN0KCldfSkKICAgICAgICBoYWRfZXJyID0gRmFsc2UKICAgICAgICB0cnk6CiAgICAgICAgICAgIHN0YXJ0ID0gZGF0ZXRpbWUubm93KCkKICAgICAgICAgICAgcmVzcCA9IHJlcXVlc3RzLnB1dChmJ3thZGRyfS92Mi9tb2RlbHMve21vZGVsfS9pbmZlcicsIGpzb249ZXZlbnRfZGF0YSkKICAgICAgICAgICAgaWYgbm90IHJlc3Aub2s6CiAgICAgICAgICAgICAgICBjb250ZXh0LmxvZ2dlci5lcnJvcihmJ2JhZCBmdW5jdGlvbiByZXNwISFcbntyZXNwLnRleHR9JykKICAgICAgICAgICAgICAgIGVycl9jb3VudCArPSAxCiAgICAgICAgICAgICAgICBjb250aW51ZQogICAgICAgICAgICB0aW1lcy5hcHBlbmQoKGRhdGV0aW1lLm5vdygpLXN0YXJ0KS5taWNyb3NlY29uZHMpCiAgICAgICAgICAgICAgICAKICAgICAgICBleGNlcHQgT1NFcnJvciBhcyBlcnI6CiAgICAgICAgICAgIGNvbnRleHQubG9nZ2VyLmVycm9yKGYnZXJyb3IgaW4gcmVxdWVzdCwgZGF0YTp7ZXZlbnRfZGF0YX0sIGVycm9yOiB7ZXJyfScpCiAgICAgICAgICAgIGVycl9jb3VudCArPSAxCiAgICAgICAgICAgIGNvbnRpbnVlCiAgICAgICAgCiAgICAgICAgcmVzcF9kYXRhID0gcmVzcC5qc29uKCkKICAgICAgICBwcmludChyZXNwX2RhdGEpCiAgICAgICAgeV9yZXNwID0gcmVzcF9kYXRhWydvdXRwdXRzJ11bMF0KICAgICAgICBpZiB5ID09IHlfcmVzcDoKICAgICAgICAgICAgbWF0Y2ggKz0gMQogICAgICAgIAogICAgY29udGV4dC5sb2dfcmVzdWx0KCd0b3RhbF90ZXN0cycsIGNvdW50KQogICAgY29udGV4dC5sb2dfcmVzdWx0KCdlcnJvcnMnLCBlcnJfY291bnQpCiAgICBjb250ZXh0LmxvZ19yZXN1bHQoJ21hdGNoJywgbWF0Y2gpCiAgICBpZiBjb3VudCAtIGVycl9jb3VudCA+IDA6CiAgICAgICAgdGltZXNfYXJyID0gbnAuYXJyYXkodGltZXMpCiAgICAgICAgY29udGV4dC5sb2dfcmVzdWx0KCdhdmdfbGF0ZW5jeScsIGludChucC5tZWFuKHRpbWVzX2FycikpKQogICAgICAgIGNvbnRleHQubG9nX3Jlc3VsdCgnbWluX2xhdGVuY3knLCBpbnQobnAuYW1pbih0aW1lc19hcnIpKSkKICAgICAgICBjb250ZXh0LmxvZ19yZXN1bHQoJ21heF9sYXRlbmN5JywgaW50KG5wLmFtYXgodGltZXNfYXJyKSkpCiAgICAgICAgCiAgICAgICAgY2hhcnQgPSBDaGFydEFydGlmYWN0KCdsYXRlbmN5JywgaGVhZGVyPVsnVGVzdCcsICdMYXRlbmN5IChtaWNyb3NlYyknXSkKICAgICAgICBmb3IgaSBpbiByYW5nZShsZW4odGltZXMpKToKICAgICAgICAgICAgY2hhcnQuYWRkX3JvdyhbaSsxLCBpbnQodGltZXNbaV0pXSkKICAgICAgICBjb250ZXh0LmxvZ19hcnRpZmFjdChjaGFydCkKCiAgICBjb250ZXh0LmxvZ2dlci5pbmZvKGYncnVuIHtjb3VudH0gdGVzdHMsIHtlcnJfY291bnR9IGVycm9ycyBhbmQge21hdGNofSBtYXRjaCBleHBlY3RlZCB2YWx1ZScpCiAgICAKICAgIGlmIGVycl9jb3VudDoKICAgICAgICByYWlzZSBWYWx1ZUVycm9yKGYnZmFpbGVkIG9uIHtlcnJfY291bnR9IHRlc3RzIG9mIHtjb3VudH0nKQogICAgCiAgICBpZiBtYXRjaF9lcnIgYW5kIG1hdGNoICE9IGNvdW50OgogICAgICAgIHJhaXNlIFZhbHVlRXJyb3IoZidvbmx5IHttYXRjaH0gcmVzdWx0cyBtYXRjaCBvdXQgb2Yge2NvdW50fScpCgo= + functionSourceCode: IyBHZW5lcmF0ZWQgYnkgbnVjbGlvLmV4cG9ydC5OdWNsaW9FeHBvcnRlcgoKaW1wb3J0IG9zCmltcG9ydCBwYW5kYXMgYXMgcGQKaW1wb3J0IHJlcXVlc3RzCmltcG9ydCBqc29uCmltcG9ydCBudW1weSBhcyBucApmcm9tIGRhdGV0aW1lIGltcG9ydCBkYXRldGltZQpmcm9tIG1scnVuLmRhdGFzdG9yZSBpbXBvcnQgRGF0YUl0ZW0KZnJvbSBtbHJ1bi5hcnRpZmFjdHMgaW1wb3J0IENoYXJ0QXJ0aWZhY3QKaW1wb3J0IG1scnVuCgpkZWYgbW9kZWxfc2VydmVyX3Rlc3Rlcihjb250ZXh0LAogICAgICAgICAgICAgICAgICAgICAgICB0YWJsZTogRGF0YUl0ZW0sCiAgICAgICAgICAgICAgICAgICAgICAgIGFkZHI6IHN0ciwgCiAgICAgICAgICAgICAgICAgICAgICAgIGxhYmVsX2NvbHVtbjogc3RyID0gImxhYmVsIiwKICAgICAgICAgICAgICAgICAgICAgICAgbW9kZWw6IHN0ciA9ICcnLAogICAgICAgICAgICAgICAgICAgICAgICBtYXRjaF9lcnI6IGJvb2wgPSBGYWxzZSwKICAgICAgICAgICAgICAgICAgICAgICAgcm93czogaW50ID0gMjApOgogICAgIiIiIFRlc3QgYSBtb2RlbCBzZXJ2ZXIgCiAgICAKICAgIDpwYXJhbSB0YWJsZTogICAgICAgICBjc3YvcGFycXVldCB0YWJsZSB3aXRoIHRlc3QgZGF0YQogICAgOnBhcmFtIGFkZHI6ICAgICAgICAgIGZ1bmN0aW9uIGFkZHJlc3MvdXJsCiAgICA6cGFyYW0gbGFiZWxfY29sdW1uOiAgbmFtZSBvZiB0aGUgbGFiZWwgY29sdW1uIGluIHRhYmxlCiAgICA6cGFyYW0gbW9kZWw6ICAgICAgICAgdGVzdGVkIG1vZGVsIG5hbWUgCiAgICA6cGFyYW0gbWF0Y2hfZXJyOiAgICAgcmFpc2UgZXJyb3Igb24gdmFsaWRhdGlvbiAocmVxdWlyZSBwcm9wZXIgdGVzdCBzZXQpCiAgICA6cGFyYW0gcm93czogICAgICAgICAgbnVtYmVyIG9mIHJvd3MgdG8gdXNlIGZyb20gdGVzdCBzZXQKICAgICIiIgogICAgICAgIAogICAgdGFibGUgPSB0YWJsZS5hc19kZigpCgogICAgeV9saXN0ID0gdGFibGUucG9wKGxhYmVsX2NvbHVtbikudmFsdWVzLnRvbGlzdCgpCiAgICBjb250ZXh0LmxvZ2dlci5pbmZvKGYndGVzdGluZyB3aXRoIGRhdGFzZXQgYWdhaW5zdCB7YWRkcn0sIG1vZGVsOiB7bW9kZWx9JykKICAgIGlmIHJvd3MgYW5kIHJvd3MgPCB0YWJsZS5zaGFwZVswXToKICAgICAgICB0YWJsZSA9IHRhYmxlLnNhbXBsZShyb3dzKQogICAgCiAgICBjb3VudCA9IGVycl9jb3VudCA9IG1hdGNoID0gMAogICAgdGltZXMgPSBbXQogICAgZm9yIHgsIHkgaW4gemlwKHRhYmxlLnZhbHVlcywgeV9saXN0KToKICAgICAgICBjb3VudCArPSAxCiAgICAgICAgZXZlbnRfZGF0YSA9IGpzb24uZHVtcHMoeyJpbnB1dHMiOlt4LnRvbGlzdCgpXX0pCiAgICAgICAgaGFkX2VyciA9IEZhbHNlCiAgICAgICAgdHJ5OgogICAgICAgICAgICBzdGFydCA9IGRhdGV0aW1lLm5vdygpCiAgICAgICAgICAgIHJlc3AgPSByZXF1ZXN0cy5wdXQoZid7YWRkcn0vdjIvbW9kZWxzL3ttb2RlbH0vaW5mZXInLCBqc29uPWV2ZW50X2RhdGEpCiAgICAgICAgICAgIGlmIG5vdCByZXNwLm9rOgogICAgICAgICAgICAgICAgY29udGV4dC5sb2dnZXIuZXJyb3IoZidiYWQgZnVuY3Rpb24gcmVzcCEhXG57cmVzcC50ZXh0fScpCiAgICAgICAgICAgICAgICBlcnJfY291bnQgKz0gMQogICAgICAgICAgICAgICAgY29udGludWUKICAgICAgICAgICAgdGltZXMuYXBwZW5kKChkYXRldGltZS5ub3coKS1zdGFydCkubWljcm9zZWNvbmRzKQogICAgICAgICAgICAgICAgCiAgICAgICAgZXhjZXB0IE9TRXJyb3IgYXMgZXJyOgogICAgICAgICAgICBjb250ZXh0LmxvZ2dlci5lcnJvcihmJ2Vycm9yIGluIHJlcXVlc3QsIGRhdGE6e2V2ZW50X2RhdGF9LCBlcnJvcjoge2Vycn0nKQogICAgICAgICAgICBlcnJfY291bnQgKz0gMQogICAgICAgICAgICBjb250aW51ZQogICAgICAgIAogICAgICAgIHJlc3BfZGF0YSA9IHJlc3AuanNvbigpCiAgICAgICAgcHJpbnQocmVzcF9kYXRhKQogICAgICAgIHlfcmVzcCA9IHJlc3BfZGF0YVsnb3V0cHV0cyddWzBdCiAgICAgICAgaWYgeSA9PSB5X3Jlc3A6CiAgICAgICAgICAgIG1hdGNoICs9IDEKICAgICAgICAKICAgIGNvbnRleHQubG9nX3Jlc3VsdCgndG90YWxfdGVzdHMnLCBjb3VudCkKICAgIGNvbnRleHQubG9nX3Jlc3VsdCgnZXJyb3JzJywgZXJyX2NvdW50KQogICAgY29udGV4dC5sb2dfcmVzdWx0KCdtYXRjaCcsIG1hdGNoKQogICAgaWYgY291bnQgLSBlcnJfY291bnQgPiAwOgogICAgICAgIHRpbWVzX2FyciA9IG5wLmFycmF5KHRpbWVzKQogICAgICAgIGNvbnRleHQubG9nX3Jlc3VsdCgnYXZnX2xhdGVuY3knLCBpbnQobnAubWVhbih0aW1lc19hcnIpKSkKICAgICAgICBjb250ZXh0LmxvZ19yZXN1bHQoJ21pbl9sYXRlbmN5JywgaW50KG5wLmFtaW4odGltZXNfYXJyKSkpCiAgICAgICAgY29udGV4dC5sb2dfcmVzdWx0KCdtYXhfbGF0ZW5jeScsIGludChucC5hbWF4KHRpbWVzX2FycikpKQogICAgICAgIAogICAgICAgIGNoYXJ0ID0gQ2hhcnRBcnRpZmFjdCgnbGF0ZW5jeScsIGhlYWRlcj1bJ1Rlc3QnLCAnTGF0ZW5jeSAobWljcm9zZWMpJ10pCiAgICAgICAgZm9yIGkgaW4gcmFuZ2UobGVuKHRpbWVzKSk6CiAgICAgICAgICAgIGNoYXJ0LmFkZF9yb3coW2krMSwgaW50KHRpbWVzW2ldKV0pCiAgICAgICAgY29udGV4dC5sb2dfYXJ0aWZhY3QoY2hhcnQpCgogICAgY29udGV4dC5sb2dnZXIuaW5mbyhmJ3J1biB7Y291bnR9IHRlc3RzLCB7ZXJyX2NvdW50fSBlcnJvcnMgYW5kIHttYXRjaH0gbWF0Y2ggZXhwZWN0ZWQgdmFsdWUnKQogICAgCiAgICBpZiBlcnJfY291bnQ6CiAgICAgICAgcmFpc2UgVmFsdWVFcnJvcihmJ2ZhaWxlZCBvbiB7ZXJyX2NvdW50fSB0ZXN0cyBvZiB7Y291bnR9JykKICAgIAogICAgaWYgbWF0Y2hfZXJyIGFuZCBtYXRjaCAhPSBjb3VudDoKICAgICAgICByYWlzZSBWYWx1ZUVycm9yKGYnb25seSB7bWF0Y2h9IHJlc3VsdHMgbWF0Y2ggb3V0IG9mIHtjb3VudH0nKQoK commands: [] - code_origin: https://github.com/mlrun/functions.git#0afac753c28f1c4126b841ebea14219700bc9635:v2_model_tester.ipynb + code_origin: https://github.com/Idan707/functions.git#a0e559d5ebff00e1c9b41307200258b507a8201b:v2_model_tester.ipynb diff --git a/v2_model_tester/v2_model_tester.ipynb b/v2_model_tester/v2_model_tester.ipynb index 992970197..ee14e4d73 100644 --- a/v2_model_tester/v2_model_tester.ipynb +++ b/v2_model_tester/v2_model_tester.ipynb @@ -28,7 +28,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "%nuclio: setting spec.image to 'mlrun/mlrun'\n" + "%nuclio: setting spec.image to 'mlrun/mlrun'\n", + "%nuclio: setting spec.maxReplicas to 1\n" ] } ], @@ -45,7 +46,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "> 2020-10-12 14:57:23,715 [warning] Failed resolving version info. Ignoring and using defaults\n" + "> 2020-10-28 17:05:57,889 [warning] Failed resolving version info. Ignoring and using defaults\n" ] } ], @@ -142,53 +143,86 @@ "# marks the end of a code section" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Deploy model server for testing" + ] + }, { "cell_type": "code", "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> 2020-10-28 17:05:57,914 [warning] warning!, server (0.5.3-rc1) and client (unstable) ver dont match\n", + "> 2020-10-28 17:05:58,324 [info] deploy started\n", + "[nuclio] 2020-10-28 17:06:08,637 (info) Build complete\n", + "[nuclio] 2020-10-28 17:06:11,672 done updating default-v2-model-server, function address: default-tenant.app.dsteam.iguazio-cd1.com:30984\n", + "> 2020-10-28 17:06:11,679 [warning] warning!, server (0.5.3-rc1) and client (unstable) ver dont match\n" + ] + } + ], "source": [ - "from os import path\n", - "from mlrun import run_local, NewTask, mlconf, import_function, mount_v3io\n", - "mlconf.dbpath = mlconf.dbpath or 'http://mlrun-api:8080'\n", + "import mlrun\n", + "\n", + "project_name = 'sk-project'\n", + "DATA_PATH = 'https://s3.wasabisys.com/iguazio/data/iris/iris_dataset.csv'\n", + "MODEL_PATH = 'https://s3.wasabisys.com/iguazio/models/iris/model.pkl'\n", "\n", - "# specify artifacts target location\n", - "artifact_path = mlconf.artifact_path or path.abspath('./')\n", - "project_name = 'sk-project'" + "artifact_path = mlrun.set_environment(api_path = 'http://mlrun-api:8080',\n", + " artifact_path = os.path.abspath('./'))\n", + "\n", + "fn = mlrun.import_function('hub://v2_model_server')\n", + "fn.add_model('mymodel', model_path=MODEL_PATH)\n", + "address = fn.deploy()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Run model server tester locally" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "> 2020-10-12 15:02:30,095 [info] starting run model_server_tester uid=e99145f5d1c2477397cda3bae2724743 -> http://mlrun-api:8080\n", - "> 2020-10-12 15:02:30,305 [info] testing with dataset against http://3.128.234.166:30913, model: iris_dataset_v1\n", - "{'id': '3ea79e20-c4ea-445f-8f78-64e052a92cfd', 'model_name': 'iris_dataset_v1', 'outputs': [0]}\n", - "{'id': 'd2e58ccf-8c79-4e83-9ac1-78b53faf85e4', 'model_name': 'iris_dataset_v1', 'outputs': [0]}\n", - "{'id': 'd6f221c3-b7ec-46ff-b5ef-e26e08338ef1', 'model_name': 'iris_dataset_v1', 'outputs': [1]}\n", - "{'id': '8a3f1460-8192-499e-8ba5-75f28a558125', 'model_name': 'iris_dataset_v1', 'outputs': [0]}\n", - "{'id': '9f15a3e0-b53e-4689-bba6-b489c79f24b6', 'model_name': 'iris_dataset_v1', 'outputs': [0]}\n", - "{'id': 'abdd1480-cdad-429d-b466-73c957aa0f90', 'model_name': 'iris_dataset_v1', 'outputs': [2]}\n", - "{'id': 'b4fb0135-e455-46db-93e5-c4d7cb95e8ab', 'model_name': 'iris_dataset_v1', 'outputs': [2]}\n", - "{'id': 'ac111d8e-79ab-4699-a2a3-b266c2921d82', 'model_name': 'iris_dataset_v1', 'outputs': [2]}\n", - "{'id': 'f881fbab-7575-44a1-987b-7470544ccf55', 'model_name': 'iris_dataset_v1', 'outputs': [0]}\n", - "{'id': '592bf961-65e5-429a-8e92-64bc1220b724', 'model_name': 'iris_dataset_v1', 'outputs': [0]}\n", - "{'id': 'a3627ceb-6651-4a67-a145-64c7594b10b4', 'model_name': 'iris_dataset_v1', 'outputs': [1]}\n", - "{'id': '82d42332-263a-4754-8da2-7d8fcce933f7', 'model_name': 'iris_dataset_v1', 'outputs': [1]}\n", - "{'id': '4ff18f8d-ffd0-488b-99b2-a0293952a342', 'model_name': 'iris_dataset_v1', 'outputs': [2]}\n", - "{'id': '4f9262d1-ea82-4cf8-bfb0-9ee9312f8a39', 'model_name': 'iris_dataset_v1', 'outputs': [0]}\n", - "{'id': '1b1d356b-296e-4382-9fe5-17789a950131', 'model_name': 'iris_dataset_v1', 'outputs': [1]}\n", - "{'id': 'a5d9df09-d4ff-4dc8-835f-7c9eb0e789bb', 'model_name': 'iris_dataset_v1', 'outputs': [0]}\n", - "{'id': '987fc45c-788f-456b-822d-d8a2e4391d84', 'model_name': 'iris_dataset_v1', 'outputs': [2]}\n", - "{'id': '765390a6-afdf-48c4-aa52-12ec50dd4fe2', 'model_name': 'iris_dataset_v1', 'outputs': [2]}\n", - "{'id': '79df5728-f025-4d21-bf7c-a5ea46390e83', 'model_name': 'iris_dataset_v1', 'outputs': [2]}\n", - "{'id': '6ea455e4-f5be-4afb-9c01-08bcadcc54d4', 'model_name': 'iris_dataset_v1', 'outputs': [1]}\n", - "> 2020-10-12 15:02:30,710 [info] run 20 tests, 0 errors and 8 match expected value\n" + "> 2020-10-28 17:06:11,735 [warning] warning!, server (0.5.3-rc1) and client (unstable) ver dont match\n", + "> 2020-10-28 17:06:11,736 [info] starting run model_server_tester uid=cdab1ab0ee78491aa7112199edd13eee -> http://mlrun-api:8080\n", + "> 2020-10-28 17:06:11,774 [warning] warning!, server (0.5.3-rc1) and client (unstable) ver dont match\n", + "> 2020-10-28 17:06:11,926 [info] testing with dataset against http://default-tenant.app.dsteam.iguazio-cd1.com:30984, model: mymodel\n", + "{'id': 'ff2b8b28-a577-41f7-9903-10d30b5e5ce1', 'model_name': 'mymodel', 'outputs': [1]}\n", + "{'id': 'de0ebcde-584b-47a1-a3a2-58b71d5dfd6e', 'model_name': 'mymodel', 'outputs': [2]}\n", + "{'id': 'b5b48278-1923-4b22-afa0-125d168cf8f2', 'model_name': 'mymodel', 'outputs': [0]}\n", + "{'id': 'adc5fa66-d375-4284-956d-258cfccf7ffd', 'model_name': 'mymodel', 'outputs': [0]}\n", + "{'id': '34d7aec0-8c00-4a46-beff-3d3f230f1eb6', 'model_name': 'mymodel', 'outputs': [1]}\n", + "{'id': '3b54ba8d-2dd9-4f1b-b047-d7dd0795d700', 'model_name': 'mymodel', 'outputs': [2]}\n", + "{'id': 'c1dc99d8-1ee8-49e1-bb29-6d358c6fe4df', 'model_name': 'mymodel', 'outputs': [2]}\n", + "{'id': 'cfb68667-c73e-4099-bc86-0621ee5f5b74', 'model_name': 'mymodel', 'outputs': [0]}\n", + "{'id': '4a6ce9d1-5554-4b86-81d4-27f9c2f3d1f4', 'model_name': 'mymodel', 'outputs': [2]}\n", + "{'id': 'e9b92268-1e8a-4027-b22c-1796548c85d3', 'model_name': 'mymodel', 'outputs': [2]}\n", + "{'id': '24856808-39f9-479f-9351-e9a53606b774', 'model_name': 'mymodel', 'outputs': [1]}\n", + "{'id': '4d02d624-123c-492e-a2c5-31fb5666b641', 'model_name': 'mymodel', 'outputs': [0]}\n", + "{'id': '73f24c9c-8b91-49e1-b489-9ce130987923', 'model_name': 'mymodel', 'outputs': [0]}\n", + "{'id': '9814eb33-0dd1-4a77-a88f-bbff7f4d0b6d', 'model_name': 'mymodel', 'outputs': [2]}\n", + "{'id': '125107eb-021f-4bd0-bf49-1ec9eb9c6271', 'model_name': 'mymodel', 'outputs': [1]}\n", + "{'id': 'a1110605-7f83-4944-aec1-f5c0ebe6df11', 'model_name': 'mymodel', 'outputs': [2]}\n", + "{'id': '54683000-a57b-4359-861a-12a07c721e80', 'model_name': 'mymodel', 'outputs': [1]}\n", + "{'id': 'f0966c56-3184-4ad6-897e-05ad3142078c', 'model_name': 'mymodel', 'outputs': [0]}\n", + "{'id': 'b12cf6f0-bf3c-4047-a214-6f7a87fe4bf8', 'model_name': 'mymodel', 'outputs': [1]}\n", + "{'id': 'f8f17071-833f-425f-843b-f83cddbc74a5', 'model_name': 'mymodel', 'outputs': [1]}\n", + "> 2020-10-28 17:06:12,640 [info] run 20 tests, 0 errors and 6 match expected value\n" ] }, { @@ -362,26 +396,26 @@ " \n", " \n", " sk-project\n", - " \n", + " \n", " 0\n", - " Oct 12 15:02:30\n", + " Oct 28 17:06:11\n", " completed\n", " model_server_tester\n", - "
v3io_user=admin
kind=handler
owner=admin
host=jupyter-58d8fdb6fc-nmqbq
\n", + "
v3io_user=admin
kind=handler
owner=admin
host=jupyter-d87678b84-n4lcf
\n", "
table
\n", - "
addr=http://3.128.234.166:30913
model=iris_dataset_v1
\n", - "
total_tests=20
errors=0
match=8
avg_latency=17454
min_latency=12366
max_latency=105585
\n", - "
latency
\n", + "
addr=http://default-tenant.app.dsteam.iguazio-cd1.com:30984
model=mymodel
\n", + "
total_tests=20
errors=0
match=6
avg_latency=32244
min_latency=24955
max_latency=116585
\n", + "
latency
\n", " \n", " \n", "\n", "\n", - "
\n", + "
\n", "
\n", - " Title\n", - " ×\n", + " Title\n", + " ×\n", "
\n", - " \n", + " \n", "
\n", "
\n" ], @@ -397,19 +431,24 @@ "output_type": "stream", "text": [ "to track results use .show() or .logs() or in CLI: \n", - "!mlrun get run e99145f5d1c2477397cda3bae2724743 --project sk-project , !mlrun logs e99145f5d1c2477397cda3bae2724743 --project sk-project\n", - "> 2020-10-12 15:02:30,772 [info] run executed, status=completed\n" + "!mlrun get run cdab1ab0ee78491aa7112199edd13eee --project sk-project , !mlrun logs cdab1ab0ee78491aa7112199edd13eee --project sk-project\n", + "> 2020-10-28 17:06:12,724 [info] run executed, status=completed\n" ] } ], "source": [ - "# run the function locally (parameters must be set !!)\n", - "addr = 'http://3.128.234.166:30913'\n", - "data_path = 'https://s3.wasabisys.com/iguazio/data/iris/iris_dataset.csv'\n", - "gen = run_local(name='model_server_tester', handler=model_server_tester, \n", - " params={'addr': addr, 'model': 'iris_dataset_v1'},\n", - " inputs={'table': data_path},\n", - " project=project_name, artifact_path=path.join(artifact_path, 'data')) " + "gen = mlrun.run_local(name='model_server_tester', handler=model_server_tester, \n", + " params={'addr': address, 'model': 'mymodel'},\n", + " inputs={'table': DATA_PATH},\n", + " project=project_name, \n", + " artifact_path=os.path.join(artifact_path, 'data')) " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Save" ] }, { @@ -421,13 +460,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "> 2020-10-07 09:53:23,062 [info] function spec saved to path: function.yaml\n" + "> 2020-10-28 17:06:21,163 [info] function spec saved to path: function.yaml\n" ] }, { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 7, @@ -436,29 +475,298 @@ } ], "source": [ - "from mlrun import code_to_function\n", - "test_func = code_to_function(name='v2_model_tester', kind='job', handler=\"model_server_tester\",\n", - " description=\"test v2 model servers\",\n", - " categories=[\"ml\", \"test\"],\n", - " labels={\"author\": \"yaronh\"},\n", - " code_output='.')\n", + "test_func = mlrun.code_to_function(name='v2_model_tester', \n", + " kind='job', \n", + " handler=\"model_server_tester\",\n", + " description=\"test v2 model servers\",\n", + " categories=[\"ml\", \"test\"],\n", + " labels={\"author\": \"yaronh\"},\n", + " code_output='.')\n", "\n", "test_func.export('function.yaml')" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Run remotely" + ] + }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> 2020-10-28 17:06:21,179 [warning] warning!, server (0.5.3-rc1) and client (unstable) ver dont match\n", + "> 2020-10-28 17:06:21,180 [info] starting run model_server_tester uid=daaed7fcf79242e2b9e0c3a5148c64d1 -> http://mlrun-api:8080\n", + "> 2020-10-28 17:06:21,307 [info] Job is running in the background, pod: model-server-tester-k2s98\n", + "> 2020-10-28 17:06:24,160 [info] testing with dataset against http://default-tenant.app.dsteam.iguazio-cd1.com:30984, model: mymodel\n", + "{'id': 'b918f1e7-e376-490a-91a9-d603a8d05c48', 'model_name': 'mymodel', 'outputs': [1]}\n", + "{'id': 'f73b94f2-0191-497b-9a0d-b6e31f53753d', 'model_name': 'mymodel', 'outputs': [2]}\n", + "{'id': '113a1873-f82e-4c1e-8eb0-e8f93035c841', 'model_name': 'mymodel', 'outputs': [0]}\n", + "{'id': 'bf9ccb23-0d77-4f12-b00b-13ed6770c062', 'model_name': 'mymodel', 'outputs': [0]}\n", + "{'id': 'acab9cdd-5c10-415d-9658-002b38c8a350', 'model_name': 'mymodel', 'outputs': [2]}\n", + "{'id': 'a8f39660-e9c5-4d64-bdde-d0427fbadf0d', 'model_name': 'mymodel', 'outputs': [0]}\n", + "{'id': '6d76b01b-aa4a-47de-b0e1-79075981abca', 'model_name': 'mymodel', 'outputs': [2]}\n", + "{'id': '1e2ba61f-da83-42ae-a5eb-0b0be0a1a89b', 'model_name': 'mymodel', 'outputs': [1]}\n", + "{'id': 'f1c7a6a7-e706-44fb-b838-f7ceeac2862f', 'model_name': 'mymodel', 'outputs': [2]}\n", + "{'id': 'a4889639-520c-48b6-bf68-8f4b09442429', 'model_name': 'mymodel', 'outputs': [2]}\n", + "{'id': '0b1968ff-0a1c-4c6c-8aed-74916d3e3496', 'model_name': 'mymodel', 'outputs': [0]}\n", + "{'id': '51ba3eee-c377-4cd7-8373-52b77d5f03a4', 'model_name': 'mymodel', 'outputs': [2]}\n", + "{'id': 'd7033e1e-fc77-47e0-bc96-6d3964ab0911', 'model_name': 'mymodel', 'outputs': [1]}\n", + "{'id': '42125174-8eb0-4619-b253-f62c0cac6eb8', 'model_name': 'mymodel', 'outputs': [1]}\n", + "{'id': '84584afd-5d35-4e92-bb0b-b72745e89c42', 'model_name': 'mymodel', 'outputs': [1]}\n", + "{'id': '0be7b84c-5c75-4b5b-b524-5c158ab1cfe7', 'model_name': 'mymodel', 'outputs': [2]}\n", + "{'id': '8b350f47-5d1b-4e69-af8f-245beb87782c', 'model_name': 'mymodel', 'outputs': [0]}\n", + "{'id': 'e0a59fde-7fd9-43ec-8794-d90174b12b65', 'model_name': 'mymodel', 'outputs': [1]}\n", + "{'id': '5f787830-a716-42fe-b57d-049299f77cc0', 'model_name': 'mymodel', 'outputs': [1]}\n", + "{'id': '58603b5b-5c06-4889-924d-5073f64fde06', 'model_name': 'mymodel', 'outputs': [2]}\n", + "> 2020-10-28 17:06:24,742 [info] run 20 tests, 0 errors and 5 match expected value\n", + "> 2020-10-28 17:06:24,766 [info] run executed, status=completed\n", + "final state: completed\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
projectuiditerstartstatenamelabelsinputsparametersresultsartifacts
sk-project0Oct 28 17:06:24completedmodel_server_tester
v3io_user=admin
kind=job
owner=admin
host=model-server-tester-k2s98
table
addr=http://default-tenant.app.dsteam.iguazio-cd1.com:30984
model=mymodel
total_tests=20
errors=0
match=5
avg_latency=26616
min_latency=25060
max_latency=31181
latency
\n", + "
\n", + "
\n", + "
\n", + " Title\n", + " ×\n", + "
\n", + " \n", + "
\n", + "
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "to track results use .show() or .logs() or in CLI: \n", + "!mlrun get run daaed7fcf79242e2b9e0c3a5148c64d1 --project sk-project , !mlrun logs daaed7fcf79242e2b9e0c3a5148c64d1 --project sk-project\n", + "> 2020-10-28 17:06:27,426 [info] run executed, status=completed\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_func.run(mlrun.NewTask(name='model_server_tester', \n", + " handler=model_server_tester, \n", + " params={'addr': address, 'model': 'mymodel'},\n", + " inputs={'table': DATA_PATH},\n", + " project=project_name, \n", + " artifact_path=os.path.join(artifact_path, 'data')))" + ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python [conda env:root] *", "language": "python", - "name": "python3" + "name": "conda-root-py" }, "language_info": { "codemirror_mode": {