Azure · Apr 1, 2022
diff --git a/‎python-sdk/tutorials/automl-with-azureml/automl-nlp-multiclass/automl-nlp-text-classification-multiclass.ipynb
+28-37 b/‎python-sdk/tutorials/automl-with-azureml/automl-nlp-multiclass/automl-nlp-text-classification-multiclass.ipynb
+28-37
diff --git a/‎python-sdk/tutorials/automl-with-azureml/automl-nlp-multiclass/update_env.yml
+1-2 b/‎python-sdk/tutorials/automl-with-azureml/automl-nlp-multiclass/update_env.yml
+1-2
@@ -13,7 +13,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/automated-machine-learning/classification-text-dnn/auto-ml-classification-text-dnn.png)"
+    "![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/automated-machine-learning/experimental/automl-nlp-multiclass/automl-nlp-text-classification-multiclass.png)"
    ]
   },
   {
@@ -68,6 +68,7 @@
    "source": [
     "import logging\n",
     "import os\n",
+    "import tempfile\n",
     "\n",
     "import pandas as pd\n",
     "\n",
@@ -77,9 +78,13 @@
     "from azureml.core.dataset import Dataset\n",
     "from azureml.core.compute import AmlCompute\n",
     "from azureml.core.compute import ComputeTarget\n",
+    "from azureml.core.compute_target import ComputeTargetException\n",
+    "from azureml.core.script_run_config import ScriptRunConfig\n",
     "from azureml.core.run import Run\n",
+    "from azureml.data.datapath import DataPath\n",
     "from azureml.train.automl import AutoMLConfig\n",
-    "from sklearn.datasets import fetch_20newsgroups"
+    "from sklearn.datasets import fetch_20newsgroups\n",
+    "from sklearn.metrics import classification_report"
    ]
   },
   {
@@ -123,7 +128,7 @@
     "ws = Workspace.from_config()\n",
     "\n",
     "# Choose an experiment name.\n",
-    "experiment_name = \"automl-nlp-text-multiclass\"\n",
+    "experiment_name = \"automl-nlp-text-classification-multiclass\"\n",
     "\n",
     "experiment = Experiment(ws, experiment_name)\n",
     "\n",
@@ -143,7 +148,7 @@
    "metadata": {},
    "source": [
     "## Set up a compute cluster\n",
-    "This section uses a user-provided compute cluster (named \"dist-compute\" in this example). If a cluster with this name does not exist in the user's workspace, the below code will create a new cluster. You can choose the parameters of the cluster as mentioned in the comments."
+    "This section uses a user-provided compute cluster (named \"gpu-compute\" in this example). If a cluster with this name does not exist in the user's workspace, the below code will create a new cluster. You can choose the parameters of the cluster as mentioned in the comments."
    ]
   },
   {
@@ -156,13 +161,10 @@
    },
    "outputs": [],
    "source": [
-    "from azureml.core.compute import ComputeTarget, AmlCompute\n",
-    "from azureml.core.compute_target import ComputeTargetException\n",
-    "\n",
     "num_nodes = 1\n",
     "\n",
     "# Choose a name for your cluster.\n",
-    "amlcompute_cluster_name = \"dist-compute\"\n",
+    "amlcompute_cluster_name = \"gpu-compute\"\n",
     "\n",
     "# Verify that cluster does not exist already\n",
     "try:\n",
@@ -222,21 +224,19 @@
     "        {feature_column_name: data.data, target_column_name: data.target}\n",
     "    )\n",
     "\n",
-    "    data_train = data[:200]\n",
-    "    data_val = data[200:300]\n",
-    "    data_test = data[300:400]\n",
+    "    data_train = data.loc[:200]\n",
+    "    data_val = data.loc[200:300]\n",
+    "    data_test = data.loc[300:400]\n",
     "\n",
-    "    data_train = remove_blanks_20news(\n",
-    "        data_train, feature_column_name, target_column_name\n",
-    "    )\n",
-    "    data_val = remove_blanks_20news(data_val, feature_column_name, target_column_name)\n",
-    "    data_test = remove_blanks_20news(data_test, feature_column_name, target_column_name)\n",
+    "    data_train = remove_blanks_20news(data_train)\n",
+    "    data_val = remove_blanks_20news(data_val)\n",
+    "    data_test = remove_blanks_20news(data_test)\n",
     "\n",
     "    return data_train, data_val, data_test\n",
     "\n",
     "\n",
-    "def remove_blanks_20news(data, feature_column_name, target_column_name):\n",
-    "\n",
+    "def remove_blanks_20news(data):\n",
+    "    data = data.copy()\n",
     "    data[feature_column_name] = (\n",
     "        data[feature_column_name]\n",
     "        .replace(r\"\\n\", \" \", regex=True)\n",
@@ -280,7 +280,12 @@
     "data_test.to_csv(test_data_fname, index=False)\n",
     "\n",
     "datastore = ws.get_default_datastore()\n",
-    "datastore.upload(src_dir=data_dir, target_path=blobstore_datadir, overwrite=True)"
+    "target = DataPath(\n",
+    "    datastore=datastore, path_on_datastore=blobstore_datadir, name=\"news_group_data\"\n",
+    ")\n",
+    "Dataset.File.upload_directory(\n",
+    "    src_dir=data_dir, target=target, overwrite=True, show_progress=True\n",
+    ")"
    ]
   },
   {
@@ -424,12 +429,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "(\n",
-    "    best_run,\n",
-    "    best_model,\n",
-    ") = (\n",
-    "    automl_run.get_output()\n",
-    ")  # You might see a warning about \"enable_distributed_dnn_training\". Please simply ignore.\n",
+    "best_run, best_model = automl_run.get_output()\n",
     "best_run"
    ]
   },
@@ -456,10 +456,7 @@
    "source": [
     "test_dataset = Dataset.Tabular.from_delimited_files(\n",
     "    path=[(datastore, blobstore_datadir + \"/test_data.csv\")]\n",
-    ")\n",
-    "\n",
-    "# preview the first 3 rows of the dataset\n",
-    "test_dataset.take(3).to_pandas_dataframe()"
+    ")"
    ]
   },
   {
@@ -490,8 +487,7 @@
    },
    "outputs": [],
    "source": [
-    "# Load training script run corresponding to AutoML run above.\n",
-    "training_run_id = automl_run.id + \"_HD_0\"\n",
+    "training_run_id = best_run.id\n",
     "training_run = Run(experiment, training_run_id)"
    ]
   },
@@ -526,9 +522,6 @@
    },
    "outputs": [],
    "source": [
-    "import tempfile\n",
-    "from azureml.core.script_run_config import ScriptRunConfig\n",
-    "\n",
     "scoring_args = arguments\n",
     "with tempfile.TemporaryDirectory() as tmpdir:\n",
     "    # Download required files from training run into temp folder.\n",
@@ -640,8 +633,6 @@
    },
    "outputs": [],
    "source": [
-    "from sklearn.metrics import classification_report\n",
-    "\n",
     "print(\n",
     "    classification_report(\n",
     "        test_data_df[target_column_name], test_set_predictions_df[target_column_name]\n",
@@ -678,7 +669,7 @@
    "name": "python3-azureml"
   },
   "kernelspec": {
-   "display_name": "Python 3.7.0 64-bit ('pypi': conda)",
+   "display_name": "Python 3.6",
    "language": "python",
    "name": "python3"
   },
 
@@ -4,5 +4,4 @@ dependencies:
 - pandas~=1.1.5
 
 - pip:
-  - azureml-automl-dnn-nlp==1.39.0
-  - horovod==0.21.3
+  - azureml-automl-dnn-nlp==1.39.0