|
153 | 153 | "### Load the Config.yml file that contains information that is used across this pipeline"
|
154 | 154 | ]
|
155 | 155 | },
|
| 156 | + { |
| 157 | + "cell_type": "code", |
| 158 | + "execution_count": null, |
| 159 | + "metadata": { |
| 160 | + "tags": [] |
| 161 | + }, |
| 162 | + "outputs": [], |
| 163 | + "source": [] |
| 164 | + }, |
| 165 | + { |
| 166 | + "cell_type": "code", |
| 167 | + "execution_count": null, |
| 168 | + "metadata": { |
| 169 | + "tags": [] |
| 170 | + }, |
| 171 | + "outputs": [], |
| 172 | + "source": [ |
| 173 | + "from glob import glob\n", |
| 174 | + "from utils import load_config\n", |
| 175 | + "\n", |
| 176 | + "s3 = boto3.client('s3')\n", |
| 177 | + "\n", |
| 178 | + "config = load_config('config.yml')\n", |
| 179 | + "\n", |
| 180 | + "source_dir = config['scripts']['source_dir']\n", |
| 181 | + "bucket = config['aws']['s3_bucket']\n", |
| 182 | + "prefix = config['aws']['s3_prefix']\n", |
| 183 | + "\n", |
| 184 | + "files = glob(os.path.join(source_dir, \"*.py\")) + glob(os.path.join(source_dir, \"*.txt\"))\n", |
| 185 | + "\n", |
| 186 | + "for file in files:\n", |
| 187 | + " s3.upload_file(file, bucket, f\"{prefix}/{file}\")\n", |
| 188 | + " print(file, bucket, f\"{prefix}/{file}\")" |
| 189 | + ] |
| 190 | + }, |
156 | 191 | {
|
157 | 192 | "cell_type": "code",
|
158 | 193 | "execution_count": null,
|
|
176 | 211 | "## initialize the sagemaker session, region, role bucket and pipeline session\n",
|
177 | 212 | "session = sagemaker.session.Session()\n",
|
178 | 213 | "region = session.boto_region_name\n",
|
179 |
| - "pipeline_session = PipelineSession()\n", |
| 214 | + "pipeline_session = PipelineSession(default_bucket=config['aws']['s3_bucket'])\n", |
180 | 215 | "ci = boto3.client('sts').get_caller_identity()\n",
|
181 | 216 | "\n",
|
182 | 217 | "role_name = config['aws']['sagemaker_execution_role_name']\n",
|
|
229 | 264 | "# query for the training job, write it to query_training.py\n",
|
230 | 265 | "fpath: str = os.path.join(config['scripts']['source_dir'], config['scripts']['query'])\n",
|
231 | 266 | "logger.info(f\"writing training query to {fpath}\")\n",
|
232 |
| - "Path(fpath).write_text(f\"TRAINING_DATA_QUERY=\\\"\\\"\\\"{config['training_step']['query']}\\\"\\\"\\\"\")\n", |
| 267 | + "\n", |
| 268 | + "q = f\"\"\"\n", |
| 269 | + "TRAINING_TRUE_QUERY=\\\"\\\"\\\"{config['training_step']['query_true']}\\\"\\\"\\\"\n", |
| 270 | + "\\n\n", |
| 271 | + "TRAINING_NON_TRUE_QUERY=\\\"\\\"\\\"{config['training_step']['query_non_true']}\\\"\\\"\\\"\n", |
| 272 | + "\"\"\"\n", |
| 273 | + "\n", |
| 274 | + "Path(fpath).write_text(q)\n", |
| 275 | + "\n", |
233 | 276 | "\n",
|
234 | 277 | "# approval status for trained model\n",
|
235 | 278 | "model_approval_status = ParameterString(\n",
|
|
312 | 355 | "# A managed processor comes with a preconfigured container, so only specifying version is required.\n",
|
313 | 356 | "est_cls = sagemaker.sklearn.estimator.SKLearn\n",
|
314 | 357 | "\n",
|
| 358 | + "nw_cfg = config['aws']['network_config']\n", |
| 359 | + "\n", |
| 360 | + "network_config = sagemaker.network.NetworkConfig(\n", |
| 361 | + " enable_network_isolation=nw_cfg['enable_network_isolation'],\n", |
| 362 | + " security_group_ids=nw_cfg['security_group_ids'], \n", |
| 363 | + " subnets=nw_cfg['subnets']\n", |
| 364 | + ")\n", |
| 365 | + "\n", |
315 | 366 | "sklearn_processor = FrameworkProcessor(\n",
|
316 | 367 | " estimator_cls=est_cls,\n",
|
317 | 368 | " framework_version=config['training_step']['sklearn_framework_version'],\n",
|
|
320 | 371 | " instance_count=config['data_processing_step']['instance_count'],\n",
|
321 | 372 | " tags=config['data_processing_step']['tags'], \n",
|
322 | 373 | " sagemaker_session=pipeline_session,\n",
|
323 |
| - " base_job_name=config['pipeline']['base_job_name'], )\n", |
| 374 | + " base_job_name=config['pipeline']['base_job_name'], \n", |
| 375 | + " network_config=network_config\n", |
| 376 | + ")\n", |
324 | 377 | "\n",
|
325 | 378 | "outputs_preprocessor = [\n",
|
326 | 379 | " ProcessingOutput(\n",
|
|
435 | 488 | " \"features\": config['training_step']['training_features'],\n",
|
436 | 489 | " \"target\": config['training_step']['training_target'],\n",
|
437 | 490 | " },\n",
|
438 |
| - " tags=config['training_step']['tags']\n", |
| 491 | + " tags=config['training_step']['tags'],\n", |
| 492 | + " output_path=f\"s3://{bucket}/{prefix}\",\n", |
439 | 493 | ")\n",
|
440 | 494 | "\n",
|
441 | 495 | "# Create Hyperparameter tuner object. Ranges from https://docs.aws.amazon.com/sagemaker/latest/dg/xgboost-tuning.html\n",
|
|
538 | 592 | " )\n",
|
539 | 593 | " )\n",
|
540 | 594 | " ],\n",
|
541 |
| - " code = config['scripts']['evaluation'],\n", |
| 595 | + " code = f\"s3://{bucket}/{prefix}/{config['scripts']['evaluation']}\",\n", |
542 | 596 | " property_files=[evaluation_report],\n",
|
543 | 597 | " job_arguments=[\n",
|
544 | 598 | " \"--target\", target_parameter,\n",
|
|
559 | 613 | "The model is registered with the model Registry with approval status set to PendingManualApproval, this means the model cannot be deployed on a SageMaker Endpoint unless its status in the registry is changed to Approved manually via the SageMaker console, programmatically or through a Lambda function."
|
560 | 614 | ]
|
561 | 615 | },
|
| 616 | + { |
| 617 | + "cell_type": "code", |
| 618 | + "execution_count": null, |
| 619 | + "metadata": {}, |
| 620 | + "outputs": [], |
| 621 | + "source": [] |
| 622 | + }, |
562 | 623 | {
|
563 | 624 | "cell_type": "code",
|
564 | 625 | "execution_count": null,
|
|
704 | 765 | " step_preprocess_data, \n",
|
705 | 766 | " step_tuning, \n",
|
706 | 767 | " step_evaluate_model, \n",
|
707 |
| - " step_cond],\n", |
| 768 | + " step_cond\n", |
| 769 | + " ],\n", |
708 | 770 | ")"
|
709 | 771 | ]
|
710 | 772 | },
|
|
1408 | 1470 | ],
|
1409 | 1471 | "instance_type": "ml.t3.medium",
|
1410 | 1472 | "kernelspec": {
|
1411 |
| - "display_name": "Python 3", |
| 1473 | + "display_name": "Python 3 (Data Science 3.0)", |
1412 | 1474 | "language": "python",
|
1413 |
| - "name": "python3" |
| 1475 | + "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-1:081325390199:image/sagemaker-data-science-310-v1" |
1414 | 1476 | },
|
1415 | 1477 | "language_info": {
|
1416 | 1478 | "codemirror_mode": {
|
|
1422 | 1484 | "name": "python",
|
1423 | 1485 | "nbconvert_exporter": "python",
|
1424 | 1486 | "pygments_lexer": "ipython3",
|
1425 |
| - "version": "3.11.5" |
| 1487 | + "version": "3.10.6" |
1426 | 1488 | }
|
1427 | 1489 | },
|
1428 | 1490 | "nbformat": 4,
|
|
0 commit comments