Skip to content

Commit 51297a7

Browse files
gauravrajgururjainccgrajguru
authored
Extra params support for stable diffusion model (#3017)
* Notebooks update for extra parameters and inpainting task * online inpainting sdk * safe inpainting online * merge with main * deleting file from wrong location * code formatting * update mlflow version * repait scoring script * dev * conda update * remove code * reformat * reformat * format --------- Co-authored-by: Rupal Jain <[email protected]> Co-authored-by: grajguru <[email protected]>
1 parent 07d87f0 commit 51297a7

12 files changed

+413
-149
lines changed

cli/foundation-models/system/inference/text-to-image/scoring-files/docker_env/conda_dependencies.yaml

+7-7
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,16 @@ dependencies:
44
- python=3.8.16
55
- pip<=23.1.2
66
- pip:
7-
- mlflow==2.3.2
7+
- mlflow==2.10.0
88
- torch==1.13.0
9-
- transformers==4.29.1
9+
- transformers==4.33.2
1010
- diffusers==0.23.0
1111
- accelerate==0.22.0
12-
- azureml-core==1.52.0
13-
- azureml-mlflow==1.52.0
14-
- azure-ai-contentsafety==1.0.0b1
12+
- azureml-core==1.56.0
13+
- azureml-mlflow==1.56.0
14+
- azure-ai-contentsafety==1.0.0
1515
- aiolimiter==1.1.0
1616
- azure-ai-mlmonitoring==0.1.0a3
17-
- azure-mgmt-cognitiveservices==13.4.0
18-
- azure-identity==1.13.0
17+
- azure-mgmt-cognitiveservices==13.5.0
18+
- azure-identity==1.16.0
1919
name: mlflow-env

cli/foundation-models/system/inference/text-to-image/text-to-image-inpainting-online-endpoint.sh

-7
Original file line numberDiff line numberDiff line change
@@ -57,13 +57,6 @@ az ml online-deployment create --file deploy-online.yaml $workspace_info --set \
5757
echo "deployment create failed"; exit 1;
5858
}
5959

60-
# get deployment name and set all traffic to the new deployment
61-
yaml_file="deploy-online.yaml"
62-
get_yaml_value() {
63-
grep "$1:" "$yaml_file" | awk '{print $2}' | sed 's/[",]//g'
64-
}
65-
deployment_name=$(get_yaml_value "name")
66-
6760
az ml online-endpoint update $workspace_info --name=$endpoint_name --traffic="$deployment_name=100" || {
6861
echo "Failed to set all traffic to the new deployment"
6962
exit 1

sdk/python/foundation-models/system/inference/text-to-image/safe-image-text-to-image-batch-deployment.ipynb

+2-2
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,7 @@
308308
" print(\"---Creating environment---\")\n",
309309
" env = Environment(\n",
310310
" name=environment_name,\n",
311-
" build=BuildContext(path=\"./aacs-scoring-files/docker_env\"),\n",
311+
" build=BuildContext(path=\"./scoring-files/docker_env\"),\n",
312312
" )\n",
313313
" workspace_ml_client.environments.create_or_update(env)\n",
314314
" env = workspace_ml_client.environments.get(environment_name, label=\"latest\")\n",
@@ -448,7 +448,7 @@
448448
" model=model,\n",
449449
" environment=env,\n",
450450
" code_configuration=CodeConfiguration(\n",
451-
" code=\"aacs-scoring-files/score\",\n",
451+
" code=\"scoring-files/score\",\n",
452452
" scoring_script=\"score_batch.py\",\n",
453453
" ),\n",
454454
" compute=compute_name,\n",

sdk/python/foundation-models/system/inference/text-to-image/safe-text-to-image-inpainting-batch-deployment.ipynb

+2-2
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,7 @@
311311
" print(\"---Creating environment---\")\n",
312312
" env = Environment(\n",
313313
" name=environment_name,\n",
314-
" build=BuildContext(path=\"./aacs-scoring-files/docker_env\"),\n",
314+
" build=BuildContext(path=\"./scoring-files/docker_env\"),\n",
315315
" )\n",
316316
" workspace_ml_client.environments.create_or_update(env)\n",
317317
" env = workspace_ml_client.environments.get(environment_name, label=\"latest\")\n",
@@ -451,7 +451,7 @@
451451
" model=model,\n",
452452
" environment=env,\n",
453453
" code_configuration=CodeConfiguration(\n",
454-
" code=\"aacs-scoring-files/score\",\n",
454+
" code=\"scoring-files/score\",\n",
455455
" scoring_script=\"score_batch.py\",\n",
456456
" ),\n",
457457
" compute=compute_name,\n",

sdk/python/foundation-models/system/inference/text-to-image/safe-text-to-image-inpainting-online-endpoint.ipynb

+104-69
Original file line numberDiff line numberDiff line change
@@ -324,49 +324,6 @@
324324
")"
325325
]
326326
},
327-
{
328-
"cell_type": "markdown",
329-
"metadata": {},
330-
"source": [
331-
"#### 3.1 Register Model in Workspace\n",
332-
"\n",
333-
"The above retrieved model from `azureml` registry will be registered within the user’s workspace. This registration will maintain the original name of the model, assign a unique version identifier (corresponding to the first field of the UUID), and label it as the “latest” version. Please note that this step take several minutes."
334-
]
335-
},
336-
{
337-
"cell_type": "code",
338-
"execution_count": null,
339-
"metadata": {},
340-
"outputs": [],
341-
"source": [
342-
"local_model_path = \"local_model\"\n",
343-
"\n",
344-
"registry_ml_client.models.download(\n",
345-
" name=model.name, version=model.version, download_path=local_model_path\n",
346-
")"
347-
]
348-
},
349-
{
350-
"cell_type": "code",
351-
"execution_count": null,
352-
"metadata": {},
353-
"outputs": [],
354-
"source": [
355-
"from azure.ai.ml.entities import Model\n",
356-
"from azure.ai.ml.constants import AssetTypes\n",
357-
"import os\n",
358-
"\n",
359-
"local_model = Model(\n",
360-
" path=os.path.join(local_model_path, model.name, \"mlflow_model_folder\"),\n",
361-
" type=AssetTypes.MLFLOW_MODEL,\n",
362-
" name=model.name,\n",
363-
" version=str(uuid4().fields[0]),\n",
364-
" description=\"Model created from local file for text to image deployment.\",\n",
365-
")\n",
366-
"\n",
367-
"model = workspace_ml_client.models.create_or_update(local_model)"
368-
]
369-
},
370327
{
371328
"cell_type": "markdown",
372329
"metadata": {},
@@ -439,7 +396,17 @@
439396
"cell_type": "markdown",
440397
"metadata": {},
441398
"source": [
442-
"Create a deployment. This step may take a several minutes."
399+
"#### Setup Deployment Parameters\n",
400+
"\n",
401+
"We utilize an optimized __foundation-model-inference__ container for model scoring. This container is designed to deliver high throughput and low latency using <a href=\"https://github.com/microsoft/DeepSpeed-MII\" target=\"_blank\"> Deepspeed-mii </a>. In this section, we introduce several environment variables that can be adjusted to customize a deployment for either high throughput or low latency scenarios.\n",
402+
"\n",
403+
"- __WORKER_COUNT__: The number of workers to use for inferencing. This is used as a proxy for the number of concurrent requests that the server should handle.\n",
404+
"- __TENSOR_PARALLEL__: The number of GPUs to use for tensor parallelism.\n",
405+
"- __NUM_REPLICAS__: The number of model instances to load for the deployment. This is used to increase throughput by loading multiple models on multiple GPUs, if the model is small enough to fit.\n",
406+
"\n",
407+
"`NUM_REPLICAS` and `TENSOR_PARALLEL` work hand-in-hand to determine the most optimal configuration to increase the throughput for the deployment without degrading too much on the latency. The total number of GPUs used for inference will be `NUM_REPLICAS` * `TENSOR_PARALLEL`. For example, if `NUM_REPLICAS` = 2 and `TENSOR_PARALLEL` = 2, then 4 GPUs will be used for inference. Ensure that the model you are deploying is small enough to fit on the number of GPUs you are using, specified by `TENSOR_PARALLEL`. For instance, if there are 4 GPUs available, and `TENSOR_PARALLEL` = 2, then the model must be small enough to fit on 2 GPUs. If the model is too large, then the deployment will fail.\n",
408+
"\n",
409+
"For stable diffusion model, the scoring script uses default `TENSOR_PARALLEL` = 1 and `NUM_REPLICAS` = number of GPUs in SKU for optimal balance of latency and throughput."
443410
]
444411
},
445412
{
@@ -455,15 +422,25 @@
455422
"\n",
456423
"REQUEST_TIMEOUT_MS = 90000\n",
457424
"\n",
458-
"deployment_env_vars = {\n",
425+
"acs_env_vars = {\n",
459426
" \"CONTENT_SAFETY_ACCOUNT_NAME\": aacs_name,\n",
460427
" \"CONTENT_SAFETY_ENDPOINT\": aacs_endpoint,\n",
461428
" \"CONTENT_SAFETY_KEY\": aacs_access_key if uai_client_id == \"\" else None,\n",
462429
" \"CONTENT_SAFETY_THRESHOLD\": content_severity_threshold,\n",
463430
" \"SUBSCRIPTION_ID\": subscription_id,\n",
464431
" \"RESOURCE_GROUP_NAME\": resource_group,\n",
465432
" \"UAI_CLIENT_ID\": uai_client_id,\n",
466-
"}"
433+
"}\n",
434+
"\n",
435+
"MAX_CONCURRENT_REQUESTS = (\n",
436+
" 2 # the maximum number of concurrent requests supported by the endpoint\n",
437+
")\n",
438+
"\n",
439+
"fm_container_default_env_vars = {\n",
440+
" \"WORKER_COUNT\": MAX_CONCURRENT_REQUESTS,\n",
441+
"}\n",
442+
"\n",
443+
"deployment_env_vars = {**fm_container_default_env_vars, **acs_env_vars}"
467444
]
468445
},
469446
{
@@ -474,22 +451,16 @@
474451
"source": [
475452
"from azure.ai.ml.entities import (\n",
476453
" OnlineRequestSettings,\n",
477-
" CodeConfiguration,\n",
478454
" ManagedOnlineDeployment,\n",
479455
" ProbeSettings,\n",
480456
")\n",
481457
"\n",
482-
"code_configuration = CodeConfiguration(\n",
483-
" code=\"./aacs-scoring-files/score/\", scoring_script=\"score_online.py\"\n",
484-
")\n",
485-
"\n",
486458
"deployment = ManagedOnlineDeployment(\n",
487459
" name=deployment_name,\n",
488460
" endpoint_name=endpoint_name,\n",
489461
" model=model.id,\n",
490462
" instance_type=sku_name,\n",
491463
" instance_count=1,\n",
492-
" code_configuration=code_configuration,\n",
493464
" environment_variables=deployment_env_vars,\n",
494465
" request_settings=OnlineRequestSettings(request_timeout_ms=REQUEST_TIMEOUT_MS),\n",
495466
" liveness_probe=ProbeSettings(\n",
@@ -524,24 +495,38 @@
524495
"\n",
525496
"We will fetch some sample data from the test dataset and submit to online endpoint for inference.\n",
526497
"\n",
498+
"### Supported Parameters\n",
499+
"\n",
500+
"- negative_prompt: The prompt to guide what to not include in image generation. Ignored when not using guidance (`guidance_scale < 1`).\n",
501+
"- num_inference_steps: The number of de-noising steps. More de-noising steps usually lead to a higher quality image at the expense of slower inference, defaults to 50.\n",
502+
"- guidance_scale: A higher guidance scale value encourages the model to generate images closely linked to the text `prompt` at the expense of lower image quality. Guidance scale is enabled when `guidance_scale > 1`, defaults to 7.5.\n",
503+
"\n",
504+
"> These `parameters` are optional inputs. If you need support for new parameters, please file a support ticket.\n",
505+
"\n",
527506
"The sample of input schema for inpainting task:\n",
528507
"```json\n",
529508
"{\n",
530509
" \"input_data\": {\n",
531-
" \"columns\": [\"prompt\", \"image\", \"mask_image\"],\n",
510+
" \"columns\": [\"prompt\", \"image\", \"mask_image\", \"negative_prompt\"],\n",
532511
" \"data\": [\n",
533512
" {\n",
534-
" \"prompt\": \"sample prompt\",\n",
535-
" \"image\": \"base image1\",\n",
536-
" \"mask_image\": \"mask image1\"\n",
513+
" \"prompt\": \"Face of a yellow cat, high resolution, sitting on a park bench\",\n",
514+
" \"image\": \"image1\",\n",
515+
" \"mask_image\": \"mask1\",\n",
516+
" \"negative_prompt\": \"blurry; cartoonish\"\n",
537517
" },\n",
538518
" {\n",
539-
" \"prompt\": \"sample prompt\",\n",
540-
" \"image\": \"base image2\",\n",
541-
" \"mask_image\": \"mask image2\"\n",
519+
" \"prompt\": \"Face of a green cat, high resolution, sitting on a park bench\",\n",
520+
" \"image\": \"image2\",\n",
521+
" \"mask_image\": \"mask2\",\n",
522+
" \"negative_prompt\": \"blurry; cartoonish\"\n",
542523
" }\n",
543524
" ],\n",
544-
" \"index\": [0, 1]\n",
525+
" \"index\": [0, 1],\n",
526+
" \"parameters\": {\n",
527+
" \"num_inference_steps\": 50,\n",
528+
" \"guidance_scale\": 7.5\n",
529+
" }\n",
545530
" }\n",
546531
"}\n",
547532
"```\n",
@@ -568,7 +553,7 @@
568553
"cell_type": "markdown",
569554
"metadata": {},
570555
"source": [
571-
"#### 5.1 Sample input for safe prompt."
556+
"#### 5.1.1 Sample input for safe prompt."
572557
]
573558
},
574559
{
@@ -601,7 +586,7 @@
601586
" \"mask_image\": base64.encodebytes(read_image(mask_image)).decode(\n",
602587
" \"utf-8\"\n",
603588
" ),\n",
604-
" \"prompt\": \"A cat sitting on a park bench in high resolution.\",\n",
589+
" \"prompt\": \"A yellow cat, high resolution, sitting on a park bench\",\n",
605590
" }\n",
606591
" ],\n",
607592
" }\n",
@@ -625,7 +610,26 @@
625610
" endpoint_name=endpoint.name,\n",
626611
" deployment_name=deployment.name,\n",
627612
" request_file=request_file_name,\n",
628-
")"
613+
")\n",
614+
"\n",
615+
"# Visualize the model output\n",
616+
"\n",
617+
"import io\n",
618+
"import base64\n",
619+
"from PIL import Image\n",
620+
"\n",
621+
"generations = json.loads(response)\n",
622+
"for generation in generations:\n",
623+
" print(f\"nsfw content detected: \", generation[\"nsfw_content_detected\"])\n",
624+
" img = Image.open(io.BytesIO(base64.b64decode(generation[\"generated_image\"])))\n",
625+
" display(img)"
626+
]
627+
},
628+
{
629+
"cell_type": "markdown",
630+
"metadata": {},
631+
"source": [
632+
"#### 5.1.2 Sample input for safe prompt."
629633
]
630634
},
631635
{
@@ -634,11 +638,40 @@
634638
"metadata": {},
635639
"outputs": [],
636640
"source": [
637-
"# Visualize the model output\n",
641+
"request_json = {\n",
642+
" \"input_data\": {\n",
643+
" \"columns\": [\"image\", \"mask_image\", \"prompt\", \"negative_prompt\"],\n",
644+
" \"data\": [\n",
645+
" {\n",
646+
" \"image\": base64.encodebytes(read_image(base_image)).decode(\"utf-8\"),\n",
647+
" \"mask_image\": base64.encodebytes(read_image(mask_image)).decode(\n",
648+
" \"utf-8\"\n",
649+
" ),\n",
650+
" \"prompt\": \"A yellow cat, high resolution, sitting on a park bench\",\n",
651+
" \"negative_prompt\": \"blurry; cartoonish\",\n",
652+
" }\n",
653+
" ],\n",
654+
" \"parameters\": {\"num_inference_steps\": 50, \"guidance_scale\": 7.5},\n",
655+
" }\n",
656+
"}\n",
638657
"\n",
639-
"import io\n",
640-
"import base64\n",
641-
"from PIL import Image\n",
658+
"request_file_name = \"sample_request_data.json\"\n",
659+
"\n",
660+
"with open(request_file_name, \"w\") as request_file:\n",
661+
" json.dump(request_json, request_file)"
662+
]
663+
},
664+
{
665+
"cell_type": "code",
666+
"execution_count": null,
667+
"metadata": {},
668+
"outputs": [],
669+
"source": [
670+
"response = workspace_ml_client.online_endpoints.invoke(\n",
671+
" endpoint_name=endpoint.name,\n",
672+
" deployment_name=deployment.name,\n",
673+
" request_file=request_file_name,\n",
674+
")\n",
642675
"\n",
643676
"generations = json.loads(response)\n",
644677
"for generation in generations:\n",
@@ -746,8 +779,10 @@
746779
}
747780
],
748781
"metadata": {
749-
"language_info": {
750-
"name": "ipython"
782+
"kernelspec": {
783+
"display_name": "Python 3.10 - SDK v2",
784+
"language": "python",
785+
"name": "python310-sdkv2"
751786
}
752787
},
753788
"nbformat": 4,

0 commit comments

Comments
 (0)