Azure
diff --git a/‎cli/foundation-models/system/inference/text-to-image/scoring-files/docker_env/conda_dependencies.yaml
+7-7 b/‎cli/foundation-models/system/inference/text-to-image/scoring-files/docker_env/conda_dependencies.yaml
+7-7
diff --git a/‎cli/foundation-models/system/inference/text-to-image/text-to-image-inpainting-online-endpoint.sh
-7 b/‎cli/foundation-models/system/inference/text-to-image/text-to-image-inpainting-online-endpoint.sh
-7
diff --git a/‎sdk/python/foundation-models/system/inference/text-to-image/safe-image-text-to-image-batch-deployment.ipynb
+2-2 b/‎sdk/python/foundation-models/system/inference/text-to-image/safe-image-text-to-image-batch-deployment.ipynb
+2-2
diff --git a/‎sdk/python/foundation-models/system/inference/text-to-image/safe-text-to-image-inpainting-batch-deployment.ipynb
+2-2 b/‎sdk/python/foundation-models/system/inference/text-to-image/safe-text-to-image-inpainting-batch-deployment.ipynb
+2-2
diff --git a/‎sdk/python/foundation-models/system/inference/text-to-image/safe-text-to-image-inpainting-online-endpoint.ipynb
+104-69 b/‎sdk/python/foundation-models/system/inference/text-to-image/safe-text-to-image-inpainting-online-endpoint.ipynb
+104-69
@@ -4,16 +4,16 @@ dependencies:
 - python=3.8.16
 - pip<=23.1.2
 - pip:
-  - mlflow==2.3.2
+  - mlflow==2.10.0
   - torch==1.13.0
-  - transformers==4.29.1
+  - transformers==4.33.2
   - diffusers==0.23.0
   - accelerate==0.22.0
-  - azureml-core==1.52.0
-  - azureml-mlflow==1.52.0
-  - azure-ai-contentsafety==1.0.0b1
+  - azureml-core==1.56.0
+  - azureml-mlflow==1.56.0
+  - azure-ai-contentsafety==1.0.0
   - aiolimiter==1.1.0
   - azure-ai-mlmonitoring==0.1.0a3
-  - azure-mgmt-cognitiveservices==13.4.0
-  - azure-identity==1.13.0
+  - azure-mgmt-cognitiveservices==13.5.0
+  - azure-identity==1.16.0
 name: mlflow-env
@@ -57,13 +57,6 @@ az ml online-deployment create --file deploy-online.yaml $workspace_info --set \
     echo "deployment create failed"; exit 1;
 }
 
-# get deployment name and set all traffic to the new deployment
-yaml_file="deploy-online.yaml"
-get_yaml_value() {
-    grep "$1:" "$yaml_file" | awk '{print $2}' | sed 's/[",]//g'
-}
-deployment_name=$(get_yaml_value "name")
-
 az ml online-endpoint update $workspace_info --name=$endpoint_name --traffic="$deployment_name=100" || {
     echo "Failed to set all traffic to the new deployment"
     exit 1
 
@@ -308,7 +308,7 @@
     "    print(\"---Creating environment---\")\n",
     "    env = Environment(\n",
     "        name=environment_name,\n",
-    "        build=BuildContext(path=\"./aacs-scoring-files/docker_env\"),\n",
+    "        build=BuildContext(path=\"./scoring-files/docker_env\"),\n",
     "    )\n",
     "    workspace_ml_client.environments.create_or_update(env)\n",
     "    env = workspace_ml_client.environments.get(environment_name, label=\"latest\")\n",
@@ -448,7 +448,7 @@
     "    model=model,\n",
     "    environment=env,\n",
     "    code_configuration=CodeConfiguration(\n",
-    "        code=\"aacs-scoring-files/score\",\n",
+    "        code=\"scoring-files/score\",\n",
     "        scoring_script=\"score_batch.py\",\n",
     "    ),\n",
     "    compute=compute_name,\n",
 
@@ -311,7 +311,7 @@
     "    print(\"---Creating environment---\")\n",
     "    env = Environment(\n",
     "        name=environment_name,\n",
-    "        build=BuildContext(path=\"./aacs-scoring-files/docker_env\"),\n",
+    "        build=BuildContext(path=\"./scoring-files/docker_env\"),\n",
     "    )\n",
     "    workspace_ml_client.environments.create_or_update(env)\n",
     "    env = workspace_ml_client.environments.get(environment_name, label=\"latest\")\n",
@@ -451,7 +451,7 @@
     "    model=model,\n",
     "    environment=env,\n",
     "    code_configuration=CodeConfiguration(\n",
-    "        code=\"aacs-scoring-files/score\",\n",
+    "        code=\"scoring-files/score\",\n",
     "        scoring_script=\"score_batch.py\",\n",
     "    ),\n",
     "    compute=compute_name,\n",
 
@@ -324,49 +324,6 @@
     ")"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### 3.1 Register Model in Workspace\n",
-    "\n",
-    "The above retrieved model from `azureml` registry will be registered within the user’s workspace. This registration will maintain the original name of the model, assign a unique version identifier (corresponding to the first field of the UUID), and label it as the “latest” version. Please note that this step take several minutes."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "local_model_path = \"local_model\"\n",
-    "\n",
-    "registry_ml_client.models.download(\n",
-    "    name=model.name, version=model.version, download_path=local_model_path\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from azure.ai.ml.entities import Model\n",
-    "from azure.ai.ml.constants import AssetTypes\n",
-    "import os\n",
-    "\n",
-    "local_model = Model(\n",
-    "    path=os.path.join(local_model_path, model.name, \"mlflow_model_folder\"),\n",
-    "    type=AssetTypes.MLFLOW_MODEL,\n",
-    "    name=model.name,\n",
-    "    version=str(uuid4().fields[0]),\n",
-    "    description=\"Model created from local file for text to image deployment.\",\n",
-    ")\n",
-    "\n",
-    "model = workspace_ml_client.models.create_or_update(local_model)"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -439,7 +396,17 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Create a deployment. This step may take a several minutes."
+    "#### Setup Deployment Parameters\n",
+    "\n",
+    "We utilize an optimized __foundation-model-inference__ container for model scoring. This container is designed to deliver high throughput and low latency using <a href=\"https://github.com/microsoft/DeepSpeed-MII\" target=\"_blank\">  Deepspeed-mii </a>. In this section, we introduce several environment variables that can be adjusted to customize a deployment for either high throughput or low latency scenarios.\n",
+    "\n",
+    "- __WORKER_COUNT__: The number of workers to use for inferencing. This is used as a proxy for the number of concurrent requests that the server should handle.\n",
+    "- __TENSOR_PARALLEL__: The number of GPUs to use for tensor parallelism.\n",
+    "- __NUM_REPLICAS__: The number of model instances to load for the deployment. This is used to increase throughput by loading multiple models on multiple GPUs, if the model is small enough to fit.\n",
+    "\n",
+    "`NUM_REPLICAS` and `TENSOR_PARALLEL` work hand-in-hand to determine the most optimal configuration to increase the throughput for the deployment without degrading too much on the latency. The total number of GPUs used for inference will be `NUM_REPLICAS` * `TENSOR_PARALLEL`. For example, if `NUM_REPLICAS` = 2 and `TENSOR_PARALLEL` = 2, then 4 GPUs will be used for inference. Ensure that the model you are deploying is small enough to fit on the number of GPUs you are using, specified by `TENSOR_PARALLEL`. For instance, if there are 4 GPUs available, and `TENSOR_PARALLEL` = 2, then the model must be small enough to fit on 2 GPUs. If the model is too large, then the deployment will fail.\n",
+    "\n",
+    "For stable diffusion model, the scoring script uses default `TENSOR_PARALLEL` = 1 and `NUM_REPLICAS` = number of GPUs in SKU for optimal balance of latency and throughput."
    ]
   },
   {
@@ -455,15 +422,25 @@
     "\n",
     "REQUEST_TIMEOUT_MS = 90000\n",
     "\n",
-    "deployment_env_vars = {\n",
+    "acs_env_vars = {\n",
     "    \"CONTENT_SAFETY_ACCOUNT_NAME\": aacs_name,\n",
     "    \"CONTENT_SAFETY_ENDPOINT\": aacs_endpoint,\n",
     "    \"CONTENT_SAFETY_KEY\": aacs_access_key if uai_client_id == \"\" else None,\n",
     "    \"CONTENT_SAFETY_THRESHOLD\": content_severity_threshold,\n",
     "    \"SUBSCRIPTION_ID\": subscription_id,\n",
     "    \"RESOURCE_GROUP_NAME\": resource_group,\n",
     "    \"UAI_CLIENT_ID\": uai_client_id,\n",
-    "}"
+    "}\n",
+    "\n",
+    "MAX_CONCURRENT_REQUESTS = (\n",
+    "    2  # the maximum number of concurrent requests supported by the endpoint\n",
+    ")\n",
+    "\n",
+    "fm_container_default_env_vars = {\n",
+    "    \"WORKER_COUNT\": MAX_CONCURRENT_REQUESTS,\n",
+    "}\n",
+    "\n",
+    "deployment_env_vars = {**fm_container_default_env_vars, **acs_env_vars}"
    ]
   },
   {
@@ -474,22 +451,16 @@
    "source": [
     "from azure.ai.ml.entities import (\n",
     "    OnlineRequestSettings,\n",
-    "    CodeConfiguration,\n",
     "    ManagedOnlineDeployment,\n",
     "    ProbeSettings,\n",
     ")\n",
     "\n",
-    "code_configuration = CodeConfiguration(\n",
-    "    code=\"./aacs-scoring-files/score/\", scoring_script=\"score_online.py\"\n",
-    ")\n",
-    "\n",
     "deployment = ManagedOnlineDeployment(\n",
     "    name=deployment_name,\n",
     "    endpoint_name=endpoint_name,\n",
     "    model=model.id,\n",
     "    instance_type=sku_name,\n",
     "    instance_count=1,\n",
-    "    code_configuration=code_configuration,\n",
     "    environment_variables=deployment_env_vars,\n",
     "    request_settings=OnlineRequestSettings(request_timeout_ms=REQUEST_TIMEOUT_MS),\n",
     "    liveness_probe=ProbeSettings(\n",
@@ -524,24 +495,38 @@
     "\n",
     "We will fetch some sample data from the test dataset and submit to online endpoint for inference.\n",
     "\n",
+    "### Supported Parameters\n",
+    "\n",
+    "- negative_prompt: The prompt to guide what to not include in image generation. Ignored when not using guidance (`guidance_scale < 1`).\n",
+    "- num_inference_steps: The number of de-noising steps. More de-noising steps usually lead to a higher quality image at the expense of slower inference, defaults to 50.\n",
+    "- guidance_scale: A higher guidance scale value encourages the model to generate images closely linked to the text `prompt` at the expense of lower image quality. Guidance scale is enabled when `guidance_scale > 1`, defaults to 7.5.\n",
+    "\n",
+    "> These `parameters` are optional inputs. If you need support for new parameters, please file a support ticket.\n",
+    "\n",
     "The sample of input schema for inpainting task:\n",
     "```json\n",
     "{\n",
     "   \"input_data\": {\n",
-    "        \"columns\": [\"prompt\", \"image\", \"mask_image\"],\n",
+    "        \"columns\": [\"prompt\", \"image\", \"mask_image\", \"negative_prompt\"],\n",
     "        \"data\": [\n",
     "            {\n",
-    "                \"prompt\": \"sample prompt\",\n",
-    "                \"image\": \"base image1\",\n",
-    "                \"mask_image\": \"mask image1\"\n",
+    "                \"prompt\": \"Face of a yellow cat, high resolution, sitting on a park bench\",\n",
+    "                \"image\": \"image1\",\n",
+    "                \"mask_image\": \"mask1\",\n",
+    "                \"negative_prompt\": \"blurry; cartoonish\"\n",
     "            },\n",
     "            {\n",
-    "                \"prompt\": \"sample prompt\",\n",
-    "                \"image\": \"base image2\",\n",
-    "                \"mask_image\": \"mask image2\"\n",
+    "                \"prompt\": \"Face of a green cat, high resolution, sitting on a park bench\",\n",
+    "                \"image\": \"image2\",\n",
+    "                \"mask_image\": \"mask2\",\n",
+    "                \"negative_prompt\": \"blurry; cartoonish\"\n",
     "            }\n",
     "        ],\n",
-    "        \"index\": [0, 1]\n",
+    "        \"index\": [0, 1],\n",
+    "        \"parameters\": {\n",
+    "            \"num_inference_steps\": 50,\n",
+    "            \"guidance_scale\": 7.5\n",
+    "        }\n",
     "    }\n",
     "}\n",
     "```\n",
@@ -568,7 +553,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "#### 5.1 Sample input for safe prompt."
+    "#### 5.1.1 Sample input for safe prompt."
    ]
   },
   {
@@ -601,7 +586,7 @@
     "                \"mask_image\": base64.encodebytes(read_image(mask_image)).decode(\n",
     "                    \"utf-8\"\n",
     "                ),\n",
-    "                \"prompt\": \"A cat sitting on a park bench in high resolution.\",\n",
+    "                \"prompt\": \"A yellow cat, high resolution, sitting on a park bench\",\n",
     "            }\n",
     "        ],\n",
     "    }\n",
@@ -625,7 +610,26 @@
     "    endpoint_name=endpoint.name,\n",
     "    deployment_name=deployment.name,\n",
     "    request_file=request_file_name,\n",
-    ")"
+    ")\n",
+    "\n",
+    "# Visualize the model output\n",
+    "\n",
+    "import io\n",
+    "import base64\n",
+    "from PIL import Image\n",
+    "\n",
+    "generations = json.loads(response)\n",
+    "for generation in generations:\n",
+    "    print(f\"nsfw content detected: \", generation[\"nsfw_content_detected\"])\n",
+    "    img = Image.open(io.BytesIO(base64.b64decode(generation[\"generated_image\"])))\n",
+    "    display(img)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### 5.1.2 Sample input for safe prompt."
    ]
   },
   {
@@ -634,11 +638,40 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Visualize the model output\n",
+    "request_json = {\n",
+    "    \"input_data\": {\n",
+    "        \"columns\": [\"image\", \"mask_image\", \"prompt\", \"negative_prompt\"],\n",
+    "        \"data\": [\n",
+    "            {\n",
+    "                \"image\": base64.encodebytes(read_image(base_image)).decode(\"utf-8\"),\n",
+    "                \"mask_image\": base64.encodebytes(read_image(mask_image)).decode(\n",
+    "                    \"utf-8\"\n",
+    "                ),\n",
+    "                \"prompt\": \"A yellow cat, high resolution, sitting on a park bench\",\n",
+    "                \"negative_prompt\": \"blurry; cartoonish\",\n",
+    "            }\n",
+    "        ],\n",
+    "        \"parameters\": {\"num_inference_steps\": 50, \"guidance_scale\": 7.5},\n",
+    "    }\n",
+    "}\n",
     "\n",
-    "import io\n",
-    "import base64\n",
-    "from PIL import Image\n",
+    "request_file_name = \"sample_request_data.json\"\n",
+    "\n",
+    "with open(request_file_name, \"w\") as request_file:\n",
+    "    json.dump(request_json, request_file)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "response = workspace_ml_client.online_endpoints.invoke(\n",
+    "    endpoint_name=endpoint.name,\n",
+    "    deployment_name=deployment.name,\n",
+    "    request_file=request_file_name,\n",
+    ")\n",
     "\n",
     "generations = json.loads(response)\n",
     "for generation in generations:\n",
@@ -746,8 +779,10 @@
   }
  ],
  "metadata": {
-  "language_info": {
-   "name": "ipython"
+  "kernelspec": {
+   "display_name": "Python 3.10 - SDK v2",
+   "language": "python",
+   "name": "python310-sdkv2"
   }
  },
  "nbformat": 4,