diff --git a/.ci/recreate_dataproc_cluster.cloudbuild.yaml b/.ci/recreate_dataproc_cluster.cloudbuild.yaml new file mode 100644 index 000000000000..215fa662defc --- /dev/null +++ b/.ci/recreate_dataproc_cluster.cloudbuild.yaml @@ -0,0 +1,41 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# NOTE: The _CLUSTER_NAME substitution variable defined here must match the +# DATAPROC_LIST_JOBS_CLUSTER parameter defined in .ci/integration.cloudbuild.yaml +# (default: cluster-36). + +steps: + - id: "recreate-dataproc-cluster" + name: "gcr.io/cloud-builders/gcloud:latest" + env: + - "PROJECT_ID=$PROJECT_ID" + - "CLUSTER_NAME=$_CLUSTER_NAME" + - "REGION=$_REGION" + - "IMAGE_VERSION=$_IMAGE_VERSION" + script: | + #!/usr/bin/env bash + bash .ci/recreate_dataproc_cluster.sh "$${PROJECT_ID}" "$${REGION}" "$${IMAGE_VERSION}" "$${CLUSTER_NAME}" + +options: + automapSubstitutions: true + dynamicSubstitutions: true + logging: CLOUD_LOGGING_ONLY + pool: + name: projects/$PROJECT_ID/locations/us-central1/workerPools/integration-testing + +substitutions: + _CLUSTER_NAME: "cluster-36" + _REGION: "us-central1" + _IMAGE_VERSION: "2.3-debian12" diff --git a/.ci/recreate_dataproc_cluster.sh b/.ci/recreate_dataproc_cluster.sh new file mode 100644 index 000000000000..2126cf2cf881 --- /dev/null +++ b/.ci/recreate_dataproc_cluster.sh @@ -0,0 +1,74 @@ +#!/usr/bin/env bash +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -eo pipefail + +if [ $# -lt 4 ]; then + echo "Error: Missing required arguments." >&2 + echo "Usage: $0 " >&2 + exit 1 +fi + +PROJECT_ID="$1" +REGION="$2" +IMAGE_VERSION="$3" +CLUSTER_NAME="$4" + +SERVICE_ACCOUNT="toolbox-identity@${PROJECT_ID}.iam.gserviceaccount.com" + +echo "==========================================================" +echo "Recreating Dataproc cluster in project: ${PROJECT_ID}" +echo "Region: ${REGION}" +echo "Image Version: ${IMAGE_VERSION}" +echo "Cluster Name: ${CLUSTER_NAME}" +echo "Service Account: ${SERVICE_ACCOUNT}" +echo "==========================================================" + +# Check if the cluster exists, capturing stdout and stderr to distinguish NOT_FOUND from other errors +echo "Checking if cluster '${CLUSTER_NAME}' exists..." +set +e +DESCRIBE_OUT=$(gcloud dataproc clusters describe "${CLUSTER_NAME}" --region="${REGION}" --project="${PROJECT_ID}" 2>&1) +DESCRIBE_STATUS=$? +set -e + +if [ ${DESCRIBE_STATUS} -eq 0 ]; then + echo "Cluster '${CLUSTER_NAME}' exists. Deleting it..." + gcloud dataproc clusters delete "${CLUSTER_NAME}" \ + --region="${REGION}" \ + --project="${PROJECT_ID}" \ + --quiet + echo "Cluster '${CLUSTER_NAME}' deleted successfully." +elif echo "${DESCRIBE_OUT}" | grep -q "NOT_FOUND"; then + echo "Cluster '${CLUSTER_NAME}' does not exist. Skipping deletion." +else + echo "Error querying cluster existence: ${DESCRIBE_OUT}" >&2 + exit ${DESCRIBE_STATUS} +fi + +# Create the cluster +echo "Creating Dataproc cluster '${CLUSTER_NAME}'..." +gcloud dataproc clusters create "${CLUSTER_NAME}" \ + --region="${REGION}" \ + --project="${PROJECT_ID}" \ + --image-version="${IMAGE_VERSION}" \ + --service-account="${SERVICE_ACCOUNT}" \ + --scopes=cloud-platform \ + --no-address \ + --network=default \ + --master-machine-type=n4-standard-2 \ + --worker-machine-type=n4-standard-2 \ + --num-workers=2 + +echo "Cluster '${CLUSTER_NAME}' created successfully."