diff --git a/.github/workflows/check-release.yaml b/.github/workflows/check-release.yaml index 296b998157..c8022e934b 100644 --- a/.github/workflows/check-release.yaml +++ b/.github/workflows/check-release.yaml @@ -7,6 +7,9 @@ on: paths: - VERSION +permissions: + contents: read + concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true @@ -20,7 +23,7 @@ jobs: steps: - name: Checkout source code - uses: actions/checkout@v5 + uses: actions/checkout@v5.0.0 with: fetch-depth: 0 diff --git a/.github/workflows/integration.yaml b/.github/workflows/integration.yaml index 26c4277ba1..afa404b202 100644 --- a/.github/workflows/integration.yaml +++ b/.github/workflows/integration.yaml @@ -11,6 +11,9 @@ on: - master - release-* +permissions: + contents: read + concurrency: group: ${{ github.workflow }}-${{ github.ref }}-${{ github.actor }} cancel-in-progress: true @@ -20,12 +23,12 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout source code - uses: actions/checkout@v5 + uses: actions/checkout@v5.0.0 with: fetch-depth: 0 - name: Set up Go - uses: actions/setup-go@v5 + uses: actions/setup-go@v6 with: go-version-file: go.mod @@ -77,12 +80,12 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout source code - uses: actions/checkout@v5 + uses: actions/checkout@v5.0.0 with: fetch-depth: 0 - name: Set up Go - uses: actions/setup-go@v5 + uses: actions/setup-go@v6 with: go-version-file: go.mod @@ -99,12 +102,12 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout source code - uses: actions/checkout@v5 + uses: actions/checkout@v5.0.0 with: fetch-depth: 0 - name: Set up Go - uses: actions/setup-go@v5 + uses: actions/setup-go@v6 with: go-version-file: go.mod @@ -130,7 +133,7 @@ jobs: echo "BRANCH=$BRANCH" >> "$GITHUB_OUTPUT" - name: Checkout source code - uses: actions/checkout@v5 + uses: actions/checkout@v5.0.0 with: fetch-depth: 0 @@ -146,7 +149,7 @@ jobs: version: ${{ steps.get_helm_version.outputs.HELM_VERSION }} - name: Set up chart-testing - uses: helm/chart-testing-action@v2.7.0 + uses: helm/chart-testing-action@v2.8.0 - name: Generate manifests run: | @@ -220,12 +223,12 @@ jobs: - v1.32.0 steps: - name: Checkout source code - uses: actions/checkout@v5 + uses: actions/checkout@v5.0.0 with: fetch-depth: 0 - name: Set up Go - uses: actions/setup-go@v5 + uses: actions/setup-go@v6 with: go-version-file: go.mod diff --git a/.github/workflows/release-helm-charts.yaml b/.github/workflows/release-helm-charts.yaml index f1ff28345b..50f675c87f 100644 --- a/.github/workflows/release-helm-charts.yaml +++ b/.github/workflows/release-helm-charts.yaml @@ -23,7 +23,7 @@ jobs: steps: - name: Checkout source code - uses: actions/checkout@v5 + uses: actions/checkout@v5.0.0 - name: Configure Git run: | @@ -71,7 +71,7 @@ jobs: cp *.tgz /tmp/charts - name: Checkout to branch gh-pages - uses: actions/checkout@v5 + uses: actions/checkout@v5.0.0 with: ref: gh-pages fetch-depth: 0 diff --git a/.github/workflows/release-latest-images.yaml b/.github/workflows/release-latest-images.yaml new file mode 100644 index 0000000000..d6fdb19a97 --- /dev/null +++ b/.github/workflows/release-latest-images.yaml @@ -0,0 +1,263 @@ +name: Release Latest Images + +on: + push: + branches: + - master + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }}-${{ github.actor }} + cancel-in-progress: true + +env: + IMAGE_REGISTRY: ghcr.io + OPERATOR_IMAGE_REPOSITORY: ${{ github.repository }}/controller + KUBECTL_IMAGE_REPOSITORY: ${{ github.repository }}/kubectl + KUBECTL_VERSION: 1.33.2 + +jobs: + build_operator_images: + runs-on: ubuntu-latest + + permissions: + contents: read + packages: write + + strategy: + fail-fast: false + matrix: + platform: + - linux/amd64 + - linux/arm64 + + steps: + - name: Prepare + run: | + platform=${{ matrix.platform }} + echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV + + - name: Checkout source code + uses: actions/checkout@v5 + + - name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.IMAGE_REGISTRY }}/${{ env.OPERATOR_IMAGE_REPOSITORY }} + tags: | + type=raw,pattern={{version}},value=latest + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to container registry + uses: docker/login-action@v3 + with: + registry: ${{ env.IMAGE_REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build and push by digest + id: build + uses: docker/build-push-action@v6 + with: + platforms: ${{ matrix.platform }} + labels: ${{ steps.meta.outputs.labels }} + outputs: type=image,name=${{ env.IMAGE_REGISTRY }}/${{ env.OPERATOR_IMAGE_REPOSITORY }},push-by-digest=true,name-canonical=true,push=true + + - name: Export digest + run: | + mkdir -p /tmp/digests + digest="${{ steps.build.outputs.digest }}" + touch "/tmp/digests/${digest#sha256:}" + + - name: Upload digest + uses: actions/upload-artifact@v6 + with: + name: digests-operator-${{ env.PLATFORM_PAIR }} + path: /tmp/digests/* + if-no-files-found: error + retention-days: 1 + + build_kubectl_images: + runs-on: ubuntu-latest + + permissions: + contents: read + packages: write + + strategy: + fail-fast: false + matrix: + platform: + - linux/amd64 + - linux/arm64 + + steps: + - name: Prepare + run: | + platform=${{ matrix.platform }} + echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV + + - name: Checkout source code + uses: actions/checkout@v5 + + - name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.IMAGE_REGISTRY }}/${{ env.KUBECTL_IMAGE_REPOSITORY }} + tags: | + type=raw,value=latest + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to container registry + uses: docker/login-action@v3 + with: + registry: ${{ env.IMAGE_REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build and push by digest + id: build + uses: docker/build-push-action@v6 + with: + file: docker/Dockerfile.kubectl + platforms: ${{ matrix.platform }} + build-args: | + KUBECTL_VERSION=${{ env.KUBECTL_VERSION }} + labels: ${{ steps.meta.outputs.labels }} + outputs: type=image,name=${{ env.IMAGE_REGISTRY }}/${{ env.KUBECTL_IMAGE_REPOSITORY }},push-by-digest=true,name-canonical=true,push=true + + - name: Export digest + run: | + mkdir -p /tmp/digests + digest="${{ steps.build.outputs.digest }}" + touch "/tmp/digests/${digest#sha256:}" + + - name: Upload digest + uses: actions/upload-artifact@v6 + with: + name: digests-kubectl-${{ env.PLATFORM_PAIR }} + path: /tmp/digests/* + if-no-files-found: error + retention-days: 1 + + release_operator_images: + needs: + - build_operator_images + + runs-on: ubuntu-latest + + permissions: + contents: read + packages: write + + steps: + - name: Checkout source code + uses: actions/checkout@v5 + + - name: Read version from VERSION file + run: | + VERSION=$(cat VERSION) + echo "VERSION=${VERSION}" >> $GITHUB_ENV + + - name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.IMAGE_REGISTRY }}/${{ env.OPERATOR_IMAGE_REPOSITORY }} + tags: | + type=raw,value=latest + + - name: Download digests + uses: actions/download-artifact@v7 + with: + path: /tmp/digests + pattern: digests-operator-* + merge-multiple: true + + - name: Set up Docker buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to container registry + uses: docker/login-action@v3 + with: + registry: ${{ env.IMAGE_REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Create manifest list and push + working-directory: /tmp/digests + run: | + docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \ + $(printf '${{ env.IMAGE_REGISTRY }}/${{ env.OPERATOR_IMAGE_REPOSITORY }}@sha256:%s ' *) + + - name: Inspect image + run: | + docker buildx imagetools inspect ${{ env.IMAGE_REGISTRY }}/${{ env.OPERATOR_IMAGE_REPOSITORY }}:${{ steps.meta.outputs.version }} + + release_kubectl_images: + needs: + - build_kubectl_images + + runs-on: ubuntu-latest + + permissions: + contents: read + packages: write + + steps: + - name: Checkout source code + uses: actions/checkout@v5 + + - name: Read version from VERSION file + run: | + VERSION=$(cat VERSION) + echo "VERSION=${VERSION}" >> $GITHUB_ENV + + - name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.IMAGE_REGISTRY }}/${{ env.KUBECTL_IMAGE_REPOSITORY }} + tags: | + type=raw,value=latest + + - name: Download digests + uses: actions/download-artifact@v7 + with: + path: /tmp/digests + pattern: digests-kubectl-* + merge-multiple: true + + - name: Set up Docker buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to container registry + uses: docker/login-action@v3 + with: + registry: ${{ env.IMAGE_REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Create manifest list and push + working-directory: /tmp/digests + run: | + docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \ + $(printf '${{ env.IMAGE_REGISTRY }}/${{ env.KUBECTL_IMAGE_REPOSITORY }}@sha256:%s ' *) + + - name: Inspect image + run: | + docker buildx imagetools inspect ${{ env.IMAGE_REGISTRY }}/${{ env.KUBECTL_IMAGE_REPOSITORY }}:${{ steps.meta.outputs.version }} diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index c8f487b5e2..8a3a668fc2 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -7,6 +7,9 @@ on: paths: - VERSION +permissions: + contents: read + concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true @@ -24,7 +27,7 @@ jobs: steps: - name: Checkout source code - uses: actions/checkout@v5 + uses: actions/checkout@v5.0.0 with: fetch-depth: 0 @@ -73,6 +76,10 @@ jobs: runs-on: ubuntu-latest + permissions: + contents: read + packages: write + strategy: fail-fast: false matrix: @@ -87,7 +94,7 @@ jobs: echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV - name: Checkout source code - uses: actions/checkout@v5 + uses: actions/checkout@v5.0.0 - name: Read version from VERSION file run: | @@ -130,7 +137,7 @@ jobs: touch "/tmp/digests/${digest#sha256:}" - name: Upload digest - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v6 with: name: digests-operator-${{ env.PLATFORM_PAIR }} path: /tmp/digests/* @@ -143,6 +150,10 @@ jobs: runs-on: ubuntu-latest + permissions: + contents: read + packages: write + strategy: fail-fast: false matrix: @@ -157,7 +168,7 @@ jobs: echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV - name: Checkout source code - uses: actions/checkout@v5 + uses: actions/checkout@v5.0.0 - name: Read version from VERSION file run: | @@ -203,7 +214,7 @@ jobs: touch "/tmp/digests/${digest#sha256:}" - name: Upload digest - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v6 with: name: digests-kubectl-${{ env.PLATFORM_PAIR }} path: /tmp/digests/* @@ -216,9 +227,13 @@ jobs: runs-on: ubuntu-latest + permissions: + contents: read + packages: write + steps: - name: Checkout source code - uses: actions/checkout@v5 + uses: actions/checkout@v5.0.0 - name: Read version from VERSION file run: | @@ -234,7 +249,7 @@ jobs: type=semver,pattern={{version}},value=${{ env.VERSION }} - name: Download digests - uses: actions/download-artifact@v5 + uses: actions/download-artifact@v7 with: path: /tmp/digests pattern: digests-operator-* @@ -266,9 +281,13 @@ jobs: runs-on: ubuntu-latest + permissions: + contents: read + packages: write + steps: - name: Checkout source code - uses: actions/checkout@v5 + uses: actions/checkout@v5.0.0 - name: Read version from VERSION file run: | @@ -284,7 +303,7 @@ jobs: type=semver,pattern={{version}},value=${{ env.VERSION }} - name: Download digests - uses: actions/download-artifact@v5 + uses: actions/download-artifact@v7 with: path: /tmp/digests pattern: digests-kubectl-* @@ -317,9 +336,12 @@ jobs: runs-on: ubuntu-latest + permissions: + contents: write + steps: - name: Checkout source code - uses: actions/checkout@v5 + uses: actions/checkout@v5.0.0 with: fetch-depth: 0 @@ -349,7 +371,7 @@ jobs: steps: - name: Checkout - uses: actions/checkout@v5 + uses: actions/checkout@v5.0.0 - name: Configure Git run: | diff --git a/.github/workflows/scorecard.yaml b/.github/workflows/scorecard.yaml new file mode 100644 index 0000000000..81dd2953a6 --- /dev/null +++ b/.github/workflows/scorecard.yaml @@ -0,0 +1,72 @@ +# This workflow uses actions that are not certified by GitHub. They are provided +# by a third-party and are governed by separate terms of service, privacy +# policy, and support documentation. + +name: Scorecard supply-chain security +on: + # For Branch-Protection check. Only the default branch is supported. See + # https://github.com/ossf/scorecard/blob/main/docs/checks.md#branch-protection + branch_protection_rule: + # To guarantee Maintained check is occasionally updated. See + # https://github.com/ossf/scorecard/blob/main/docs/checks.md#maintained + schedule: + - cron: '16 21 * * 1' + push: + branches: [ "master" ] + +# Declare default permissions as read only. +permissions: read-all + +jobs: + analysis: + name: Scorecard analysis + runs-on: ubuntu-latest + permissions: + # Needed to upload the results to code-scanning dashboard. + security-events: write + # Needed to publish results and get a badge (see publish_results below). + id-token: write + # Uncomment the permissions below if installing in a private repository. + # contents: read + # actions: read + + steps: + - name: "Checkout code" + uses: actions/checkout@ff7abcd0c3c05ccf6adc123a8cd1fd4fb30fb493 # v4.2.2 + with: + persist-credentials: false + + - name: "Run analysis" + uses: ossf/scorecard-action@4eaacf0543bb3f2c246792bd56e8cdeffafb205a # v2.4.3 + with: + results_file: results.sarif + results_format: sarif + # (Optional) "write" PAT token. Uncomment the `repo_token` line below if: + # - you want to enable the Branch-Protection check on a *public* repository, or + # - you are installing Scorecard on a *private* repository + # To create the PAT, follow the steps in https://github.com/ossf/scorecard-action#authentication-with-pat. + # repo_token: ${{ secrets.SCORECARD_TOKEN }} + + # Public repositories: + # - Publish results to OpenSSF REST API for easy access by consumers + # - Allows the repository to include the Scorecard badge. + # - See https://github.com/ossf/scorecard-action#publishing-results. + # For private repositories: + # - `publish_results` will always be set to `false`, regardless + # of the value entered here. + publish_results: true + + # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF + # format to the repository Actions tab. + - name: "Upload artifact" + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 + with: + name: SARIF file + path: results.sarif + retention-days: 5 + + # Upload the results to GitHub's code scanning dashboard. + - name: "Upload to code-scanning" + uses: github/codeql-action/upload-sarif@1b168cd39490f61582a9beae412bb7057a6b2c4e # v4.31.8 + with: + sarif_file: results.sarif diff --git a/.github/workflows/stale.yaml b/.github/workflows/stale.yaml index 5082cb7f8c..dc4f673db4 100644 --- a/.github/workflows/stale.yaml +++ b/.github/workflows/stale.yaml @@ -13,7 +13,7 @@ jobs: pull-requests: write steps: - - uses: actions/stale@v9 + - uses: actions/stale@v10 with: repo-token: ${{ secrets.GITHUB_TOKEN }} days-before-stale: 90 diff --git a/.github/workflows/trivy-image-scanning.yaml b/.github/workflows/trivy-image-scanning.yaml index d7d77f319e..6d017d4889 100644 --- a/.github/workflows/trivy-image-scanning.yaml +++ b/.github/workflows/trivy-image-scanning.yaml @@ -7,15 +7,20 @@ on: jobs: image-scanning: runs-on: ubuntu-latest + + permissions: + contents: read + security-events: write + steps: - name: Checkout code - uses: actions/checkout@v5 + uses: actions/checkout@v5.0.0 - name: Add image to environment run: make print-IMAGE >> $GITHUB_ENV - name: trivy scan for github security tab - uses: aquasecurity/trivy-action@0.32.0 + uses: aquasecurity/trivy-action@0.33.1 with: image-ref: '${{ env.IMAGE }}' format: 'sarif' @@ -26,7 +31,7 @@ jobs: timeout: 30m0s - name: Upload Trivy scan results to GitHub Security tab - uses: github/codeql-action/upload-sarif@v3 + uses: github/codeql-action/upload-sarif@v4.31.8 if: always() with: sarif_file: 'trivy-results.sarif' diff --git a/.gitignore b/.gitignore index 38996e3b2b..8617d20996 100644 --- a/.gitignore +++ b/.gitignore @@ -3,5 +3,6 @@ bin/ codecov.yaml cover.out +cover.html .DS_Store *.iml diff --git a/CHANGELOG.md b/CHANGELOG.md index 1cc3d6b454..1b373df039 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,85 @@ # Changelog +## [v2.4.0](https://github.com/kubeflow/spark-operator/tree/v2.4.0) (2025-11-14) + +### Features + +- fix(chart): add revisionHistoryLimit option ([#2625](https://github.com/kubeflow/spark-operator/pull/2625) by [@t3mi](https://github.com/t3mi)) +- Suspend/Resume feature on SparkApplication ([#2387](https://github.com/kubeflow/spark-operator/pull/2387) by [@everpeace](https://github.com/everpeace)) +- Set ControllerReference on driver pod and non-Controller OwnerReference on executor pod at submission time ([#2670](https://github.com/kubeflow/spark-operator/pull/2670) by [@everpeace](https://github.com/everpeace)) +- added SparkApp name validator to accept valid DNS-1035 format ([#2711](https://github.com/kubeflow/spark-operator/pull/2711) by [@aryankumar04](https://github.com/aryankumar04)) +- No propagate Kueue labels to driver and executor pods ([#2714](https://github.com/kubeflow/spark-operator/pull/2714) by [@everpeace](https://github.com/everpeace)) +- feat: add support for customizing connect service ([#2709](https://github.com/kubeflow/spark-operator/pull/2709) by [@ChenYi015](https://github.com/ChenYi015)) + +### Bug Fixes + +- Grant get/patch permissions for SparkConnect CRD to hook ClusterRole ([#2605](https://github.com/kubeflow/spark-operator/pull/2605) by [@ChenYi015](https://github.com/ChenYi015)) +- Grant create events permissions to Controller ([#2616](https://github.com/kubeflow/spark-operator/pull/2616) by [@Rockawear](https://github.com/Rockawear)) +- fix(connect): propagate executor memory setting to spark options ([#2656](https://github.com/kubeflow/spark-operator/pull/2656) by [@mrjoe7](https://github.com/mrjoe7)) +- [fix] add miss cli params. Add leader election params in chart ([#2657](https://github.com/kubeflow/spark-operator/pull/2657) by [@aagumin](https://github.com/aagumin)) +- fix: webhook encoder configuration ([#2664](https://github.com/kubeflow/spark-operator/pull/2664) by [@pvbouwel](https://github.com/pvbouwel)) +- bugfix: hook.upgradeCrd use cases ([#2663](https://github.com/kubeflow/spark-operator/pull/2663) by [@pvbouwel](https://github.com/pvbouwel)) +- Correct entrypoint.sh for Openshift ([#2645](https://github.com/kubeflow/spark-operator/pull/2645) by [@Reamer](https://github.com/Reamer)) +- Fix driver host configuration to handle IPv6 addresses ([#2703](https://github.com/kubeflow/spark-operator/pull/2703) by [@tiagotxm](https://github.com/tiagotxm)) +- Add permissions for sparkconnects/finalizers ([#2727](https://github.com/kubeflow/spark-operator/pull/2727) by [@ChenYi015](https://github.com/ChenYi015)) +- Remove redundant name and namespace context in logs ([#2723](https://github.com/kubeflow/spark-operator/pull/2723) by [@ChenYi015](https://github.com/ChenYi015)) +- Logging info when resources associated with SparkApplication still exist ([#2725](https://github.com/kubeflow/spark-operator/pull/2725) by [@ChenYi015](https://github.com/ChenYi015)) + +### Unit Tests + +- Add kube-scheduler podgroup unit tests ([#2689](https://github.com/kubeflow/spark-operator/pull/2689) by [@shadowinlife](https://github.com/shadowinlife)) +- Generate HTML coverage report after running unit tests ([#2691](https://github.com/kubeflow/spark-operator/pull/2691) by [@shadowinlife](https://github.com/shadowinlife)) +- Avoid 409 (Conflict) error at e2e tests with retry. ([#2695](https://github.com/kubeflow/spark-operator/pull/2695) by [@shadowinlife](https://github.com/shadowinlife)) +- Add unit test for sparkapplication_validator ([#2692](https://github.com/kubeflow/spark-operator/pull/2692) by [@shadowinlife](https://github.com/shadowinlife)) +- Add unit test for scheduledsparkapplication_validator ([#2694](https://github.com/kubeflow/spark-operator/pull/2694) by [@shadowinlife](https://github.com/shadowinlife)) +- Refactor the unit tests for web_ui.go ([#2696](https://github.com/kubeflow/spark-operator/pull/2696) by [@shadowinlife](https://github.com/shadowinlife)) + +### Refactor + +- unify SparkApplication defaulting logic ([#2671](https://github.com/kubeflow/spark-operator/pull/2671) by [@zhzhuang-zju](https://github.com/zhzhuang-zju)) +- Replace strconv.Atoi with strconv.ParseInt ([#2699](https://github.com/kubeflow/spark-operator/pull/2699) by [@ChenYi015](https://github.com/ChenYi015)) +- refactor: use ptr.To to replace util.XxxPtr ([#2693](https://github.com/kubeflow/spark-operator/pull/2693) by [@ChenYi015](https://github.com/ChenYi015)) + +### Dependencies + +- Bump actions/checkout from 4 to 5 ([#2623](https://github.com/kubeflow/spark-operator/pull/2623) by [@dependabot[bot]](https://github.com/apps/dependabot)) +- Bump actions/download-artifact from 4 to 5 ([#2624](https://github.com/kubeflow/spark-operator/pull/2624) by [@dependabot[bot]](https://github.com/apps/dependabot)) +- Bump helm.sh/helm/v3 from 3.18.4 to 3.18.5 ([#2627](https://github.com/kubeflow/spark-operator/pull/2627) by [@dependabot[bot]](https://github.com/apps/dependabot)) +- Bump github.com/go-viper/mapstructure/v2 from 2.3.0 to 2.4.0 ([#2635](https://github.com/kubeflow/spark-operator/pull/2635) by [@dependabot[bot]](https://github.com/apps/dependabot)) +- Bump actions/setup-go from 5 to 6 ([#2651](https://github.com/kubeflow/spark-operator/pull/2651) by [@dependabot[bot]](https://github.com/apps/dependabot)) +- Bump actions/stale from 9 to 10 ([#2649](https://github.com/kubeflow/spark-operator/pull/2649) by [@dependabot[bot]](https://github.com/apps/dependabot)) +- Bump aquasecurity/trivy-action from 0.32.0 to 0.33.1 ([#2650](https://github.com/kubeflow/spark-operator/pull/2650) by [@dependabot[bot]](https://github.com/apps/dependabot)) +- Bump github.com/golang/glog from 1.2.4 to 1.2.5 ([#2620](https://github.com/kubeflow/spark-operator/pull/2620) by [@dependabot[bot]](https://github.com/apps/dependabot)) +- Bump github.com/onsi/ginkgo/v2 from 2.23.3 to 2.26.0 ([#2660](https://github.com/kubeflow/spark-operator/pull/2660) by [@dependabot[bot]](https://github.com/apps/dependabot)) +- Bump sigs.k8s.io/scheduler-plugins from 0.31.8 to 0.32.7 ([#2675](https://github.com/kubeflow/spark-operator/pull/2675) by [@dependabot[bot]](https://github.com/apps/dependabot)) +- Bump github/codeql-action from 3 to 4 ([#2672](https://github.com/kubeflow/spark-operator/pull/2672) by [@dependabot[bot]](https://github.com/apps/dependabot)) +- Bump github.com/stretchr/testify from 1.10.0 to 1.11.1 ([#2673](https://github.com/kubeflow/spark-operator/pull/2673) by [@dependabot[bot]](https://github.com/apps/dependabot)) +- Bump golang.org/x/time from 0.9.0 to 0.14.0 ([#2686](https://github.com/kubeflow/spark-operator/pull/2686) by [@dependabot[bot]](https://github.com/apps/dependabot)) +- Bump github.com/prometheus/client_golang from 1.22.0 to 1.23.2 ([#2682](https://github.com/kubeflow/spark-operator/pull/2682) by [@dependabot[bot]](https://github.com/apps/dependabot)) +- Bump github.com/spf13/viper from 1.20.1 to 1.21.0 ([#2683](https://github.com/kubeflow/spark-operator/pull/2683) by [@dependabot[bot]](https://github.com/apps/dependabot)) +- Bump github/codeql-action from 3.29.2 to 4.31.0 ([#2684](https://github.com/kubeflow/spark-operator/pull/2684) by [@dependabot[bot]](https://github.com/apps/dependabot)) +- Bump github.com/containerd/containerd from 1.7.27 to 1.7.29 ([#2712](https://github.com/kubeflow/spark-operator/pull/2712) by [@dependabot[bot]](https://github.com/apps/dependabot)) +- Bump sigs.k8s.io/yaml from 1.5.0 to 1.6.0 ([#2704](https://github.com/kubeflow/spark-operator/pull/2704) by [@dependabot[bot]](https://github.com/apps/dependabot)) +- Bump github.com/spf13/cobra from 1.9.1 to 1.10.1 ([#2705](https://github.com/kubeflow/spark-operator/pull/2705) by [@dependabot[bot]](https://github.com/apps/dependabot)) +- Bump github/codeql-action from 4.31.0 to 4.31.2 ([#2707](https://github.com/kubeflow/spark-operator/pull/2707) by [@dependabot[bot]](https://github.com/apps/dependabot)) +- Bump actions/upload-artifact from 4 to 5 ([#2681](https://github.com/kubeflow/spark-operator/pull/2681) by [@dependabot[bot]](https://github.com/apps/dependabot)) +- Bump actions/download-artifact from 5 to 6 ([#2680](https://github.com/kubeflow/spark-operator/pull/2680) by [@dependabot[bot]](https://github.com/apps/dependabot)) +- Bump actions/checkout from 4.2.2 to 5.0.0 ([#2685](https://github.com/kubeflow/spark-operator/pull/2685) by [@dependabot[bot]](https://github.com/apps/dependabot)) +- Bump ossf/scorecard-action from 2.4.2 to 2.4.3 ([#2687](https://github.com/kubeflow/spark-operator/pull/2687) by [@dependabot[bot]](https://github.com/apps/dependabot)) +- Bump github.com/onsi/ginkgo/v2 from 2.26.0 to 2.27.2 ([#2719](https://github.com/kubeflow/spark-operator/pull/2719) by [@dependabot[bot]](https://github.com/apps/dependabot)) +- Bump golang.org/x/mod from 0.27.0 to 0.29.0 ([#2720](https://github.com/kubeflow/spark-operator/pull/2720) by [@dependabot[bot]](https://github.com/apps/dependabot)) +- Bump helm/chart-testing-action from 2.7.0 to 2.8.0 ([#2722](https://github.com/kubeflow/spark-operator/pull/2722) by [@dependabot[bot]](https://github.com/apps/dependabot)) + +### Misc + +- Add changelog for v2.3.0 ([#2614](https://github.com/kubeflow/spark-operator/pull/2614) by [@ChenYi015](https://github.com/ChenYi015)) +- Update README ([#2634](https://github.com/kubeflow/spark-operator/pull/2634) by [@ChenYi015](https://github.com/ChenYi015)) +- Add Spark Operator Roadmap ([#2622](https://github.com/kubeflow/spark-operator/pull/2622) by [@ChenYi015](https://github.com/ChenYi015)) +- CI: Add Scorecard workflow ([#2654](https://github.com/kubeflow/spark-operator/pull/2654) by [@AdamKorcz](https://github.com/AdamKorcz)) +- Update GitHub action workflow permissions ([#2678](https://github.com/kubeflow/spark-operator/pull/2678) by [@ChenYi015](https://github.com/ChenYi015)) + +[Full Changelog](https://github.com/kubeflow/spark-operator/compare/v2.3.0...v2.4.0) + ## [v2.3.0](https://github.com/kubeflow/spark-operator/tree/v2.3.0) (2025-07-22) ### Features diff --git a/Dockerfile b/Dockerfile index 62f555d1dd..350b478389 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,9 +14,9 @@ # limitations under the License. # -ARG SPARK_IMAGE=docker.io/library/spark:4.0.0 +ARG SPARK_IMAGE=docker.io/library/spark:4.0.1 -FROM golang:1.24.1 AS builder +FROM golang:1.24.10 AS builder WORKDIR /workspace diff --git a/Makefile b/Makefile index bd5968eb96..f295eead22 100644 --- a/Makefile +++ b/Makefile @@ -54,8 +54,8 @@ CONTROLLER_TOOLS_VERSION ?= v0.17.1 KIND_VERSION ?= v0.23.0 KIND_K8S_VERSION ?= v1.32.0 ENVTEST_VERSION ?= release-0.20 -# ENVTEST_K8S_VERSION refers to the version of kubebuilder assets to be downloaded by envtest binary. -ENVTEST_K8S_VERSION ?= 1.32.0 +# ENVTEST_K8S_VERSION is the version of Kubernetes to use for setting up ENVTEST binaries (i.e. 1.31) +ENVTEST_K8S_VERSION ?= $(shell go list -m -f "{{ .Version }}" k8s.io/api | awk -F'[v.]' '{printf "1.%d", $$3}') GOLANGCI_LINT_VERSION ?= v2.1.6 GEN_CRD_API_REFERENCE_DOCS_VERSION ?= v0.3.0 HELM_VERSION ?= $(shell grep -e '^ helm.sh/helm/v3 v' go.mod | cut -d ' ' -f 2) @@ -159,10 +159,13 @@ go-lint-fix: golangci-lint ## Run golangci-lint linter and perform fixes. $(GOLANGCI_LINT) run --fix .PHONY: unit-test -unit-test: envtest ## Run unit tests. +unit-test: setup-envtest ## Run unit tests. @echo "Running unit tests..." KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" go test $(shell go list ./... | grep -v /e2e) -coverprofile cover.out + @echo "Generating HTML coverage report..." + go tool cover -html=cover.out -o cover.html + @echo "Coverage report available at cover.html" .PHONY: e2e-test e2e-test: envtest ## Run the e2e tests against a Kind k8s instance that is spun up. @@ -300,6 +303,14 @@ kind: $(KIND) ## Download kind locally if necessary. $(KIND): $(LOCALBIN) $(call go-install-tool,$(KIND),sigs.k8s.io/kind,$(KIND_VERSION)) +.PHONY: setup-envtest +setup-envtest: envtest ## Download the binaries required for ENVTEST in the local bin directory. + @echo "Setting up envtest binaries for Kubernetes version $(ENVTEST_K8S_VERSION)..." + @"$(ENVTEST)" use $(ENVTEST_K8S_VERSION) --bin-dir "$(LOCALBIN)" -p path || { \ + echo "Error: Failed to set up envtest binaries for version $(ENVTEST_K8S_VERSION)."; \ + exit 1; \ + } + .PHONY: envtest envtest: $(ENVTEST) ## Download setup-envtest locally if necessary. $(ENVTEST): $(LOCALBIN) diff --git a/ROADMAP.md b/ROADMAP.md new file mode 100644 index 0000000000..3060675790 --- /dev/null +++ b/ROADMAP.md @@ -0,0 +1,21 @@ +# Spark Operator ROADMAP + +## 2025 + +- [x] A new CR to support Spark Connect ([#2569](https://github.com/kubeflow/spark-operator/pull/2569)) +- [x] Cert manger support ([#1178](https://github.com/kubeflow/spark-operator/issues/1178)) +- [ ] Improve controller performance +- [ ] Notebooks integration +- [ ] [Kubeflow SDK](https://github.com/kubeflow/sdk) integration +- [ ] Rest API for submitting jobs +- [ ] Deprecation of webhook by moving all functionality into the pod template ([#2502](https://github.com/kubeflow/spark-operator/issues/2502)) +- [ ] Watching namespaces based on labels +- [ ] A web UI for visibility into currently running applications +- [ ] Doc improvement + +## 2024 + +- [x] Pod template support ([#2101](https://github.com/kubeflow/spark-operator/issues/2101)) +- [x] Spark Connect Support ([#1801](https://github.com/kubeflow/spark-operator/issues/1801)) +- [x] Support Yunikorn as a batch scheduler ([#2107](https://github.com/kubeflow/spark-operator/pull/2107)) +- [x] Improve test coverage to improve the confidence in releases, particularly with e2e tests diff --git a/api/v1alpha1/sparkconnect_types.go b/api/v1alpha1/sparkconnect_types.go index 5a457b3331..80f5ac5c0b 100644 --- a/api/v1alpha1/sparkconnect_types.go +++ b/api/v1alpha1/sparkconnect_types.go @@ -87,6 +87,10 @@ type SparkConnectSpec struct { // ServerSpec is specification of the Spark connect server. type ServerSpec struct { SparkPodSpec `json:",inline"` + + // Service exposes the Spark connect server. + // +optional + Service *corev1.Service `json:"service,omitempty"` } // ExecutorSpec is specification of the executor. diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index d9498121b0..24078ffad9 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -91,6 +91,11 @@ func (in *ExecutorSpec) DeepCopy() *ExecutorSpec { func (in *ServerSpec) DeepCopyInto(out *ServerSpec) { *out = *in in.SparkPodSpec.DeepCopyInto(&out.SparkPodSpec) + if in.Service != nil { + in, out := &in.Service, &out.Service + *out = new(v1.Service) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServerSpec. diff --git a/api/v1beta2/defaults.go b/api/v1beta2/defaults.go index a6e66d3217..50e0c3388b 100644 --- a/api/v1beta2/defaults.go +++ b/api/v1beta2/defaults.go @@ -16,7 +16,24 @@ limitations under the License. package v1beta2 -import "strconv" +import ( + "strconv" + + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/utils/ptr" +) + +func addDefaultingFuncs(scheme *runtime.Scheme) error { + return RegisterDefaults(scheme) +} + +// RegisterDefaults adds defaulters functions to the given scheme. +// Public to allow building arbitrary schemes. +// All generated defaulters are covering - they call all nested defaulters. +func RegisterDefaults(scheme *runtime.Scheme) error { + scheme.AddTypeDefaultingFunc(&SparkApplication{}, func(obj interface{}) { SetSparkApplicationDefaults(obj.(*SparkApplication)) }) + return nil +} // SetSparkApplicationDefaults sets default values for certain fields of a SparkApplication. func SetSparkApplicationDefaults(app *SparkApplication) { @@ -39,13 +56,12 @@ func SetSparkApplicationDefaults(app *SparkApplication) { if app.Spec.RestartPolicy.Type != RestartPolicyNever { // Default to 5 sec if the RestartPolicy is OnFailure or Always and these values aren't specified. if app.Spec.RestartPolicy.OnFailureRetryInterval == nil { - app.Spec.RestartPolicy.OnFailureRetryInterval = new(int64) - *app.Spec.RestartPolicy.OnFailureRetryInterval = 5 + app.Spec.RestartPolicy.OnFailureRetryInterval = ptr.To[int64](5) } if app.Spec.RestartPolicy.OnSubmissionFailureRetryInterval == nil { app.Spec.RestartPolicy.OnSubmissionFailureRetryInterval = new(int64) - *app.Spec.RestartPolicy.OnSubmissionFailureRetryInterval = 5 + app.Spec.RestartPolicy.OnSubmissionFailureRetryInterval = ptr.To[int64](5) } } @@ -73,11 +89,26 @@ func setExecutorSpecDefaults(spec *ExecutorSpec, sparkConf map[string]string, al spec.Memory = new(string) *spec.Memory = "1g" } - var dynalloc, _ = sparkConf["spark.dynamicallocation.enabled"] - if dynamic, _ := strconv.ParseBool(dynalloc); !dynamic && (allocSpec == nil || !allocSpec.Enabled) { - if _, exists := sparkConf["spark.executor.instances"]; !exists && spec.Instances == nil { - spec.Instances = new(int32) - *spec.Instances = 1 - } + + isDynamicAllocationEnabled := isDynamicAllocationEnabled(sparkConf, allocSpec) + + if spec.Instances == nil && + sparkConf["spark.executor.instances"] == "" && + !isDynamicAllocationEnabled { + spec.Instances = ptr.To[int32](1) + } + + // Set default for ShuffleTrackingEnabled to true if DynamicAllocation.enabled is true and + // DynamicAllocation.ShuffleTrackingEnabled is nil. + if isDynamicAllocationEnabled && allocSpec != nil && allocSpec.ShuffleTrackingEnabled == nil { + allocSpec.ShuffleTrackingEnabled = ptr.To(true) + } +} + +func isDynamicAllocationEnabled(sparkConf map[string]string, allocSpec *DynamicAllocation) bool { + if allocSpec != nil { + return allocSpec.Enabled } + dynamicAllocationConfVal, _ := strconv.ParseBool(sparkConf["spark.dynamicallocation.enabled"]) + return dynamicAllocationConfVal } diff --git a/api/v1beta2/groupversion_info.go b/api/v1beta2/groupversion_info.go index 0f8277c704..027a07189d 100644 --- a/api/v1beta2/groupversion_info.go +++ b/api/v1beta2/groupversion_info.go @@ -20,17 +20,36 @@ limitations under the License. package v1beta2 import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime/schema" - "sigs.k8s.io/controller-runtime/pkg/scheme" ) var ( // GroupVersion is group version used to register these objects. GroupVersion = schema.GroupVersion{Group: "sparkoperator.k8s.io", Version: "v1beta2"} - // SchemeBuilder is used to add go types to the GroupVersionKind scheme. - SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion} + // SchemeBuilder is the scheme builder with scheme init functions. + SchemeBuilder = runtime.NewSchemeBuilder(addKnownTypes, addDefaultingFuncs) // AddToScheme adds the types in this group-version to the given scheme. AddToScheme = SchemeBuilder.AddToScheme ) + +func init() { + SchemeBuilder.Register(addKnownTypes, addDefaultingFuncs) +} + +// Adds the list of known types to Scheme. +func addKnownTypes(scheme *runtime.Scheme) error { + scheme.AddKnownTypes(GroupVersion, + &SparkApplication{}, + &SparkApplicationList{}, + &ScheduledSparkApplication{}, + &ScheduledSparkApplicationList{}, + ) + + // AddToGroupVersion allows the serialization of client types like ListOptions. + metav1.AddToGroupVersion(scheme, SchemeGroupVersion) + return nil +} diff --git a/api/v1beta2/scheduledsparkapplication_types.go b/api/v1beta2/scheduledsparkapplication_types.go index 9bc02da534..2801cd9247 100644 --- a/api/v1beta2/scheduledsparkapplication_types.go +++ b/api/v1beta2/scheduledsparkapplication_types.go @@ -23,10 +23,6 @@ import ( // EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN! // NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized. -func init() { - SchemeBuilder.Register(&ScheduledSparkApplication{}, &ScheduledSparkApplicationList{}) -} - // ScheduledSparkApplicationSpec defines the desired state of ScheduledSparkApplication. type ScheduledSparkApplicationSpec struct { // INSERT ADDITIONAL SPEC FIELDS - desired state of cluster diff --git a/api/v1beta2/sparkapplication_types.go b/api/v1beta2/sparkapplication_types.go index f5a94842d7..888359c5e6 100644 --- a/api/v1beta2/sparkapplication_types.go +++ b/api/v1beta2/sparkapplication_types.go @@ -25,10 +25,6 @@ import ( // EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN! // NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized. -func init() { - SchemeBuilder.Register(&SparkApplication{}, &SparkApplicationList{}) -} - // SparkApplicationSpec defines the desired state of SparkApplication // It carries every pieces of information a spark-submit command takes and recognizes. type SparkApplicationSpec struct { diff --git a/api/v1beta2/zz_generated.deepcopy.go b/api/v1beta2/zz_generated.deepcopy.go index 3d5145e943..09fc3e1cc5 100644 --- a/api/v1beta2/zz_generated.deepcopy.go +++ b/api/v1beta2/zz_generated.deepcopy.go @@ -23,7 +23,7 @@ package v1beta2 import ( "k8s.io/api/core/v1" networkingv1 "k8s.io/api/networking/v1" - runtime "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime" ) // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. diff --git a/charts/spark-operator-chart/README.md b/charts/spark-operator-chart/README.md index ddb863113b..7d4588090e 100644 --- a/charts/spark-operator-chart/README.md +++ b/charts/spark-operator-chart/README.md @@ -87,7 +87,11 @@ See [helm uninstall](https://helm.sh/docs/helm/helm_uninstall) for command docum | hook.image.registry | string | `"ghcr.io"` | Image registry. | | hook.image.repository | string | `"kubeflow/spark-operator/kubectl"` | Image repository. | | hook.image.tag | string | If not set, the chart appVersion will be used. | Image tag. | +| hook.nodeSelector | object | `{}` | Node selector for the Helm hook Job. | +| hook.affinity | object | `{}` | Affinity for the Helm hook Job. | +| hook.tolerations | list | `[]` | List of node taints to tolerate for the Helm hook Job. | | controller.replicas | int | `1` | Number of replicas of controller. | +| controller.featureGates | list | `[{"enabled":false,"name":"PartialRestart"},{"enabled":false,"name":"LoadSparkDefaults"}]` | Feature gates to enable or disable specific features. | | controller.revisionHistoryLimit | int | `10` | The number of old history to retain to allow rollback. | | controller.leaderElection.enable | bool | `true` | Specifies whether to enable leader election for controller. | | controller.leaderElection.leaseDuration | string | `"15s"` | Leader election lease duration. | @@ -122,6 +126,7 @@ See [helm uninstall](https://helm.sh/docs/helm/helm_uninstall) for command docum | controller.priorityClassName | string | `""` | Priority class for controller pods. | | controller.podSecurityContext | object | `{"fsGroup":185}` | Security context for controller pods. | | controller.topologySpreadConstraints | list | `[]` | Topology spread constraints rely on node labels to identify the topology domain(s) that each Node is in. Ref: [Pod Topology Spread Constraints](https://kubernetes.io/docs/concepts/workloads/pods/pod-topology-spread-constraints/). The labelSelector field in topology spread constraint will be set to the selector labels for controller pods if not specified. | +| controller.hostUsers | string | `nil` | Whether to use user namespace or not Kubernetes version 1.30 for feature beta (1.33 for GA) or higher is required with support from OS and OCI runtime ref: https://kubernetes.io/docs/concepts/workloads/pods/user-namespaces/ | | controller.env | list | `[]` | Environment variables for controller containers. | | controller.envFrom | list | `[]` | Environment variable sources for controller containers. | | controller.volumeMounts | list | `[{"mountPath":"/tmp","name":"tmp","readOnly":false}]` | Volume mounts for controller containers. | @@ -164,6 +169,7 @@ See [helm uninstall](https://helm.sh/docs/helm/helm_uninstall) for command docum | webhook.priorityClassName | string | `""` | Priority class for webhook pods. | | webhook.podSecurityContext | object | `{"fsGroup":185}` | Security context for webhook pods. | | webhook.topologySpreadConstraints | list | `[]` | Topology spread constraints rely on node labels to identify the topology domain(s) that each Node is in. Ref: [Pod Topology Spread Constraints](https://kubernetes.io/docs/concepts/workloads/pods/pod-topology-spread-constraints/). The labelSelector field in topology spread constraint will be set to the selector labels for webhook pods if not specified. | +| webhook.hostUsers | string | `nil` | Whether to use user namespace or not Kubernetes version 1.30 for feature beta (1.33 for GA) or higher is required with support from OS and OCI runtime ref: https://kubernetes.io/docs/concepts/workloads/pods/user-namespaces/ | | webhook.env | list | `[]` | Environment variables for webhook containers. | | webhook.envFrom | list | `[]` | Environment variable sources for webhook containers. | | webhook.volumeMounts | list | `[{"mountPath":"/etc/k8s-webhook-server/serving-certs","name":"serving-certs","readOnly":false,"subPath":"serving-certs"}]` | Volume mounts for webhook containers. | diff --git a/charts/spark-operator-chart/crds/sparkoperator.k8s.io_sparkconnects.yaml b/charts/spark-operator-chart/crds/sparkoperator.k8s.io_sparkconnects.yaml index 769494f084..3f12bd5dd9 100644 --- a/charts/spark-operator-chart/crds/sparkoperator.k8s.io_sparkconnects.yaml +++ b/charts/spark-operator-chart/crds/sparkoperator.k8s.io_sparkconnects.yaml @@ -143,6 +143,537 @@ spec: description: Memory is the amount of memory to request for the pod. type: string + service: + description: Service exposes the Spark connect server. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + description: |- + Standard object's metadata. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#metadata + properties: + annotations: + additionalProperties: + type: string + type: object + finalizers: + items: + type: string + type: array + labels: + additionalProperties: + type: string + type: object + name: + type: string + namespace: + type: string + type: object + spec: + description: |- + Spec defines the behavior of a service. + https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#spec-and-status + properties: + allocateLoadBalancerNodePorts: + description: |- + allocateLoadBalancerNodePorts defines if NodePorts will be automatically + allocated for services with type LoadBalancer. Default is "true". It + may be set to "false" if the cluster load-balancer does not rely on + NodePorts. If the caller requests specific NodePorts (by specifying a + value), those requests will be respected, regardless of this field. + This field may only be set for services with type LoadBalancer and will + be cleared if the type is changed to any other type. + type: boolean + clusterIP: + description: |- + clusterIP is the IP address of the service and is usually assigned + randomly. If an address is specified manually, is in-range (as per + system configuration), and is not in use, it will be allocated to the + service; otherwise creation of the service will fail. This field may not + be changed through updates unless the type field is also being changed + to ExternalName (which requires this field to be blank) or the type + field is being changed from ExternalName (in which case this field may + optionally be specified, as describe above). Valid values are "None", + empty string (""), or a valid IP address. Setting this to "None" makes a + "headless service" (no virtual IP), which is useful when direct endpoint + connections are preferred and proxying is not required. Only applies to + types ClusterIP, NodePort, and LoadBalancer. If this field is specified + when creating a Service of type ExternalName, creation will fail. This + field will be wiped when updating a Service to type ExternalName. + More info: https://kubernetes.io/docs/concepts/services-networking/service/#virtual-ips-and-service-proxies + type: string + clusterIPs: + description: |- + ClusterIPs is a list of IP addresses assigned to this service, and are + usually assigned randomly. If an address is specified manually, is + in-range (as per system configuration), and is not in use, it will be + allocated to the service; otherwise creation of the service will fail. + This field may not be changed through updates unless the type field is + also being changed to ExternalName (which requires this field to be + empty) or the type field is being changed from ExternalName (in which + case this field may optionally be specified, as describe above). Valid + values are "None", empty string (""), or a valid IP address. Setting + this to "None" makes a "headless service" (no virtual IP), which is + useful when direct endpoint connections are preferred and proxying is + not required. Only applies to types ClusterIP, NodePort, and + LoadBalancer. If this field is specified when creating a Service of type + ExternalName, creation will fail. This field will be wiped when updating + a Service to type ExternalName. If this field is not specified, it will + be initialized from the clusterIP field. If this field is specified, + clients must ensure that clusterIPs[0] and clusterIP have the same + value. + + This field may hold a maximum of two entries (dual-stack IPs, in either order). + These IPs must correspond to the values of the ipFamilies field. Both + clusterIPs and ipFamilies are governed by the ipFamilyPolicy field. + More info: https://kubernetes.io/docs/concepts/services-networking/service/#virtual-ips-and-service-proxies + items: + type: string + type: array + x-kubernetes-list-type: atomic + externalIPs: + description: |- + externalIPs is a list of IP addresses for which nodes in the cluster + will also accept traffic for this service. These IPs are not managed by + Kubernetes. The user is responsible for ensuring that traffic arrives + at a node with this IP. A common example is external load-balancers + that are not part of the Kubernetes system. + items: + type: string + type: array + x-kubernetes-list-type: atomic + externalName: + description: |- + externalName is the external reference that discovery mechanisms will + return as an alias for this service (e.g. a DNS CNAME record). No + proxying will be involved. Must be a lowercase RFC-1123 hostname + (https://tools.ietf.org/html/rfc1123) and requires `type` to be "ExternalName". + type: string + externalTrafficPolicy: + description: |- + externalTrafficPolicy describes how nodes distribute service traffic they + receive on one of the Service's "externally-facing" addresses (NodePorts, + ExternalIPs, and LoadBalancer IPs). If set to "Local", the proxy will configure + the service in a way that assumes that external load balancers will take care + of balancing the service traffic between nodes, and so each node will deliver + traffic only to the node-local endpoints of the service, without masquerading + the client source IP. (Traffic mistakenly sent to a node with no endpoints will + be dropped.) The default value, "Cluster", uses the standard behavior of + routing to all endpoints evenly (possibly modified by topology and other + features). Note that traffic sent to an External IP or LoadBalancer IP from + within the cluster will always get "Cluster" semantics, but clients sending to + a NodePort from within the cluster may need to take traffic policy into account + when picking a node. + type: string + healthCheckNodePort: + description: |- + healthCheckNodePort specifies the healthcheck nodePort for the service. + This only applies when type is set to LoadBalancer and + externalTrafficPolicy is set to Local. If a value is specified, is + in-range, and is not in use, it will be used. If not specified, a value + will be automatically allocated. External systems (e.g. load-balancers) + can use this port to determine if a given node holds endpoints for this + service or not. If this field is specified when creating a Service + which does not need it, creation will fail. This field will be wiped + when updating a Service to no longer need it (e.g. changing type). + This field cannot be updated once set. + format: int32 + type: integer + internalTrafficPolicy: + description: |- + InternalTrafficPolicy describes how nodes distribute service traffic they + receive on the ClusterIP. If set to "Local", the proxy will assume that pods + only want to talk to endpoints of the service on the same node as the pod, + dropping the traffic if there are no local endpoints. The default value, + "Cluster", uses the standard behavior of routing to all endpoints evenly + (possibly modified by topology and other features). + type: string + ipFamilies: + description: |- + IPFamilies is a list of IP families (e.g. IPv4, IPv6) assigned to this + service. This field is usually assigned automatically based on cluster + configuration and the ipFamilyPolicy field. If this field is specified + manually, the requested family is available in the cluster, + and ipFamilyPolicy allows it, it will be used; otherwise creation of + the service will fail. This field is conditionally mutable: it allows + for adding or removing a secondary IP family, but it does not allow + changing the primary IP family of the Service. Valid values are "IPv4" + and "IPv6". This field only applies to Services of types ClusterIP, + NodePort, and LoadBalancer, and does apply to "headless" services. + This field will be wiped when updating a Service to type ExternalName. + + This field may hold a maximum of two entries (dual-stack families, in + either order). These families must correspond to the values of the + clusterIPs field, if specified. Both clusterIPs and ipFamilies are + governed by the ipFamilyPolicy field. + items: + description: |- + IPFamily represents the IP Family (IPv4 or IPv6). This type is used + to express the family of an IP expressed by a type (e.g. service.spec.ipFamilies). + type: string + type: array + x-kubernetes-list-type: atomic + ipFamilyPolicy: + description: |- + IPFamilyPolicy represents the dual-stack-ness requested or required by + this Service. If there is no value provided, then this field will be set + to SingleStack. Services can be "SingleStack" (a single IP family), + "PreferDualStack" (two IP families on dual-stack configured clusters or + a single IP family on single-stack clusters), or "RequireDualStack" + (two IP families on dual-stack configured clusters, otherwise fail). The + ipFamilies and clusterIPs fields depend on the value of this field. This + field will be wiped when updating a service to type ExternalName. + type: string + loadBalancerClass: + description: |- + loadBalancerClass is the class of the load balancer implementation this Service belongs to. + If specified, the value of this field must be a label-style identifier, with an optional prefix, + e.g. "internal-vip" or "example.com/internal-vip". Unprefixed names are reserved for end-users. + This field can only be set when the Service type is 'LoadBalancer'. If not set, the default load + balancer implementation is used, today this is typically done through the cloud provider integration, + but should apply for any default implementation. If set, it is assumed that a load balancer + implementation is watching for Services with a matching class. Any default load balancer + implementation (e.g. cloud providers) should ignore Services that set this field. + This field can only be set when creating or updating a Service to type 'LoadBalancer'. + Once set, it can not be changed. This field will be wiped when a service is updated to a non 'LoadBalancer' type. + type: string + loadBalancerIP: + description: |- + Only applies to Service Type: LoadBalancer. + This feature depends on whether the underlying cloud-provider supports specifying + the loadBalancerIP when a load balancer is created. + This field will be ignored if the cloud-provider does not support the feature. + Deprecated: This field was under-specified and its meaning varies across implementations. + Using it is non-portable and it may not support dual-stack. + Users are encouraged to use implementation-specific annotations when available. + type: string + loadBalancerSourceRanges: + description: |- + If specified and supported by the platform, this will restrict traffic through the cloud-provider + load-balancer will be restricted to the specified client IPs. This field will be ignored if the + cloud-provider does not support the feature." + More info: https://kubernetes.io/docs/tasks/access-application-cluster/create-external-load-balancer/ + items: + type: string + type: array + x-kubernetes-list-type: atomic + ports: + description: |- + The list of ports that are exposed by this service. + More info: https://kubernetes.io/docs/concepts/services-networking/service/#virtual-ips-and-service-proxies + items: + description: ServicePort contains information on service's + port. + properties: + appProtocol: + description: |- + The application protocol for this port. + This is used as a hint for implementations to offer richer behavior for protocols that they understand. + This field follows standard Kubernetes label syntax. + Valid values are either: + + * Un-prefixed protocol names - reserved for IANA standard service names (as per + RFC-6335 and https://www.iana.org/assignments/service-names). + + * Kubernetes-defined prefixed names: + * 'kubernetes.io/h2c' - HTTP/2 prior knowledge over cleartext as described in https://www.rfc-editor.org/rfc/rfc9113.html#name-starting-http-2-with-prior- + * 'kubernetes.io/ws' - WebSocket over cleartext as described in https://www.rfc-editor.org/rfc/rfc6455 + * 'kubernetes.io/wss' - WebSocket over TLS as described in https://www.rfc-editor.org/rfc/rfc6455 + + * Other protocols should use implementation-defined prefixed names such as + mycompany.com/my-custom-protocol. + type: string + name: + description: |- + The name of this port within the service. This must be a DNS_LABEL. + All ports within a ServiceSpec must have unique names. When considering + the endpoints for a Service, this must match the 'name' field in the + EndpointPort. + Optional if only one ServicePort is defined on this service. + type: string + nodePort: + description: |- + The port on each node on which this service is exposed when type is + NodePort or LoadBalancer. Usually assigned by the system. If a value is + specified, in-range, and not in use it will be used, otherwise the + operation will fail. If not specified, a port will be allocated if this + Service requires one. If this field is specified when creating a + Service which does not need it, creation will fail. This field will be + wiped when updating a Service to no longer need it (e.g. changing type + from NodePort to ClusterIP). + More info: https://kubernetes.io/docs/concepts/services-networking/service/#type-nodeport + format: int32 + type: integer + port: + description: The port that will be exposed by this + service. + format: int32 + type: integer + protocol: + default: TCP + description: |- + The IP protocol for this port. Supports "TCP", "UDP", and "SCTP". + Default is TCP. + type: string + targetPort: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the pods targeted by the service. + Number must be in the range 1 to 65535. Name must be an IANA_SVC_NAME. + If this is a string, it will be looked up as a named port in the + target Pod's container ports. If this is not specified, the value + of the 'port' field is used (an identity map). + This field is ignored for services with clusterIP=None, and should be + omitted or set equal to the 'port' field. + More info: https://kubernetes.io/docs/concepts/services-networking/service/#defining-a-service + x-kubernetes-int-or-string: true + required: + - port + type: object + type: array + x-kubernetes-list-map-keys: + - port + - protocol + x-kubernetes-list-type: map + publishNotReadyAddresses: + description: |- + publishNotReadyAddresses indicates that any agent which deals with endpoints for this + Service should disregard any indications of ready/not-ready. + The primary use case for setting this field is for a StatefulSet's Headless Service to + propagate SRV DNS records for its Pods for the purpose of peer discovery. + The Kubernetes controllers that generate Endpoints and EndpointSlice resources for + Services interpret this to mean that all endpoints are considered "ready" even if the + Pods themselves are not. Agents which consume only Kubernetes generated endpoints + through the Endpoints or EndpointSlice resources can safely assume this behavior. + type: boolean + selector: + additionalProperties: + type: string + description: |- + Route service traffic to pods with label keys and values matching this + selector. If empty or not present, the service is assumed to have an + external process managing its endpoints, which Kubernetes will not + modify. Only applies to types ClusterIP, NodePort, and LoadBalancer. + Ignored if type is ExternalName. + More info: https://kubernetes.io/docs/concepts/services-networking/service/ + type: object + x-kubernetes-map-type: atomic + sessionAffinity: + description: |- + Supports "ClientIP" and "None". Used to maintain session affinity. + Enable client IP based session affinity. + Must be ClientIP or None. + Defaults to None. + More info: https://kubernetes.io/docs/concepts/services-networking/service/#virtual-ips-and-service-proxies + type: string + sessionAffinityConfig: + description: sessionAffinityConfig contains the configurations + of session affinity. + properties: + clientIP: + description: clientIP contains the configurations + of Client IP based session affinity. + properties: + timeoutSeconds: + description: |- + timeoutSeconds specifies the seconds of ClientIP type session sticky time. + The value must be >0 && <=86400(for 1 day) if ServiceAffinity == "ClientIP". + Default value is 10800(for 3 hours). + format: int32 + type: integer + type: object + type: object + trafficDistribution: + description: |- + TrafficDistribution offers a way to express preferences for how traffic is + distributed to Service endpoints. Implementations can use this field as a + hint, but are not required to guarantee strict adherence. If the field is + not set, the implementation will apply its default routing strategy. If set + to "PreferClose", implementations should prioritize endpoints that are + topologically close (e.g., same zone). + This is a beta field and requires enabling ServiceTrafficDistribution feature. + type: string + type: + description: |- + type determines how the Service is exposed. Defaults to ClusterIP. Valid + options are ExternalName, ClusterIP, NodePort, and LoadBalancer. + "ClusterIP" allocates a cluster-internal IP address for load-balancing + to endpoints. Endpoints are determined by the selector or if that is not + specified, by manual construction of an Endpoints object or + EndpointSlice objects. If clusterIP is "None", no virtual IP is + allocated and the endpoints are published as a set of endpoints rather + than a virtual IP. + "NodePort" builds on ClusterIP and allocates a port on every node which + routes to the same endpoints as the clusterIP. + "LoadBalancer" builds on NodePort and creates an external load-balancer + (if supported in the current cloud) which routes to the same endpoints + as the clusterIP. + "ExternalName" aliases this service to the specified externalName. + Several other fields do not apply to ExternalName services. + More info: https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types + type: string + type: object + status: + description: |- + Most recently observed status of the service. + Populated by the system. + Read-only. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#spec-and-status + properties: + conditions: + description: Current service state + items: + description: Condition contains details for one aspect + of the current state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, + False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in + foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map + loadBalancer: + description: |- + LoadBalancer contains the current status of the load-balancer, + if one is present. + properties: + ingress: + description: |- + Ingress is a list containing ingress points for the load-balancer. + Traffic intended for the service should be sent to these ingress points. + items: + description: |- + LoadBalancerIngress represents the status of a load-balancer ingress point: + traffic intended for the service should be sent to an ingress point. + properties: + hostname: + description: |- + Hostname is set for load-balancer ingress points that are DNS based + (typically AWS load-balancers) + type: string + ip: + description: |- + IP is set for load-balancer ingress points that are IP based + (typically GCE or OpenStack load-balancers) + type: string + ipMode: + description: |- + IPMode specifies how the load-balancer IP behaves, and may only be specified when the ip field is specified. + Setting this to "VIP" indicates that traffic is delivered to the node with + the destination set to the load-balancer's IP and port. + Setting this to "Proxy" indicates that traffic is delivered to the node or pod with + the destination set to the node's IP and node port or the pod's IP and port. + Service implementations may use this information to adjust traffic routing. + type: string + ports: + description: |- + Ports is a list of records of service ports + If used, every port defined in the service should have an entry in it + items: + description: PortStatus represents the error + condition of a service port + properties: + error: + description: |- + Error is to record the problem with the service port + The format of the error shall comply with the following rules: + - built-in error values shall be specified in this file and those shall use + CamelCase names + - cloud provider specific error values must have names that comply with the + format foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + port: + description: Port is the port number of + the service port of which status is + recorded here + format: int32 + type: integer + protocol: + description: |- + Protocol is the protocol of the service port of which status is recorded here + The supported values are: "TCP", "UDP", "SCTP" + type: string + required: + - error + - port + - protocol + type: object + type: array + x-kubernetes-list-type: atomic + type: object + type: array + x-kubernetes-list-type: atomic + type: object + type: object + type: object template: description: |- Template is a pod template that can be used to define the driver or executor pod configurations that Spark configurations do not support. diff --git a/charts/spark-operator-chart/templates/controller/_helpers.tpl b/charts/spark-operator-chart/templates/controller/_helpers.tpl index 9ea9cdc052..a349a0c1de 100644 --- a/charts/spark-operator-chart/templates/controller/_helpers.tpl +++ b/charts/spark-operator-chart/templates/controller/_helpers.tpl @@ -199,6 +199,7 @@ Create the role policy rules for the controller in every Spark job namespace - scheduledsparkapplications/status - scheduledsparkapplications/finalizers - sparkconnects/status + - sparkconnects/finalizers verbs: - get - update diff --git a/charts/spark-operator-chart/templates/controller/deployment.yaml b/charts/spark-operator-chart/templates/controller/deployment.yaml index 2fd428e033..5f386eb7f5 100644 --- a/charts/spark-operator-chart/templates/controller/deployment.yaml +++ b/charts/spark-operator-chart/templates/controller/deployment.yaml @@ -124,6 +124,9 @@ spec: {{- if .Values.controller.maxTrackedExecutorPerApp }} - --max-tracked-executor-per-app={{ .Values.controller.maxTrackedExecutorPerApp }} {{- end }} + {{- if .Values.controller.featureGates }} + - --feature-gates={{ range $index, $gate := .Values.controller.featureGates }}{{ if $index }},{{ end }}{{ $gate.name }}={{ $gate.enabled }}{{ end }} + {{- end }} {{- if or .Values.prometheus.metrics.enable .Values.controller.pprof.enable }} ports: {{- if .Values.controller.pprof.enable }} @@ -184,6 +187,9 @@ spec: affinity: {{- toYaml . | nindent 8 }} {{- end }} + {{- if and (semverCompare ">=1.30-0" .Capabilities.KubeVersion.Version) (kindIs "bool" .Values.controller.hostUsers) }} + hostUsers: {{ .Values.controller.hostUsers }} + {{- end }} {{- with .Values.controller.tolerations }} tolerations: {{- toYaml . | nindent 6 }} diff --git a/charts/spark-operator-chart/templates/hook/cluster_role.yaml b/charts/spark-operator-chart/templates/hook/cluster_role.yaml index d03e46d603..e1658064dd 100644 --- a/charts/spark-operator-chart/templates/hook/cluster_role.yaml +++ b/charts/spark-operator-chart/templates/hook/cluster_role.yaml @@ -38,4 +38,5 @@ rules: verbs: - get - patch + - create {{- end }} diff --git a/charts/spark-operator-chart/templates/hook/job.yaml b/charts/spark-operator-chart/templates/hook/job.yaml index 4d0e94b595..74cb706f3e 100644 --- a/charts/spark-operator-chart/templates/hook/job.yaml +++ b/charts/spark-operator-chart/templates/hook/job.yaml @@ -38,6 +38,7 @@ spec: args: - apply - --server-side + - --force-conflicts - -f - /etc/spark-operator/crds resources: @@ -58,6 +59,18 @@ spec: imagePullSecrets: {{- toYaml . | nindent 8 }} {{- end }} + {{- with .Values.hook.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.hook.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.hook.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} serviceAccountName: {{ include "spark-operator.hook.serviceAccount.name" . }} restartPolicy: Never {{- end }} diff --git a/charts/spark-operator-chart/templates/webhook/deployment.yaml b/charts/spark-operator-chart/templates/webhook/deployment.yaml index 1cdf569e18..1b56ccfd2c 100644 --- a/charts/spark-operator-chart/templates/webhook/deployment.yaml +++ b/charts/spark-operator-chart/templates/webhook/deployment.yaml @@ -144,6 +144,9 @@ spec: affinity: {{- toYaml . | nindent 8 }} {{- end }} + {{- if and (semverCompare ">=1.30-0" .Capabilities.KubeVersion.Version) (kindIs "bool" .Values.webhook.hostUsers) }} + hostUsers: {{ .Values.webhook.hostUsers }} + {{- end }} {{- with .Values.webhook.tolerations }} tolerations: {{- toYaml . | nindent 8 }} diff --git a/charts/spark-operator-chart/tests/controller/deployment_test.yaml b/charts/spark-operator-chart/tests/controller/deployment_test.yaml index 5cf7497f88..9b7d727abe 100644 --- a/charts/spark-operator-chart/tests/controller/deployment_test.yaml +++ b/charts/spark-operator-chart/tests/controller/deployment_test.yaml @@ -668,6 +668,71 @@ tests: - failedTemplate: errorMessage: "controller.replicas must be greater than 1 to enable topology spread constraints for controller pods" + - it: Should include hostUsers when kubernetes version >= 1.30 and hostUsers is true + capabilities: + majorVersion: 1 + minorVersion: 30 + set: + controller: + hostUsers: true + asserts: + - equal: + path: spec.template.spec.hostUsers + value: true + + - it: Should include hostUsers when kubernetes version >= 1.30 and hostUsers is false + capabilities: + majorVersion: 1 + minorVersion: 34 + set: + controller: + hostUsers: false + asserts: + - equal: + path: spec.template.spec.hostUsers + value: false + + - it: Should not contain hostUsers when kubernetes version < 1.29 and hostUsers is set + capabilities: + majorVersion: 1 + minorVersion: 29 + set: + controller: + hostUsers: true + asserts: + - notExists: + path: spec.template.spec.hostUsers + + - it: Should not include hostUsers when kubernetes version >= 1.30 but value not set + capabilities: + majorVersion: 1 + minorVersion: 34 + asserts: + - notExists: + path: spec.template.spec.hostUsers + + - it: Should should NOT include hostUsers when kubernetes version >= 1.30 but value is string + capabilities: + majorVersion: 1 + minorVersion: 34 + set: + controller: + hostUsers: "true" + asserts: + - notExists: + path: spec.template.spec.hostUsers + + - it: Should not include hostUsers when kubernetes version >= 1.30 but value is null + capabilities: + majorVersion: 1 + minorVersion: 34 + set: + controller: + hostUsers: null + asserts: + - notExists: + path: spec.template.spec.hostUsers + - it: Should contain `--pprof-bind-address` arg if `controller.pprof.enable` is set to `true` set: controller: @@ -763,3 +828,33 @@ tests: path: spec.template.spec.containers[?(@.name=="spark-operator-controller")].args content: --leader-election-retry-period=15s + - it: Should not contain `--feature-gates` arg if `controller.featureGates` is not set + asserts: + - notContains: + path: spec.template.spec.containers[?(@.name=="spark-operator-controller")].args + pattern: --feature-gates=.* + + - it: Should contain `--feature-gates` arg with single feature gate if `controller.featureGates` is set + set: + controller: + featureGates: + - name: PartialRestart + enabled: true + asserts: + - contains: + path: spec.template.spec.containers[?(@.name=="spark-operator-controller")].args + content: --feature-gates=PartialRestart=true + + - it: Should contain `--feature-gates` arg with multiple feature gates if `controller.featureGates` is set + set: + controller: + featureGates: + - name: PartialRestart + enabled: true + - name: AnotherFeature + enabled: false + asserts: + - contains: + path: spec.template.spec.containers[?(@.name=="spark-operator-controller")].args + content: --feature-gates=PartialRestart=true,AnotherFeature=false + diff --git a/charts/spark-operator-chart/tests/hook/cluster_role_test.yaml b/charts/spark-operator-chart/tests/hook/cluster_role_test.yaml index 527fc068a4..fa06315b09 100644 --- a/charts/spark-operator-chart/tests/hook/cluster_role_test.yaml +++ b/charts/spark-operator-chart/tests/hook/cluster_role_test.yaml @@ -55,3 +55,4 @@ tests: verbs: - get - patch + - create diff --git a/charts/spark-operator-chart/tests/hook/job_test.yaml b/charts/spark-operator-chart/tests/hook/job_test.yaml index 9cc8989771..3da77372df 100644 --- a/charts/spark-operator-chart/tests/hook/job_test.yaml +++ b/charts/spark-operator-chart/tests/hook/job_test.yaml @@ -65,3 +65,86 @@ tests: - equal: path: spec.template.spec.containers[*].imagePullPolicy value: Always + +- it: Should add nodeSelector if `hook.nodeSelector` is set + set: + hook: + upgradeCrd: true + nodeSelector: + key1: value1 + key2: value2 + asserts: + - equal: + path: spec.template.spec.nodeSelector.key1 + value: value1 + - equal: + path: spec.template.spec.nodeSelector.key2 + value: value2 + +- it: Should add affinity if `hook.affinity` is set + set: + hook: + upgradeCrd: true + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: topology.kubernetes.io/zone + operator: In + values: + - antarctica-east1 + - antarctica-west1 + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 1 + preference: + matchExpressions: + - key: another-node-label-key + operator: In + values: + - another-node-label-value + asserts: + - equal: + path: spec.template.spec.affinity + value: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: topology.kubernetes.io/zone + operator: In + values: + - antarctica-east1 + - antarctica-west1 + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 1 + preference: + matchExpressions: + - key: another-node-label-key + operator: In + values: + - another-node-label-value + +- it: Should add tolerations if `hook.tolerations` is set + set: + hook: + upgradeCrd: true + tolerations: + - key: key1 + operator: Equal + value: value1 + effect: NoSchedule + - key: key2 + operator: Exists + effect: NoSchedule + asserts: + - equal: + path: spec.template.spec.tolerations + value: + - key: key1 + operator: Equal + value: value1 + effect: NoSchedule + - key: key2 + operator: Exists + effect: NoSchedule diff --git a/charts/spark-operator-chart/tests/webhook/deployment_test.yaml b/charts/spark-operator-chart/tests/webhook/deployment_test.yaml index e7d7446179..9f33b28b07 100644 --- a/charts/spark-operator-chart/tests/webhook/deployment_test.yaml +++ b/charts/spark-operator-chart/tests/webhook/deployment_test.yaml @@ -579,3 +579,68 @@ tests: asserts: - failedTemplate: errorMessage: "webhook.replicas must be greater than 1 to enable topology spread constraints for webhook pods" + + - it: Should include hostUsers when kubernetes version >= 1.30 and hostUsers is true + capabilities: + majorVersion: 1 + minorVersion: 30 + set: + webhook: + hostUsers: true + asserts: + - equal: + path: spec.template.spec.hostUsers + value: true + + - it: Should include hostUsers when kubernetes version >= 1.30 and hostUsers is false + capabilities: + majorVersion: 1 + minorVersion: 34 + set: + webhook: + hostUsers: false + asserts: + - equal: + path: spec.template.spec.hostUsers + value: false + + - it: Should not contain hostUsers when kubernetes version < 1.29 and hostUsers is set + capabilities: + majorVersion: 1 + minorVersion: 29 + set: + webhook: + hostUsers: true + asserts: + - notExists: + path: spec.template.spec.hostUsers + + - it: Should not include hostUsers when kubernetes version >= 1.30 but value not set + capabilities: + majorVersion: 1 + minorVersion: 34 + asserts: + - notExists: + path: spec.template.spec.hostUsers + + - it: Should should NOT include hostUsers when kubernetes version >= 1.30 but value is string + capabilities: + majorVersion: 1 + minorVersion: 34 + set: + webhook: + hostUsers: "true" + asserts: + - notExists: + path: spec.template.spec.hostUsers + + - it: Should not include hostUsers when kubernetes version >= 1.30 but value is null + capabilities: + majorVersion: 1 + minorVersion: 34 + set: + webhook: + hostUsers: null + asserts: + - notExists: + path: spec.template.spec.hostUsers diff --git a/charts/spark-operator-chart/values.yaml b/charts/spark-operator-chart/values.yaml index be8d64dd9a..5ab2740cb4 100644 --- a/charts/spark-operator-chart/values.yaml +++ b/charts/spark-operator-chart/values.yaml @@ -55,11 +55,26 @@ hook: # -- Image tag. # @default -- If not set, the chart appVersion will be used. tag: "" + # -- Node selector for the Helm hook Job. + nodeSelector: {} + + # -- Affinity for the Helm hook Job. + affinity: {} + + # -- List of node taints to tolerate for the Helm hook Job. + tolerations: [] controller: # -- Number of replicas of controller. replicas: 1 + # -- Feature gates to enable or disable specific features. + featureGates: + - name: PartialRestart + enabled: false + - name: LoadSparkDefaults + enabled: false + # -- The number of old history to retain to allow rollback. revisionHistoryLimit: 10 @@ -183,6 +198,11 @@ controller: # topologyKey: kubernetes.io/hostname # whenUnsatisfiable: DoNotSchedule + # -- Whether to use user namespace or not + # Kubernetes version 1.30 for feature beta (1.33 for GA) or higher is required with support from OS and OCI runtime + # ref: https://kubernetes.io/docs/concepts/workloads/pods/user-namespaces/ + hostUsers: null + # -- Environment variables for controller containers. env: [] @@ -353,6 +373,11 @@ webhook: # topologyKey: kubernetes.io/hostname # whenUnsatisfiable: DoNotSchedule + # -- Whether to use user namespace or not + # Kubernetes version 1.30 for feature beta (1.33 for GA) or higher is required with support from OS and OCI runtime + # ref: https://kubernetes.io/docs/concepts/workloads/pods/user-namespaces/ + hostUsers: null + # -- Environment variables for webhook containers. env: [] diff --git a/cmd/operator/controller/start.go b/cmd/operator/controller/start.go index 1800802da3..921ef0059a 100644 --- a/cmd/operator/controller/start.go +++ b/cmd/operator/controller/start.go @@ -29,6 +29,11 @@ import ( // to ensure that exec-entrypoint and run can make use of them. _ "k8s.io/client-go/plugin/pkg/client/auth" + // Import features package to register feature gates. + _ "github.com/kubeflow/spark-operator/v2/pkg/features" + utilfeature "k8s.io/apiserver/pkg/util/feature" + "k8s.io/client-go/rest" + "github.com/spf13/cobra" "github.com/spf13/viper" "go.uber.org/zap" @@ -37,10 +42,7 @@ import ( corev1 "k8s.io/api/core/v1" networkingv1 "k8s.io/api/networking/v1" "k8s.io/apimachinery/pkg/labels" - "k8s.io/apimachinery/pkg/runtime" - utilruntime "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/client-go/kubernetes" - clientgoscheme "k8s.io/client-go/kubernetes/scheme" "k8s.io/utils/clock" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/cache" @@ -50,7 +52,6 @@ import ( logzap "sigs.k8s.io/controller-runtime/pkg/log/zap" metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" ctrlwebhook "sigs.k8s.io/controller-runtime/pkg/webhook" - schedulingv1alpha1 "sigs.k8s.io/scheduler-plugins/apis/scheduling/v1alpha1" sparkoperator "github.com/kubeflow/spark-operator/v2" "github.com/kubeflow/spark-operator/v2/api/v1alpha1" @@ -64,12 +65,12 @@ import ( "github.com/kubeflow/spark-operator/v2/internal/scheduler/volcano" "github.com/kubeflow/spark-operator/v2/internal/scheduler/yunikorn" "github.com/kubeflow/spark-operator/v2/pkg/common" + operatorscheme "github.com/kubeflow/spark-operator/v2/pkg/scheme" "github.com/kubeflow/spark-operator/v2/pkg/util" // +kubebuilder:scaffold:imports ) var ( - scheme = runtime.NewScheme() logger = ctrl.Log.WithName("") ) @@ -124,15 +125,6 @@ var ( zapOptions = logzap.Options{} ) -func init() { - utilruntime.Must(clientgoscheme.AddToScheme(scheme)) - utilruntime.Must(schedulingv1alpha1.AddToScheme(scheme)) - - utilruntime.Must(v1alpha1.AddToScheme(scheme)) - utilruntime.Must(v1beta2.AddToScheme(scheme)) - // +kubebuilder:scaffold:scheme -} - func NewStartCommand() *cobra.Command { var ingressTLSstring string var ingressAnnotationsString string @@ -209,6 +201,8 @@ func NewStartCommand() *cobra.Command { zapOptions.BindFlags(flagSet) command.Flags().AddGoFlagSet(flagSet) + utilfeature.DefaultMutableFeatureGate.AddFlag(command.Flags()) + return command } @@ -217,6 +211,7 @@ func start() { // Create the client rest config. Use kubeConfig if given, otherwise assume in-cluster. cfg, err := ctrl.GetConfig() + cfg.WarningHandler = rest.NoWarnings{} if err != nil { logger.Error(err, "failed to get kube config") os.Exit(1) @@ -225,7 +220,7 @@ func start() { // Create the manager. tlsOptions := newTLSOptions() mgr, err := ctrl.NewManager(cfg, ctrl.Options{ - Scheme: scheme, + Scheme: operatorscheme.ControllerScheme, Cache: newCacheOptions(), Metrics: metricsserver.Options{ BindAddress: metricsBindAddress, @@ -386,14 +381,14 @@ func newTLSOptions() []func(c *tls.Config) { // newCacheOptions creates and returns a cache.Options instance configured with default namespaces and object caching settings. func newCacheOptions() cache.Options { defaultNamespaces := make(map[string]cache.Config) - if !util.ContainsString(namespaces, cache.AllNamespaces) { + if !slices.Contains(namespaces, cache.AllNamespaces) { for _, ns := range namespaces { defaultNamespaces[ns] = cache.Config{} } } options := cache.Options{ - Scheme: scheme, + Scheme: operatorscheme.ControllerScheme, DefaultNamespaces: defaultNamespaces, ByObject: map[client.Object]cache.ByObject{ &corev1.Pod{}: { diff --git a/cmd/operator/webhook/start.go b/cmd/operator/webhook/start.go index cf5f6046ba..bcfa425449 100644 --- a/cmd/operator/webhook/start.go +++ b/cmd/operator/webhook/start.go @@ -21,6 +21,7 @@ import ( "crypto/tls" "flag" "os" + "slices" "time" // Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.) @@ -36,10 +37,7 @@ import ( "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/fields" "k8s.io/apimachinery/pkg/labels" - "k8s.io/apimachinery/pkg/runtime" - utilruntime "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/apimachinery/pkg/util/wait" - clientgoscheme "k8s.io/client-go/kubernetes/scheme" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/cache" "sigs.k8s.io/controller-runtime/pkg/client" @@ -55,12 +53,11 @@ import ( "github.com/kubeflow/spark-operator/v2/internal/webhook" "github.com/kubeflow/spark-operator/v2/pkg/certificate" "github.com/kubeflow/spark-operator/v2/pkg/common" - "github.com/kubeflow/spark-operator/v2/pkg/util" + operatorscheme "github.com/kubeflow/spark-operator/v2/pkg/scheme" // +kubebuilder:scaffold:imports ) var ( - scheme = runtime.NewScheme() logger = ctrl.Log.WithName("") ) @@ -110,13 +107,6 @@ var ( zapOptions = logzap.Options{} ) -func init() { - utilruntime.Must(clientgoscheme.AddToScheme(scheme)) - - utilruntime.Must(v1beta2.AddToScheme(scheme)) - // +kubebuilder:scaffold:scheme -} - func NewStartCommand() *cobra.Command { var command = &cobra.Command{ Use: "start", @@ -195,7 +185,7 @@ func start() { // Create the manager. tlsOptions := newTLSOptions() mgr, err := ctrl.NewManager(cfg, ctrl.Options{ - Scheme: scheme, + Scheme: operatorscheme.WebhookScheme, Cache: newCacheOptions(), Metrics: metricsserver.Options{ BindAddress: metricsBindAddress, @@ -294,6 +284,7 @@ func start() { For(&v1beta2.SparkApplication{}). WithDefaulter(webhook.NewSparkApplicationDefaulter()). WithValidator(webhook.NewSparkApplicationValidator(mgr.GetClient(), enableResourceQuotaEnforcement)). + WithLogConstructor(webhook.LogConstructor). Complete(); err != nil { logger.Error(err, "Failed to create mutating webhook for Spark application") os.Exit(1) @@ -303,6 +294,7 @@ func start() { For(&v1beta2.ScheduledSparkApplication{}). WithDefaulter(webhook.NewScheduledSparkApplicationDefaulter()). WithValidator(webhook.NewScheduledSparkApplicationValidator()). + WithLogConstructor(webhook.LogConstructor). Complete(); err != nil { logger.Error(err, "Failed to create mutating webhook for Scheduled Spark application") os.Exit(1) @@ -311,6 +303,7 @@ func start() { if err := ctrl.NewWebhookManagedBy(mgr). For(&corev1.Pod{}). WithDefaulter(webhook.NewSparkPodDefaulter(mgr.GetClient(), namespaces)). + WithLogConstructor(webhook.LogConstructor). Complete(); err != nil { logger.Error(err, "Failed to create mutating webhook for Spark pod") os.Exit(1) @@ -341,19 +334,12 @@ func setupLog() { logzap.UseFlagOptions(&zapOptions), func(o *logzap.Options) { o.Development = development - }, func(o *logzap.Options) { o.ZapOpts = append(o.ZapOpts, zap.AddCaller()) - }, func(o *logzap.Options) { - var config zapcore.EncoderConfig - if !development { - config = zap.NewProductionEncoderConfig() - } else { - config = zap.NewDevelopmentEncoderConfig() - } - config.EncodeLevel = zapcore.CapitalColorLevelEncoder - config.EncodeTime = zapcore.ISO8601TimeEncoder - config.EncodeCaller = zapcore.ShortCallerEncoder - o.Encoder = zapcore.NewConsoleEncoder(config) + o.EncoderConfigOptions = append(o.EncoderConfigOptions, func(config *zapcore.EncoderConfig) { + config.EncodeLevel = zapcore.CapitalLevelEncoder + config.EncodeTime = zapcore.ISO8601TimeEncoder + config.EncodeCaller = zapcore.ShortCallerEncoder + }) }), ) } @@ -380,7 +366,7 @@ func newTLSOptions() []func(c *tls.Config) { // newCacheOptions creates and returns a cache.Options instance configured with default namespaces and object caching settings. func newCacheOptions() cache.Options { defaultNamespaces := make(map[string]cache.Config) - if !util.ContainsString(namespaces, cache.AllNamespaces) { + if !slices.Contains(namespaces, cache.AllNamespaces) { for _, ns := range namespaces { defaultNamespaces[ns] = cache.Config{} } @@ -411,7 +397,7 @@ func newCacheOptions() cache.Options { } options := cache.Options{ - Scheme: scheme, + Scheme: operatorscheme.WebhookScheme, DefaultNamespaces: defaultNamespaces, ByObject: byObject, } diff --git a/config/crd/bases/sparkoperator.k8s.io_sparkconnects.yaml b/config/crd/bases/sparkoperator.k8s.io_sparkconnects.yaml index 769494f084..3f12bd5dd9 100644 --- a/config/crd/bases/sparkoperator.k8s.io_sparkconnects.yaml +++ b/config/crd/bases/sparkoperator.k8s.io_sparkconnects.yaml @@ -143,6 +143,537 @@ spec: description: Memory is the amount of memory to request for the pod. type: string + service: + description: Service exposes the Spark connect server. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + description: |- + Standard object's metadata. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#metadata + properties: + annotations: + additionalProperties: + type: string + type: object + finalizers: + items: + type: string + type: array + labels: + additionalProperties: + type: string + type: object + name: + type: string + namespace: + type: string + type: object + spec: + description: |- + Spec defines the behavior of a service. + https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#spec-and-status + properties: + allocateLoadBalancerNodePorts: + description: |- + allocateLoadBalancerNodePorts defines if NodePorts will be automatically + allocated for services with type LoadBalancer. Default is "true". It + may be set to "false" if the cluster load-balancer does not rely on + NodePorts. If the caller requests specific NodePorts (by specifying a + value), those requests will be respected, regardless of this field. + This field may only be set for services with type LoadBalancer and will + be cleared if the type is changed to any other type. + type: boolean + clusterIP: + description: |- + clusterIP is the IP address of the service and is usually assigned + randomly. If an address is specified manually, is in-range (as per + system configuration), and is not in use, it will be allocated to the + service; otherwise creation of the service will fail. This field may not + be changed through updates unless the type field is also being changed + to ExternalName (which requires this field to be blank) or the type + field is being changed from ExternalName (in which case this field may + optionally be specified, as describe above). Valid values are "None", + empty string (""), or a valid IP address. Setting this to "None" makes a + "headless service" (no virtual IP), which is useful when direct endpoint + connections are preferred and proxying is not required. Only applies to + types ClusterIP, NodePort, and LoadBalancer. If this field is specified + when creating a Service of type ExternalName, creation will fail. This + field will be wiped when updating a Service to type ExternalName. + More info: https://kubernetes.io/docs/concepts/services-networking/service/#virtual-ips-and-service-proxies + type: string + clusterIPs: + description: |- + ClusterIPs is a list of IP addresses assigned to this service, and are + usually assigned randomly. If an address is specified manually, is + in-range (as per system configuration), and is not in use, it will be + allocated to the service; otherwise creation of the service will fail. + This field may not be changed through updates unless the type field is + also being changed to ExternalName (which requires this field to be + empty) or the type field is being changed from ExternalName (in which + case this field may optionally be specified, as describe above). Valid + values are "None", empty string (""), or a valid IP address. Setting + this to "None" makes a "headless service" (no virtual IP), which is + useful when direct endpoint connections are preferred and proxying is + not required. Only applies to types ClusterIP, NodePort, and + LoadBalancer. If this field is specified when creating a Service of type + ExternalName, creation will fail. This field will be wiped when updating + a Service to type ExternalName. If this field is not specified, it will + be initialized from the clusterIP field. If this field is specified, + clients must ensure that clusterIPs[0] and clusterIP have the same + value. + + This field may hold a maximum of two entries (dual-stack IPs, in either order). + These IPs must correspond to the values of the ipFamilies field. Both + clusterIPs and ipFamilies are governed by the ipFamilyPolicy field. + More info: https://kubernetes.io/docs/concepts/services-networking/service/#virtual-ips-and-service-proxies + items: + type: string + type: array + x-kubernetes-list-type: atomic + externalIPs: + description: |- + externalIPs is a list of IP addresses for which nodes in the cluster + will also accept traffic for this service. These IPs are not managed by + Kubernetes. The user is responsible for ensuring that traffic arrives + at a node with this IP. A common example is external load-balancers + that are not part of the Kubernetes system. + items: + type: string + type: array + x-kubernetes-list-type: atomic + externalName: + description: |- + externalName is the external reference that discovery mechanisms will + return as an alias for this service (e.g. a DNS CNAME record). No + proxying will be involved. Must be a lowercase RFC-1123 hostname + (https://tools.ietf.org/html/rfc1123) and requires `type` to be "ExternalName". + type: string + externalTrafficPolicy: + description: |- + externalTrafficPolicy describes how nodes distribute service traffic they + receive on one of the Service's "externally-facing" addresses (NodePorts, + ExternalIPs, and LoadBalancer IPs). If set to "Local", the proxy will configure + the service in a way that assumes that external load balancers will take care + of balancing the service traffic between nodes, and so each node will deliver + traffic only to the node-local endpoints of the service, without masquerading + the client source IP. (Traffic mistakenly sent to a node with no endpoints will + be dropped.) The default value, "Cluster", uses the standard behavior of + routing to all endpoints evenly (possibly modified by topology and other + features). Note that traffic sent to an External IP or LoadBalancer IP from + within the cluster will always get "Cluster" semantics, but clients sending to + a NodePort from within the cluster may need to take traffic policy into account + when picking a node. + type: string + healthCheckNodePort: + description: |- + healthCheckNodePort specifies the healthcheck nodePort for the service. + This only applies when type is set to LoadBalancer and + externalTrafficPolicy is set to Local. If a value is specified, is + in-range, and is not in use, it will be used. If not specified, a value + will be automatically allocated. External systems (e.g. load-balancers) + can use this port to determine if a given node holds endpoints for this + service or not. If this field is specified when creating a Service + which does not need it, creation will fail. This field will be wiped + when updating a Service to no longer need it (e.g. changing type). + This field cannot be updated once set. + format: int32 + type: integer + internalTrafficPolicy: + description: |- + InternalTrafficPolicy describes how nodes distribute service traffic they + receive on the ClusterIP. If set to "Local", the proxy will assume that pods + only want to talk to endpoints of the service on the same node as the pod, + dropping the traffic if there are no local endpoints. The default value, + "Cluster", uses the standard behavior of routing to all endpoints evenly + (possibly modified by topology and other features). + type: string + ipFamilies: + description: |- + IPFamilies is a list of IP families (e.g. IPv4, IPv6) assigned to this + service. This field is usually assigned automatically based on cluster + configuration and the ipFamilyPolicy field. If this field is specified + manually, the requested family is available in the cluster, + and ipFamilyPolicy allows it, it will be used; otherwise creation of + the service will fail. This field is conditionally mutable: it allows + for adding or removing a secondary IP family, but it does not allow + changing the primary IP family of the Service. Valid values are "IPv4" + and "IPv6". This field only applies to Services of types ClusterIP, + NodePort, and LoadBalancer, and does apply to "headless" services. + This field will be wiped when updating a Service to type ExternalName. + + This field may hold a maximum of two entries (dual-stack families, in + either order). These families must correspond to the values of the + clusterIPs field, if specified. Both clusterIPs and ipFamilies are + governed by the ipFamilyPolicy field. + items: + description: |- + IPFamily represents the IP Family (IPv4 or IPv6). This type is used + to express the family of an IP expressed by a type (e.g. service.spec.ipFamilies). + type: string + type: array + x-kubernetes-list-type: atomic + ipFamilyPolicy: + description: |- + IPFamilyPolicy represents the dual-stack-ness requested or required by + this Service. If there is no value provided, then this field will be set + to SingleStack. Services can be "SingleStack" (a single IP family), + "PreferDualStack" (two IP families on dual-stack configured clusters or + a single IP family on single-stack clusters), or "RequireDualStack" + (two IP families on dual-stack configured clusters, otherwise fail). The + ipFamilies and clusterIPs fields depend on the value of this field. This + field will be wiped when updating a service to type ExternalName. + type: string + loadBalancerClass: + description: |- + loadBalancerClass is the class of the load balancer implementation this Service belongs to. + If specified, the value of this field must be a label-style identifier, with an optional prefix, + e.g. "internal-vip" or "example.com/internal-vip". Unprefixed names are reserved for end-users. + This field can only be set when the Service type is 'LoadBalancer'. If not set, the default load + balancer implementation is used, today this is typically done through the cloud provider integration, + but should apply for any default implementation. If set, it is assumed that a load balancer + implementation is watching for Services with a matching class. Any default load balancer + implementation (e.g. cloud providers) should ignore Services that set this field. + This field can only be set when creating or updating a Service to type 'LoadBalancer'. + Once set, it can not be changed. This field will be wiped when a service is updated to a non 'LoadBalancer' type. + type: string + loadBalancerIP: + description: |- + Only applies to Service Type: LoadBalancer. + This feature depends on whether the underlying cloud-provider supports specifying + the loadBalancerIP when a load balancer is created. + This field will be ignored if the cloud-provider does not support the feature. + Deprecated: This field was under-specified and its meaning varies across implementations. + Using it is non-portable and it may not support dual-stack. + Users are encouraged to use implementation-specific annotations when available. + type: string + loadBalancerSourceRanges: + description: |- + If specified and supported by the platform, this will restrict traffic through the cloud-provider + load-balancer will be restricted to the specified client IPs. This field will be ignored if the + cloud-provider does not support the feature." + More info: https://kubernetes.io/docs/tasks/access-application-cluster/create-external-load-balancer/ + items: + type: string + type: array + x-kubernetes-list-type: atomic + ports: + description: |- + The list of ports that are exposed by this service. + More info: https://kubernetes.io/docs/concepts/services-networking/service/#virtual-ips-and-service-proxies + items: + description: ServicePort contains information on service's + port. + properties: + appProtocol: + description: |- + The application protocol for this port. + This is used as a hint for implementations to offer richer behavior for protocols that they understand. + This field follows standard Kubernetes label syntax. + Valid values are either: + + * Un-prefixed protocol names - reserved for IANA standard service names (as per + RFC-6335 and https://www.iana.org/assignments/service-names). + + * Kubernetes-defined prefixed names: + * 'kubernetes.io/h2c' - HTTP/2 prior knowledge over cleartext as described in https://www.rfc-editor.org/rfc/rfc9113.html#name-starting-http-2-with-prior- + * 'kubernetes.io/ws' - WebSocket over cleartext as described in https://www.rfc-editor.org/rfc/rfc6455 + * 'kubernetes.io/wss' - WebSocket over TLS as described in https://www.rfc-editor.org/rfc/rfc6455 + + * Other protocols should use implementation-defined prefixed names such as + mycompany.com/my-custom-protocol. + type: string + name: + description: |- + The name of this port within the service. This must be a DNS_LABEL. + All ports within a ServiceSpec must have unique names. When considering + the endpoints for a Service, this must match the 'name' field in the + EndpointPort. + Optional if only one ServicePort is defined on this service. + type: string + nodePort: + description: |- + The port on each node on which this service is exposed when type is + NodePort or LoadBalancer. Usually assigned by the system. If a value is + specified, in-range, and not in use it will be used, otherwise the + operation will fail. If not specified, a port will be allocated if this + Service requires one. If this field is specified when creating a + Service which does not need it, creation will fail. This field will be + wiped when updating a Service to no longer need it (e.g. changing type + from NodePort to ClusterIP). + More info: https://kubernetes.io/docs/concepts/services-networking/service/#type-nodeport + format: int32 + type: integer + port: + description: The port that will be exposed by this + service. + format: int32 + type: integer + protocol: + default: TCP + description: |- + The IP protocol for this port. Supports "TCP", "UDP", and "SCTP". + Default is TCP. + type: string + targetPort: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the pods targeted by the service. + Number must be in the range 1 to 65535. Name must be an IANA_SVC_NAME. + If this is a string, it will be looked up as a named port in the + target Pod's container ports. If this is not specified, the value + of the 'port' field is used (an identity map). + This field is ignored for services with clusterIP=None, and should be + omitted or set equal to the 'port' field. + More info: https://kubernetes.io/docs/concepts/services-networking/service/#defining-a-service + x-kubernetes-int-or-string: true + required: + - port + type: object + type: array + x-kubernetes-list-map-keys: + - port + - protocol + x-kubernetes-list-type: map + publishNotReadyAddresses: + description: |- + publishNotReadyAddresses indicates that any agent which deals with endpoints for this + Service should disregard any indications of ready/not-ready. + The primary use case for setting this field is for a StatefulSet's Headless Service to + propagate SRV DNS records for its Pods for the purpose of peer discovery. + The Kubernetes controllers that generate Endpoints and EndpointSlice resources for + Services interpret this to mean that all endpoints are considered "ready" even if the + Pods themselves are not. Agents which consume only Kubernetes generated endpoints + through the Endpoints or EndpointSlice resources can safely assume this behavior. + type: boolean + selector: + additionalProperties: + type: string + description: |- + Route service traffic to pods with label keys and values matching this + selector. If empty or not present, the service is assumed to have an + external process managing its endpoints, which Kubernetes will not + modify. Only applies to types ClusterIP, NodePort, and LoadBalancer. + Ignored if type is ExternalName. + More info: https://kubernetes.io/docs/concepts/services-networking/service/ + type: object + x-kubernetes-map-type: atomic + sessionAffinity: + description: |- + Supports "ClientIP" and "None". Used to maintain session affinity. + Enable client IP based session affinity. + Must be ClientIP or None. + Defaults to None. + More info: https://kubernetes.io/docs/concepts/services-networking/service/#virtual-ips-and-service-proxies + type: string + sessionAffinityConfig: + description: sessionAffinityConfig contains the configurations + of session affinity. + properties: + clientIP: + description: clientIP contains the configurations + of Client IP based session affinity. + properties: + timeoutSeconds: + description: |- + timeoutSeconds specifies the seconds of ClientIP type session sticky time. + The value must be >0 && <=86400(for 1 day) if ServiceAffinity == "ClientIP". + Default value is 10800(for 3 hours). + format: int32 + type: integer + type: object + type: object + trafficDistribution: + description: |- + TrafficDistribution offers a way to express preferences for how traffic is + distributed to Service endpoints. Implementations can use this field as a + hint, but are not required to guarantee strict adherence. If the field is + not set, the implementation will apply its default routing strategy. If set + to "PreferClose", implementations should prioritize endpoints that are + topologically close (e.g., same zone). + This is a beta field and requires enabling ServiceTrafficDistribution feature. + type: string + type: + description: |- + type determines how the Service is exposed. Defaults to ClusterIP. Valid + options are ExternalName, ClusterIP, NodePort, and LoadBalancer. + "ClusterIP" allocates a cluster-internal IP address for load-balancing + to endpoints. Endpoints are determined by the selector or if that is not + specified, by manual construction of an Endpoints object or + EndpointSlice objects. If clusterIP is "None", no virtual IP is + allocated and the endpoints are published as a set of endpoints rather + than a virtual IP. + "NodePort" builds on ClusterIP and allocates a port on every node which + routes to the same endpoints as the clusterIP. + "LoadBalancer" builds on NodePort and creates an external load-balancer + (if supported in the current cloud) which routes to the same endpoints + as the clusterIP. + "ExternalName" aliases this service to the specified externalName. + Several other fields do not apply to ExternalName services. + More info: https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types + type: string + type: object + status: + description: |- + Most recently observed status of the service. + Populated by the system. + Read-only. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#spec-and-status + properties: + conditions: + description: Current service state + items: + description: Condition contains details for one aspect + of the current state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, + False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in + foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map + loadBalancer: + description: |- + LoadBalancer contains the current status of the load-balancer, + if one is present. + properties: + ingress: + description: |- + Ingress is a list containing ingress points for the load-balancer. + Traffic intended for the service should be sent to these ingress points. + items: + description: |- + LoadBalancerIngress represents the status of a load-balancer ingress point: + traffic intended for the service should be sent to an ingress point. + properties: + hostname: + description: |- + Hostname is set for load-balancer ingress points that are DNS based + (typically AWS load-balancers) + type: string + ip: + description: |- + IP is set for load-balancer ingress points that are IP based + (typically GCE or OpenStack load-balancers) + type: string + ipMode: + description: |- + IPMode specifies how the load-balancer IP behaves, and may only be specified when the ip field is specified. + Setting this to "VIP" indicates that traffic is delivered to the node with + the destination set to the load-balancer's IP and port. + Setting this to "Proxy" indicates that traffic is delivered to the node or pod with + the destination set to the node's IP and node port or the pod's IP and port. + Service implementations may use this information to adjust traffic routing. + type: string + ports: + description: |- + Ports is a list of records of service ports + If used, every port defined in the service should have an entry in it + items: + description: PortStatus represents the error + condition of a service port + properties: + error: + description: |- + Error is to record the problem with the service port + The format of the error shall comply with the following rules: + - built-in error values shall be specified in this file and those shall use + CamelCase names + - cloud provider specific error values must have names that comply with the + format foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + port: + description: Port is the port number of + the service port of which status is + recorded here + format: int32 + type: integer + protocol: + description: |- + Protocol is the protocol of the service port of which status is recorded here + The supported values are: "TCP", "UDP", "SCTP" + type: string + required: + - error + - port + - protocol + type: object + type: array + x-kubernetes-list-type: atomic + type: object + type: array + x-kubernetes-list-type: atomic + type: object + type: object + type: object template: description: |- Template is a pod template that can be used to define the driver or executor pod configurations that Spark configurations do not support. diff --git a/config/samples/v1beta2_scheduledsparkapplication.yaml b/config/samples/v1beta2_scheduledsparkapplication.yaml index 4808f5837d..34eeac0516 100644 --- a/config/samples/v1beta2_scheduledsparkapplication.yaml +++ b/config/samples/v1beta2_scheduledsparkapplication.yaml @@ -11,11 +11,11 @@ spec: template: type: Scala mode: cluster - image: docker.io/library/spark:4.0.0 + image: docker.io/library/spark:4.0.1 imagePullPolicy: IfNotPresent mainClass: org.apache.spark.examples.SparkPi mainApplicationFile: local:///opt/spark/examples/jars/spark-examples.jar - sparkVersion: 4.0.0 + sparkVersion: 4.0.1 restartPolicy: type: Never driver: diff --git a/config/samples/v1beta2_sparkapplication.yaml b/config/samples/v1beta2_sparkapplication.yaml index 788ef981d6..ef1b39e3a7 100644 --- a/config/samples/v1beta2_sparkapplication.yaml +++ b/config/samples/v1beta2_sparkapplication.yaml @@ -8,11 +8,11 @@ metadata: spec: type: Scala mode: cluster - image: docker.io/library/spark:4.0.0 + image: docker.io/library/spark:4.0.1 imagePullPolicy: IfNotPresent mainClass: org.apache.spark.examples.SparkPi mainApplicationFile: local:///opt/spark/examples/jars/spark-examples.jar - sparkVersion: 4.0.0 + sparkVersion: 4.0.1 driver: labels: version: 4.0.0 diff --git a/entrypoint.sh b/entrypoint.sh index 38ee7e9264..7cdf382aa8 100755 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -16,7 +16,7 @@ if ! getent passwd "$myuid" &> /dev/null; then NSS_WRAPPER_GROUP="$(mktemp)" export LD_PRELOAD="$wrapper" NSS_WRAPPER_PASSWD NSS_WRAPPER_GROUP mygid="$(id -g)" - printf 'spark:x:%s:%s:${SPARK_USER_NAME:-anonymous uid}:%s:/bin/false\n' "$myuid" "$mygid" "$SPARK_HOME" > "$NSS_WRAPPER_PASSWD" + printf 'spark:x:%s:%s:%s:%s:/bin/false\n' "$myuid" "$mygid" "${SPARK_USER_NAME:-anonymous uid}" "$SPARK_HOME" > "$NSS_WRAPPER_PASSWD" printf 'spark:x:%s:\n' "$mygid" > "$NSS_WRAPPER_GROUP" break fi diff --git a/examples/spark-pi-configmap.yaml b/examples/spark-pi-configmap.yaml index de9a38b269..b9d0797850 100644 --- a/examples/spark-pi-configmap.yaml +++ b/examples/spark-pi-configmap.yaml @@ -21,11 +21,11 @@ metadata: spec: type: Scala mode: cluster - image: docker.io/library/spark:4.0.0 + image: docker.io/library/spark:4.0.1 imagePullPolicy: IfNotPresent mainClass: org.apache.spark.examples.SparkPi mainApplicationFile: local:///opt/spark/examples/jars/spark-examples.jar - sparkVersion: 4.0.0 + sparkVersion: 4.0.1 restartPolicy: type: Never volumes: diff --git a/examples/spark-pi-custom-resource.yaml b/examples/spark-pi-custom-resource.yaml index fa2e91512e..22a7b96ef7 100644 --- a/examples/spark-pi-custom-resource.yaml +++ b/examples/spark-pi-custom-resource.yaml @@ -21,11 +21,11 @@ metadata: spec: type: Scala mode: cluster - image: docker.io/library/spark:4.0.0 + image: docker.io/library/spark:4.0.1 imagePullPolicy: IfNotPresent mainClass: org.apache.spark.examples.SparkPi mainApplicationFile: local:///opt/spark/examples/jars/spark-examples.jar - sparkVersion: 4.0.0 + sparkVersion: 4.0.1 restartPolicy: type: Never driver: diff --git a/examples/spark-pi-dynamic-allocation.yaml b/examples/spark-pi-dynamic-allocation.yaml index d90e8a99f5..dc3eeb87fe 100644 --- a/examples/spark-pi-dynamic-allocation.yaml +++ b/examples/spark-pi-dynamic-allocation.yaml @@ -21,11 +21,11 @@ metadata: spec: type: Scala mode: cluster - image: docker.io/library/spark:4.0.0 + image: docker.io/library/spark:4.0.1 imagePullPolicy: IfNotPresent mainClass: org.apache.spark.examples.SparkPi mainApplicationFile: local:///opt/spark/examples/jars/spark-examples.jar - sparkVersion: 4.0.0 + sparkVersion: 4.0.1 driver: cores: 1 memory: 512m diff --git a/examples/spark-pi-kube-scheduler.yaml b/examples/spark-pi-kube-scheduler.yaml index eeef6ab5be..ea76241200 100644 --- a/examples/spark-pi-kube-scheduler.yaml +++ b/examples/spark-pi-kube-scheduler.yaml @@ -21,11 +21,11 @@ metadata: spec: type: Scala mode: cluster - image: docker.io/library/spark:4.0.0 + image: docker.io/library/spark:4.0.1 imagePullPolicy: IfNotPresent mainClass: org.apache.spark.examples.SparkPi mainApplicationFile: local:///opt/spark/examples/jars/spark-examples.jar - sparkVersion: 4.0.0 + sparkVersion: 4.0.1 driver: cores: 1 memory: 512m diff --git a/examples/spark-pi-pod-template.yaml b/examples/spark-pi-pod-template.yaml index 5fd38b9f98..f642d0248a 100644 --- a/examples/spark-pi-pod-template.yaml +++ b/examples/spark-pi-pod-template.yaml @@ -57,8 +57,8 @@ metadata: spec: type: Scala mode: cluster - sparkVersion: 4.0.0 - image: docker.io/library/spark:4.0.0 + sparkVersion: 4.0.1 + image: docker.io/library/spark:4.0.1 imagePullPolicy: IfNotPresent mainApplicationFile: local:///opt/spark/examples/jars/spark-examples.jar mainClass: org.apache.spark.examples.SparkPi diff --git a/examples/spark-pi-prometheus-servlet.yaml b/examples/spark-pi-prometheus-servlet.yaml index d5b2256f88..0ceba63c30 100644 --- a/examples/spark-pi-prometheus-servlet.yaml +++ b/examples/spark-pi-prometheus-servlet.yaml @@ -21,10 +21,10 @@ spec: type: Python pythonVersion: "3" mode: cluster - image: docker.io/library/spark:4.0.0 + image: docker.io/library/spark:4.0.1 imagePullPolicy: IfNotPresent mainApplicationFile: local:///opt/spark/examples/src/main/python/pi.py - sparkVersion: 4.0.0 + sparkVersion: 4.0.1 sparkConf: # Expose Spark metrics for Prometheus "spark.kubernetes.driver.annotation.prometheus.io/scrape": "true" diff --git a/examples/spark-pi-prometheus.yaml b/examples/spark-pi-prometheus.yaml index 4fe4c31504..997d9e563e 100644 --- a/examples/spark-pi-prometheus.yaml +++ b/examples/spark-pi-prometheus.yaml @@ -22,13 +22,13 @@ metadata: spec: type: Scala mode: cluster - image: {IMAGE_REGISTRY}/{IMAGE_REPOSITORY}/docker.io/library/spark:4.0.0-gcs-prometheus + image: {IMAGE_REGISTRY}/{IMAGE_REPOSITORY}/docker.io/library/spark:4.0.1-gcs-prometheus imagePullPolicy: Always mainClass: org.apache.spark.examples.SparkPi mainApplicationFile: local:///opt/spark/examples/jars/spark-examples.jar arguments: - "100000" - sparkVersion: 4.0.0 + sparkVersion: 4.0.1 restartPolicy: type: Never driver: diff --git a/examples/spark-pi-python.yaml b/examples/spark-pi-python.yaml index 03b9a5bbb6..8fc281a9a2 100644 --- a/examples/spark-pi-python.yaml +++ b/examples/spark-pi-python.yaml @@ -22,10 +22,10 @@ spec: type: Python pythonVersion: "3" mode: cluster - image: docker.io/library/spark:4.0.0 + image: docker.io/library/spark:4.0.1 imagePullPolicy: IfNotPresent mainApplicationFile: local:///opt/spark/examples/src/main/python/pi.py - sparkVersion: 4.0.0 + sparkVersion: 4.0.1 driver: cores: 1 memory: 512m diff --git a/examples/spark-pi-scheduled.yaml b/examples/spark-pi-scheduled.yaml index d1ba35352b..b9f402d093 100644 --- a/examples/spark-pi-scheduled.yaml +++ b/examples/spark-pi-scheduled.yaml @@ -25,11 +25,11 @@ spec: template: type: Scala mode: cluster - image: docker.io/library/spark:4.0.0 + image: docker.io/library/spark:4.0.1 imagePullPolicy: IfNotPresent mainClass: org.apache.spark.examples.SparkPi mainApplicationFile: local:///opt/spark/examples/jars/spark-examples.jar - sparkVersion: 4.0.0 + sparkVersion: 4.0.1 restartPolicy: type: Never driver: diff --git a/examples/spark-pi-ttl.yaml b/examples/spark-pi-ttl.yaml index 57a75257bb..b4e7fbb56a 100644 --- a/examples/spark-pi-ttl.yaml +++ b/examples/spark-pi-ttl.yaml @@ -21,11 +21,11 @@ metadata: spec: type: Scala mode: cluster - image: docker.io/library/spark:4.0.0 + image: docker.io/library/spark:4.0.1 imagePullPolicy: IfNotPresent mainClass: org.apache.spark.examples.SparkPi mainApplicationFile: local:///opt/spark/examples/jars/spark-examples.jar - sparkVersion: 4.0.0 + sparkVersion: 4.0.1 timeToLiveSeconds: 30 driver: cores: 1 diff --git a/examples/spark-pi-volcano.yaml b/examples/spark-pi-volcano.yaml index 0ed872b4a0..0eea16226b 100644 --- a/examples/spark-pi-volcano.yaml +++ b/examples/spark-pi-volcano.yaml @@ -21,11 +21,11 @@ metadata: spec: type: Scala mode: cluster - image: docker.io/library/spark:4.0.0 + image: docker.io/library/spark:4.0.1 imagePullPolicy: IfNotPresent mainClass: org.apache.spark.examples.SparkPi mainApplicationFile: local:///opt/spark/examples/jars/spark-examples.jar - sparkVersion: 4.0.0 + sparkVersion: 4.0.1 driver: cores: 1 memory: 512m diff --git a/examples/spark-pi-yunikorn.yaml b/examples/spark-pi-yunikorn.yaml index 0407504442..93077c7b2e 100644 --- a/examples/spark-pi-yunikorn.yaml +++ b/examples/spark-pi-yunikorn.yaml @@ -21,11 +21,11 @@ metadata: spec: type: Scala mode: cluster - image: docker.io/library/spark:4.0.0 + image: docker.io/library/spark:4.0.1 imagePullPolicy: IfNotPresent mainClass: org.apache.spark.examples.SparkPi mainApplicationFile: local:///opt/spark/examples/jars/spark-examples.jar - sparkVersion: 4.0.0 + sparkVersion: 4.0.1 driver: cores: 1 memory: 512m diff --git a/examples/spark-pi.yaml b/examples/spark-pi.yaml index fb1c2a1143..7f9eecf264 100644 --- a/examples/spark-pi.yaml +++ b/examples/spark-pi.yaml @@ -21,13 +21,13 @@ metadata: spec: type: Scala mode: cluster - image: docker.io/library/spark:4.0.0 + image: docker.io/library/spark:4.0.1 imagePullPolicy: IfNotPresent mainClass: org.apache.spark.examples.SparkPi mainApplicationFile: local:///opt/spark/examples/jars/spark-examples.jar arguments: - "5000" - sparkVersion: 4.0.0 + sparkVersion: 4.0.1 driver: labels: version: 4.0.0 diff --git a/examples/sparkconnect/spark-connect.yaml b/examples/sparkconnect/spark-connect.yaml index 10cc4ad56b..87fb5fb7a3 100644 --- a/examples/sparkconnect/spark-connect.yaml +++ b/examples/sparkconnect/spark-connect.yaml @@ -19,7 +19,7 @@ metadata: name: spark-connect namespace: default spec: - sparkVersion: 4.0.0 + sparkVersion: 4.0.1 server: template: metadata: @@ -32,7 +32,7 @@ spec: spec: containers: - name: spark-kubernetes-driver - image: spark:4.0.0 + image: docker.io/library/spark:4.0.1 imagePullPolicy: Always resources: requests: @@ -67,7 +67,7 @@ spec: spec: containers: - name: spark-kubernetes-executor - image: spark:4.0.0 + image: docker.io/library/spark:4.0.1 imagePullPolicy: Always securityContext: capabilities: diff --git a/go.mod b/go.mod index a00a3b943f..8d0c05aef2 100644 --- a/go.mod +++ b/go.mod @@ -1,32 +1,35 @@ module github.com/kubeflow/spark-operator/v2 -go 1.24.1 +go 1.24.10 tool k8s.io/code-generator require ( github.com/go-logr/logr v1.4.3 - github.com/golang/glog v1.2.4 + github.com/golang/glog v1.2.5 github.com/google/uuid v1.6.0 - github.com/onsi/ginkgo/v2 v2.23.3 - github.com/onsi/gomega v1.37.0 - github.com/prometheus/client_golang v1.22.0 + github.com/onsi/ginkgo/v2 v2.27.2 + github.com/onsi/gomega v1.38.2 + github.com/prometheus/client_golang v1.23.2 github.com/robfig/cron/v3 v3.0.1 - github.com/spf13/cobra v1.9.1 - github.com/spf13/viper v1.20.1 - github.com/stretchr/testify v1.10.0 + github.com/spf13/cobra v1.10.1 + github.com/spf13/viper v1.21.0 + github.com/stretchr/testify v1.11.1 go.uber.org/zap v1.27.0 - golang.org/x/mod v0.26.0 - golang.org/x/time v0.9.0 + golang.org/x/mod v0.29.0 + golang.org/x/time v0.14.0 helm.sh/helm/v3 v3.18.5 k8s.io/api v0.33.3 k8s.io/apiextensions-apiserver v0.33.3 k8s.io/apimachinery v0.33.3 + k8s.io/apiserver v0.33.3 k8s.io/client-go v0.33.3 + k8s.io/component-base v0.33.3 + k8s.io/klog/v2 v2.130.1 k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738 sigs.k8s.io/controller-runtime v0.20.4 - sigs.k8s.io/scheduler-plugins v0.31.8 - sigs.k8s.io/yaml v1.5.0 + sigs.k8s.io/scheduler-plugins v0.32.7 + sigs.k8s.io/yaml v1.6.0 volcano.sh/apis v1.10.0 ) @@ -36,7 +39,7 @@ require ( github.com/BurntSushi/toml v1.5.0 // indirect github.com/MakeNowJust/heredoc v1.0.0 // indirect github.com/Masterminds/goutils v1.1.1 // indirect - github.com/Masterminds/semver/v3 v3.3.0 // indirect + github.com/Masterminds/semver/v3 v3.4.0 // indirect github.com/Masterminds/sprig/v3 v3.3.0 // indirect github.com/Masterminds/squirrel v1.5.4 // indirect github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 // indirect @@ -44,7 +47,7 @@ require ( github.com/blang/semver/v4 v4.0.0 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/chai2010/gettext-go v1.0.3 // indirect - github.com/containerd/containerd v1.7.27 // indirect + github.com/containerd/containerd v1.7.29 // indirect github.com/containerd/errdefs v0.3.0 // indirect github.com/containerd/log v0.1.0 // indirect github.com/containerd/platforms v0.2.1 // indirect @@ -55,7 +58,7 @@ require ( github.com/evanphx/json-patch/v5 v5.9.11 // indirect github.com/exponent-io/jsonpath v0.0.0-20210407135951-1de76d718b3f // indirect github.com/fatih/color v1.17.0 // indirect - github.com/fsnotify/fsnotify v1.8.0 // indirect + github.com/fsnotify/fsnotify v1.9.0 // indirect github.com/fxamacker/cbor/v2 v2.7.0 // indirect github.com/go-errors/errors v1.5.1 // indirect github.com/go-gorp/gorp/v3 v3.1.0 // indirect @@ -72,7 +75,7 @@ require ( github.com/google/gnostic-models v0.6.9 // indirect github.com/google/go-cmp v0.7.0 // indirect github.com/google/gofuzz v1.2.0 // indirect - github.com/google/pprof v0.0.0-20241210010833-40e02aabc2ad // indirect + github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 // indirect github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674 // indirect github.com/gosuri/uitable v0.0.4 // indirect @@ -105,51 +108,51 @@ require ( github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect github.com/opencontainers/go-digest v1.0.0 // indirect github.com/opencontainers/image-spec v1.1.1 // indirect - github.com/pelletier/go-toml/v2 v2.2.3 // indirect + github.com/pelletier/go-toml/v2 v2.2.4 // indirect github.com/peterbourgon/diskv v2.0.1+incompatible // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect - github.com/prometheus/client_model v0.6.1 // indirect - github.com/prometheus/common v0.62.0 // indirect - github.com/prometheus/procfs v0.15.1 // indirect + github.com/prometheus/client_model v0.6.2 // indirect + github.com/prometheus/common v0.66.1 // indirect + github.com/prometheus/procfs v0.16.1 // indirect github.com/rivo/uniseg v0.4.7 // indirect github.com/rubenv/sql-migrate v1.8.0 // indirect github.com/russross/blackfriday/v2 v2.1.0 // indirect - github.com/sagikazarmark/locafero v0.7.0 // indirect + github.com/sagikazarmark/locafero v0.11.0 // indirect github.com/santhosh-tekuri/jsonschema/v6 v6.0.2 // indirect github.com/shopspring/decimal v1.4.0 // indirect github.com/sirupsen/logrus v1.9.3 // indirect - github.com/sourcegraph/conc v0.3.0 // indirect - github.com/spf13/afero v1.12.0 // indirect - github.com/spf13/cast v1.7.1 // indirect - github.com/spf13/pflag v1.0.7 // indirect + github.com/sourcegraph/conc v0.3.1-0.20240121214520-5f936abd7ae8 // indirect + github.com/spf13/afero v1.15.0 // indirect + github.com/spf13/cast v1.10.0 // indirect + github.com/spf13/pflag v1.0.10 // indirect github.com/subosito/gotenv v1.6.0 // indirect github.com/x448/float16 v0.8.4 // indirect github.com/xlab/treeprint v1.2.0 // indirect + go.opentelemetry.io/otel v1.33.0 // indirect + go.opentelemetry.io/otel/trace v1.33.0 // indirect go.uber.org/multierr v1.11.0 // indirect go.yaml.in/yaml/v2 v2.4.2 // indirect - go.yaml.in/yaml/v3 v3.0.3 // indirect - golang.org/x/crypto v0.40.0 // indirect - golang.org/x/net v0.41.0 // indirect - golang.org/x/oauth2 v0.28.0 // indirect - golang.org/x/sync v0.16.0 // indirect - golang.org/x/sys v0.34.0 // indirect - golang.org/x/term v0.33.0 // indirect - golang.org/x/text v0.27.0 // indirect - golang.org/x/tools v0.34.0 // indirect + go.yaml.in/yaml/v3 v3.0.4 // indirect + golang.org/x/crypto v0.42.0 // indirect + golang.org/x/net v0.44.0 // indirect + golang.org/x/oauth2 v0.30.0 // indirect + golang.org/x/sync v0.17.0 // indirect + golang.org/x/sys v0.36.0 // indirect + golang.org/x/term v0.35.0 // indirect + golang.org/x/text v0.29.0 // indirect + golang.org/x/tools v0.37.0 // indirect + golang.org/x/tools/go/packages/packagestest v0.1.1-deprecated // indirect gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20241223144023-3abc09e42ca8 // indirect - google.golang.org/grpc v1.68.1 // indirect - google.golang.org/protobuf v1.36.5 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20250102185135-69823020774d // indirect + google.golang.org/grpc v1.69.4 // indirect + google.golang.org/protobuf v1.36.8 // indirect gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect - k8s.io/apiserver v0.33.3 // indirect k8s.io/cli-runtime v0.33.3 // indirect - k8s.io/code-generator v0.32.5 // indirect - k8s.io/component-base v0.33.3 // indirect + k8s.io/code-generator v0.32.7 // indirect k8s.io/gengo/v2 v2.0.0-20240911193312-2b36238f13e9 // indirect - k8s.io/klog/v2 v2.130.1 // indirect k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff // indirect k8s.io/kubectl v0.33.3 // indirect oras.land/oras-go/v2 v2.6.0 // indirect diff --git a/go.sum b/go.sum index 29b8c402b8..92c341b9f3 100644 --- a/go.sum +++ b/go.sum @@ -14,8 +14,8 @@ github.com/MakeNowJust/heredoc v1.0.0 h1:cXCdzVdstXyiTqTvfqk9SDHpKNjxuom+DOlyEeQ github.com/MakeNowJust/heredoc v1.0.0/go.mod h1:mG5amYoWBHf8vpLOuehzbGGw0EHxpZZ6lCpQ4fNJ8LE= github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI= github.com/Masterminds/goutils v1.1.1/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU= -github.com/Masterminds/semver/v3 v3.3.0 h1:B8LGeaivUe71a5qox1ICM/JLl0NqZSW5CHyL+hmvYS0= -github.com/Masterminds/semver/v3 v3.3.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM= +github.com/Masterminds/semver/v3 v3.4.0 h1:Zog+i5UMtVoCU8oKka5P7i9q9HgrJeGzI9SA1Xbatp0= +github.com/Masterminds/semver/v3 v3.4.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM= github.com/Masterminds/sprig/v3 v3.3.0 h1:mQh0Yrg1XPo6vjYXgtf5OtijNAKJRNcTdOOGZe3tPhs= github.com/Masterminds/sprig/v3 v3.3.0/go.mod h1:Zy1iXRYNqNLUolqCpL4uhk6SHUMAOSCzdgBfDb35Lz0= github.com/Masterminds/squirrel v1.5.4 h1:uUcX/aBc8O7Fg9kaISIUsHXdKuqehiXAMQTYX8afzqM= @@ -36,8 +36,8 @@ github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UF github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/chai2010/gettext-go v1.0.3 h1:9liNh8t+u26xl5ddmWLmsOsdNLwkdRTg5AG+JnTiM80= github.com/chai2010/gettext-go v1.0.3/go.mod h1:y+wnP2cHYaVj19NZhYKAwEMH2CI1gNHeQQ+5AjwawxA= -github.com/containerd/containerd v1.7.27 h1:yFyEyojddO3MIGVER2xJLWoCIn+Up4GaHFquP7hsFII= -github.com/containerd/containerd v1.7.27/go.mod h1:xZmPnl75Vc+BLGt4MIfu6bp+fy03gdHAn9bz+FreFR0= +github.com/containerd/containerd v1.7.29 h1:90fWABQsaN9mJhGkoVnuzEY+o1XDPbg9BTC9QTAHnuE= +github.com/containerd/containerd v1.7.29/go.mod h1:azUkWcOvHrWvaiUjSQH0fjzuHIwSPg1WL5PshGP4Szs= github.com/containerd/errdefs v0.3.0 h1:FSZgGOeK4yuT/+DnF07/Olde/q4KBoMsaamhXxIMDp4= github.com/containerd/errdefs v0.3.0/go.mod h1:+YBYIdtsnF4Iw6nWZhJcqGSg/dwvV7tyJ/kCkyJ2k+M= github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I= @@ -85,10 +85,16 @@ github.com/foxcpp/go-mockdns v1.1.0 h1:jI0rD8M0wuYAxL7r/ynTrCQQq0BVqfB99Vgk7Dlme github.com/foxcpp/go-mockdns v1.1.0/go.mod h1:IhLeSFGed3mJIAXPH2aiRQB+kqz7oqu8ld2qVbOu7Wk= github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8= github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0= -github.com/fsnotify/fsnotify v1.8.0 h1:dAwr6QBTBZIkG8roQaJjGof0pp0EeF+tNV7YBP3F/8M= -github.com/fsnotify/fsnotify v1.8.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0= +github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k= +github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0= github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E= github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ= +github.com/gkampitakis/ciinfo v0.3.2 h1:JcuOPk8ZU7nZQjdUhctuhQofk7BGHuIy0c9Ez8BNhXs= +github.com/gkampitakis/ciinfo v0.3.2/go.mod h1:1NIwaOcFChN4fa/B0hEBdAb6npDlFL8Bwx4dfRLRqAo= +github.com/gkampitakis/go-diff v1.3.2 h1:Qyn0J9XJSDTgnsgHRdz9Zp24RaJeKMUHg2+PDZZdC4M= +github.com/gkampitakis/go-diff v1.3.2/go.mod h1:LLgOrpqleQe26cte8s36HTWcTmMEur6OPYerdAAS9tk= +github.com/gkampitakis/go-snaps v0.5.15 h1:amyJrvM1D33cPHwVrjo9jQxX8g/7E2wYdZ+01KS3zGE= +github.com/gkampitakis/go-snaps v0.5.15/go.mod h1:HNpx/9GoKisdhw9AFOBT1N7DBs9DiHo/hGheFGBZ+mc= github.com/go-errors/errors v1.5.1 h1:ZwEMSLRCapFLflTpT7NKaAc7ukJ8ZPEjzlxt8rPN8bk= github.com/go-errors/errors v1.5.1/go.mod h1:sIVyrIiJhuEF+Pj9Ebtd6P/rEYROXFi3BopGUQ5a5Og= github.com/go-gorp/gorp/v3 v3.1.0 h1:ItKF/Vbuj31dmV4jxA1qblpSwkl9g1typ24xoe70IGs= @@ -113,10 +119,12 @@ github.com/go-viper/mapstructure/v2 v2.4.0 h1:EBsztssimR/CONLSZZ04E8qAkxNYq4Qp9L github.com/go-viper/mapstructure/v2 v2.4.0/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM= github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y= github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8= +github.com/goccy/go-yaml v1.18.0 h1:8W7wMFS12Pcas7KU+VVkaiCng+kG8QiFeFwzFb+rwuw= +github.com/goccy/go-yaml v1.18.0/go.mod h1:XBurs7gK8ATbW4ZPGKgcbrY1Br56PdM69F7LkFRi1kA= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= -github.com/golang/glog v1.2.4 h1:CNNw5U8lSiiBk7druxtSHHTsRWcxKoac6kZKm2peBBc= -github.com/golang/glog v1.2.4/go.mod h1:6AhwSGph0fcJtXVM/PEHPqZlFeoLxhs7/t5UDAwmO+w= +github.com/golang/glog v1.2.5 h1:DrW6hGnjIhtvhOIiAKT6Psh/Kd/ldepEa81DKeiRJ5I= +github.com/golang/glog v1.2.5/go.mod h1:6AhwSGph0fcJtXVM/PEHPqZlFeoLxhs7/t5UDAwmO+w= github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= github.com/google/btree v1.1.3 h1:CVpQJjYgC4VbzxeGVHfvZrv1ctoYCAI8vbl07Fcxlyg= @@ -129,8 +137,8 @@ github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= -github.com/google/pprof v0.0.0-20241210010833-40e02aabc2ad h1:a6HEuzUHeKH6hwfN/ZoQgRgVIWFJljSWa/zetS2WTvg= -github.com/google/pprof v0.0.0-20241210010833-40e02aabc2ad/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144= +github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 h1:BHT72Gu3keYf3ZEu2J0b1vyeLSOYI8bm5wbJM/8yDe8= +github.com/google/pprof v0.0.0-20250403155104-27863c87afa6/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA= github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 h1:El6M4kTTCOh6aBiKaUGG7oYTSPP8MxqL4YI3kZKwcP4= github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3pAXIBCelhxNneeOaAeabZDe5s4K6zSpQ= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= @@ -165,6 +173,8 @@ github.com/jmoiron/sqlx v1.4.0 h1:1PLqN7S1UYp5t4SrVVnt4nUVNemrDAtxlulVe+Qgm3o= github.com/jmoiron/sqlx v1.4.0/go.mod h1:ZrZ7UsYB/weZdl2Bxg6jCRO9c3YHl8r3ahlKmRT4JLY= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= +github.com/joshdk/go-junit v1.0.0 h1:S86cUKIdwBHWwA6xCmFlf3RTLfVXYQfvanM5Uh+K6GE= +github.com/joshdk/go-junit v1.0.0/go.mod h1:TiiV0PqkaNfFXjEiyjWM3XXrhVyCa1K4Zfga6W52ung= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= @@ -187,6 +197,8 @@ github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de h1:9TO3cAIGXtEhn github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de/go.mod h1:zAbeS9B/r2mtpb6U+EI2rYA5OAXxsYw6wTamcNW+zcE= github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= +github.com/maruel/natural v1.1.1 h1:Hja7XhhmvEFhcByqDoHz9QZbkWey+COd9xWfCfn1ioo= +github.com/maruel/natural v1.1.1/go.mod h1:v+Rfd79xlw1AgVBjbO0BEQmptqb5HvL/k9GRHB7ZKEg= github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= @@ -196,6 +208,8 @@ github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZ github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= github.com/mattn/go-sqlite3 v1.14.22 h1:2gZY6PC6kBnID23Tichd1K+Z0oS6nE/XwU+Vz/5o4kU= github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= +github.com/mfridman/tparse v0.18.0 h1:wh6dzOKaIwkUGyKgOntDW4liXSo37qg5AXbIhkMV3vE= +github.com/mfridman/tparse v0.18.0/go.mod h1:gEvqZTuCgEhPbYk/2lS3Kcxg1GmTxxU7kTC8DvP0i/A= github.com/miekg/dns v1.1.57 h1:Jzi7ApEIzwEPLHWRcafCN9LZSBbqQpxjt/wpgvg7wcM= github.com/miekg/dns v1.1.57/go.mod h1:uqRjCRUuEAA6qsOiJvDd+CFo/vW+y5WR6SNmHE55hZk= github.com/mitchellh/copystructure v1.2.0 h1:vpKXTN4ewci03Vljg/q9QvCGUDttBOGBIa15WveJJGw= @@ -219,16 +233,16 @@ github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f h1:y5//uYreIhSUg3J1GEMiLbxo1LJaP8RfCpH6pymGZus= github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+o7JKHSa8/e818NopupXU1YMK5fe1lsApnBw= -github.com/onsi/ginkgo/v2 v2.23.3 h1:edHxnszytJ4lD9D5Jjc4tiDkPBZ3siDeJJkUZJJVkp0= -github.com/onsi/ginkgo/v2 v2.23.3/go.mod h1:zXTP6xIp3U8aVuXN8ENK9IXRaTjFnpVB9mGmaSRvxnM= -github.com/onsi/gomega v1.37.0 h1:CdEG8g0S133B4OswTDC/5XPSzE1OeP29QOioj2PID2Y= -github.com/onsi/gomega v1.37.0/go.mod h1:8D9+Txp43QWKhM24yyOBEdpkzN8FvJyAwecBgsU4KU0= +github.com/onsi/ginkgo/v2 v2.27.2 h1:LzwLj0b89qtIy6SSASkzlNvX6WktqurSHwkk2ipF/Ns= +github.com/onsi/ginkgo/v2 v2.27.2/go.mod h1:ArE1D/XhNXBXCBkKOLkbsb2c81dQHCRcF5zwn/ykDRo= +github.com/onsi/gomega v1.38.2 h1:eZCjf2xjZAqe+LeWvKb5weQ+NcPwX84kqJ0cZNxok2A= +github.com/onsi/gomega v1.38.2/go.mod h1:W2MJcYxRGV63b418Ai34Ud0hEdTVXq9NW9+Sx6uXf3k= github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= github.com/opencontainers/image-spec v1.1.1 h1:y0fUlFfIZhPF1W537XOLg0/fcx6zcHCJwooC2xJA040= github.com/opencontainers/image-spec v1.1.1/go.mod h1:qpqAh3Dmcf36wStyyWU+kCeDgrGnAve2nCC8+7h8Q0M= -github.com/pelletier/go-toml/v2 v2.2.3 h1:YmeHyLY8mFWbdkNWwpr+qIL2bEqT0o95WSdkNHvL12M= -github.com/pelletier/go-toml/v2 v2.2.3/go.mod h1:MfCQTFTvCcUyyvvwm1+G6H/jORL20Xlb6rzQu9GuUkc= +github.com/pelletier/go-toml/v2 v2.2.4 h1:mye9XuhQ6gvn5h28+VilKrrPoQVanw5PMw/TB0t5Ec4= +github.com/pelletier/go-toml/v2 v2.2.4/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY= github.com/peterbourgon/diskv v2.0.1+incompatible h1:UBdAOUP5p4RWqPBg048CAvpKN+vxiaj6gdUUzhl4XmI= github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU= github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5 h1:Ii+DKncOVM8Cu1Hc+ETb5K+23HdAMvESYE3ZJ5b5cMI= @@ -240,14 +254,14 @@ github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRI github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/poy/onpar v1.1.2 h1:QaNrNiZx0+Nar5dLgTVp5mXkyoVFIbepjyEoGSnhbAY= github.com/poy/onpar v1.1.2/go.mod h1:6X8FLNoxyr9kkmnlqpK6LSoiOtrO6MICtWwEuWkLjzg= -github.com/prometheus/client_golang v1.22.0 h1:rb93p9lokFEsctTys46VnV1kLCDpVZ0a/Y92Vm0Zc6Q= -github.com/prometheus/client_golang v1.22.0/go.mod h1:R7ljNsLXhuQXYZYtw6GAE9AZg8Y7vEW5scdCXrWRXC0= -github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= -github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= -github.com/prometheus/common v0.62.0 h1:xasJaQlnWAeyHdUBeGjXmutelfJHWMRr+Fg4QszZ2Io= -github.com/prometheus/common v0.62.0/go.mod h1:vyBcEuLSvWos9B1+CyL7JZ2up+uFzXhkqml0W5zIY1I= -github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= -github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= +github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o= +github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg= +github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk= +github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE= +github.com/prometheus/common v0.66.1 h1:h5E0h5/Y8niHc5DlaLlWLArTQI7tMrsfQjHV+d9ZoGs= +github.com/prometheus/common v0.66.1/go.mod h1:gcaUsgf3KfRSwHY4dIMXLPV0K/Wg1oZ8+SbZk/HH/dA= +github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg= +github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is= github.com/redis/go-redis/extra/rediscmd/v9 v9.0.5 h1:EaDatTxkdHG+U3Bk4EUr+DZ7fOGwTfezUiUJMaIcaho= github.com/redis/go-redis/extra/rediscmd/v9 v9.0.5/go.mod h1:fyalQWdtzDBECAQFBJuQe5bzQ02jGd5Qcbgb97Flm7U= github.com/redis/go-redis/extra/redisotel/v9 v9.0.5 h1:EfpWLLCyXw8PSM2/XNJLjI3Pb27yVE+gIAfeqp8LUCc= @@ -259,14 +273,14 @@ github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ= github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= github.com/robfig/cron/v3 v3.0.1 h1:WdRxkvbJztn8LMz/QEvLN5sBU+xKpSqwwUO1Pjr4qDs= github.com/robfig/cron/v3 v3.0.1/go.mod h1:eQICP3HwyT7UooqI/z+Ov+PtYAWygg1TEWWzGIFLtro= -github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= -github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= +github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= +github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= github.com/rubenv/sql-migrate v1.8.0 h1:dXnYiJk9k3wetp7GfQbKJcPHjVJL6YK19tKj8t2Ns0o= github.com/rubenv/sql-migrate v1.8.0/go.mod h1:F2bGFBwCU+pnmbtNYDeKvSuvL6lBVtXDXUUv5t+u1qw= github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= -github.com/sagikazarmark/locafero v0.7.0 h1:5MqpDsTGNDhY8sGp0Aowyf0qKsPrhewaLSsFaodPcyo= -github.com/sagikazarmark/locafero v0.7.0/go.mod h1:2za3Cg5rMaTMoG/2Ulr9AwtFaIppKXTRYnozin4aB5k= +github.com/sagikazarmark/locafero v0.11.0 h1:1iurJgmM9G3PA/I+wWYIOw/5SyBtxapeHDcg+AAIFXc= +github.com/sagikazarmark/locafero v0.11.0/go.mod h1:nVIGvgyzw595SUSUE6tvCp3YYTeHs15MvlmU87WwIik= github.com/santhosh-tekuri/jsonschema/v6 v6.0.2 h1:KRzFb2m7YtdldCEkzs6KqmJw4nqEVZGK7IN2kJkjTuQ= github.com/santhosh-tekuri/jsonschema/v6 v6.0.2/go.mod h1:JXeL+ps8p7/KNMjDQk3TCwPpBy0wYklyWTfbkIzdIFU= github.com/sergi/go-diff v1.2.0 h1:XU+rvMAioB0UC3q1MFrIQy4Vo5/4VsRDQQXHsEya6xQ= @@ -275,19 +289,19 @@ github.com/shopspring/decimal v1.4.0 h1:bxl37RwXBklmTi0C79JfXCEBD1cqqHt0bbgBAGFp github.com/shopspring/decimal v1.4.0/go.mod h1:gawqmDU56v4yIKSwfBSFip1HdCCXN8/+DMd9qYNcwME= github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= -github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo= -github.com/sourcegraph/conc v0.3.0/go.mod h1:Sdozi7LEKbFPqYX2/J+iBAM6HpqSLTASQIKqDmF7Mt0= -github.com/spf13/afero v1.12.0 h1:UcOPyRBYczmFn6yvphxkn9ZEOY65cpwGKb5mL36mrqs= -github.com/spf13/afero v1.12.0/go.mod h1:ZTlWwG4/ahT8W7T0WQ5uYmjI9duaLQGy3Q2OAl4sk/4= -github.com/spf13/cast v1.7.1 h1:cuNEagBQEHWN1FnbGEjCXL2szYEXqfJPbP2HNUaca9Y= -github.com/spf13/cast v1.7.1/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo= -github.com/spf13/cobra v1.9.1 h1:CXSaggrXdbHK9CF+8ywj8Amf7PBRmPCOJugH954Nnlo= -github.com/spf13/cobra v1.9.1/go.mod h1:nDyEzZ8ogv936Cinf6g1RU9MRY64Ir93oCnqb9wxYW0= -github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= -github.com/spf13/pflag v1.0.7 h1:vN6T9TfwStFPFM5XzjsvmzZkLuaLX+HS+0SeFLRgU6M= -github.com/spf13/pflag v1.0.7/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= -github.com/spf13/viper v1.20.1 h1:ZMi+z/lvLyPSCoNtFCpqjy0S4kPbirhpTMwl8BkW9X4= -github.com/spf13/viper v1.20.1/go.mod h1:P9Mdzt1zoHIG8m2eZQinpiBjo6kCmZSKBClNNqjJvu4= +github.com/sourcegraph/conc v0.3.1-0.20240121214520-5f936abd7ae8 h1:+jumHNA0Wrelhe64i8F6HNlS8pkoyMv5sreGx2Ry5Rw= +github.com/sourcegraph/conc v0.3.1-0.20240121214520-5f936abd7ae8/go.mod h1:3n1Cwaq1E1/1lhQhtRK2ts/ZwZEhjcQeJQ1RuC6Q/8U= +github.com/spf13/afero v1.15.0 h1:b/YBCLWAJdFWJTN9cLhiXXcD7mzKn9Dm86dNnfyQw1I= +github.com/spf13/afero v1.15.0/go.mod h1:NC2ByUVxtQs4b3sIUphxK0NioZnmxgyCrfzeuq8lxMg= +github.com/spf13/cast v1.10.0 h1:h2x0u2shc1QuLHfxi+cTJvs30+ZAHOGRic8uyGTDWxY= +github.com/spf13/cast v1.10.0/go.mod h1:jNfB8QC9IA6ZuY2ZjDp0KtFO2LZZlg4S/7bzP6qqeHo= +github.com/spf13/cobra v1.10.1 h1:lJeBwCfmrnXthfAupyUTzJ/J4Nc1RsHC/mSRU2dll/s= +github.com/spf13/cobra v1.10.1/go.mod h1:7SmJGaTHFVBY0jW4NXGluQoLvhqFQM+6XSKD+P4XaB0= +github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/spf13/pflag v1.0.10 h1:4EBh2KAYBwaONj6b2Ye1GiHfwjqyROoF4RwYO+vPwFk= +github.com/spf13/pflag v1.0.10/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/spf13/viper v1.21.0 h1:x5S+0EU27Lbphp4UKm1C+1oQO+rKx36vfCoaVebLFSU= +github.com/spf13/viper v1.21.0/go.mod h1:P0lhsswPGWD/1lZJ9ny3fYnVqxiegrlNrEmgLjbTCAY= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY= github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= @@ -295,10 +309,18 @@ github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXf github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= -github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= github.com/subosito/gotenv v1.6.0 h1:9NlTDc1FTs4qu0DDq7AEtTPNw6SVm7uBMsUCUjABIf8= github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSWPKKo0FU= +github.com/tidwall/gjson v1.18.0 h1:FIDeeyB800efLX89e5a8Y0BNH+LOngJyGrIWxG2FKQY= +github.com/tidwall/gjson v1.18.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= +github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA= +github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM= +github.com/tidwall/pretty v1.2.1 h1:qjsOFOWWQl+N3RsoF5/ssm1pHmJJwhjlSbZ51I6wMl4= +github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= +github.com/tidwall/sjson v1.2.5 h1:kLy8mja+1c9jlljvWTlSazM7cKDRfJuR/bOJhcY5NcY= +github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28= github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= github.com/xlab/treeprint v1.2.0 h1:HzHnuAF1plUN2zGlAFHbSQP2qJ0ZAD3XF5XD7OesXRQ= @@ -359,30 +381,30 @@ go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI= go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU= -go.yaml.in/yaml/v3 v3.0.3 h1:bXOww4E/J3f66rav3pX3m8w6jDE4knZjGOw8b5Y6iNE= -go.yaml.in/yaml/v3 v3.0.3/go.mod h1:tBHosrYAkRZjRAOREWbDnBXUf08JOwYq++0QNwQiWzI= +go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc= +go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/crypto v0.40.0 h1:r4x+VvoG5Fm+eJcxMaY8CQM7Lb0l1lsmjGBQ6s8BfKM= -golang.org/x/crypto v0.40.0/go.mod h1:Qr1vMER5WyS2dfPHAlsOj01wgLbsyWtFn/aY+5+ZdxY= +golang.org/x/crypto v0.42.0 h1:chiH31gIWm57EkTXpwnqf8qeuMUi0yekh6mT2AvFlqI= +golang.org/x/crypto v0.42.0/go.mod h1:4+rDnOTJhQCx2q7/j6rAN5XDw8kPjeaXEUR2eL94ix8= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.26.0 h1:EGMPT//Ezu+ylkCijjPc+f4Aih7sZvaAr+O3EHBxvZg= -golang.org/x/mod v0.26.0/go.mod h1:/j6NAhSk8iQ723BGAUyoAcn7SlD7s15Dp9Nd/SfeaFQ= +golang.org/x/mod v0.29.0 h1:HV8lRxZC4l2cr3Zq1LvtOsi/ThTgWnUk/y64QSs8GwA= +golang.org/x/mod v0.29.0/go.mod h1:NyhrlYXJ2H4eJiRy/WDBO6HMqZQ6q9nk4JzS3NuCK+w= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.41.0 h1:vBTly1HeNPEn3wtREYfy4GZ/NECgw2Cnl+nK6Nz3uvw= -golang.org/x/net v0.41.0/go.mod h1:B/K4NNqkfmg07DQYrbwvSluqCJOOXwUjeb/5lOisjbA= -golang.org/x/oauth2 v0.28.0 h1:CrgCKl8PPAVtLnU3c+EDw6x11699EWlsDeWNWKdIOkc= -golang.org/x/oauth2 v0.28.0/go.mod h1:onh5ek6nERTohokkhCD/y2cV4Do3fxFHFuAejCkRWT8= +golang.org/x/net v0.44.0 h1:evd8IRDyfNBMBTTY5XRF1vaZlD+EmWx6x8PkhR04H/I= +golang.org/x/net v0.44.0/go.mod h1:ECOoLqd5U3Lhyeyo/QDCEVQ4sNgYsqvCZ722XogGieY= +golang.org/x/oauth2 v0.30.0 h1:dnDm7JmhM45NNpd8FDDeLhK6FwqbOf4MLCM9zb1BOHI= +golang.org/x/oauth2 v0.30.0/go.mod h1:B++QgG3ZKulg6sRPGD/mqlHQs5rB3Ml9erfeDY7xKlU= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.16.0 h1:ycBJEhp9p4vXvUZNszeOq0kGTPghopOL8q0fq3vstxw= -golang.org/x/sync v0.16.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= +golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug= +golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -390,37 +412,41 @@ golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.34.0 h1:H5Y5sJ2L2JRdyv7ROF1he/lPdvFsd0mJHFw2ThKHxLA= -golang.org/x/sys v0.34.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= -golang.org/x/term v0.33.0 h1:NuFncQrRcaRvVmgRkvM3j/F00gWIAlcmlB8ACEKmGIg= -golang.org/x/term v0.33.0/go.mod h1:s18+ql9tYWp1IfpV9DmCtQDDSRBUjKaw9M1eAv5UeF0= +golang.org/x/sys v0.36.0 h1:KVRy2GtZBrk1cBYA7MKu5bEZFxQk4NIDV6RLVcC8o0k= +golang.org/x/sys v0.36.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/term v0.35.0 h1:bZBVKBudEyhRcajGcNc3jIfWPqV4y/Kt2XcoigOWtDQ= +golang.org/x/term v0.35.0/go.mod h1:TPGtkTLesOwf2DE8CgVYiZinHAOuy5AYUYT1lENIZnA= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.27.0 h1:4fGWRpyh641NLlecmyl4LOe6yDdfaYNrGb2zdfo4JV4= -golang.org/x/text v0.27.0/go.mod h1:1D28KMCvyooCX9hBiosv5Tz/+YLxj0j7XhWjpSUF7CU= -golang.org/x/time v0.9.0 h1:EsRrnYcQiGH+5FfbgvV4AP7qEZstoyrHB0DzarOQ4ZY= -golang.org/x/time v0.9.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= +golang.org/x/text v0.29.0 h1:1neNs90w9YzJ9BocxfsQNHKuAT4pkghyXc4nhZ6sJvk= +golang.org/x/text v0.29.0/go.mod h1:7MhJOA9CD2qZyOKYazxdYMF85OwPdEr9jTtBpO7ydH4= +golang.org/x/time v0.14.0 h1:MRx4UaLrDotUKUdCIqzPC48t1Y9hANFKIRpNx+Te8PI= +golang.org/x/time v0.14.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/tools v0.34.0 h1:qIpSLOxeCYGg9TrcJokLBG4KFA6d795g0xkBkiESGlo= -golang.org/x/tools v0.34.0/go.mod h1:pAP9OwEaY1CAW3HOmg3hLZC5Z0CCmzjAF2UQMSqNARg= +golang.org/x/tools v0.37.0 h1:DVSRzp7FwePZW356yEAChSdNcQo6Nsp+fex1SUW09lE= +golang.org/x/tools v0.37.0/go.mod h1:MBN5QPQtLMHVdvsbtarmTNukZDdgwdwlO5qGacAzF0w= +golang.org/x/tools/go/expect v0.1.0-deprecated h1:jY2C5HGYR5lqex3gEniOQL0r7Dq5+VGVgY1nudX5lXY= +golang.org/x/tools/go/expect v0.1.0-deprecated/go.mod h1:eihoPOH+FgIqa3FpoTwguz/bVUSGBlGQU67vpBeOrBY= +golang.org/x/tools/go/packages/packagestest v0.1.1-deprecated h1:1h2MnaIAIXISqTFKdENegdpAgUXz6NrPEsbIeWaBRvM= +golang.org/x/tools/go/packages/packagestest v0.1.1-deprecated/go.mod h1:RVAQXBGNv1ib0J382/DPCRS/BPnsGebyM1Gj5VSDpG8= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gomodules.xyz/jsonpatch/v2 v2.4.0 h1:Ci3iUJyx9UeRx7CeFN8ARgGbkESwJK+KB9lLcWxY/Zw= gomodules.xyz/jsonpatch/v2 v2.4.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY= -google.golang.org/genproto v0.0.0-20241118233622-e639e219e697 h1:ToEetK57OidYuqD4Q5w+vfEnPvPpuTwedCNVohYJfNk= +google.golang.org/genproto v0.0.0-20240123012728-ef4313101c80 h1:KAeGQVN3M9nD0/bQXnr/ClcEMJ968gUXJQ9pwfSynuQ= google.golang.org/genproto/googleapis/api v0.0.0-20241209162323-e6fa225c2576 h1:CkkIfIt50+lT6NHAVoRYEyAvQGFM7xEwXUUywFvEb3Q= google.golang.org/genproto/googleapis/api v0.0.0-20241209162323-e6fa225c2576/go.mod h1:1R3kvZ1dtP3+4p4d3G8uJ8rFk/fWlScl38vanWACI08= -google.golang.org/genproto/googleapis/rpc v0.0.0-20241223144023-3abc09e42ca8 h1:TqExAhdPaB60Ux47Cn0oLV07rGnxZzIsaRhQaqS666A= -google.golang.org/genproto/googleapis/rpc v0.0.0-20241223144023-3abc09e42ca8/go.mod h1:lcTa1sDdWEIHMWlITnIczmw5w60CF9ffkb8Z+DVmmjA= -google.golang.org/grpc v1.68.1 h1:oI5oTa11+ng8r8XMMN7jAOmWfPZWbYpCFaMUTACxkM0= -google.golang.org/grpc v1.68.1/go.mod h1:+q1XYFJjShcqn0QZHvCyeR4CXPA+llXIeUIfIe00waw= -google.golang.org/protobuf v1.36.5 h1:tPhr+woSbjfYvY6/GPufUoYizxw1cF/yFoxJ2fmpwlM= -google.golang.org/protobuf v1.36.5/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= +google.golang.org/genproto/googleapis/rpc v0.0.0-20250102185135-69823020774d h1:xJJRGY7TJcvIlpSrN3K6LAWgNFUILlO+OMAqtg9aqnw= +google.golang.org/genproto/googleapis/rpc v0.0.0-20250102185135-69823020774d/go.mod h1:3ENsm/5D1mzDyhpzeRi1NR784I0BcofWBoSc5QqqMK4= +google.golang.org/grpc v1.69.4 h1:MF5TftSMkd8GLw/m0KM6V8CMOCY6NZ1NQDPGFgbTt4A= +google.golang.org/grpc v1.69.4/go.mod h1:vyjdE6jLBI76dgpDojsFGNaHlxdjXN9ghpnd2o7JGZ4= +google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc= +google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= @@ -474,12 +500,12 @@ sigs.k8s.io/kustomize/kyaml v0.19.0/go.mod h1:FeKD5jEOH+FbZPpqUghBP8mrLjJ3+zD3/r sigs.k8s.io/randfill v0.0.0-20250304075658-069ef1bbf016/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY= sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU= sigs.k8s.io/randfill v1.0.0/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY= -sigs.k8s.io/scheduler-plugins v0.31.8 h1:Ie2EFRnkE9T2tBjxwypww7hJJyPRIwrXJNZeNxjP6QY= -sigs.k8s.io/scheduler-plugins v0.31.8/go.mod h1:KkcXEbf9CYaoZ5ntbAMSYmquPq9MtSfXVpI31R6mHeM= +sigs.k8s.io/scheduler-plugins v0.32.7 h1:fGr4JKraaTe6it4PIqUlXStfctFKYxJgYkDsiU6699o= +sigs.k8s.io/scheduler-plugins v0.32.7/go.mod h1:Oem5rktj6wgFr2SUqcaInUTIBX8tlY8c4qid5vp2lBw= sigs.k8s.io/structured-merge-diff/v4 v4.6.0 h1:IUA9nvMmnKWcj5jl84xn+T5MnlZKThmUW1TdblaLVAc= sigs.k8s.io/structured-merge-diff/v4 v4.6.0/go.mod h1:dDy58f92j70zLsuZVuUX5Wp9vtxXpaZnkPGWeqDfCps= sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= -sigs.k8s.io/yaml v1.5.0 h1:M10b2U7aEUY6hRtU870n2VTPgR5RZiL/I6Lcc2F4NUQ= -sigs.k8s.io/yaml v1.5.0/go.mod h1:wZs27Rbxoai4C0f8/9urLZtZtF3avA3gKvGyPdDqTO4= +sigs.k8s.io/yaml v1.6.0 h1:G8fkbMSAFqgEFgh4b1wmtzDnioxFCUgTZhlbj5P9QYs= +sigs.k8s.io/yaml v1.6.0/go.mod h1:796bPqUfzR/0jLAl6XjHl3Ck7MiyVv8dbTdyT3/pMf4= volcano.sh/apis v1.10.0 h1:Z9eLwibQmhpFmYGLWxjsTWwsYeTEKvvjFcLptmP2qxE= volcano.sh/apis v1.10.0/go.mod h1:z8hhFZ2qcUMR1JIjVYmBqL98CVaXNzsQAcqKiytQW9s= diff --git a/internal/controller/mutatingwebhookconfiguration/controller.go b/internal/controller/mutatingwebhookconfiguration/controller.go index 6ff7dfc0db..956d41e50d 100644 --- a/internal/controller/mutatingwebhookconfiguration/controller.go +++ b/internal/controller/mutatingwebhookconfiguration/controller.go @@ -19,6 +19,7 @@ package mutatingwebhookconfiguration import ( "context" "fmt" + "strings" admissionregistrationv1 "k8s.io/api/admissionregistration/v1" "k8s.io/apimachinery/pkg/types" @@ -29,6 +30,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/reconcile" "github.com/kubeflow/spark-operator/v2/pkg/certificate" + "github.com/kubeflow/spark-operator/v2/pkg/util" ) var ( @@ -55,8 +57,14 @@ func NewReconciler(client client.Client, certProvider *certificate.Provider, nam } func (r *Reconciler) SetupWithManager(mgr ctrl.Manager, options controller.Options) error { + kind := "MutatingWebhookConfiguration" + name := strings.ToLower(kind) + + // Use a custom log constructor. + options.LogConstructor = util.NewLogConstructor(mgr.GetLogger(), kind) + return ctrl.NewControllerManagedBy(mgr). - Named("mutating-webhook-configuration-controller"). + Named(name). Watches( &admissionregistrationv1.MutatingWebhookConfiguration{}, NewEventHandler(), diff --git a/internal/controller/scheduledsparkapplication/controller.go b/internal/controller/scheduledsparkapplication/controller.go index 521d5a6329..dee0c96c0e 100644 --- a/internal/controller/scheduledsparkapplication/controller.go +++ b/internal/controller/scheduledsparkapplication/controller.go @@ -34,6 +34,7 @@ import ( "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/tools/record" "k8s.io/utils/clock" + "k8s.io/utils/ptr" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/builder" "sigs.k8s.io/controller-runtime/pkg/client" @@ -206,8 +207,14 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Resu // SetupWithManager sets up the controller with the Manager. func (r *Reconciler) SetupWithManager(mgr ctrl.Manager, options controller.Options) error { + kind := "ScheduledSparkApplication" + name := strings.ToLower(kind) + + // Use a custom log constructor. + options.LogConstructor = util.NewLogConstructor(mgr.GetLogger(), kind) + return ctrl.NewControllerManagedBy(mgr). - Named("scheduled-spark-application-controller"). + Named(name). Watches( &v1beta2.ScheduledSparkApplication{}, NewEventHandler(), @@ -246,7 +253,7 @@ func (r *Reconciler) createSparkApplication( Kind: reflect.TypeOf(v1beta2.ScheduledSparkApplication{}).Name(), Name: scheduledApp.Name, UID: scheduledApp.UID, - BlockOwnerDeletion: util.BoolPtr(true), + BlockOwnerDeletion: ptr.To(true), }}, }, Spec: scheduledApp.Spec.Template, diff --git a/internal/controller/scheduledsparkapplication/controller_test.go b/internal/controller/scheduledsparkapplication/controller_test.go index 5b7de14a5d..77897152c4 100644 --- a/internal/controller/scheduledsparkapplication/controller_test.go +++ b/internal/controller/scheduledsparkapplication/controller_test.go @@ -26,10 +26,10 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "k8s.io/utils/clock" + "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/reconcile" "github.com/kubeflow/spark-operator/v2/api/v1beta2" - "github.com/kubeflow/spark-operator/v2/pkg/util" ) var _ = Describe("ScheduledSparkApplication Controller", func() { @@ -62,7 +62,7 @@ var _ = Describe("ScheduledSparkApplication Controller", func() { RestartPolicy: v1beta2.RestartPolicy{ Type: v1beta2.RestartPolicyNever, }, - MainApplicationFile: util.StringPtr("local:///dummy.jar"), + MainApplicationFile: ptr.To("local:///dummy.jar"), }, }, // TODO(user): Specify other spec details if needed. diff --git a/internal/controller/scheduledsparkapplication/suite_test.go b/internal/controller/scheduledsparkapplication/suite_test.go index 372343e4bb..8c4ac3dfdb 100644 --- a/internal/controller/scheduledsparkapplication/suite_test.go +++ b/internal/controller/scheduledsparkapplication/suite_test.go @@ -63,7 +63,7 @@ var _ = BeforeSuite(func() { // Note that you must have the required binaries setup under the bin directory to perform // the tests directly. When we run make test it will be setup and used automatically. BinaryAssetsDirectory: filepath.Join("..", "..", "..", "bin", "k8s", - fmt.Sprintf("1.32.0-%s-%s", runtime.GOOS, runtime.GOARCH)), + fmt.Sprintf("1.33.0-%s-%s", runtime.GOOS, runtime.GOARCH)), } var err error diff --git a/internal/controller/sparkapplication/controller.go b/internal/controller/sparkapplication/controller.go index 8114b11429..a49e1e495a 100644 --- a/internal/controller/sparkapplication/controller.go +++ b/internal/controller/sparkapplication/controller.go @@ -21,6 +21,7 @@ import ( "fmt" "os" "strconv" + "strings" "time" "github.com/golang/glog" @@ -35,7 +36,6 @@ import ( "k8s.io/client-go/tools/record" "k8s.io/client-go/util/retry" "k8s.io/utils/ptr" - ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/builder" "sigs.k8s.io/controller-runtime/pkg/client" @@ -242,8 +242,14 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Resu // SetupWithManager sets up the controller with the Manager. func (r *Reconciler) SetupWithManager(mgr ctrl.Manager, options controller.Options) error { + kind := "SparkApplication" + name := strings.ToLower(kind) + + // Use a custom log constructor. + options.LogConstructor = util.NewLogConstructor(mgr.GetLogger(), kind) + return ctrl.NewControllerManagedBy(mgr). - Named("spark-application-controller"). + Named(name). Watches( &corev1.Pod{}, NewSparkPodEventHandler(mgr.GetClient(), r.options.SparkExecutorMetrics), @@ -496,8 +502,9 @@ func (r *Reconciler) reconcilePendingRerunSparkApplication(ctx context.Context, if r.validateSparkResourceDeletion(ctx, app) { logger.Info("Successfully deleted resources associated with SparkApplication", "state", app.Status.AppState.State) r.recordSparkApplicationEvent(app) - r.resetSparkApplicationStatus(app) r.submitSparkApplication(ctx, app) + } else { + logger.Info("Resources associated with SparkApplication still exist") } if err := r.updateSparkApplicationStatus(ctx, app); err != nil { return err @@ -567,6 +574,7 @@ func (r *Reconciler) reconcileSucceedingSparkApplication(ctx context.Context, re logger.Error(err, "failed to delete spark resources") return err } + r.resetSparkApplicationStatus(app) app.Status.AppState.State = v1beta2.ApplicationStatePendingRerun } else { app.Status.AppState.State = v1beta2.ApplicationStateCompleted @@ -612,6 +620,7 @@ func (r *Reconciler) reconcileFailingSparkApplication(ctx context.Context, req c logger.Error(err, "failed to delete spark resources") return err } + r.resetSparkApplicationStatus(app) app.Status.AppState.State = v1beta2.ApplicationStatePendingRerun } else { // If we're waiting before retrying then reconcile will not modify anything, so we need to requeue. @@ -742,7 +751,7 @@ func (r *Reconciler) reconcileSuspendingSparkApplication(ctx context.Context, re r.recordSparkApplicationEvent(app) if err := r.deleteSparkResources(ctx, app); err != nil { - logger.Error(err, "failed to delete spark resources", "name", app.Name, "namespace", app.Namespace) + logger.Error(err, "failed to delete spark resources") return err } @@ -758,7 +767,7 @@ func (r *Reconciler) reconcileSuspendingSparkApplication(ctx context.Context, re }, ) if retryErr != nil { - logger.Error(retryErr, "Failed to reconcile SparkApplication", "name", key.Name, "namespace", key.Namespace) + logger.Error(retryErr, "Failed to reconcile SparkApplication") return ctrl.Result{}, retryErr } return ctrl.Result{}, nil @@ -780,7 +789,7 @@ func (r *Reconciler) reconcileSuspendedSparkApplication(ctx context.Context, req app := old.DeepCopy() if r.validateSparkResourceDeletion(ctx, app) { - logger.Info("Successfully deleted resources associated with SparkApplication", "name", app.Name, "namespace", app.Namespace, "state", app.Status.AppState.State) + logger.Info("Successfully deleted resources associated with SparkApplication", "state", app.Status.AppState.State) r.resetSparkApplicationStatus(app) r.recordSparkApplicationEvent(app) if !ptr.Deref(app.Spec.Suspend, false) { @@ -789,9 +798,7 @@ func (r *Reconciler) reconcileSuspendedSparkApplication(ctx context.Context, req } } } else { - err := fmt.Errorf("resources associated with SparkApplication still exist: %s/%s", app.Namespace, app.Name) - logger.Error(err, "Failed to confirm being deleted resources associated with SparkApplication", "name", app.Name, "namespace", app.Namespace, "state", app.Status.AppState.State) - return err + logger.Info("Resources associated with SparkApplication still exist") } if err := r.updateSparkApplicationStatus(ctx, app); err != nil { return err @@ -800,7 +807,7 @@ func (r *Reconciler) reconcileSuspendedSparkApplication(ctx context.Context, req }, ) if retryErr != nil { - logger.Error(retryErr, "Failed to reconcile SparkApplication", "name", key.Name, "namespace", key.Namespace) + logger.Error(retryErr, "Failed to reconcile SparkApplication") return ctrl.Result{}, retryErr } return ctrl.Result{}, nil @@ -831,7 +838,7 @@ func (r *Reconciler) reconcileResumingSparkApplication(ctx context.Context, req }, ) if retryErr != nil { - logger.Error(retryErr, "Failed to reconcile SparkApplication", "name", key.Name, "namespace", key.Namespace) + logger.Error(retryErr, "Failed to reconcile SparkApplication") return ctrl.Result{Requeue: true}, retryErr } return ctrl.Result{}, nil @@ -858,7 +865,7 @@ func (r *Reconciler) transitionToSuspending(ctx context.Context, req ctrl.Reques }, ) if retryErr != nil { - logger.Error(retryErr, "Failed to reconcile SparkApplication", "name", key.Name, "namespace", key.Namespace) + logger.Error(retryErr, "Failed to reconcile SparkApplication") return ctrl.Result{Requeue: true}, retryErr } return ctrl.Result{}, nil @@ -916,7 +923,10 @@ func (r *Reconciler) submitSparkApplication(ctx context.Context, app *v1beta2.Sp } // Use batch scheduler to perform scheduling task before submitting (before build command arguments). - if needScheduling, scheduler := r.shouldDoBatchScheduling(ctx, app); needScheduling { + if needScheduling, scheduler, err := r.shouldDoBatchScheduling(ctx, app); err != nil { + submitErr = fmt.Errorf("failed during batch scheduler setup or check: %v", err) + return + } else if needScheduling { logger.Info("Do batch scheduling for SparkApplication") if err := scheduler.Schedule(app); err != nil { submitErr = fmt.Errorf("failed to process batch scheduler: %v", err) @@ -1169,7 +1179,7 @@ func (r *Reconciler) deleteWebUIService(ctx context.Context, app *v1beta2.SparkA }, }, &client.DeleteOptions{ - GracePeriodSeconds: util.Int64Ptr(0), + GracePeriodSeconds: ptr.To[int64](0), }, ); err != nil && !errors.IsNotFound(err) { return err @@ -1195,7 +1205,7 @@ func (r *Reconciler) deleteWebUIIngress(ctx context.Context, app *v1beta2.SparkA }, }, &client.DeleteOptions{ - GracePeriodSeconds: util.Int64Ptr(0), + GracePeriodSeconds: ptr.To[int64](0), }, ); err != nil && !errors.IsNotFound(err) { return err @@ -1213,7 +1223,7 @@ func (r *Reconciler) deleteWebUIIngress(ctx context.Context, app *v1beta2.SparkA }, }, &client.DeleteOptions{ - GracePeriodSeconds: util.Int64Ptr(0), + GracePeriodSeconds: ptr.To[int64](0), }, ); err != nil && !errors.IsNotFound(err) { return err @@ -1367,20 +1377,20 @@ func (r *Reconciler) recordExecutorEvent(app *v1beta2.SparkApplication, state v1 func (r *Reconciler) resetSparkApplicationStatus(app *v1beta2.SparkApplication) { status := &app.Status switch status.AppState.State { - case v1beta2.ApplicationStateInvalidating: + case v1beta2.ApplicationStateSucceeding, v1beta2.ApplicationStateFailing: status.SparkApplicationID = "" - status.SubmissionAttempts = 0 - status.ExecutionAttempts = 0 - status.LastSubmissionAttemptTime = metav1.Time{} status.TerminationTime = metav1.Time{} status.AppState.ErrorMessage = "" + status.DriverInfo = v1beta2.DriverInfo{} status.ExecutorState = nil - case v1beta2.ApplicationStatePendingRerun: + case v1beta2.ApplicationStateInvalidating: status.SparkApplicationID = "" status.SubmissionAttempts = 0 + status.ExecutionAttempts = 0 status.LastSubmissionAttemptTime = metav1.Time{} - status.DriverInfo = v1beta2.DriverInfo{} + status.TerminationTime = metav1.Time{} status.AppState.ErrorMessage = "" + status.DriverInfo = v1beta2.DriverInfo{} status.ExecutorState = nil case v1beta2.ApplicationStateSuspended: status.SparkApplicationID = "" @@ -1390,11 +1400,10 @@ func (r *Reconciler) resetSparkApplicationStatus(app *v1beta2.SparkApplication) } } -func (r *Reconciler) shouldDoBatchScheduling(ctx context.Context, app *v1beta2.SparkApplication) (bool, scheduler.Interface) { - logger := log.FromContext(ctx) +func (r *Reconciler) shouldDoBatchScheduling(ctx context.Context, app *v1beta2.SparkApplication) (bool, scheduler.Interface, error) { // If batch scheduling isn't enabled if r.registry == nil { - return false, nil + return false, nil, nil } schedulerName := r.options.DefaultBatchScheduler @@ -1404,7 +1413,7 @@ func (r *Reconciler) shouldDoBatchScheduling(ctx context.Context, app *v1beta2.S // If both the default and app batch scheduler are unspecified or empty if schedulerName == "" { - return false, nil + return false, nil, nil } var err error @@ -1431,15 +1440,14 @@ func (r *Reconciler) shouldDoBatchScheduling(ctx context.Context, app *v1beta2.S } if err != nil || scheduler == nil { - logger.Error(err, "Failed to get scheduler for SparkApplication", "scheduler", schedulerName) - return false, nil + return false, nil, fmt.Errorf("failed to get scheduler %s: %v", schedulerName, err) } - return scheduler.ShouldSchedule(app), scheduler + return scheduler.ShouldSchedule(app), scheduler, nil } // Clean up when the spark application is terminated. func (r *Reconciler) cleanUpOnTermination(ctx context.Context, _, newApp *v1beta2.SparkApplication) error { - if needScheduling, scheduler := r.shouldDoBatchScheduling(ctx, newApp); needScheduling { + if needScheduling, scheduler, _ := r.shouldDoBatchScheduling(ctx, newApp); needScheduling { if err := scheduler.Cleanup(newApp); err != nil { return err } diff --git a/internal/controller/sparkapplication/controller_test.go b/internal/controller/sparkapplication/controller_test.go index 5be49553ff..aa56015a27 100644 --- a/internal/controller/sparkapplication/controller_test.go +++ b/internal/controller/sparkapplication/controller_test.go @@ -56,7 +56,7 @@ var _ = Describe("SparkApplication Controller", func() { Namespace: appNamespace, }, Spec: v1beta2.SparkApplicationSpec{ - MainApplicationFile: util.StringPtr("local:///dummy.jar"), + MainApplicationFile: ptr.To("local:///dummy.jar"), }, } ingressKey := types.NamespacedName{ @@ -174,7 +174,7 @@ var _ = Describe("SparkApplication Controller", func() { Namespace: appNamespace, }, Spec: v1beta2.SparkApplicationSpec{ - MainApplicationFile: util.StringPtr("local:///dummy.jar"), + MainApplicationFile: ptr.To("local:///dummy.jar"), SparkUIOptions: &v1beta2.SparkUIConfiguration{ IngressTLS: ingressTLS, IngressAnnotations: ingressAnnotations, @@ -290,7 +290,7 @@ var _ = Describe("SparkApplication Controller", func() { Namespace: appNamespace, }, Spec: v1beta2.SparkApplicationSpec{ - MainApplicationFile: util.StringPtr("local:///dummy.jar"), + MainApplicationFile: ptr.To("local:///dummy.jar"), }, } v1beta2.SetSparkApplicationDefaults(app) @@ -369,7 +369,7 @@ var _ = Describe("SparkApplication Controller", func() { Namespace: appNamespace, }, Spec: v1beta2.SparkApplicationSpec{ - MainApplicationFile: util.StringPtr("local:///dummy.jar"), + MainApplicationFile: ptr.To("local:///dummy.jar"), }, } v1beta2.SetSparkApplicationDefaults(app) @@ -439,7 +439,7 @@ var _ = Describe("SparkApplication Controller", func() { }, }, Spec: v1beta2.SparkApplicationSpec{ - MainApplicationFile: util.StringPtr("local:///dummy.jar"), + MainApplicationFile: ptr.To("local:///dummy.jar"), }, } v1beta2.SetSparkApplicationDefaults(app) @@ -494,7 +494,7 @@ var _ = Describe("SparkApplication Controller", func() { Namespace: appNamespace, }, Spec: v1beta2.SparkApplicationSpec{ - MainApplicationFile: util.StringPtr("local:///dummy.jar"), + MainApplicationFile: ptr.To("local:///dummy.jar"), }, } v1beta2.SetSparkApplicationDefaults(app) @@ -514,7 +514,7 @@ var _ = Describe("SparkApplication Controller", func() { By("Set TimeToLiveSeconds and make the SparkApplication expired") app := &v1beta2.SparkApplication{} Expect(k8sClient.Get(ctx, key, app)).To(Succeed()) - app.Spec.TimeToLiveSeconds = util.Int64Ptr(60) + app.Spec.TimeToLiveSeconds = ptr.To[int64](60) Expect(k8sClient.Update(ctx, app)).To(Succeed()) app.Status.TerminationTime = metav1.NewTime(time.Now().Add(-2 * time.Minute)) Expect(k8sClient.Status().Update(ctx, app)).To(Succeed()) @@ -554,7 +554,7 @@ var _ = Describe("SparkApplication Controller", func() { Namespace: appNamespace, }, Spec: v1beta2.SparkApplicationSpec{ - MainApplicationFile: util.StringPtr("local:///dummy.jar"), + MainApplicationFile: ptr.To("local:///dummy.jar"), }, } v1beta2.SetSparkApplicationDefaults(app) @@ -609,7 +609,7 @@ var _ = Describe("SparkApplication Controller", func() { Namespace: appNamespace, }, Spec: v1beta2.SparkApplicationSpec{ - MainApplicationFile: util.StringPtr("local:///dummy.jar"), + MainApplicationFile: ptr.To("local:///dummy.jar"), }, } v1beta2.SetSparkApplicationDefaults(app) @@ -629,7 +629,7 @@ var _ = Describe("SparkApplication Controller", func() { By("Set TimeToLiveSeconds and make the SparkApplication expired") app := &v1beta2.SparkApplication{} Expect(k8sClient.Get(ctx, key, app)).To(Succeed()) - app.Spec.TimeToLiveSeconds = util.Int64Ptr(60) + app.Spec.TimeToLiveSeconds = ptr.To[int64](60) Expect(k8sClient.Update(ctx, app)).To(Succeed()) app.Status.TerminationTime = metav1.NewTime(time.Now().Add(-2 * time.Minute)) Expect(k8sClient.Status().Update(ctx, app)).To(Succeed()) @@ -669,7 +669,7 @@ var _ = Describe("SparkApplication Controller", func() { Namespace: appNamespace, }, Spec: v1beta2.SparkApplicationSpec{ - MainApplicationFile: util.StringPtr("local:///dummy.jar"), + MainApplicationFile: ptr.To("local:///dummy.jar"), }, } v1beta2.SetSparkApplicationDefaults(app) @@ -757,6 +757,279 @@ var _ = Describe("SparkApplication Controller", func() { }) }) + Context("When reconciling a succeeding SparkApplication with Always retry policy", func() { + ctx := context.Background() + appName := "test" + appNamespace := "default" + key := types.NamespacedName{ + Name: appName, + Namespace: appNamespace, + } + + BeforeEach(func() { + By("Creating a SparkApplication with Always restart policy") + app := &v1beta2.SparkApplication{ + ObjectMeta: metav1.ObjectMeta{ + Name: appName, + Namespace: appNamespace, + }, + Spec: v1beta2.SparkApplicationSpec{ + MainApplicationFile: ptr.To("local:///dummy.jar"), + RestartPolicy: v1beta2.RestartPolicy{ + Type: v1beta2.RestartPolicyAlways, + }, + }, + } + v1beta2.SetSparkApplicationDefaults(app) + Expect(k8sClient.Create(ctx, app)).Should(Succeed()) + + By("Creating a driver pod with Succeeded phase") + driver := createDriverPod(appName, appNamespace) + Expect(k8sClient.Create(ctx, driver)).Should(Succeed()) + driver.Status.Phase = corev1.PodSucceeded + Expect(k8sClient.Status().Update(ctx, driver)).Should(Succeed()) + + By("Creating a executor pod with Succeeded phase") + executor := createExecutorPod(appName, appNamespace, 1) + Expect(k8sClient.Create(ctx, executor)).To(Succeed()) + executor.Status.Phase = corev1.PodSucceeded + Expect(k8sClient.Status().Update(ctx, executor)).To(Succeed()) + + By("Updating the SparkApplication state to Succeeding") + now := time.Now() + app.Status.SparkApplicationID = "test-app-id" + app.Status.SubmissionAttempts = 1 + app.Status.LastSubmissionAttemptTime = metav1.NewTime(now.Add(-5 * time.Minute)) + app.Status.ExecutionAttempts = 1 + app.Status.TerminationTime = metav1.NewTime(now.Add(-30 * time.Second)) + app.Status.AppState.State = v1beta2.ApplicationStateSucceeding + app.Status.DriverInfo.PodName = driver.Name + app.Status.ExecutorState = map[string]v1beta2.ExecutorState{ + executor.Name: util.GetExecutorState(executor), + } + Expect(k8sClient.Status().Update(ctx, app)).Should(Succeed()) + }) + + AfterEach(func() { + By("Deleting the test SparkApplication") + app := &v1beta2.SparkApplication{ + ObjectMeta: metav1.ObjectMeta{ + Name: appName, + Namespace: appNamespace, + }, + } + Expect(client.IgnoreNotFound(k8sClient.Delete(ctx, app))).Should(Succeed()) + + By("Deleting the driver pod") + driverKey := getDriverNamespacedName(appName, appNamespace) + driver := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: driverKey.Name, + Namespace: driverKey.Namespace, + }, + } + Expect(client.IgnoreNotFound(k8sClient.Delete(ctx, driver))).Should(Succeed()) + + By("Deleting the executor pods") + executorKey := getExecutorNamespacedName(appName, appNamespace, 1) + executor := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: executorKey.Name, + Namespace: executorKey.Namespace, + }, + } + Expect(client.IgnoreNotFound(k8sClient.Delete(ctx, executor))).Should(Succeed()) + }) + + It("Should reset SparkApplication status when transitioning to PendingRerun", func() { + By("Reconciling the failing SparkApplication") + reconciler := sparkapplication.NewReconciler( + nil, + k8sClient.Scheme(), + k8sClient, + record.NewFakeRecorder(3), + nil, + &sparkapplication.SparkSubmitter{}, + sparkapplication.Options{Namespaces: []string{appNamespace}}, + ) + _, err := reconciler.Reconcile(ctx, reconcile.Request{NamespacedName: key}) + Expect(err).NotTo(HaveOccurred()) + + By("Checking whether the SparkApplication status has been reset") + app := &v1beta2.SparkApplication{} + Expect(k8sClient.Get(ctx, key, app)).NotTo(HaveOccurred()) + Expect(app.Status.SparkApplicationID).To(BeEmpty()) + Expect(app.Status.TerminationTime).To(BeZero()) + Expect(app.Status.AppState.State).To(Equal(v1beta2.ApplicationStatePendingRerun)) + Expect(app.Status.AppState.ErrorMessage).To(BeEmpty()) + Expect(app.Status.DriverInfo).To(BeZero()) + Expect(app.Status.ExecutorState).To(BeEmpty()) + }) + + It("Should delete existing driver and executor pods when transitioning to PendingRerun", func() { + By("Reconciling the failing SparkApplication") + reconciler := sparkapplication.NewReconciler( + nil, + k8sClient.Scheme(), + k8sClient, + record.NewFakeRecorder(3), + nil, + &sparkapplication.SparkSubmitter{}, + sparkapplication.Options{Namespaces: []string{appNamespace}}, + ) + _, err := reconciler.Reconcile(ctx, reconcile.Request{NamespacedName: key}) + Expect(err).NotTo(HaveOccurred()) + + By("Checking whether the driver pod has been deleted") + driverKey := getDriverNamespacedName(appName, appNamespace) + driverPod := &corev1.Pod{} + Expect(errors.IsNotFound(k8sClient.Get(ctx, driverKey, driverPod))).To(BeTrue()) + + By("Checking whether the executor pods has been deleted") + executorKey := getDriverNamespacedName(appName, appNamespace) + executorPod := &corev1.Pod{} + Expect(errors.IsNotFound(k8sClient.Get(ctx, executorKey, executorPod))).To(BeTrue()) + }) + }) + + Context("When reconciling a failing SparkApplication which should retry", func() { + ctx := context.Background() + appName := "test" + appNamespace := "default" + key := types.NamespacedName{ + Name: appName, + Namespace: appNamespace, + } + + BeforeEach(func() { + By("Creating a SparkApplication with OnFailure restart policy") + app := &v1beta2.SparkApplication{ + ObjectMeta: metav1.ObjectMeta{ + Name: appName, + Namespace: appNamespace, + }, + Spec: v1beta2.SparkApplicationSpec{ + MainApplicationFile: ptr.To("local:///dummy.jar"), + RestartPolicy: v1beta2.RestartPolicy{ + Type: v1beta2.RestartPolicyOnFailure, + OnFailureRetries: ptr.To[int32](3), + OnFailureRetryInterval: ptr.To[int64](10), + }, + }, + } + v1beta2.SetSparkApplicationDefaults(app) + Expect(k8sClient.Create(ctx, app)).Should(Succeed()) + + By("Creating a driver pod with Failed phase") + driver := createDriverPod(appName, appNamespace) + Expect(k8sClient.Create(ctx, driver)).Should(Succeed()) + driver.Status.Phase = corev1.PodFailed + Expect(k8sClient.Status().Update(ctx, driver)).Should(Succeed()) + + By("Creating a executor pod with Failed phase") + executor := createExecutorPod(appName, appNamespace, 1) + Expect(k8sClient.Create(ctx, executor)).To(Succeed()) + executor.Status.Phase = corev1.PodFailed + Expect(k8sClient.Status().Update(ctx, executor)).To(Succeed()) + + By("Updating the SparkApplication state to Failing") + now := time.Now() + app.Status.SparkApplicationID = "test-app-id" + app.Status.SubmissionAttempts = 1 + app.Status.LastSubmissionAttemptTime = metav1.NewTime(now.Add(-5 * time.Minute)) + app.Status.ExecutionAttempts = 1 + app.Status.TerminationTime = metav1.NewTime(now.Add(-30 * time.Second)) + app.Status.AppState.State = v1beta2.ApplicationStateFailing + app.Status.AppState.ErrorMessage = "Driver pod failed" + app.Status.DriverInfo.PodName = driver.Name + app.Status.ExecutorState = map[string]v1beta2.ExecutorState{ + executor.Name: util.GetExecutorState(executor), + } + Expect(k8sClient.Status().Update(ctx, app)).Should(Succeed()) + }) + + AfterEach(func() { + By("Deleting the test SparkApplication") + app := &v1beta2.SparkApplication{ + ObjectMeta: metav1.ObjectMeta{ + Name: appName, + Namespace: appNamespace, + }, + } + Expect(client.IgnoreNotFound(k8sClient.Delete(ctx, app))).Should(Succeed()) + + By("Deleting the driver pod") + driverKey := getDriverNamespacedName(appName, appNamespace) + driver := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: driverKey.Name, + Namespace: driverKey.Namespace, + }, + } + Expect(client.IgnoreNotFound(k8sClient.Delete(ctx, driver))).Should(Succeed()) + + By("Deleting the executor pods") + executorKey := getExecutorNamespacedName(appName, appNamespace, 1) + executor := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: executorKey.Name, + Namespace: executorKey.Namespace, + }, + } + Expect(client.IgnoreNotFound(k8sClient.Delete(ctx, executor))).Should(Succeed()) + }) + + It("Should reset SparkApplication status when transitioning to PendingRerun", func() { + By("Reconciling the failing SparkApplication") + reconciler := sparkapplication.NewReconciler( + nil, + k8sClient.Scheme(), + k8sClient, + record.NewFakeRecorder(3), + nil, + &sparkapplication.SparkSubmitter{}, + sparkapplication.Options{Namespaces: []string{appNamespace}}, + ) + _, err := reconciler.Reconcile(ctx, reconcile.Request{NamespacedName: key}) + Expect(err).NotTo(HaveOccurred()) + + By("Checking whether the SparkApplication status has been reset") + app := &v1beta2.SparkApplication{} + Expect(k8sClient.Get(ctx, key, app)).NotTo(HaveOccurred()) + Expect(app.Status.SparkApplicationID).To(BeEmpty()) + Expect(app.Status.TerminationTime).To(BeZero()) + Expect(app.Status.AppState.State).To(Equal(v1beta2.ApplicationStatePendingRerun)) + Expect(app.Status.AppState.ErrorMessage).To(BeEmpty()) + Expect(app.Status.DriverInfo).To(BeZero()) + Expect(app.Status.ExecutorState).To(BeEmpty()) + }) + + It("Should delete existing driver and executor pods when transitioning to PendingRerun", func() { + By("Reconciling the failing SparkApplication") + reconciler := sparkapplication.NewReconciler( + nil, + k8sClient.Scheme(), + k8sClient, + record.NewFakeRecorder(3), + nil, + &sparkapplication.SparkSubmitter{}, + sparkapplication.Options{Namespaces: []string{appNamespace}}, + ) + _, err := reconciler.Reconcile(ctx, reconcile.Request{NamespacedName: key}) + Expect(err).NotTo(HaveOccurred()) + + By("Checking whether the driver pod has been deleted") + driverKey := getDriverNamespacedName(appName, appNamespace) + driverPod := &corev1.Pod{} + Expect(errors.IsNotFound(k8sClient.Get(ctx, driverKey, driverPod))).To(BeTrue()) + + By("Checking whether the executor pods has been deleted") + executorKey := getDriverNamespacedName(appName, appNamespace) + executorPod := &corev1.Pod{} + Expect(errors.IsNotFound(k8sClient.Get(ctx, executorKey, executorPod))).To(BeTrue()) + }) + }) + Context("Suspend and Resume", func() { ctx := context.Background() appName := "test" @@ -774,7 +1047,7 @@ var _ = Describe("SparkApplication Controller", func() { Namespace: appNamespace, }, Spec: v1beta2.SparkApplicationSpec{ - MainApplicationFile: util.StringPtr("local:///dummy.jar"), + MainApplicationFile: ptr.To("local:///dummy.jar"), }, } v1beta2.SetSparkApplicationDefaults(app) diff --git a/internal/controller/sparkapplication/driveringress_test.go b/internal/controller/sparkapplication/driveringress_test.go index 7b26d596c2..267ba8d2c6 100644 --- a/internal/controller/sparkapplication/driveringress_test.go +++ b/internal/controller/sparkapplication/driveringress_test.go @@ -26,11 +26,11 @@ import ( corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client/fake" "github.com/kubeflow/spark-operator/v2/api/v1beta2" "github.com/kubeflow/spark-operator/v2/pkg/common" - "github.com/kubeflow/spark-operator/v2/pkg/util" ) func TestCreateDriverIngressService(t *testing.T) { @@ -111,7 +111,7 @@ func TestCreateDriverIngressService(t *testing.T) { Spec: v1beta2.SparkApplicationSpec{ DriverIngressOptions: []v1beta2.DriverIngressConfiguration{ { - ServicePort: util.Int32Ptr(8888), + ServicePort: ptr.To[int32](8888), ServiceLabels: map[string]string{"foo": "bar"}, }, }, @@ -130,7 +130,7 @@ func TestCreateDriverIngressService(t *testing.T) { Spec: v1beta2.SparkApplicationSpec{ DriverIngressOptions: []v1beta2.DriverIngressConfiguration{ { - ServicePort: util.Int32Ptr(8888), + ServicePort: ptr.To[int32](8888), }, }, }, @@ -166,7 +166,7 @@ func TestCreateDriverIngressService(t *testing.T) { Spec: v1beta2.SparkApplicationSpec{ DriverIngressOptions: []v1beta2.DriverIngressConfiguration{ { - ServicePort: util.Int32Ptr(4041), + ServicePort: ptr.To[int32](4041), }, }, }, @@ -184,7 +184,7 @@ func TestCreateDriverIngressService(t *testing.T) { Spec: v1beta2.SparkApplicationSpec{ DriverIngressOptions: []v1beta2.DriverIngressConfiguration{ { - ServicePort: util.Int32Ptr(8888), + ServicePort: ptr.To[int32](8888), ServiceType: &serviceTypeNodePort, }, }, @@ -222,7 +222,7 @@ func TestCreateDriverIngressService(t *testing.T) { Spec: v1beta2.SparkApplicationSpec{ DriverIngressOptions: []v1beta2.DriverIngressConfiguration{ { - ServicePort: util.Int32Ptr(8888), + ServicePort: ptr.To[int32](8888), ServiceAnnotations: map[string]string{ "key": "value", }, @@ -243,7 +243,7 @@ func TestCreateDriverIngressService(t *testing.T) { Spec: v1beta2.SparkApplicationSpec{ DriverIngressOptions: []v1beta2.DriverIngressConfiguration{ { - ServicePort: util.Int32Ptr(8888), + ServicePort: ptr.To[int32](8888), ServiceLabels: map[string]string{ "foo": "bar", }, diff --git a/internal/controller/sparkapplication/event_filter.go b/internal/controller/sparkapplication/event_filter.go index 85738a593a..f125f904df 100644 --- a/internal/controller/sparkapplication/event_filter.go +++ b/internal/controller/sparkapplication/event_filter.go @@ -18,6 +18,7 @@ package sparkapplication import ( "context" + "reflect" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/equality" @@ -30,6 +31,7 @@ import ( "github.com/go-logr/logr" "github.com/kubeflow/spark-operator/v2/api/v1beta2" + "github.com/kubeflow/spark-operator/v2/pkg/features" "github.com/kubeflow/spark-operator/v2/pkg/util" ) @@ -175,12 +177,30 @@ func (f *EventFilter) Update(e event.UpdateEvent) bool { // This is currently best effort as we can potentially miss updates and end up in an inconsistent state. if !equality.Semantic.DeepEqual(oldApp.Spec, newApp.Spec) { - // Only Spec.Suspend can be updated - oldApp.Spec.Suspend = newApp.Spec.Suspend - if equality.Semantic.DeepEqual(oldApp.Spec, newApp.Spec) { + // Only Spec.Suspend can be updated without any action + oldAppCopy := oldApp.DeepCopy() + oldAppCopy.Spec.Suspend = newApp.Spec.Suspend + if equality.Semantic.DeepEqual(oldAppCopy.Spec, newApp.Spec) { return true } + // Check if only webhook-patched fields changed (requires PartialRestart feature gate). + // These fields are applied by the mutating webhook when new pods are created, + // so we don't need to trigger a reconcile - the webhook cache will automatically + // use the new values for any newly created pods. + if features.Enabled(features.PartialRestart) && f.isWebhookPatchedFieldsOnlyChange(oldApp, newApp) { + f.logger.Info("Only webhook-patched fields changed, skipping reconcile", + "name", newApp.Name, "namespace", newApp.Namespace) + f.recorder.Eventf( + newApp, + corev1.EventTypeNormal, + "SparkApplicationWebhookFieldsUpdated", + "SparkApplication %s webhook-patched fields updated, new pods will use updated values", + newApp.Name, + ) + return false + } + // Force-set the application status to Invalidating which handles clean-up and application re-run. newApp.Status.AppState.State = v1beta2.ApplicationStateInvalidating f.logger.Info("Updating SparkApplication status", "name", newApp.Name, "namespace", newApp.Namespace, " oldState", oldApp.Status.AppState.State, "newState", newApp.Status.AppState.State) @@ -224,3 +244,76 @@ func (f *EventFilter) Generic(e event.GenericEvent) bool { func (f *EventFilter) filter(app *v1beta2.SparkApplication) bool { return f.namespaces[metav1.NamespaceAll] || f.namespaces[app.Namespace] } + +// isWebhookPatchedFieldsOnlyChange checks if the spec changes only involve fields +// that are patched by the mutating webhook when pods are created. +// These fields don't require a full application restart because: +// 1. They don't affect already running pods +// 2. The webhook will automatically apply the new values to any newly created pods +// +// Currently supported webhook-patched fields for Executor: +// - PriorityClassName +// - NodeSelector +// - Tolerations +// - Affinity +// - SchedulerName +// +// Note: Driver field changes still require full restart since the driver pod +// is not recreated during the application lifecycle. +func (f *EventFilter) isWebhookPatchedFieldsOnlyChange(oldApp, newApp *v1beta2.SparkApplication) bool { + // First check if there are any webhook-patched field changes + if !hasExecutorWebhookFieldChanges(oldApp, newApp) { + // No webhook fields changed, so this is not a "webhook-only" change + return false + } + + // Create copies to compare non-webhook fields + oldCopy := oldApp.DeepCopy() + newCopy := newApp.DeepCopy() + + // Zero out webhook-patched executor fields in both copies + clearWebhookPatchedExecutorFields(&oldCopy.Spec.Executor) + clearWebhookPatchedExecutorFields(&newCopy.Spec.Executor) + + // Also zero out Suspend field as it's handled separately + oldCopy.Spec.Suspend = nil + newCopy.Spec.Suspend = nil + + // If specs are equal after clearing webhook-patched fields, + // then only webhook-patched fields changed + return equality.Semantic.DeepEqual(oldCopy.Spec, newCopy.Spec) +} + +// clearWebhookPatchedExecutorFields zeros out the executor fields that are +// patched by the mutating webhook. +func clearWebhookPatchedExecutorFields(executor *v1beta2.ExecutorSpec) { + executor.PriorityClassName = nil + executor.NodeSelector = nil + executor.Tolerations = nil + executor.Affinity = nil + executor.SchedulerName = nil +} + +// hasExecutorWebhookFieldChanges checks if any webhook-patched executor fields changed. +// This is useful for logging which fields triggered the skip. +func hasExecutorWebhookFieldChanges(oldApp, newApp *v1beta2.SparkApplication) bool { + oldExec := &oldApp.Spec.Executor + newExec := &newApp.Spec.Executor + + if !reflect.DeepEqual(oldExec.PriorityClassName, newExec.PriorityClassName) { + return true + } + if !reflect.DeepEqual(oldExec.NodeSelector, newExec.NodeSelector) { + return true + } + if !reflect.DeepEqual(oldExec.Tolerations, newExec.Tolerations) { + return true + } + if !reflect.DeepEqual(oldExec.Affinity, newExec.Affinity) { + return true + } + if !reflect.DeepEqual(oldExec.SchedulerName, newExec.SchedulerName) { + return true + } + return false +} diff --git a/internal/controller/sparkapplication/event_filter_test.go b/internal/controller/sparkapplication/event_filter_test.go new file mode 100644 index 0000000000..e868f9572b --- /dev/null +++ b/internal/controller/sparkapplication/event_filter_test.go @@ -0,0 +1,294 @@ +/* +Copyright 2025 The Kubeflow authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package sparkapplication + +import ( + "testing" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/log" + + "github.com/kubeflow/spark-operator/v2/api/v1beta2" +) + +func TestIsWebhookPatchedFieldsOnlyChange(t *testing.T) { + logger := log.Log.WithName("test") + filter := &EventFilter{ + logger: logger, + } + + baseApp := func() *v1beta2.SparkApplication { + return &v1beta2.SparkApplication{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-app", + Namespace: "default", + }, + Spec: v1beta2.SparkApplicationSpec{ + Type: v1beta2.SparkApplicationTypeScala, + SparkVersion: "3.5.0", + Driver: v1beta2.DriverSpec{ + SparkPodSpec: v1beta2.SparkPodSpec{ + Cores: func() *int32 { v := int32(1); return &v }(), + }, + }, + Executor: v1beta2.ExecutorSpec{ + SparkPodSpec: v1beta2.SparkPodSpec{ + Cores: func() *int32 { v := int32(2); return &v }(), + }, + Instances: func() *int32 { v := int32(2); return &v }(), + }, + }, + } + } + + tests := []struct { + name string + oldApp *v1beta2.SparkApplication + newApp *v1beta2.SparkApplication + expected bool + }{ + { + name: "no changes", + oldApp: baseApp(), + newApp: baseApp(), + expected: false, // No changes at all, so not "only webhook fields changed" + }, + { + name: "only executor priorityClassName changed", + oldApp: baseApp(), + newApp: func() *v1beta2.SparkApplication { + app := baseApp() + app.Spec.Executor.PriorityClassName = func() *string { v := "high-priority"; return &v }() + return app + }(), + expected: true, + }, + { + name: "only executor nodeSelector changed", + oldApp: baseApp(), + newApp: func() *v1beta2.SparkApplication { + app := baseApp() + app.Spec.Executor.NodeSelector = map[string]string{"node-type": "spark"} + return app + }(), + expected: true, + }, + { + name: "only executor tolerations changed", + oldApp: baseApp(), + newApp: func() *v1beta2.SparkApplication { + app := baseApp() + app.Spec.Executor.Tolerations = []corev1.Toleration{ + {Key: "spark", Operator: corev1.TolerationOpExists}, + } + return app + }(), + expected: true, + }, + { + name: "only executor affinity changed", + oldApp: baseApp(), + newApp: func() *v1beta2.SparkApplication { + app := baseApp() + app.Spec.Executor.Affinity = &corev1.Affinity{ + NodeAffinity: &corev1.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &corev1.NodeSelector{ + NodeSelectorTerms: []corev1.NodeSelectorTerm{ + { + MatchExpressions: []corev1.NodeSelectorRequirement{ + {Key: "type", Operator: corev1.NodeSelectorOpIn, Values: []string{"spark"}}, + }, + }, + }, + }, + }, + } + return app + }(), + expected: true, + }, + { + name: "only executor schedulerName changed", + oldApp: baseApp(), + newApp: func() *v1beta2.SparkApplication { + app := baseApp() + app.Spec.Executor.SchedulerName = func() *string { v := "volcano"; return &v }() + return app + }(), + expected: true, + }, + { + name: "multiple executor webhook fields changed", + oldApp: baseApp(), + newApp: func() *v1beta2.SparkApplication { + app := baseApp() + app.Spec.Executor.PriorityClassName = func() *string { v := "high-priority"; return &v }() + app.Spec.Executor.NodeSelector = map[string]string{"node-type": "spark"} + app.Spec.Executor.Tolerations = []corev1.Toleration{ + {Key: "spark", Operator: corev1.TolerationOpExists}, + } + return app + }(), + expected: true, + }, + { + name: "driver cores changed - requires full restart", + oldApp: baseApp(), + newApp: func() *v1beta2.SparkApplication { + app := baseApp() + app.Spec.Driver.Cores = func() *int32 { v := int32(2); return &v }() + return app + }(), + expected: false, + }, + { + name: "executor instances changed - requires full restart", + oldApp: baseApp(), + newApp: func() *v1beta2.SparkApplication { + app := baseApp() + app.Spec.Executor.Instances = func() *int32 { v := int32(4); return &v }() + return app + }(), + expected: false, + }, + { + name: "sparkVersion changed - requires full restart", + oldApp: baseApp(), + newApp: func() *v1beta2.SparkApplication { + app := baseApp() + app.Spec.SparkVersion = "3.5.1" + return app + }(), + expected: false, + }, + { + name: "executor webhook field and non-webhook field both changed", + oldApp: baseApp(), + newApp: func() *v1beta2.SparkApplication { + app := baseApp() + app.Spec.Executor.PriorityClassName = func() *string { v := "high-priority"; return &v }() + app.Spec.Executor.Instances = func() *int32 { v := int32(4); return &v }() + return app + }(), + expected: false, + }, + { + name: "driver priorityClassName changed - requires full restart", + oldApp: baseApp(), + newApp: func() *v1beta2.SparkApplication { + app := baseApp() + app.Spec.Driver.PriorityClassName = func() *string { v := "high-priority"; return &v }() + return app + }(), + expected: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := filter.isWebhookPatchedFieldsOnlyChange(tt.oldApp, tt.newApp) + if result != tt.expected { + t.Errorf("isWebhookPatchedFieldsOnlyChange() = %v, expected %v", result, tt.expected) + } + }) + } +} + +func TestHasExecutorWebhookFieldChanges(t *testing.T) { + baseApp := func() *v1beta2.SparkApplication { + return &v1beta2.SparkApplication{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-app", + Namespace: "default", + }, + Spec: v1beta2.SparkApplicationSpec{ + Type: v1beta2.SparkApplicationTypeScala, + SparkVersion: "3.5.0", + Executor: v1beta2.ExecutorSpec{ + SparkPodSpec: v1beta2.SparkPodSpec{ + Cores: func() *int32 { v := int32(2); return &v }(), + }, + Instances: func() *int32 { v := int32(2); return &v }(), + }, + }, + } + } + + tests := []struct { + name string + oldApp *v1beta2.SparkApplication + newApp *v1beta2.SparkApplication + expected bool + }{ + { + name: "no changes", + oldApp: baseApp(), + newApp: baseApp(), + expected: false, + }, + { + name: "priorityClassName changed", + oldApp: baseApp(), + newApp: func() *v1beta2.SparkApplication { + app := baseApp() + app.Spec.Executor.PriorityClassName = func() *string { v := "high"; return &v }() + return app + }(), + expected: true, + }, + { + name: "nodeSelector changed", + oldApp: baseApp(), + newApp: func() *v1beta2.SparkApplication { + app := baseApp() + app.Spec.Executor.NodeSelector = map[string]string{"key": "value"} + return app + }(), + expected: true, + }, + { + name: "tolerations changed", + oldApp: baseApp(), + newApp: func() *v1beta2.SparkApplication { + app := baseApp() + app.Spec.Executor.Tolerations = []corev1.Toleration{{Key: "key"}} + return app + }(), + expected: true, + }, + { + name: "instances changed - not a webhook field", + oldApp: baseApp(), + newApp: func() *v1beta2.SparkApplication { + app := baseApp() + app.Spec.Executor.Instances = func() *int32 { v := int32(4); return &v }() + return app + }(), + expected: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := hasExecutorWebhookFieldChanges(tt.oldApp, tt.newApp) + if result != tt.expected { + t.Errorf("hasExecutorWebhookFieldChanges() = %v, expected %v", result, tt.expected) + } + }) + } +} diff --git a/internal/controller/sparkapplication/monitoring_config_test.go b/internal/controller/sparkapplication/monitoring_config_test.go index 6aaededb81..8538ec5a9b 100644 --- a/internal/controller/sparkapplication/monitoring_config_test.go +++ b/internal/controller/sparkapplication/monitoring_config_test.go @@ -24,6 +24,7 @@ import ( "github.com/stretchr/testify/assert" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" @@ -128,19 +129,19 @@ func TestConfigPrometheusMonitoring(t *testing.T) { }, Spec: v1beta2.SparkApplicationSpec{ Driver: v1beta2.DriverSpec{ - JavaOptions: util.StringPtr("-XX:+PrintGCDetails -XX:+PrintGCTimeStamps"), + JavaOptions: ptr.To("-XX:+PrintGCDetails -XX:+PrintGCTimeStamps"), }, Executor: v1beta2.ExecutorSpec{ - JavaOptions: util.StringPtr("-XX:+PrintGCDetails -XX:+PrintGCTimeStamps"), + JavaOptions: ptr.To("-XX:+PrintGCDetails -XX:+PrintGCTimeStamps"), }, Monitoring: &v1beta2.MonitoringSpec{ ExposeDriverMetrics: true, ExposeExecutorMetrics: true, - MetricsProperties: util.StringPtr("testcase2dummy"), + MetricsProperties: ptr.To("testcase2dummy"), Prometheus: &v1beta2.PrometheusSpec{ JmxExporterJar: "/prometheus/exporter.jar", - Port: util.Int32Ptr(8091), - Configuration: util.StringPtr("testcase2dummy"), + Port: ptr.To[int32](8091), + Configuration: ptr.To("testcase2dummy"), }, }, }, @@ -159,19 +160,19 @@ func TestConfigPrometheusMonitoring(t *testing.T) { }, Spec: v1beta2.SparkApplicationSpec{ Driver: v1beta2.DriverSpec{ - JavaOptions: util.StringPtr("-XX:+PrintGCDetails -XX:+PrintGCTimeStamps"), + JavaOptions: ptr.To("-XX:+PrintGCDetails -XX:+PrintGCTimeStamps"), }, Executor: v1beta2.ExecutorSpec{ - JavaOptions: util.StringPtr("-XX:+PrintGCDetails -XX:+PrintGCTimeStamps"), + JavaOptions: ptr.To("-XX:+PrintGCDetails -XX:+PrintGCTimeStamps"), }, Monitoring: &v1beta2.MonitoringSpec{ ExposeDriverMetrics: true, ExposeExecutorMetrics: true, - MetricsProperties: util.StringPtr("testcase3dummy"), + MetricsProperties: ptr.To("testcase3dummy"), Prometheus: &v1beta2.PrometheusSpec{ JmxExporterJar: "/prometheus/exporter.jar", - Port: util.Int32Ptr(8091), - ConfigFile: util.StringPtr("testcase3dummy.yaml"), + Port: ptr.To[int32](8091), + ConfigFile: ptr.To("testcase3dummy.yaml"), }, }, }, @@ -189,19 +190,19 @@ func TestConfigPrometheusMonitoring(t *testing.T) { }, Spec: v1beta2.SparkApplicationSpec{ Driver: v1beta2.DriverSpec{ - JavaOptions: util.StringPtr("-XX:+PrintGCDetails -XX:+PrintGCTimeStamps"), + JavaOptions: ptr.To("-XX:+PrintGCDetails -XX:+PrintGCTimeStamps"), }, Executor: v1beta2.ExecutorSpec{ - JavaOptions: util.StringPtr("-XX:+PrintGCDetails -XX:+PrintGCTimeStamps"), + JavaOptions: ptr.To("-XX:+PrintGCDetails -XX:+PrintGCTimeStamps"), }, Monitoring: &v1beta2.MonitoringSpec{ ExposeDriverMetrics: true, ExposeExecutorMetrics: true, - MetricsPropertiesFile: util.StringPtr("/testcase4dummy/metrics.properties"), + MetricsPropertiesFile: ptr.To("/testcase4dummy/metrics.properties"), Prometheus: &v1beta2.PrometheusSpec{ JmxExporterJar: "/prometheus/exporter.jar", - Port: util.Int32Ptr(8091), - ConfigFile: util.StringPtr("testcase4dummy.yaml"), + Port: ptr.To[int32](8091), + ConfigFile: ptr.To("testcase4dummy.yaml"), }, }, }, @@ -219,18 +220,18 @@ func TestConfigPrometheusMonitoring(t *testing.T) { }, Spec: v1beta2.SparkApplicationSpec{ Driver: v1beta2.DriverSpec{ - JavaOptions: util.StringPtr("-XX:+PrintGCDetails -XX:+PrintGCTimeStamps"), + JavaOptions: ptr.To("-XX:+PrintGCDetails -XX:+PrintGCTimeStamps"), }, Executor: v1beta2.ExecutorSpec{ - JavaOptions: util.StringPtr("-XX:+PrintGCDetails -XX:+PrintGCTimeStamps"), + JavaOptions: ptr.To("-XX:+PrintGCDetails -XX:+PrintGCTimeStamps"), }, Monitoring: &v1beta2.MonitoringSpec{ ExposeDriverMetrics: true, ExposeExecutorMetrics: true, - MetricsPropertiesFile: util.StringPtr("/testcase5dummy/metrics.properties"), + MetricsPropertiesFile: ptr.To("/testcase5dummy/metrics.properties"), Prometheus: &v1beta2.PrometheusSpec{ JmxExporterJar: "/prometheus/exporter.jar", - Port: util.Int32Ptr(8091), + Port: ptr.To[int32](8091), }, }, }, @@ -255,7 +256,7 @@ func TestConfigPrometheusMonitoring(t *testing.T) { JmxExporterJar: "/prometheus/exporter.jar", }, }, - Driver: v1beta2.DriverSpec{JavaOptions: util.StringPtr("testdummy")}, + Driver: v1beta2.DriverSpec{JavaOptions: ptr.To("testdummy")}, }, }, metricsProperties: common.DefaultMetricsProperties, @@ -277,7 +278,7 @@ func TestConfigPrometheusMonitoring(t *testing.T) { JmxExporterJar: "/prometheus/exporter.jar", }, }, - Executor: v1beta2.ExecutorSpec{JavaOptions: util.StringPtr("testdummy")}, + Executor: v1beta2.ExecutorSpec{JavaOptions: ptr.To("testdummy")}, }, }, metricsProperties: common.DefaultMetricsProperties, @@ -297,8 +298,8 @@ func TestConfigPrometheusMonitoring(t *testing.T) { ExposeExecutorMetrics: true, Prometheus: &v1beta2.PrometheusSpec{ JmxExporterJar: "/prometheus/exporter.jar", - Port: util.Int32Ptr(1000), - PortName: util.StringPtr("metrics-port"), + Port: ptr.To[int32](1000), + PortName: ptr.To("metrics-port"), }, }, }, @@ -317,15 +318,15 @@ func TestConfigPrometheusMonitoring(t *testing.T) { }, Spec: v1beta2.SparkApplicationSpec{ Driver: v1beta2.DriverSpec{ - JavaOptions: util.StringPtr("-XX:+PrintGCDetails -XX:+PrintGCTimeStamps"), + JavaOptions: ptr.To("-XX:+PrintGCDetails -XX:+PrintGCTimeStamps"), }, Executor: v1beta2.ExecutorSpec{ - JavaOptions: util.StringPtr("-XX:+PrintGCDetails -XX:+PrintGCTimeStamps"), + JavaOptions: ptr.To("-XX:+PrintGCDetails -XX:+PrintGCTimeStamps"), }, Monitoring: &v1beta2.MonitoringSpec{ ExposeDriverMetrics: false, ExposeExecutorMetrics: false, - MetricsProperties: util.StringPtr("testcase2dummy"), + MetricsProperties: ptr.To("testcase2dummy"), }, }, }, diff --git a/internal/controller/sparkapplication/submission.go b/internal/controller/sparkapplication/submission.go index dc7aae8e00..aafd6abc84 100644 --- a/internal/controller/sparkapplication/submission.go +++ b/internal/controller/sparkapplication/submission.go @@ -22,12 +22,18 @@ import ( "os" "os/exec" "path/filepath" + "reflect" + "slices" "strings" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/log" "github.com/kubeflow/spark-operator/v2/api/v1beta2" "github.com/kubeflow/spark-operator/v2/pkg/common" + "github.com/kubeflow/spark-operator/v2/pkg/features" "github.com/kubeflow/spark-operator/v2/pkg/util" ) @@ -94,6 +100,7 @@ func buildSparkSubmitArgs(app *v1beta2.SparkApplication) ([]string, error) { deployModeOption, mainClassOption, nameOption, + loadSparkDefaultsOption, dependenciesOption, namespaceOption, imageOption, @@ -385,6 +392,13 @@ func driverConfOption(app *v1beta2.SparkApplication) ([]string, error) { // Populate SparkApplication labels to driver pod for key, value := range app.Labels { + // Don't propagate Kueue labels to driver pod. + // This is a quick workaround to avoid issues in Kueue integration. + // Community may consider a better label propagation control mechanisms in the future. + // ref: https://github.com/kubeflow/spark-operator/issues/2669#issuecomment-3500165528 + if strings.HasPrefix(key, common.KueueLabelPrefix) { + continue + } property = fmt.Sprintf(common.SparkKubernetesDriverLabelTemplate, key) args = append(args, "--conf", fmt.Sprintf("%s=%s", property, value)) } @@ -730,6 +744,13 @@ func executorConfOption(app *v1beta2.SparkApplication) ([]string, error) { // Populate SparkApplication labels to executor pod for key, value := range app.Labels { + // Don't propagate Kueue labels to driver pod. + // This is a quick workaround to avoid issues in Kueue integration. + // Community may consider a better label propagation control mechanisms in the future. + // ref: https://github.com/kubeflow/spark-operator/issues/2669#issuecomment-3500165528 + if strings.HasPrefix(key, common.KueueLabelPrefix) { + continue + } property := fmt.Sprintf(common.SparkKubernetesExecutorLabelTemplate, key) args = append(args, "--conf", fmt.Sprintf("%s=%s", property, value)) } @@ -1061,12 +1082,27 @@ func applicationOption(app *v1beta2.SparkApplication) ([]string, error) { // driverPodTemplateOption returns the driver pod template arguments. func driverPodTemplateOption(app *v1beta2.SparkApplication) ([]string, error) { - if app.Spec.Driver.Template == nil { - return []string{}, nil + template := app.Spec.Driver.Template + // Spark expects the template to have a driver container + // if user specifies a driver pod template, it is responsible + // for user to ensure the template has a driver container + if template == nil { + template = &corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{Name: common.SparkDriverContainerName}}, + }, + } + } + + ownerReference := util.GetOwnerReference(app) + if !slices.ContainsFunc(template.OwnerReferences, func(r metav1.OwnerReference) bool { + return reflect.DeepEqual(r, ownerReference) + }) { + template.OwnerReferences = append(template.OwnerReferences, ownerReference) } podTemplateFile := fmt.Sprintf("/tmp/spark/%s/driver-pod-template.yaml", app.Status.SubmissionID) - if err := util.WriteObjectToFile(app.Spec.Driver.Template, podTemplateFile); err != nil { + if err := util.WriteObjectToFile(template, podTemplateFile); err != nil { return []string{}, fmt.Errorf("failed to write driver pod template to file: %v", err) } @@ -1081,12 +1117,33 @@ func driverPodTemplateOption(app *v1beta2.SparkApplication) ([]string, error) { // executorPodTemplateOption returns the executor pod template arguments. func executorPodTemplateOption(app *v1beta2.SparkApplication) ([]string, error) { - if app.Spec.Executor.Template == nil { - return []string{}, nil + template := app.Spec.Executor.Template + + // Spark expects the template to have a driver container + // if user specifies a driver pod template, it is responsible + // for user to ensure the template has a driver container + if template == nil { + template = &corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{Name: common.Spark3DefaultExecutorContainerName}}, + }, + } + } + + // we put non-controller owner reference so that + // other controller (e.g. Kueue) can recognize the executor pods + // are the children of the SparkApplication + ownerReference := util.GetOwnerReference(app) + ownerReference.Controller = nil + ownerReference.BlockOwnerDeletion = nil + if !slices.ContainsFunc(template.OwnerReferences, func(r metav1.OwnerReference) bool { + return reflect.DeepEqual(r, ownerReference) + }) { + template.OwnerReferences = append(template.OwnerReferences, ownerReference) } podTemplateFile := fmt.Sprintf("/tmp/spark/%s/executor-pod-template.yaml", app.Status.SubmissionID) - if err := util.WriteObjectToFile(app.Spec.Executor.Template, podTemplateFile); err != nil { + if err := util.WriteObjectToFile(template, podTemplateFile); err != nil { return []string{}, fmt.Errorf("failed to write executor pod template to file: %v", err) } @@ -1098,3 +1155,12 @@ func executorPodTemplateOption(app *v1beta2.SparkApplication) ([]string, error) } return args, nil } + +// loadSparkDefaultsOption adds `--load-spark-defaults` flag to the command when feature gate `LoadSparkDefaults` is enabled. +func loadSparkDefaultsOption(_ *v1beta2.SparkApplication) ([]string, error) { + args := []string{} + if features.Enabled(features.LoadSparkDefaults) { + args = append(args, "--load-spark-defaults") + } + return args, nil +} diff --git a/internal/controller/sparkapplication/submission_test.go b/internal/controller/sparkapplication/submission_test.go index e555af0c4f..e9290f75d5 100644 --- a/internal/controller/sparkapplication/submission_test.go +++ b/internal/controller/sparkapplication/submission_test.go @@ -18,13 +18,19 @@ package sparkapplication import ( "fmt" + "os" "slices" + "strings" "testing" + "github.com/google/uuid" "github.com/stretchr/testify/assert" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/utils/ptr" + "sigs.k8s.io/yaml" "github.com/kubeflow/spark-operator/v2/api/v1beta2" "github.com/kubeflow/spark-operator/v2/pkg/common" @@ -68,16 +74,16 @@ func TestExecutorConfOption(t *testing.T) { }, Spec: v1beta2.SparkApplicationSpec{ Executor: v1beta2.ExecutorSpec{ - CoreRequest: util.StringPtr("2"), + CoreRequest: ptr.To("2"), SparkPodSpec: v1beta2.SparkPodSpec{ - Image: util.StringPtr("custom-executor:v1"), - Cores: util.Int32Ptr(4), - CoreLimit: util.StringPtr("4"), - Memory: util.StringPtr("4g"), - MemoryOverhead: util.StringPtr("1g"), + Image: ptr.To("custom-executor:v1"), + Cores: ptr.To[int32](4), + CoreLimit: ptr.To("4"), + Memory: ptr.To("4g"), + MemoryOverhead: ptr.To("1g"), }, - Instances: util.Int32Ptr(3), - DeleteOnTermination: util.BoolPtr(true), + Instances: ptr.To[int32](3), + DeleteOnTermination: ptr.To(true), }, }, }, @@ -96,6 +102,31 @@ func TestExecutorConfOption(t *testing.T) { "--conf", fmt.Sprintf("%s=%t", common.SparkKubernetesExecutorDeleteOnTermination, true), }, }, + { + name: "kueue labels on SparkApplication should not be propagated to executor conf", + app: &v1beta2.SparkApplication{ + ObjectMeta: metav1.ObjectMeta{ + Name: "spark-kueue", + Labels: map[string]string{ + "environment": "production", + "kueue.x-k8s.io/queue-name": "high-priority", + }, + }, + Status: v1beta2.SparkApplicationStatus{ + SubmissionID: "minimal-123", + }, + Spec: v1beta2.SparkApplicationSpec{ + Executor: v1beta2.ExecutorSpec{}, + }, + }, + expected: []string{ + "--conf", fmt.Sprintf("%s=%s", fmt.Sprintf(common.SparkKubernetesExecutorLabelTemplate, "environment"), "production"), + "--conf", fmt.Sprintf("%s=%s", fmt.Sprintf(common.SparkKubernetesExecutorLabelTemplate, common.LabelSparkAppName), "spark-kueue"), + "--conf", fmt.Sprintf("%s=%s", fmt.Sprintf(common.SparkKubernetesExecutorLabelTemplate, common.LabelLaunchedBySparkOperator), "true"), + "--conf", fmt.Sprintf("%s=%s", fmt.Sprintf(common.SparkKubernetesExecutorLabelTemplate, common.LabelMutatedBySparkOperator), "true"), + "--conf", fmt.Sprintf("%s=%s", fmt.Sprintf(common.SparkKubernetesExecutorLabelTemplate, common.LabelSubmissionID), "minimal-123"), + }, + }, } for _, testCase := range tests { @@ -239,10 +270,10 @@ func TestDriverConfOption(t *testing.T) { Spec: v1beta2.SparkApplicationSpec{ Driver: v1beta2.DriverSpec{ SparkPodSpec: v1beta2.SparkPodSpec{ - Cores: util.Int32Ptr(2), - CoreLimit: util.StringPtr("2"), - Memory: util.StringPtr("2g"), - Image: util.StringPtr("spark-driver:latest"), + Cores: ptr.To[int32](2), + CoreLimit: ptr.To("2"), + Memory: ptr.To("2g"), + Image: ptr.To("spark-driver:latest"), ConfigMaps: []v1beta2.NamePath{{Name: "driver-config", Path: "/etc/config"}}, }, }, @@ -281,6 +312,31 @@ func TestDriverConfOption(t *testing.T) { "--conf", fmt.Sprintf("%s=%s", fmt.Sprintf(common.SparkKubernetesDriverLabelTemplate, common.LabelSubmissionID), "minimal-123"), }, }, + { + name: "kueue labels on SparkApplication should not be propagated to driver conf", + app: &v1beta2.SparkApplication{ + ObjectMeta: metav1.ObjectMeta{ + Name: "spark-minimal", + Labels: map[string]string{ + "environment": "production", + "kueue.x-k8s.io/queue-name": "high-priority", + }, + }, + Status: v1beta2.SparkApplicationStatus{ + SubmissionID: "minimal-123", + }, + Spec: v1beta2.SparkApplicationSpec{ + Driver: v1beta2.DriverSpec{}, + }, + }, + expected: []string{ + "--conf", fmt.Sprintf("%s=%s", fmt.Sprintf(common.SparkKubernetesDriverLabelTemplate, "environment"), "production"), + "--conf", fmt.Sprintf("%s=%s", fmt.Sprintf(common.SparkKubernetesDriverLabelTemplate, common.LabelSparkAppName), "spark-minimal"), + "--conf", fmt.Sprintf("%s=%s", fmt.Sprintf(common.SparkKubernetesDriverLabelTemplate, common.LabelLaunchedBySparkOperator), "true"), + "--conf", fmt.Sprintf("%s=%s", fmt.Sprintf(common.SparkKubernetesDriverLabelTemplate, common.LabelMutatedBySparkOperator), "true"), + "--conf", fmt.Sprintf("%s=%s", fmt.Sprintf(common.SparkKubernetesDriverLabelTemplate, common.LabelSubmissionID), "minimal-123"), + }, + }, { name: "driver with labels and annotations", app: &v1beta2.SparkApplication{ @@ -345,6 +401,217 @@ func TestDriverConfOption(t *testing.T) { } } +func TestDriverPodTemplateContents(t *testing.T) { + appName := "test" + uid := types.UID(uuid.New().String()) + appOwnerReference := util.GetOwnerReference(&v1beta2.SparkApplication{ + ObjectMeta: metav1.ObjectMeta{ + Name: appName, + UID: uid, + }, + }) + for name, tc := range map[string]struct { + app *v1beta2.SparkApplication + expectedTemplate corev1.PodTemplateSpec + }{ + "no pod template": { + app: &v1beta2.SparkApplication{}, + expectedTemplate: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + OwnerReferences: []metav1.OwnerReference{appOwnerReference}, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{ + Name: common.SparkDriverContainerName, + }}, + }, + }, + }, + "with pod template": { + app: &v1beta2.SparkApplication{ + Spec: v1beta2.SparkApplicationSpec{ + Driver: v1beta2.DriverSpec{ + SparkPodSpec: v1beta2.SparkPodSpec{ + Template: &corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + OwnerReferences: []metav1.OwnerReference{{Name: "owner-in-template"}}, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + {Name: common.SparkDriverContainerName, Image: "image"}, + }, + }, + }, + }, + }, + }, + }, + expectedTemplate: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + OwnerReferences: []metav1.OwnerReference{ + {Name: "owner-in-template"}, + appOwnerReference, + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + {Name: common.SparkDriverContainerName, Image: "image"}, + }, + }, + }, + }, + "pod template already has owner references": { + app: &v1beta2.SparkApplication{ + Spec: v1beta2.SparkApplicationSpec{ + Driver: v1beta2.DriverSpec{ + SparkPodSpec: v1beta2.SparkPodSpec{ + Template: &corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + OwnerReferences: []metav1.OwnerReference{{Name: "owner-in-template"}, appOwnerReference}, + }, + }, + }, + }, + }, + }, + expectedTemplate: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + OwnerReferences: []metav1.OwnerReference{ + {Name: "owner-in-template"}, + appOwnerReference, + }, + }, + }, + }, + } { + t.Run(name, func(t *testing.T) { + submissionID := "TestDriverPodTemplateOption-" + strings.ReplaceAll(name, " ", "_") + "-" + uuid.New().String() + tc.app.Name = appName + tc.app.UID = uid + tc.app.Status.SubmissionID = submissionID + + _, err := driverPodTemplateOption(tc.app) + assert.NoError(t, err) + defer func() { _ = os.RemoveAll(fmt.Sprintf("/tmp/spark/%s", submissionID)) }() + + expectedBytes, err := yaml.Marshal(tc.expectedTemplate) + assert.NoError(t, err) + + actualBytes, err := os.ReadFile(fmt.Sprintf("/tmp/spark/%s/driver-pod-template.yaml", submissionID)) + assert.NoError(t, err) + + assert.Equal(t, string(expectedBytes), string(actualBytes)) + }) + } +} + +func TestExecutorPodTemplateContents(t *testing.T) { + appName := "test" + uid := types.UID(uuid.New().String()) + appNonControllerOwnerReference := util.GetOwnerReference(&v1beta2.SparkApplication{ + ObjectMeta: metav1.ObjectMeta{ + Name: appName, + UID: uid, + }, + }) + appNonControllerOwnerReference.Controller = nil + appNonControllerOwnerReference.BlockOwnerDeletion = nil + + for name, tc := range map[string]struct { + app *v1beta2.SparkApplication + expectedTemplate corev1.PodTemplateSpec + }{ + "no pod template": { + app: &v1beta2.SparkApplication{}, + expectedTemplate: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + OwnerReferences: []metav1.OwnerReference{appNonControllerOwnerReference}, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{ + Name: common.Spark3DefaultExecutorContainerName, + }}, + }, + }, + }, + "with pod template": { + app: &v1beta2.SparkApplication{ + Spec: v1beta2.SparkApplicationSpec{ + Executor: v1beta2.ExecutorSpec{ + SparkPodSpec: v1beta2.SparkPodSpec{ + Template: &corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + OwnerReferences: []metav1.OwnerReference{{Name: "owner-in-template"}}, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + {Name: common.Spark3DefaultExecutorContainerName, Image: "image"}, + }, + }, + }, + }, + }, + }, + }, + expectedTemplate: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + OwnerReferences: []metav1.OwnerReference{ + {Name: "owner-in-template"}, + appNonControllerOwnerReference, + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + {Name: common.Spark3DefaultExecutorContainerName, Image: "image"}, + }, + }, + }, + }, + "pod template already has owner references": { + app: &v1beta2.SparkApplication{ + Spec: v1beta2.SparkApplicationSpec{ + Executor: v1beta2.ExecutorSpec{ + SparkPodSpec: v1beta2.SparkPodSpec{ + Template: &corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + OwnerReferences: []metav1.OwnerReference{{Name: "owner-in-template"}, appNonControllerOwnerReference}, + }, + }, + }, + }, + }, + }, + expectedTemplate: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + OwnerReferences: []metav1.OwnerReference{ + {Name: "owner-in-template"}, + appNonControllerOwnerReference, + }, + }, + }, + }, + } { + t.Run(name, func(t *testing.T) { + submissionID := "TestExecutorPodTemplateOption-" + strings.ReplaceAll(name, " ", "_") + "-" + uuid.New().String() + tc.app.Name = appName + tc.app.UID = uid + tc.app.Status.SubmissionID = submissionID + + _, err := executorPodTemplateOption(tc.app) + assert.NoError(t, err) + defer func() { _ = os.RemoveAll(fmt.Sprintf("/tmp/spark/%s", submissionID)) }() + + expectedBytes, err := yaml.Marshal(tc.expectedTemplate) + assert.NoError(t, err) + + actualBytes, err := os.ReadFile(fmt.Sprintf("/tmp/spark/%s/executor-pod-template.yaml", submissionID)) + assert.NoError(t, err) + + assert.Equal(t, string(expectedBytes), string(actualBytes)) + }) + } +} + // import ( // "fmt" // "os" @@ -916,10 +1183,10 @@ func TestDriverConfOption(t *testing.T) { // Spec: v1beta2.SparkApplicationSpec{ // DynamicAllocation: &v1beta2.DynamicAllocation{ // Enabled: true, -// InitialExecutors: util.Int32Ptr(2), -// MinExecutors: util.Int32Ptr(0), -// MaxExecutors: util.Int32Ptr(10), -// ShuffleTrackingTimeout: util.Int64Ptr(6000000), +// InitialExecutors: ptr.To[int32](2), +// MinExecutors: ptr.To[int32](0), +// MaxExecutors: ptr.To[int32](10), +// ShuffleTrackingTimeout: ptr.To[int64](6000000), // }, // }, // } @@ -954,7 +1221,7 @@ func TestDriverConfOption(t *testing.T) { // }, // Spec: v1beta2.SparkApplicationSpec{ // Mode: v1beta2.ClusterMode, -// ProxyUser: util.StringPtr("foo"), +// ProxyUser: ptr.To("foo"), // }, // } diff --git a/internal/controller/sparkapplication/suite_test.go b/internal/controller/sparkapplication/suite_test.go index 480d4dc7e7..bcc374bf04 100644 --- a/internal/controller/sparkapplication/suite_test.go +++ b/internal/controller/sparkapplication/suite_test.go @@ -63,7 +63,7 @@ var _ = BeforeSuite(func() { // Note that you must have the required binaries setup under the bin directory to perform // the tests directly. When we run make test it will be setup and used automatically. BinaryAssetsDirectory: filepath.Join("..", "..", "..", "bin", "k8s", - fmt.Sprintf("1.32.0-%s-%s", runtime.GOOS, runtime.GOARCH)), + fmt.Sprintf("1.33.0-%s-%s", runtime.GOOS, runtime.GOARCH)), } var err error diff --git a/internal/controller/sparkapplication/web_ui.go b/internal/controller/sparkapplication/web_ui.go index bb14ccf6b1..667c7c75be 100644 --- a/internal/controller/sparkapplication/web_ui.go +++ b/internal/controller/sparkapplication/web_ui.go @@ -108,7 +108,7 @@ func getWebUITargetPort(app *v1beta2.SparkApplication) (int32, error) { if !ok { return common.DefaultSparkWebUIPort, nil } - port, err := strconv.Atoi(portStr) + port, err := strconv.ParseInt(portStr, 10, 32) if err != nil { return common.DefaultSparkWebUIPort, nil } diff --git a/internal/controller/sparkapplication/web_ui_test.go b/internal/controller/sparkapplication/web_ui_test.go index a2d1566f33..4c8a59312b 100644 --- a/internal/controller/sparkapplication/web_ui_test.go +++ b/internal/controller/sparkapplication/web_ui_test.go @@ -16,640 +16,363 @@ limitations under the License. package sparkapplication -// func TestCreateSparkUIService(t *testing.T) { -// type testcase struct { -// name string -// app *v1beta2.SparkApplication -// expectedService SparkService -// expectedSelector map[string]string -// expectError bool -// } -// testFn := func(test testcase, t *testing.T) { -// fakeClient := fake.NewSimpleClientset() -// util.IngressCapabilities = map[string]bool{"networking.k8s.io/v1": true} -// sparkService, err := createSparkUIService(test.app, fakeClient) -// if err != nil { -// if test.expectError { -// return -// } -// t.Fatal(err) -// } -// if sparkService.serviceName != test.expectedService.serviceName { -// t.Errorf("%s: for service name wanted %s got %s", test.name, test.expectedService.serviceName, sparkService.serviceName) -// } -// service, err := fakeClient.CoreV1(). -// Services(test.app.Namespace). -// Get(context.TODO(), sparkService.serviceName, metav1.GetOptions{}) -// if err != nil { -// if test.expectError { -// return -// } -// t.Fatal(err) -// } -// if service.Labels[common.SparkAppNameLabel] != test.app.Name { -// t.Errorf("%s: service of app %s has the wrong labels", test.name, test.app.Name) -// } -// if !reflect.DeepEqual(test.expectedSelector, service.Spec.Selector) { -// t.Errorf("%s: for label selector wanted %s got %s", test.name, test.expectedSelector, service.Spec.Selector) -// } -// if service.Spec.Type != test.expectedService.serviceType { -// t.Errorf("%s: for service type wanted %s got %s", test.name, test.expectedService.serviceType, service.Spec.Type) -// } -// if len(service.Spec.Ports) != 1 { -// t.Errorf("%s: wanted a single port got %d ports", test.name, len(service.Spec.Ports)) -// } -// port := service.Spec.Ports[0] -// if port.Port != test.expectedService.servicePort { -// t.Errorf("%s: unexpected port wanted %d got %d", test.name, test.expectedService.servicePort, port.Port) -// } -// if port.Name != test.expectedService.servicePortName { -// t.Errorf("%s: unexpected port name wanted %s got %s", test.name, test.expectedService.servicePortName, port.Name) -// } -// serviceAnnotations := service.ObjectMeta.Annotations -// if !reflect.DeepEqual(serviceAnnotations, test.expectedService.serviceAnnotations) { -// t.Errorf("%s: unexpected annotations wanted %s got %s", test.name, test.expectedService.serviceAnnotations, serviceAnnotations) -// } -// serviceLabels := service.ObjectMeta.Labels -// if !reflect.DeepEqual(serviceLabels, test.expectedService.serviceLabels) { -// t.Errorf("%s: unexpected labels wanted %s got %s", test.name, test.expectedService.serviceLabels, serviceLabels) -// } -// } -// defaultPort := defaultSparkWebUIPort -// defaultPortName := defaultSparkWebUIPortName -// app1 := &v1beta2.SparkApplication{ -// ObjectMeta: metav1.ObjectMeta{ -// Name: "foo1", -// Namespace: "default", -// UID: "foo-123", -// }, -// Spec: v1beta2.SparkApplicationSpec{ -// SparkConf: map[string]string{ -// sparkUIPortConfigurationKey: "4041", -// }, -// }, -// Status: v1beta2.SparkApplicationStatus{ -// SparkApplicationID: "foo-1", -// ExecutionAttempts: 1, -// }, -// } -// app2 := &v1beta2.SparkApplication{ -// ObjectMeta: metav1.ObjectMeta{ -// Name: "foo2", -// Namespace: "default", -// UID: "foo-123", -// }, -// Status: v1beta2.SparkApplicationStatus{ -// SparkApplicationID: "foo-2", -// ExecutionAttempts: 2, -// }, -// } -// app3 := &v1beta2.SparkApplication{ -// ObjectMeta: metav1.ObjectMeta{ -// Name: "foo3", -// Namespace: "default", -// UID: "foo-123", -// }, -// Spec: v1beta2.SparkApplicationSpec{ -// SparkConf: map[string]string{ -// sparkUIPortConfigurationKey: "4041x", -// }, -// }, -// Status: v1beta2.SparkApplicationStatus{ -// SparkApplicationID: "foo-3", -// }, -// } -// var appPort int32 = 80 -// app4 := &v1beta2.SparkApplication{ -// ObjectMeta: metav1.ObjectMeta{ -// Name: "foo4", -// Namespace: "default", -// UID: "foo-123", -// }, -// Spec: v1beta2.SparkApplicationSpec{ -// SparkUIOptions: &v1beta2.SparkUIConfiguration{ -// ServicePort: &appPort, -// IngressAnnotations: nil, -// IngressTLS: nil, -// }, -// SparkConf: map[string]string{ -// sparkUIPortConfigurationKey: "4041", -// }, -// }, -// Status: v1beta2.SparkApplicationStatus{ -// SparkApplicationID: "foo-3", -// }, -// } -// var serviceTypeNodePort apiv1.ServiceType = apiv1.ServiceTypeNodePort -// app5 := &v1beta2.SparkApplication{ -// ObjectMeta: metav1.ObjectMeta{ -// Name: "foo5", -// Namespace: "default", -// UID: "foo-123", -// }, -// Spec: v1beta2.SparkApplicationSpec{ -// SparkUIOptions: &v1beta2.SparkUIConfiguration{ -// ServiceType: &serviceTypeNodePort, -// }, -// }, -// Status: v1beta2.SparkApplicationStatus{ -// SparkApplicationID: "foo-2", -// ExecutionAttempts: 2, -// }, -// } -// appPortName := "http-spark-test" -// app6 := &v1beta2.SparkApplication{ -// ObjectMeta: metav1.ObjectMeta{ -// Name: "foo6", -// Namespace: "default", -// UID: "foo-123", -// }, -// Spec: v1beta2.SparkApplicationSpec{ -// SparkUIOptions: &v1beta2.SparkUIConfiguration{ -// ServicePort: &appPort, -// ServicePortName: &appPortName, -// }, -// }, -// Status: v1beta2.SparkApplicationStatus{ -// SparkApplicationID: "foo-6", -// }, -// } -// app7 := &v1beta2.SparkApplication{ -// ObjectMeta: metav1.ObjectMeta{ -// Name: "foo7", -// Namespace: "default", -// UID: "foo-123", -// }, -// Spec: v1beta2.SparkApplicationSpec{ -// SparkUIOptions: &v1beta2.SparkUIConfiguration{ -// ServiceAnnotations: map[string]string{ -// "key": "value", -// }, -// }, -// }, -// Status: v1beta2.SparkApplicationStatus{ -// SparkApplicationID: "foo-7", -// ExecutionAttempts: 1, -// }, -// } -// app8 := &v1beta2.SparkApplication{ -// ObjectMeta: metav1.ObjectMeta{ -// Name: "foo8", -// Namespace: "default", -// UID: "foo-123", -// }, -// Spec: v1beta2.SparkApplicationSpec{ -// SparkUIOptions: &v1beta2.SparkUIConfiguration{ -// ServiceLabels: map[string]string{ -// "sparkoperator.k8s.io/app-name": "foo8", -// "key": "value", -// }, -// }, -// }, -// Status: v1beta2.SparkApplicationStatus{ -// SparkApplicationID: "foo-8", -// ExecutionAttempts: 1, -// }, -// } -// testcases := []testcase{ -// { -// name: "service with custom serviceport and serviceport and target port are same", -// app: app1, -// expectedService: SparkService{ -// serviceName: fmt.Sprintf("%s-ui-svc", app1.GetName()), -// serviceType: apiv1.ServiceTypeClusterIP, -// servicePortName: defaultPortName, -// servicePort: 4041, -// serviceLabels: map[string]string{ -// "sparkoperator.k8s.io/app-name": "foo1", -// }, -// targetPort: intstr.IntOrString{ -// Type: intstr.Int, -// IntVal: int32(4041), -// }, -// }, -// expectedSelector: map[string]string{ -// common.SparkAppNameLabel: "foo1", -// common.SparkRoleLabel: common.SparkDriverRole, -// }, -// expectError: false, -// }, -// { -// name: "service with default port", -// app: app2, -// expectedService: SparkService{ -// serviceName: fmt.Sprintf("%s-ui-svc", app2.GetName()), -// serviceType: apiv1.ServiceTypeClusterIP, -// servicePortName: defaultPortName, -// servicePort: int32(defaultPort), -// serviceLabels: map[string]string{ -// "sparkoperator.k8s.io/app-name": "foo2", -// }, -// }, -// expectedSelector: map[string]string{ -// common.SparkAppNameLabel: "foo2", -// common.SparkRoleLabel: common.SparkDriverRole, -// }, -// expectError: false, -// }, -// { -// name: "service with custom serviceport and serviceport and target port are different", -// app: app4, -// expectedService: SparkService{ -// serviceName: fmt.Sprintf("%s-ui-svc", app4.GetName()), -// serviceType: apiv1.ServiceTypeClusterIP, -// servicePortName: defaultPortName, -// servicePort: 80, -// serviceLabels: map[string]string{ -// "sparkoperator.k8s.io/app-name": "foo4", -// }, -// targetPort: intstr.IntOrString{ -// Type: intstr.Int, -// IntVal: int32(4041), -// }, -// }, -// expectedSelector: map[string]string{ -// common.SparkAppNameLabel: "foo4", -// common.SparkRoleLabel: common.SparkDriverRole, -// }, -// expectError: false, -// }, -// { -// name: "service with custom servicetype", -// app: app5, -// expectedService: SparkService{ -// serviceName: fmt.Sprintf("%s-ui-svc", app5.GetName()), -// serviceType: apiv1.ServiceTypeNodePort, -// servicePortName: defaultPortName, -// servicePort: int32(defaultPort), -// serviceLabels: map[string]string{ -// "sparkoperator.k8s.io/app-name": "foo5", -// }, -// }, -// expectedSelector: map[string]string{ -// common.SparkAppNameLabel: "foo5", -// common.SparkRoleLabel: common.SparkDriverRole, -// }, -// expectError: false, -// }, -// { -// name: "service with custom serviceportname", -// app: app6, -// expectedService: SparkService{ -// serviceName: fmt.Sprintf("%s-ui-svc", app6.GetName()), -// serviceType: apiv1.ServiceTypeClusterIP, -// servicePortName: "http-spark-test", -// servicePort: int32(80), -// serviceLabels: map[string]string{ -// "sparkoperator.k8s.io/app-name": "foo6", -// }, -// }, -// expectedSelector: map[string]string{ -// common.SparkAppNameLabel: "foo6", -// common.SparkRoleLabel: common.SparkDriverRole, -// }, -// expectError: false, -// }, -// { -// name: "service with annotation", -// app: app7, -// expectedService: SparkService{ -// serviceName: fmt.Sprintf("%s-ui-svc", app7.GetName()), -// serviceType: apiv1.ServiceTypeClusterIP, -// servicePortName: defaultPortName, -// servicePort: defaultPort, -// serviceAnnotations: map[string]string{ -// "key": "value", -// }, -// serviceLabels: map[string]string{ -// "sparkoperator.k8s.io/app-name": "foo7", -// }, -// targetPort: intstr.IntOrString{ -// Type: intstr.Int, -// IntVal: int32(4041), -// }, -// }, -// expectedSelector: map[string]string{ -// common.SparkAppNameLabel: "foo7", -// common.SparkRoleLabel: common.SparkDriverRole, -// }, -// expectError: false, -// }, -// { -// name: "service with custom labels", -// app: app8, -// expectedService: SparkService{ -// serviceName: fmt.Sprintf("%s-ui-svc", app8.GetName()), -// serviceType: apiv1.ServiceTypeClusterIP, -// servicePortName: defaultPortName, -// servicePort: defaultPort, -// serviceLabels: map[string]string{ -// "sparkoperator.k8s.io/app-name": "foo8", -// "key": "value", -// }, -// targetPort: intstr.IntOrString{ -// Type: intstr.Int, -// IntVal: int32(4041), -// }, -// }, -// expectedSelector: map[string]string{ -// common.SparkAppNameLabel: "foo8", -// common.SparkRoleLabel: common.SparkDriverRole, -// }, -// expectError: false, -// }, -// { -// name: "service with bad port configurations", -// app: app3, -// expectError: true, -// }, -// } -// for _, test := range testcases { -// testFn(test, t) -// } -// } +import ( + "context" + "testing" -// func TestCreateSparkUIIngress(t *testing.T) { -// type testcase struct { -// name string -// app *v1beta2.SparkApplication -// expectedIngress SparkIngress -// expectError bool -// } + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client/fake" -// testFn := func(test testcase, t *testing.T, ingressURLFormat string, ingressClassName string) { -// fakeClient := fake.NewSimpleClientset() -// sparkService, err := createSparkUIService(test.app, fakeClient) -// if err != nil { -// t.Fatal(err) -// } -// ingressURL, err := getDriverIngressURL(ingressURLFormat, test.app.Name, test.app.Namespace) -// if err != nil { -// t.Fatal(err) -// } -// sparkIngress, err := createSparkUIIngress(test.app, *sparkService, ingressURL, ingressClassName, fakeClient) -// if err != nil { -// if test.expectError { -// return -// } -// t.Fatal(err) -// } -// if sparkIngress.ingressName != test.expectedIngress.ingressName { -// t.Errorf("Ingress name wanted %s got %s", test.expectedIngress.ingressName, sparkIngress.ingressName) -// } -// if sparkIngress.ingressURL.String() != test.expectedIngress.ingressURL.String() { -// t.Errorf("Ingress URL wanted %s got %s", test.expectedIngress.ingressURL, sparkIngress.ingressURL) -// } -// ingress, err := fakeClient.NetworkingV1().Ingresses(test.app.Namespace). -// Get(context.TODO(), sparkIngress.ingressName, metav1.GetOptions{}) -// if err != nil { -// t.Fatal(err) -// } -// if len(ingress.Annotations) != 0 { -// for key, value := range ingress.Annotations { -// if test.expectedIngress.annotations[key] != ingress.Annotations[key] { -// t.Errorf("Expected annotation: %s=%s but found : %s=%s", key, value, key, ingress.Annotations[key]) -// } -// } -// } -// if len(ingress.Spec.TLS) != 0 { -// for _, ingressTls := range ingress.Spec.TLS { -// if ingressTls.Hosts[0] != test.expectedIngress.ingressTLS[0].Hosts[0] { -// t.Errorf("Expected ingressTls host: %s but found : %s", test.expectedIngress.ingressTLS[0].Hosts[0], ingressTls.Hosts[0]) -// } -// if ingressTls.SecretName != test.expectedIngress.ingressTLS[0].SecretName { -// t.Errorf("Expected ingressTls secretName: %s but found : %s", test.expectedIngress.ingressTLS[0].SecretName, ingressTls.SecretName) -// } -// } -// } -// if ingress.Labels[common.SparkAppNameLabel] != test.app.Name { -// t.Errorf("Ingress of app %s has the wrong labels", test.app.Name) -// } + "github.com/kubeflow/spark-operator/v2/api/v1beta2" + "github.com/kubeflow/spark-operator/v2/pkg/common" +) -// if len(ingress.Spec.Rules) != 1 { -// t.Errorf("No Ingress rules found.") -// } -// ingressRule := ingress.Spec.Rules[0] -// // If we have a path, then the ingress adds capture groups -// if ingressRule.IngressRuleValue.HTTP.Paths[0].Path != "" && ingressRule.IngressRuleValue.HTTP.Paths[0].Path != "/" { -// test.expectedIngress.ingressURL.Path = test.expectedIngress.ingressURL.Path + "(/|$)(.*)" -// } -// if ingressRule.Host+ingressRule.IngressRuleValue.HTTP.Paths[0].Path != test.expectedIngress.ingressURL.Host+test.expectedIngress.ingressURL.Path { +func TestConfigWebUI(t *testing.T) { + ctx := context.Background() + appBase := &v1beta2.SparkApplication{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-app", + Namespace: "default", + }, + Status: v1beta2.SparkApplicationStatus{ + SparkApplicationID: "test-app-1", + }, + } -// t.Errorf("Ingress of app %s has the wrong host %s", ingressRule.Host+ingressRule.IngressRuleValue.HTTP.Paths[0].Path, test.expectedIngress.ingressURL.Host+test.expectedIngress.ingressURL.Path) -// } + tests := []struct { + name string + options Options + app *v1beta2.SparkApplication + wantConf map[string]string + }{ + { + name: "ui service disabled", + options: Options{ + EnableUIService: false, + IngressURLFormat: "ingress.example.com/{{ $appName }}", + }, + app: appBase.DeepCopy(), + wantConf: nil, + }, + { + name: "ingress format without path", + options: Options{ + EnableUIService: true, + IngressURLFormat: "test-app.ingress.example.com", + }, + app: appBase.DeepCopy(), + wantConf: nil, + }, + { + name: "ingress format with path", + options: Options{ + EnableUIService: true, + IngressURLFormat: "ingress.example.com/{{ $appNamespace }}/{{ $appName }}", + }, + app: appBase.DeepCopy(), + wantConf: map[string]string{ + common.SparkUIProxyBase: "/default/test-app", + common.SparkUIProxyRedirectURI: "/", + }, + }, + { + name: "existing config preserved", + options: Options{ + EnableUIService: true, + IngressURLFormat: "ingress.example.com/{{ $appNamespace }}/{{ $appName }}", + }, + app: func() *v1beta2.SparkApplication { + app := appBase.DeepCopy() + app.Spec.SparkConf = map[string]string{ + common.SparkUIProxyBase: "/custom", + common.SparkUIProxyRedirectURI: "/keep", + "spark.executor.instances": "2", + } + return app + }(), + wantConf: map[string]string{ + common.SparkUIProxyBase: "/custom", + common.SparkUIProxyRedirectURI: "/keep", + "spark.executor.instances": "2", + }, + }, + } -// if len(ingressRule.IngressRuleValue.HTTP.Paths) != 1 { -// t.Errorf("No Ingress paths found.") -// } -// ingressPath := ingressRule.IngressRuleValue.HTTP.Paths[0] -// if ingressPath.Backend.Service.Name != sparkService.serviceName { -// t.Errorf("Service name wanted %s got %s", sparkService.serviceName, ingressPath.Backend.Service.Name) -// } -// if *ingressPath.PathType != networkingv1.PathTypeImplementationSpecific { -// t.Errorf("PathType wanted %s got %s", networkingv1.PathTypeImplementationSpecific, *ingressPath.PathType) -// } -// if ingressPath.Backend.Service.Port.Number != sparkService.servicePort { -// t.Errorf("Service port wanted %v got %v", sparkService.servicePort, ingressPath.Backend.Service.Port.Number) -// } -// } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + reconciler := &Reconciler{options: tc.options} + err := reconciler.configWebUI(ctx, tc.app) + require.NoError(t, err) -// var appPort int32 = 80 -// app1 := &v1beta2.SparkApplication{ -// ObjectMeta: metav1.ObjectMeta{ -// Name: "foo", -// Namespace: "default", -// UID: "foo-123", -// }, -// Status: v1beta2.SparkApplicationStatus{ -// SparkApplicationID: "foo-1", -// DriverInfo: v1beta2.DriverInfo{ -// WebUIServiceName: "blah-service", -// }, -// }, -// } -// app2 := &v1beta2.SparkApplication{ -// ObjectMeta: metav1.ObjectMeta{ -// Name: "foo", -// Namespace: "default", -// UID: "foo-123", -// }, -// Spec: v1beta2.SparkApplicationSpec{ -// SparkUIOptions: &v1beta2.SparkUIConfiguration{ -// ServicePort: &appPort, -// IngressAnnotations: map[string]string{ -// "kubernetes.io/ingress.class": "nginx", -// "nginx.ingress.kubernetes.io/force-ssl-redirect": "true", -// }, -// }, -// }, -// Status: v1beta2.SparkApplicationStatus{ -// SparkApplicationID: "foo-1", -// DriverInfo: v1beta2.DriverInfo{ -// WebUIServiceName: "blah-service", -// }, -// }, -// } -// app3 := &v1beta2.SparkApplication{ -// ObjectMeta: metav1.ObjectMeta{ -// Name: "foo", -// Namespace: "default", -// UID: "foo-123", -// }, -// Spec: v1beta2.SparkApplicationSpec{ -// SparkUIOptions: &v1beta2.SparkUIConfiguration{ -// ServicePort: &appPort, -// IngressAnnotations: map[string]string{ -// "kubernetes.io/ingress.class": "nginx", -// "nginx.ingress.kubernetes.io/force-ssl-redirect": "true", -// }, -// IngressTLS: []networkingv1.IngressTLS{ -// {Hosts: []string{"host1", "host2"}, SecretName: "secret"}, -// }, -// }, -// }, -// Status: v1beta2.SparkApplicationStatus{ -// SparkApplicationID: "foo-1", -// DriverInfo: v1beta2.DriverInfo{ -// WebUIServiceName: "blah-service", -// }, -// }, -// } -// app4 := &v1beta2.SparkApplication{ -// ObjectMeta: metav1.ObjectMeta{ -// Name: "foo", -// Namespace: "default", -// UID: "foo-123", -// }, -// Spec: v1beta2.SparkApplicationSpec{ -// SparkUIOptions: &v1beta2.SparkUIConfiguration{ -// ServicePort: &appPort, -// IngressAnnotations: map[string]string{ -// "kubernetes.io/ingress.class": "nginx", -// }, -// IngressTLS: []networkingv1.IngressTLS{ -// {Hosts: []string{"host1", "host2"}, SecretName: ""}, -// }, -// }, -// }, -// Status: v1beta2.SparkApplicationStatus{ -// SparkApplicationID: "foo-1", -// DriverInfo: v1beta2.DriverInfo{ -// WebUIServiceName: "blah-service", -// }, -// }, -// } + if tc.wantConf == nil { + assert.Nil(t, tc.app.Spec.SparkConf) + return + } -// testcases := []testcase{ -// { -// name: "simple ingress object", -// app: app1, -// expectedIngress: SparkIngress{ -// ingressName: fmt.Sprintf("%s-ui-ingress", app1.GetName()), -// ingressURL: parseURLAndAssertError(app1.GetName()+".ingress.clusterName.com", t), -// }, -// expectError: false, -// }, -// { -// name: "ingress with annotations and without tls configuration", -// app: app2, -// expectedIngress: SparkIngress{ -// ingressName: fmt.Sprintf("%s-ui-ingress", app2.GetName()), -// ingressURL: parseURLAndAssertError(app2.GetName()+".ingress.clusterName.com", t), -// annotations: map[string]string{ -// "kubernetes.io/ingress.class": "nginx", -// "nginx.ingress.kubernetes.io/force-ssl-redirect": "true", -// }, -// }, -// expectError: false, -// }, -// { -// name: "ingress with annotations and tls configuration", -// app: app3, -// expectedIngress: SparkIngress{ -// ingressName: fmt.Sprintf("%s-ui-ingress", app3.GetName()), -// ingressURL: parseURLAndAssertError(app3.GetName()+".ingress.clusterName.com", t), -// annotations: map[string]string{ -// "kubernetes.io/ingress.class": "nginx", -// "nginx.ingress.kubernetes.io/force-ssl-redirect": "true", -// }, -// ingressTLS: []networkingv1.IngressTLS{ -// {Hosts: []string{"host1", "host2"}, SecretName: "secret"}, -// }, -// }, -// expectError: false, -// }, -// { -// name: "ingress with incomplete list of annotations", -// app: app4, -// expectedIngress: SparkIngress{ -// ingressName: fmt.Sprintf("%s-ui-ingress", app4.GetName()), -// ingressURL: parseURLAndAssertError(app3.GetName()+".ingress.clusterName.com", t), -// annotations: map[string]string{ -// "kubernetes.io/ingress.class": "nginx", -// "nginx.ingress.kubernetes.io/force-ssl-redirect": "true", -// }, -// ingressTLS: []networkingv1.IngressTLS{ -// {Hosts: []string{"host1", "host2"}, SecretName: ""}, -// }, -// }, -// expectError: true, -// }, -// } + assert.Equal(t, tc.wantConf, tc.app.Spec.SparkConf) + }) + } +} -// for _, test := range testcases { -// testFn(test, t, "{{$appName}}.ingress.clusterName.com", "") -// } +func TestGetWebUIServicePortName(t *testing.T) { + appBase := &v1beta2.SparkApplication{Spec: v1beta2.SparkApplicationSpec{}} -// testcases = []testcase{ -// { -// name: "simple ingress object with ingress URL Format with path", -// app: app1, -// expectedIngress: SparkIngress{ -// ingressName: fmt.Sprintf("%s-ui-ingress", app1.GetName()), -// ingressURL: parseURLAndAssertError("ingress.clusterName.com/"+app1.GetNamespace()+"/"+app1.GetName(), t), -// annotations: map[string]string{ -// "nginx.ingress.kubernetes.io/rewrite-target": "/$2", -// }, -// }, -// expectError: false, -// }, -// } + tests := []struct { + name string + app *v1beta2.SparkApplication + want string + }{ + { + name: "default port name when options nil", + app: appBase.DeepCopy(), + want: common.DefaultSparkWebUIPortName, + }, + { + name: "default port name when ServicePortName nil", + app: func() *v1beta2.SparkApplication { + app := appBase.DeepCopy() + app.Spec.SparkUIOptions = &v1beta2.SparkUIConfiguration{} + return app + }(), + want: common.DefaultSparkWebUIPortName, + }, + { + name: "custom service port name", + app: func() *v1beta2.SparkApplication { + app := appBase.DeepCopy() + name := "custom-port" + app.Spec.SparkUIOptions = &v1beta2.SparkUIConfiguration{ServicePortName: &name} + return app + }(), + want: "custom-port", + }, + } -// for _, test := range testcases { -// testFn(test, t, "ingress.clusterName.com/{{$appNamespace}}/{{$appName}}", "") -// } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + got := getWebUIServicePortName(tc.app) + assert.Equal(t, tc.want, got) + }) + } +} -// testcases = []testcase{ -// { -// name: "simple ingress object with ingressClassName set", -// app: app1, -// expectedIngress: SparkIngress{ -// ingressName: fmt.Sprintf("%s-ui-ingress", app1.GetName()), -// ingressURL: parseURLAndAssertError(app1.GetName()+".ingress.clusterName.com", t), -// ingressClassName: "nginx", -// }, -// expectError: false, -// }, -// } -// for _, test := range testcases { -// testFn(test, t, "{{$appName}}.ingress.clusterName.com", "nginx") -// } -// } +func TestGetWebUIServicePort(t *testing.T) { + appBase := &v1beta2.SparkApplication{Spec: v1beta2.SparkApplicationSpec{}} -// func parseURLAndAssertError(testURL string, t *testing.T) *url.URL { -// fallbackURL, _ := url.Parse("http://example.com") -// parsedURL, err := url.Parse(testURL) -// if err != nil { -// t.Errorf("failed to parse the url: %s", testURL) -// return fallbackURL -// } -// if parsedURL.Scheme == "" { -// //url does not contain any scheme, adding http:// so url.Parse can function correctly -// parsedURL, err = url.Parse("http://" + testURL) -// if err != nil { -// t.Errorf("failed to parse the url: %s", testURL) -// return fallbackURL -// } -// } -// return parsedURL -// } + tests := []struct { + name string + app *v1beta2.SparkApplication + want int32 + }{ + { + name: "port derived from spark conf when options nil", + app: func() *v1beta2.SparkApplication { + app := appBase.DeepCopy() + app.Spec.SparkConf = map[string]string{ + common.SparkUIPortKey: "4041", + } + return app + }(), + want: 4041, + }, + { + name: "custom service port from options", + app: func() *v1beta2.SparkApplication { + app := appBase.DeepCopy() + port := int32(18080) + app.Spec.SparkUIOptions = &v1beta2.SparkUIConfiguration{ServicePort: &port} + return app + }(), + want: 18080, + }, + { + name: "invalid conf falls back to default", + app: func() *v1beta2.SparkApplication { + app := appBase.DeepCopy() + app.Spec.SparkConf = map[string]string{ + common.SparkUIPortKey: "not-a-port", + } + return app + }(), + want: common.DefaultSparkWebUIPort, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + got, err := getWebUIServicePort(tc.app) + require.NoError(t, err) + assert.Equal(t, tc.want, got) + }) + } +} + +func TestGetWebUITargetPort(t *testing.T) { + appBase := &v1beta2.SparkApplication{Spec: v1beta2.SparkApplicationSpec{}} + + tests := []struct { + name string + app *v1beta2.SparkApplication + want int32 + }{ + { + name: "target port from spark conf", + app: func() *v1beta2.SparkApplication { + app := appBase.DeepCopy() + app.Spec.SparkConf = map[string]string{ + common.SparkUIPortKey: "4045", + } + return app + }(), + want: 4045, + }, + { + name: "invalid value returns default", + app: func() *v1beta2.SparkApplication { + app := appBase.DeepCopy() + app.Spec.SparkConf = map[string]string{ + common.SparkUIPortKey: "invalid", + } + return app + }(), + want: common.DefaultSparkWebUIPort, + }, + { + name: "missing value returns default", + app: appBase.DeepCopy(), + want: common.DefaultSparkWebUIPort, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + got, err := getWebUITargetPort(tc.app) + require.NoError(t, err) + assert.Equal(t, tc.want, got) + }) + } +} + +func TestCreateWebUIService(t *testing.T) { + scheme := runtime.NewScheme() + require.NoError(t, corev1.AddToScheme(scheme)) + require.NoError(t, v1beta2.AddToScheme(scheme)) + + ctx := context.Background() + + tests := []struct { + name string + app *v1beta2.SparkApplication + wantPort int32 + wantTarget int32 + wantPortName string + wantType corev1.ServiceType + wantLabels map[string]string + wantAnnots map[string]string + }{ + { + name: "defaults with spark conf port override", + app: func() *v1beta2.SparkApplication { + app := &v1beta2.SparkApplication{ + ObjectMeta: metav1.ObjectMeta{ + Name: "ui-default", + Namespace: "default", + }, + Spec: v1beta2.SparkApplicationSpec{ + SparkConf: map[string]string{ + common.SparkUIPortKey: "4041", + }, + }, + } + return app + }(), + wantPort: 4041, + wantTarget: 4041, + wantPortName: common.DefaultSparkWebUIPortName, + wantType: corev1.ServiceTypeClusterIP, + wantLabels: map[string]string{ + common.LabelSparkAppName: "ui-default", + }, + wantAnnots: nil, + }, + { + name: "custom service port and annotations", + app: func() *v1beta2.SparkApplication { + port := int32(80) + portName := "http-spark" + app := &v1beta2.SparkApplication{ + ObjectMeta: metav1.ObjectMeta{ + Name: "ui-custom", + Namespace: "default", + }, + Spec: v1beta2.SparkApplicationSpec{ + SparkConf: map[string]string{ + common.SparkUIPortKey: "4045", + }, + SparkUIOptions: &v1beta2.SparkUIConfiguration{ + ServicePort: &port, + ServicePortName: &portName, + ServiceAnnotations: map[string]string{"key": "value"}, + ServiceLabels: map[string]string{"custom": "label"}, + }, + }, + } + return app + }(), + wantPort: 80, + wantTarget: 4045, + wantPortName: "http-spark", + wantType: corev1.ServiceTypeClusterIP, + wantLabels: map[string]string{ + "custom": "label", + }, + wantAnnots: map[string]string{"key": "value"}, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + client := fake.NewClientBuilder().WithScheme(scheme).Build() + reconciler := &Reconciler{client: client} + + svc, err := reconciler.createWebUIService(ctx, tc.app) + require.NoError(t, err) + + assert.Equal(t, tc.wantPort, svc.servicePort) + assert.Equal(t, tc.wantTarget, svc.targetPort.IntVal) + assert.Equal(t, tc.wantPortName, svc.servicePortName) + assert.Equal(t, tc.wantType, svc.serviceType) + + created := &corev1.Service{} + err = client.Get(ctx, types.NamespacedName{Name: svc.serviceName, Namespace: tc.app.Namespace}, created) + require.NoError(t, err) + + if tc.wantLabels != nil { + assert.Equal(t, tc.wantLabels, created.Labels) + } else { + assert.Equal(t, map[string]string{common.LabelSparkAppName: tc.app.Name}, created.Labels) + } + + if tc.wantAnnots != nil { + assert.Equal(t, tc.wantAnnots, created.Annotations) + } else { + assert.Nil(t, created.Annotations) + } + + require.Len(t, created.Spec.Ports, 1) + assert.Equal(t, tc.wantPort, created.Spec.Ports[0].Port) + assert.Equal(t, tc.wantPortName, created.Spec.Ports[0].Name) + assert.Equal(t, tc.wantTarget, created.Spec.Ports[0].TargetPort.IntVal) + assert.Equal(t, tc.wantType, created.Spec.Type) + assert.Equal(t, map[string]string{ + common.LabelSparkAppName: tc.app.Name, + common.LabelSparkRole: common.SparkRoleDriver, + }, created.Spec.Selector) + }) + } +} diff --git a/internal/controller/sparkconnect/options.go b/internal/controller/sparkconnect/options.go index 69806b5c17..2ac424ded7 100644 --- a/internal/controller/sparkconnect/options.go +++ b/internal/controller/sparkconnect/options.go @@ -170,7 +170,9 @@ func driverConfOption(conn *v1alpha1.SparkConnect) ([]string, error) { } args = append(args, "--conf", "spark.driver.bindAddress=0.0.0.0") - args = append(args, "--conf", "spark.driver.host=${POD_IP}") + + driverHost := "$(host=${POD_IP}; if [[ $host == *:* ]] && [[ $host != \\[* ]]; then echo \"[$host]\"; else echo \"$host\"; fi)" + args = append(args, "--conf", fmt.Sprintf("spark.driver.host=%s", driverHost)) args = append(args, "--conf", "spark.driver.port=7078") // Driver pod name diff --git a/internal/controller/sparkconnect/reconciler.go b/internal/controller/sparkconnect/reconciler.go index a11fd5640b..c2ba34bb21 100644 --- a/internal/controller/sparkconnect/reconciler.go +++ b/internal/controller/sparkconnect/reconciler.go @@ -88,6 +88,11 @@ func NewReconciler( // SetupWithManager sets up the SparkConnect reconciler with the manager. func (r *Reconciler) SetupWithManager(mgr ctrl.Manager, options controller.Options) error { + kind := "SparkConnect" + + // Use a custom log constructor. + options.LogConstructor = util.NewLogConstructor(mgr.GetLogger(), kind) + return ctrl.NewControllerManagedBy(mgr). For(&v1alpha1.SparkConnect{}). Owns( @@ -447,16 +452,18 @@ func (r *Reconciler) mutateServerPod(_ context.Context, conn *v1alpha1.SparkConn // createOrUpdateServerService creates or updates the server service for the SparkConnect resource. func (r *Reconciler) createOrUpdateServerService(ctx context.Context, conn *v1alpha1.SparkConnect) error { logger := ctrl.LoggerFrom(ctx) - logger.V(1).Info("Create or update server service") - svc := &corev1.Service{ - ObjectMeta: metav1.ObjectMeta{ - Name: GetServerServiceName(conn), - Namespace: conn.Namespace, - }, + // Use the service specified in the server spec if provided. + svc := conn.Spec.Server.Service + if svc == nil { + svc = &corev1.Service{} } + svc.Name = GetServerServiceName(conn) + // Namespace provided by user will be ignored. + svc.Namespace = conn.Namespace - _, err := controllerutil.CreateOrUpdate(ctx, r.client, svc, func() error { + // Create or update server service. + opResult, err := controllerutil.CreateOrUpdate(ctx, r.client, svc, func() error { if err := r.mutateServerService(ctx, conn, svc); err != nil { return fmt.Errorf("failed to mutate server service: %v", err) } @@ -465,6 +472,12 @@ func (r *Reconciler) createOrUpdateServerService(ctx context.Context, conn *v1al if err != nil { return fmt.Errorf("failed to create or update server service: %v", err) } + switch opResult { + case controllerutil.OperationResultCreated: + logger.Info("Server service created") + case controllerutil.OperationResultUpdated: + logger.Info("Server service updated") + } // Update SparkConnect status. conn.Status.Server.ServiceName = svc.Name @@ -475,7 +488,6 @@ func (r *Reconciler) createOrUpdateServerService(ctx context.Context, conn *v1al // mutateServerService mutates the server service for the SparkConnect resource. func (r *Reconciler) mutateServerService(_ context.Context, conn *v1alpha1.SparkConnect, svc *corev1.Service) error { if svc.CreationTimestamp.IsZero() { - svc.Spec.Type = corev1.ServiceTypeClusterIP svc.Spec.Ports = []corev1.ServicePort{ { Name: "driver-rpc", diff --git a/internal/controller/sparkconnect/suite_test.go b/internal/controller/sparkconnect/suite_test.go new file mode 100644 index 0000000000..9ee243aff2 --- /dev/null +++ b/internal/controller/sparkconnect/suite_test.go @@ -0,0 +1,89 @@ +/* +Copyright 2025 The Kubeflow authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package sparkconnect + +import ( + "fmt" + "path/filepath" + "runtime" + "testing" + + "github.com/kubeflow/spark-operator/v2/api/v1alpha1" + "github.com/kubeflow/spark-operator/v2/api/v1beta2" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "k8s.io/client-go/kubernetes/scheme" + "k8s.io/client-go/rest" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/envtest" + "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/log/zap" + // +kubebuilder:scaffold:imports +) + +// These tests use Ginkgo (BDD-style Go testing framework). Refer to +// http://onsi.github.io/ginkgo/ to learn more about Ginkgo. + +var cfg *rest.Config +var k8sClient client.Client +var testEnv *envtest.Environment + +func TestSparkConnectController(t *testing.T) { + RegisterFailHandler(Fail) + + RunSpecs(t, "SparkConnect Controller Suite") +} + +var _ = BeforeSuite(func() { + log.SetLogger(zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true))) + + By("bootstrapping test environment") + testEnv = &envtest.Environment{ + CRDDirectoryPaths: []string{filepath.Join("..", "..", "..", "config", "crd", "bases")}, + ErrorIfCRDPathMissing: true, + + // The BinaryAssetsDirectory is only required if you want to run the tests directly + // without call the makefile target test. If not informed it will look for the + // default path defined in controller-runtime which is /usr/local/kubebuilder/. + // Note that you must have the required binaries setup under the bin directory to perform + // the tests directly. When we run make test it will be setup and used automatically. + BinaryAssetsDirectory: filepath.Join("..", "..", "..", "bin", "k8s", + fmt.Sprintf("1.33.0-%s-%s", runtime.GOOS, runtime.GOARCH)), + } + + var err error + // cfg is defined in this file globally. + cfg, err = testEnv.Start() + Expect(err).NotTo(HaveOccurred()) + Expect(cfg).NotTo(BeNil()) + + Expect(v1alpha1.AddToScheme(scheme.Scheme)).Should(Succeed()) + Expect(v1beta2.AddToScheme(scheme.Scheme)).Should(Succeed()) + // +kubebuilder:scaffold:scheme + + k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}) + Expect(err).NotTo(HaveOccurred()) + Expect(k8sClient).NotTo(BeNil()) + +}) + +var _ = AfterSuite(func() { + By("tearing down the test environment") + err := testEnv.Stop() + Expect(err).NotTo(HaveOccurred()) +}) diff --git a/internal/controller/sparkconnect/util.go b/internal/controller/sparkconnect/util.go index 5a7223f96c..8a02d15986 100644 --- a/internal/controller/sparkconnect/util.go +++ b/internal/controller/sparkconnect/util.go @@ -65,6 +65,13 @@ func GetServerPodName(conn *v1alpha1.SparkConnect) string { // GetServerServiceName returns the name of the server service for SparkConnect. func GetServerServiceName(conn *v1alpha1.SparkConnect) string { + // Use the service specified in the server spec if provided. + svc := conn.Spec.Server.Service + if svc != nil { + return svc.Name + } + + // Otherwise, use the default service name. return fmt.Sprintf("%s-server", conn.Name) } diff --git a/internal/controller/sparkconnect/util_test.go b/internal/controller/sparkconnect/util_test.go new file mode 100644 index 0000000000..f933ec63fc --- /dev/null +++ b/internal/controller/sparkconnect/util_test.go @@ -0,0 +1,125 @@ +/* +Copyright 2025 The kubeflow authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package sparkconnect + +import ( + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/kubeflow/spark-operator/v2/api/v1alpha1" + "github.com/kubeflow/spark-operator/v2/pkg/common" +) + +var _ = Describe("Util functions", func() { + var conn *v1alpha1.SparkConnect + + BeforeEach(func() { + conn = &v1alpha1.SparkConnect{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-spark-connect", + Namespace: "test-namespace", + }, + Spec: v1alpha1.SparkConnectSpec{ + SparkVersion: "4.0.0", + Server: v1alpha1.ServerSpec{ + SparkPodSpec: v1alpha1.SparkPodSpec{}, + }, + Executor: v1alpha1.ExecutorSpec{ + SparkPodSpec: v1alpha1.SparkPodSpec{}, + }, + }, + } + }) + + Context("GetCommonLabels", func() { + It("should return correct common labels", func() { + labels := GetCommonLabels(conn) + Expect(labels).To(HaveLen(2)) + Expect(labels).To(HaveKeyWithValue(common.LabelCreatedBySparkOperator, "true")) + Expect(labels).To(HaveKeyWithValue(common.LabelSparkConnectName, "test-spark-connect")) + }) + }) + + Context("GetServerSelectorLabels", func() { + It("should return correct server selector labels", func() { + labels := GetServerSelectorLabels(conn) + Expect(labels).To(HaveLen(4)) + Expect(labels).To(HaveKeyWithValue(common.LabelLaunchedBySparkOperator, "true")) + Expect(labels).To(HaveKeyWithValue(common.LabelSparkConnectName, "test-spark-connect")) + Expect(labels).To(HaveKeyWithValue(common.LabelSparkRole, common.SparkRoleConnectServer)) + Expect(labels).To(HaveKeyWithValue(common.LabelSparkVersion, "4.0.0")) + }) + }) + + Context("GetExecutorSelectorLabels", func() { + It("should return correct executor selector labels", func() { + labels := GetExecutorSelectorLabels(conn) + Expect(labels).To(HaveLen(3)) + Expect(labels).To(HaveKeyWithValue(common.LabelLaunchedBySparkOperator, "true")) + Expect(labels).To(HaveKeyWithValue(common.LabelSparkConnectName, "test-spark-connect")) + Expect(labels).To(HaveKeyWithValue(common.LabelSparkRole, common.SparkRoleExecutor)) + }) + }) + + Context("GetConfigMapName", func() { + It("should return correct config map name", func() { + name := GetConfigMapName(conn) + Expect(name).To(Equal("test-spark-connect-conf")) + }) + }) + + Context("GetServerPodName", func() { + It("should return correct server pod name", func() { + name := GetServerPodName(conn) + Expect(name).To(Equal("test-spark-connect-server")) + }) + }) + + Context("GetServerServiceName", func() { + When("service is not specified in server spec", func() { + It("should return default server service name", func() { + name := GetServerServiceName(conn) + Expect(name).To(Equal("test-spark-connect-server")) + }) + }) + + When("service is specified in server spec", func() { + BeforeEach(func() { + conn.Spec.Server.Service = &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: "custom-service-name", + }, + } + }) + + It("should return the specified service name", func() { + name := GetServerServiceName(conn) + Expect(name).To(Equal("custom-service-name")) + }) + }) + }) + + Context("GetServerServiceHost", func() { + It("should return correct server service host", func() { + host := GetServerServiceHost(conn) + Expect(host).To(Equal("test-spark-connect-server.test-namespace.svc.cluster.local")) + }) + }) +}) diff --git a/internal/controller/validatingwebhookconfiguration/controller.go b/internal/controller/validatingwebhookconfiguration/controller.go index 63f308c795..8688ae2c9f 100644 --- a/internal/controller/validatingwebhookconfiguration/controller.go +++ b/internal/controller/validatingwebhookconfiguration/controller.go @@ -19,6 +19,7 @@ package validatingwebhookconfiguration import ( "context" "fmt" + "strings" admissionregistrationv1 "k8s.io/api/admissionregistration/v1" "k8s.io/apimachinery/pkg/types" @@ -29,6 +30,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/reconcile" "github.com/kubeflow/spark-operator/v2/pkg/certificate" + "github.com/kubeflow/spark-operator/v2/pkg/util" ) var ( @@ -55,8 +57,14 @@ func NewReconciler(client client.Client, certProvider *certificate.Provider, nam } func (r *Reconciler) SetupWithManager(mgr ctrl.Manager, options controller.Options) error { + kind := "ValidatingWebhookConfiguration" + name := strings.ToLower(kind) + + // Use a custom log constructor. + options.LogConstructor = util.NewLogConstructor(mgr.GetLogger(), kind) + return ctrl.NewControllerManagedBy(mgr). - Named("validating-webhook-configuration-controller"). + Named(name). Watches( &admissionregistrationv1.ValidatingWebhookConfiguration{}, NewEventHandler(), diff --git a/internal/scheduler/kubescheduler/scheduler.go b/internal/scheduler/kubescheduler/scheduler.go index 697e463f9b..002eb4e504 100644 --- a/internal/scheduler/kubescheduler/scheduler.go +++ b/internal/scheduler/kubescheduler/scheduler.go @@ -139,7 +139,8 @@ func (s *Scheduler) syncPodGroup(podGroup *schedulingv1alpha1.PodGroup) error { Name: podGroup.Name, } - if err := s.client.Get(context.TODO(), key, &schedulingv1alpha1.PodGroup{}); err != nil { + existing := &schedulingv1alpha1.PodGroup{} + if err := s.client.Get(context.TODO(), key, existing); err != nil { if !errors.IsNotFound(err) { return err } @@ -151,6 +152,8 @@ func (s *Scheduler) syncPodGroup(podGroup *schedulingv1alpha1.PodGroup) error { return nil } + podGroup.SetResourceVersion(existing.GetResourceVersion()) + if err := s.client.Update(context.TODO(), podGroup); err != nil { return err } diff --git a/internal/scheduler/kubescheduler/scheduler_test.go b/internal/scheduler/kubescheduler/scheduler_test.go new file mode 100644 index 0000000000..337a96bffb --- /dev/null +++ b/internal/scheduler/kubescheduler/scheduler_test.go @@ -0,0 +1,208 @@ +/* +Copyright 2024 The Kubeflow authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package kubescheduler + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "k8s.io/utils/ptr" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + "github.com/kubeflow/spark-operator/v2/api/v1beta2" + "github.com/kubeflow/spark-operator/v2/pkg/util" + schedulingv1alpha1 "sigs.k8s.io/scheduler-plugins/apis/scheduling/v1alpha1" +) + +func TestFactoryWithValidConfig(t *testing.T) { + scheme := newTestScheme(t) + fakeClient := fake.NewClientBuilder().WithScheme(scheme).Build() + + cfg := &Config{SchedulerName: Name, Client: fakeClient} + sch, err := Factory(cfg) + + require.NoError(t, err) + assert.Equal(t, Name, sch.Name()) +} + +func TestFactoryWithInvalidConfig(t *testing.T) { + _, err := Factory(struct{}{}) + require.Error(t, err) +} + +func TestSchedulerName(t *testing.T) { + sch, _ := newTestScheduler(t) + assert.Equal(t, Name, sch.Name()) +} + +func TestShouldScheduleAlwaysTrue(t *testing.T) { + sch, _ := newTestScheduler(t) + app := newTestSparkApplication() + + assert.True(t, sch.ShouldSchedule(app)) +} + +func TestScheduleCreatesPodGroupAndLabelsApp(t *testing.T) { + sch, cl := newTestScheduler(t) + app := newTestSparkApplication() + + err := sch.Schedule(app) + require.NoError(t, err) + + assert.Equal(t, getPodGroupName(app), app.Labels[schedulingv1alpha1.PodGroupLabel]) + + created := &schedulingv1alpha1.PodGroup{} + err = cl.Get(context.Background(), types.NamespacedName{Namespace: app.Namespace, Name: getPodGroupName(app)}, created) + require.NoError(t, err) + + assert.Equal(t, int32(1), created.Spec.MinMember) + assertResourceListEqual(t, created.Spec.MinResources, expectedMinResources(app)) + require.Len(t, created.OwnerReferences, 1) + assert.Equal(t, app.Name, created.OwnerReferences[0].Name) + assert.NotNil(t, created.OwnerReferences[0].Controller) + assert.True(t, *created.OwnerReferences[0].Controller) +} + +func TestScheduleUpdatesExistingPodGroup(t *testing.T) { + app := newTestSparkApplication() + existing := &schedulingv1alpha1.PodGroup{ + ObjectMeta: metav1.ObjectMeta{ + Name: getPodGroupName(app), + Namespace: app.Namespace, + ResourceVersion: "1", + Labels: map[string]string{"existing": "label"}, + }, + Spec: schedulingv1alpha1.PodGroupSpec{ + MinMember: 5, + MinResources: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("100m"), + corev1.ResourceMemory: resource.MustParse("100Mi"), + }, + }, + } + + sch, cl := newTestScheduler(t, existing) + app.Labels = map[string]string{"preserve": "me"} + + err := sch.Schedule(app) + require.NoError(t, err) + + updated := &schedulingv1alpha1.PodGroup{} + err = cl.Get(context.Background(), types.NamespacedName{Namespace: app.Namespace, Name: getPodGroupName(app)}, updated) + require.NoError(t, err) + + assert.Equal(t, int32(1), updated.Spec.MinMember) + assertResourceListEqual(t, updated.Spec.MinResources, expectedMinResources(app)) + require.Len(t, updated.OwnerReferences, 1) + assert.Equal(t, app.Name, updated.OwnerReferences[0].Name) + assert.Equal(t, "me", app.Labels["preserve"]) + assert.Equal(t, getPodGroupName(app), app.Labels[schedulingv1alpha1.PodGroupLabel]) +} + +func TestCleanupDeletesPodGroup(t *testing.T) { + app := newTestSparkApplication() + existing := &schedulingv1alpha1.PodGroup{ + ObjectMeta: metav1.ObjectMeta{ + Name: getPodGroupName(app), + Namespace: app.Namespace, + }, + } + + sch, cl := newTestScheduler(t, existing) + + err := sch.Cleanup(app) + require.NoError(t, err) + + err = cl.Get(context.Background(), types.NamespacedName{Namespace: app.Namespace, Name: getPodGroupName(app)}, &schedulingv1alpha1.PodGroup{}) + require.Error(t, err) + assert.True(t, client.IgnoreNotFound(err) == nil) +} + +func TestCleanupIgnoresNotFound(t *testing.T) { + sch, _ := newTestScheduler(t) + err := sch.Cleanup(newTestSparkApplication()) + assert.NoError(t, err) +} + +func newTestScheduler(t *testing.T, objs ...client.Object) (*Scheduler, client.Client) { + t.Helper() + + scheme := newTestScheme(t) + cl := fake.NewClientBuilder().WithScheme(scheme).WithObjects(objs...).Build() + return &Scheduler{name: Name, client: cl}, cl +} + +func newTestScheme(t *testing.T) *runtime.Scheme { + t.Helper() + + scheme := runtime.NewScheme() + require.NoError(t, v1beta2.AddToScheme(scheme)) + require.NoError(t, schedulingv1alpha1.AddToScheme(scheme)) + return scheme +} + +func newTestSparkApplication() *v1beta2.SparkApplication { + return &v1beta2.SparkApplication{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-app", + Namespace: "test-ns", + }, + Spec: v1beta2.SparkApplicationSpec{ + Type: v1beta2.SparkApplicationTypeScala, + SparkVersion: "3.5.0", + Driver: v1beta2.DriverSpec{ + SparkPodSpec: v1beta2.SparkPodSpec{ + Cores: ptr.To[int32](1), + Memory: ptr.To("1Gi"), + MemoryOverhead: ptr.To("256Mi"), + }, + }, + Executor: v1beta2.ExecutorSpec{ + Instances: ptr.To[int32](2), + SparkPodSpec: v1beta2.SparkPodSpec{ + Cores: ptr.To[int32](1), + Memory: ptr.To("2Gi"), + MemoryOverhead: ptr.To("512Mi"), + }, + }, + }, + } +} + +func expectedMinResources(app *v1beta2.SparkApplication) corev1.ResourceList { + return util.SumResourceList([]corev1.ResourceList{util.GetDriverRequestResource(app), util.GetExecutorRequestResource(app)}) +} + +func assertResourceListEqual(t *testing.T, actual, expected corev1.ResourceList) { + t.Helper() + + assert.Equal(t, len(expected), len(actual)) + for name, exp := range expected { + got, ok := actual[name] + if assert.Truef(t, ok, "missing resource %s", name) { + assert.Zerof(t, exp.Cmp(got), "resource %s mismatch: want %s, got %s", name, exp.String(), got.String()) + } + } +} diff --git a/internal/scheduler/volcano/scheduler_test.go b/internal/scheduler/volcano/scheduler_test.go index 3f2f3b3042..acb49a60b7 100644 --- a/internal/scheduler/volcano/scheduler_test.go +++ b/internal/scheduler/volcano/scheduler_test.go @@ -25,6 +25,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" clienttesting "k8s.io/client-go/testing" + "k8s.io/utils/ptr" "volcano.sh/apis/pkg/apis/scheduling/v1beta1" fakevolcanoclientset "volcano.sh/apis/pkg/client/clientset/versioned/fake" @@ -53,12 +54,12 @@ func TestSchedule(t *testing.T) { SparkPodSpec: v1beta2.SparkPodSpec{}, }, Executor: v1beta2.ExecutorSpec{ - Instances: util.Int32Ptr(1), + Instances: ptr.To[int32](1), SparkPodSpec: v1beta2.SparkPodSpec{}, }, BatchSchedulerOptions: &v1beta2.BatchSchedulerConfiguration{ - Queue: util.StringPtr("high-priority"), - PriorityClassName: util.StringPtr("high"), + Queue: ptr.To("high-priority"), + PriorityClassName: ptr.To("high"), }, }, }, @@ -79,11 +80,11 @@ func TestSchedule(t *testing.T) { SparkPodSpec: v1beta2.SparkPodSpec{}, }, Executor: v1beta2.ExecutorSpec{ - Instances: util.Int32Ptr(1), + Instances: ptr.To[int32](1), SparkPodSpec: v1beta2.SparkPodSpec{}, }, BatchSchedulerOptions: &v1beta2.BatchSchedulerConfiguration{ - Queue: util.StringPtr("batch-queue"), + Queue: ptr.To("batch-queue"), }, }, }, @@ -102,15 +103,15 @@ func TestSchedule(t *testing.T) { Mode: v1beta2.DeployModeClient, Driver: v1beta2.DriverSpec{ SparkPodSpec: v1beta2.SparkPodSpec{ - Memory: util.StringPtr("2g"), - Cores: util.Int32Ptr(1), + Memory: ptr.To("2g"), + Cores: ptr.To[int32](1), }, }, Executor: v1beta2.ExecutorSpec{ - Instances: util.Int32Ptr(2), + Instances: ptr.To[int32](2), SparkPodSpec: v1beta2.SparkPodSpec{ - Memory: util.StringPtr("1g"), - Cores: util.Int32Ptr(1), + Memory: ptr.To("1g"), + Cores: ptr.To[int32](1), }, }, BatchSchedulerOptions: &v1beta2.BatchSchedulerConfiguration{ @@ -137,15 +138,15 @@ func TestSchedule(t *testing.T) { Mode: v1beta2.DeployModeClient, Driver: v1beta2.DriverSpec{ SparkPodSpec: v1beta2.SparkPodSpec{ - Memory: util.StringPtr("2g"), - Cores: util.Int32Ptr(1), + Memory: ptr.To("2g"), + Cores: ptr.To[int32](1), }, }, Executor: v1beta2.ExecutorSpec{ - Instances: util.Int32Ptr(2), + Instances: ptr.To[int32](2), SparkPodSpec: v1beta2.SparkPodSpec{ - Memory: util.StringPtr("1g"), - Cores: util.Int32Ptr(1), + Memory: ptr.To("1g"), + Cores: ptr.To[int32](1), }, }, BatchSchedulerOptions: nil, @@ -166,19 +167,19 @@ func TestSchedule(t *testing.T) { Mode: v1beta2.DeployModeCluster, Driver: v1beta2.DriverSpec{ SparkPodSpec: v1beta2.SparkPodSpec{ - Memory: util.StringPtr("2g"), - Cores: util.Int32Ptr(1), + Memory: ptr.To("2g"), + Cores: ptr.To[int32](1), }, }, Executor: v1beta2.ExecutorSpec{ - Instances: util.Int32Ptr(3), + Instances: ptr.To[int32](3), SparkPodSpec: v1beta2.SparkPodSpec{ - Memory: util.StringPtr("1g"), - Cores: util.Int32Ptr(1), + Memory: ptr.To("1g"), + Cores: ptr.To[int32](1), }, }, BatchSchedulerOptions: &v1beta2.BatchSchedulerConfiguration{ - Queue: util.StringPtr("gpu-queue"), + Queue: ptr.To("gpu-queue"), Resources: corev1.ResourceList{ corev1.ResourceCPU: resource.MustParse("6"), corev1.ResourceMemory: resource.MustParse("12Gi"), @@ -202,15 +203,15 @@ func TestSchedule(t *testing.T) { Mode: v1beta2.DeployModeCluster, Driver: v1beta2.DriverSpec{ SparkPodSpec: v1beta2.SparkPodSpec{ - Memory: util.StringPtr("2g"), - Cores: util.Int32Ptr(1), + Memory: ptr.To("2g"), + Cores: ptr.To[int32](1), }, }, Executor: v1beta2.ExecutorSpec{ - Instances: util.Int32Ptr(2), + Instances: ptr.To[int32](2), SparkPodSpec: v1beta2.SparkPodSpec{ - Memory: util.StringPtr("1g"), - Cores: util.Int32Ptr(1), + Memory: ptr.To("1g"), + Cores: ptr.To[int32](1), }, }, BatchSchedulerOptions: nil, diff --git a/internal/scheduler/yunikorn/resourceusage/resource_usage_test.go b/internal/scheduler/yunikorn/resourceusage/resource_usage_test.go index 1c08cc2dcd..93e36df9f4 100644 --- a/internal/scheduler/yunikorn/resourceusage/resource_usage_test.go +++ b/internal/scheduler/yunikorn/resourceusage/resource_usage_test.go @@ -20,8 +20,7 @@ import ( "testing" "github.com/stretchr/testify/assert" - - "github.com/kubeflow/spark-operator/v2/pkg/util" + "k8s.io/utils/ptr" ) func TestCpuRequest(t *testing.T) { @@ -31,9 +30,9 @@ func TestCpuRequest(t *testing.T) { expected string }{ {nil, nil, "1"}, - {util.Int32Ptr(1), nil, "1"}, - {nil, util.StringPtr("1"), "1"}, - {util.Int32Ptr(1), util.StringPtr("500m"), "500m"}, + {ptr.To[int32](1), nil, "1"}, + {nil, ptr.To("1"), "1"}, + {ptr.To[int32](1), ptr.To("500m"), "500m"}, } for _, tc := range testCases { diff --git a/internal/scheduler/yunikorn/scheduler.go b/internal/scheduler/yunikorn/scheduler.go index d092c85c33..dc4b4acc62 100644 --- a/internal/scheduler/yunikorn/scheduler.go +++ b/internal/scheduler/yunikorn/scheduler.go @@ -22,6 +22,7 @@ import ( "maps" corev1 "k8s.io/api/core/v1" + "k8s.io/utils/ptr" "github.com/kubeflow/spark-operator/v2/api/v1beta2" "github.com/kubeflow/spark-operator/v2/internal/scheduler" @@ -111,8 +112,8 @@ func (s *Scheduler) Schedule(app *v1beta2.SparkApplication) error { // Ensure that the driver and executors pods are scheduled by Yunikorn // if it is installed with the admissions controller disabled - app.Spec.Driver.SchedulerName = util.StringPtr(SchedulerName) - app.Spec.Executor.SchedulerName = util.StringPtr(SchedulerName) + app.Spec.Driver.SchedulerName = ptr.To(SchedulerName) + app.Spec.Executor.SchedulerName = ptr.To(SchedulerName) // Yunikorn re-uses the application ID set by the driver under the label "spark-app-selector", // so there is no need to set an application ID diff --git a/internal/scheduler/yunikorn/scheduler_test.go b/internal/scheduler/yunikorn/scheduler_test.go index 50f92e6cf3..25ef89a386 100644 --- a/internal/scheduler/yunikorn/scheduler_test.go +++ b/internal/scheduler/yunikorn/scheduler_test.go @@ -22,9 +22,9 @@ import ( "github.com/stretchr/testify/assert" corev1 "k8s.io/api/core/v1" + "k8s.io/utils/ptr" "github.com/kubeflow/spark-operator/v2/api/v1beta2" - "github.com/kubeflow/spark-operator/v2/pkg/util" ) func TestSchedule(t *testing.T) { @@ -40,21 +40,21 @@ func TestSchedule(t *testing.T) { Type: v1beta2.SparkApplicationTypeScala, Driver: v1beta2.DriverSpec{ SparkPodSpec: v1beta2.SparkPodSpec{ - Cores: util.Int32Ptr(1), - CoreLimit: util.StringPtr("1200m"), - Memory: util.StringPtr("512m"), + Cores: ptr.To[int32](1), + CoreLimit: ptr.To("1200m"), + Memory: ptr.To("512m"), }, }, Executor: v1beta2.ExecutorSpec{ - Instances: util.Int32Ptr(2), + Instances: ptr.To[int32](2), SparkPodSpec: v1beta2.SparkPodSpec{ - Cores: util.Int32Ptr(1), - CoreLimit: util.StringPtr("1200m"), - Memory: util.StringPtr("512m"), + Cores: ptr.To[int32](1), + CoreLimit: ptr.To("1200m"), + Memory: ptr.To("512m"), }, }, BatchSchedulerOptions: &v1beta2.BatchSchedulerConfiguration{ - Queue: util.StringPtr("root.default"), + Queue: ptr.To("root.default"), }, }, }, @@ -82,29 +82,29 @@ func TestSchedule(t *testing.T) { app: &v1beta2.SparkApplication{ Spec: v1beta2.SparkApplicationSpec{ Type: v1beta2.SparkApplicationTypePython, - MemoryOverheadFactor: util.StringPtr("0.3"), + MemoryOverheadFactor: ptr.To("0.3"), Driver: v1beta2.DriverSpec{ - CoreRequest: util.StringPtr("2000m"), + CoreRequest: ptr.To("2000m"), SparkPodSpec: v1beta2.SparkPodSpec{ - Cores: util.Int32Ptr(4), - Memory: util.StringPtr("8g"), + Cores: ptr.To[int32](4), + Memory: ptr.To("8g"), }, }, Executor: v1beta2.ExecutorSpec{ - Instances: util.Int32Ptr(4), + Instances: ptr.To[int32](4), SparkPodSpec: v1beta2.SparkPodSpec{ - MemoryOverhead: util.StringPtr("2g"), - Cores: util.Int32Ptr(8), - Memory: util.StringPtr("64g"), + MemoryOverhead: ptr.To("2g"), + Cores: ptr.To[int32](8), + Memory: ptr.To("64g"), }, }, DynamicAllocation: &v1beta2.DynamicAllocation{ Enabled: true, - InitialExecutors: util.Int32Ptr(8), - MinExecutors: util.Int32Ptr(2), + InitialExecutors: ptr.To[int32](8), + MinExecutors: ptr.To[int32](2), }, BatchSchedulerOptions: &v1beta2.BatchSchedulerConfiguration{ - Queue: util.StringPtr("root.default"), + Queue: ptr.To("root.default"), }, }, }, @@ -135,8 +135,8 @@ func TestSchedule(t *testing.T) { NodeSelector: map[string]string{"key": "value"}, Driver: v1beta2.DriverSpec{ SparkPodSpec: v1beta2.SparkPodSpec{ - Cores: util.Int32Ptr(1), - Memory: util.StringPtr("1g"), + Cores: ptr.To[int32](1), + Memory: ptr.To("1g"), NodeSelector: map[string]string{"key": "newvalue", "key2": "value2"}, Tolerations: []corev1.Toleration{ { @@ -149,10 +149,10 @@ func TestSchedule(t *testing.T) { }, }, Executor: v1beta2.ExecutorSpec{ - Instances: util.Int32Ptr(1), + Instances: ptr.To[int32](1), SparkPodSpec: v1beta2.SparkPodSpec{ - Cores: util.Int32Ptr(1), - Memory: util.StringPtr("1g"), + Cores: ptr.To[int32](1), + Memory: ptr.To("1g"), Affinity: &corev1.Affinity{ NodeAffinity: &corev1.NodeAffinity{ RequiredDuringSchedulingIgnoredDuringExecution: &corev1.NodeSelector{ @@ -229,15 +229,15 @@ func TestSchedule(t *testing.T) { Type: v1beta2.SparkApplicationTypePython, Driver: v1beta2.DriverSpec{ SparkPodSpec: v1beta2.SparkPodSpec{ - Cores: util.Int32Ptr(1), - Memory: util.StringPtr("512m"), + Cores: ptr.To[int32](1), + Memory: ptr.To("512m"), }, }, Executor: v1beta2.ExecutorSpec{ - Instances: util.Int32Ptr(2), + Instances: ptr.To[int32](2), SparkPodSpec: v1beta2.SparkPodSpec{ - Cores: util.Int32Ptr(1), - Memory: util.StringPtr("512m"), + Cores: ptr.To[int32](1), + Memory: ptr.To("512m"), }, }, SparkConf: map[string]string{ @@ -272,15 +272,15 @@ func TestSchedule(t *testing.T) { Type: v1beta2.SparkApplicationTypePython, Driver: v1beta2.DriverSpec{ SparkPodSpec: v1beta2.SparkPodSpec{ - Cores: util.Int32Ptr(1), - Memory: util.StringPtr("512m"), + Cores: ptr.To[int32](1), + Memory: ptr.To("512m"), }, }, Executor: v1beta2.ExecutorSpec{ - Instances: util.Int32Ptr(2), + Instances: ptr.To[int32](2), SparkPodSpec: v1beta2.SparkPodSpec{ - Cores: util.Int32Ptr(1), - Memory: util.StringPtr("512m"), + Cores: ptr.To[int32](1), + Memory: ptr.To("512m"), }, }, SparkConf: map[string]string{ @@ -316,15 +316,15 @@ func TestSchedule(t *testing.T) { Type: v1beta2.SparkApplicationTypePython, Driver: v1beta2.DriverSpec{ SparkPodSpec: v1beta2.SparkPodSpec{ - Cores: util.Int32Ptr(1), - Memory: util.StringPtr("512m"), + Cores: ptr.To[int32](1), + Memory: ptr.To("512m"), }, }, Executor: v1beta2.ExecutorSpec{ - Instances: util.Int32Ptr(2), + Instances: ptr.To[int32](2), SparkPodSpec: v1beta2.SparkPodSpec{ - Cores: util.Int32Ptr(1), - Memory: util.StringPtr("512m"), + Cores: ptr.To[int32](1), + Memory: ptr.To("512m"), }, }, SparkConf: map[string]string{ diff --git a/internal/webhook/scheduledsparkapplication_defaulter.go b/internal/webhook/scheduledsparkapplication_defaulter.go index e6c20e0ed4..319d2f2782 100644 --- a/internal/webhook/scheduledsparkapplication_defaulter.go +++ b/internal/webhook/scheduledsparkapplication_defaulter.go @@ -20,6 +20,7 @@ import ( "context" "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/webhook/admission" "github.com/kubeflow/spark-operator/v2/api/v1beta2" @@ -40,10 +41,11 @@ var _ admission.CustomDefaulter = &ScheduledSparkApplicationDefaulter{} // Default implements admission.CustomDefaulter. func (d *ScheduledSparkApplicationDefaulter) Default(ctx context.Context, obj runtime.Object) error { - app, ok := obj.(*v1beta2.ScheduledSparkApplication) - if !ok { + if _, ok := obj.(*v1beta2.ScheduledSparkApplication); !ok { return nil } - logger.Info("Defaulting ScheduledSparkApplication", "name", app.Name, "namespace", app.Namespace) + + logger := log.FromContext(ctx) + logger.Info("Mutating ScheduledSparkApplication") return nil } diff --git a/internal/webhook/scheduledsparkapplication_validator.go b/internal/webhook/scheduledsparkapplication_validator.go index 219381cca5..4ddc0c5798 100644 --- a/internal/webhook/scheduledsparkapplication_validator.go +++ b/internal/webhook/scheduledsparkapplication_validator.go @@ -18,8 +18,12 @@ package webhook import ( "context" + "fmt" + "strings" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/util/validation" + "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/webhook/admission" "github.com/kubeflow/spark-operator/v2/api/v1beta2" @@ -44,7 +48,12 @@ func (v *ScheduledSparkApplicationValidator) ValidateCreate(ctx context.Context, if !ok { return nil, nil } - logger.Info("Validating SchedulingSparkApplication create", "name", app.Name, "namespace", app.Namespace) + logger := log.FromContext(ctx) + logger.Info("Validating ScheduledSparkApplication create") + // Validate metadata.name early to prevent downstream Service creation failures + if err := v.validateName(app.Name); err != nil { + return nil, err + } if err := v.validate(app); err != nil { return nil, err } @@ -57,7 +66,12 @@ func (v *ScheduledSparkApplicationValidator) ValidateUpdate(ctx context.Context, if !ok { return nil, nil } - logger.Info("Validating SchedulingSparkApplication update", "name", newApp.Name, "namespace", newApp.Namespace) + logger := log.FromContext(ctx) + logger.Info("Validating ScheduledSparkApplication update") + // Name is immutable in Kubernetes, but validate anyway for safety in case of admission reconcilers + if err := v.validateName(newApp.Name); err != nil { + return nil, err + } if err := v.validate(newApp); err != nil { return nil, err } @@ -66,11 +80,11 @@ func (v *ScheduledSparkApplicationValidator) ValidateUpdate(ctx context.Context, // ValidateDelete implements admission.CustomValidator. func (v *ScheduledSparkApplicationValidator) ValidateDelete(ctx context.Context, obj runtime.Object) (warnings admission.Warnings, err error) { - app, ok := obj.(*v1beta2.ScheduledSparkApplication) - if !ok { + if _, ok := obj.(*v1beta2.ScheduledSparkApplication); !ok { return nil, nil } - logger.Info("Validating ScheduledSparkApplication delete", "name", app.Name, "namespace", app.Namespace) + logger := log.FromContext(ctx) + logger.Info("Validating ScheduledSparkApplication delete") return nil, nil } @@ -78,3 +92,13 @@ func (v *ScheduledSparkApplicationValidator) validate(_ *v1beta2.ScheduledSparkA // TODO: implement validate logic return nil } + +// validateName ensures the ScheduledSparkApplication metadata.name, when combined with suffixes, +// results in a valid DNS-1035 label for Kubernetes Service names. This prevents failures later +// when creating SparkApplication resources that require DNS-1035 compliant names. +func (v *ScheduledSparkApplicationValidator) validateName(name string) error { + if errs := validation.IsDNS1035Label(name); len(errs) > 0 { + return fmt.Errorf("invalid ScheduledSparkApplication name %q: %s", name, strings.Join(errs, ", ")) + } + return nil +} diff --git a/internal/webhook/scheduledsparkapplication_validator_test.go b/internal/webhook/scheduledsparkapplication_validator_test.go new file mode 100644 index 0000000000..b1bc995b14 --- /dev/null +++ b/internal/webhook/scheduledsparkapplication_validator_test.go @@ -0,0 +1,176 @@ +/* +Copyright 2025 The Kubeflow authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package webhook + +import ( + "context" + "strings" + "testing" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/kubeflow/spark-operator/v2/api/v1beta2" +) + +func TestScheduledSparkApplicationValidatorValidateCreate(t *testing.T) { + validator := NewScheduledSparkApplicationValidator() + + t.Run("returns nil for unrelated object types", func(t *testing.T) { + warnings, err := validator.ValidateCreate(context.Background(), &v1beta2.SparkApplication{}) + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + if len(warnings) != 0 { + t.Fatalf("expected no warnings, got %v", warnings) + } + }) + + t.Run("accepts ScheduledSparkApplication instances", func(t *testing.T) { + app := &v1beta2.ScheduledSparkApplication{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-app", + Namespace: "default", + }, + } + warnings, err := validator.ValidateCreate(context.Background(), app) + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + if len(warnings) != 0 { + t.Fatalf("expected no warnings, got %v", warnings) + } + }) +} + +func TestScheduledSparkApplicationValidatorValidateUpdate(t *testing.T) { + validator := NewScheduledSparkApplicationValidator() + + t.Run("returns nil for unrelated object types", func(t *testing.T) { + warnings, err := validator.ValidateUpdate( + context.Background(), + &v1beta2.ScheduledSparkApplication{}, + &v1beta2.SparkApplication{}, + ) + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + if len(warnings) != 0 { + t.Fatalf("expected no warnings, got %v", warnings) + } + }) + + t.Run("accepts ScheduledSparkApplication instances", func(t *testing.T) { + oldApp := &v1beta2.ScheduledSparkApplication{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-app", + Namespace: "default", + }, + } + newApp := &v1beta2.ScheduledSparkApplication{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-app", + Namespace: "default", + }, + } + warnings, err := validator.ValidateUpdate(context.Background(), oldApp, newApp) + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + if len(warnings) != 0 { + t.Fatalf("expected no warnings, got %v", warnings) + } + }) +} + +func TestScheduledSparkApplicationValidatorValidateDelete(t *testing.T) { + validator := NewScheduledSparkApplicationValidator() + + t.Run("returns nil for unrelated object types", func(t *testing.T) { + warnings, err := validator.ValidateDelete(context.Background(), &v1beta2.SparkApplication{}) + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + if len(warnings) != 0 { + t.Fatalf("expected no warnings, got %v", warnings) + } + }) + + t.Run("accepts ScheduledSparkApplication instances", func(t *testing.T) { + warnings, err := validator.ValidateDelete(context.Background(), &v1beta2.ScheduledSparkApplication{}) + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + if len(warnings) != 0 { + t.Fatalf("expected no warnings, got %v", warnings) + } + }) +} + +func TestScheduledSparkApplicationValidatorValidateName(t *testing.T) { + validator := NewScheduledSparkApplicationValidator() + + tests := []struct { + name string + appName string + wantError bool + }{ + // Valid names + {"valid simple name", "test-app", false}, + {"valid name with numbers", "test-app-123", false}, + {"valid single letter", "a", false}, + {"valid name ending with number", "my-app-1", false}, + {"valid name with multiple hyphens", "my-test-app-123", false}, + {"valid 63 char name", strings.Repeat("a", 63), false}, + {"valid name with hyphens in middle", "a-b-c-d-e", false}, + + // Invalid names + {"name starting with number", "123test-app", true}, + {"name with uppercase", "Test-App", true}, + {"name with uppercase at start", "TestApp", true}, + {"name with uppercase in middle", "test-App", true}, + {"name starting with hyphen", "-test-app", true}, + {"name ending with hyphen", "test-app-", true}, + {"name with consecutive hyphens", "test--app", false}, // Kubernetes validation allows consecutive hyphens + {"empty name", "", true}, + {"name too long", strings.Repeat("a", 64), true}, + {"name with special characters", "test@app", true}, + {"name with underscore", "test_app", true}, + {"name with spaces", "test app", true}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + app := &v1beta2.ScheduledSparkApplication{ + ObjectMeta: metav1.ObjectMeta{ + Name: tt.appName, + Namespace: "default", + }, + } + + _, err := validator.ValidateCreate(context.Background(), app) + hasError := err != nil + + if hasError != tt.wantError { + t.Errorf("validateName(%q) = error %v, wantError %v, got error: %v", tt.appName, hasError, tt.wantError, err) + } + + if hasError && err.Error() == "" { + t.Errorf("validateName(%q) should return a non-empty error message, got: %v", tt.appName, err) + } + }) + } +} diff --git a/internal/webhook/sparkapplication_defaulter.go b/internal/webhook/sparkapplication_defaulter.go index 6dcd2f5b6c..7cea60d4b6 100644 --- a/internal/webhook/sparkapplication_defaulter.go +++ b/internal/webhook/sparkapplication_defaulter.go @@ -20,10 +20,11 @@ import ( "context" "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/webhook/admission" "github.com/kubeflow/spark-operator/v2/api/v1beta2" - "github.com/kubeflow/spark-operator/v2/pkg/common" + operatorscheme "github.com/kubeflow/spark-operator/v2/pkg/scheme" "github.com/kubeflow/spark-operator/v2/pkg/util" ) @@ -53,53 +54,8 @@ func (d *SparkApplicationDefaulter) Default(ctx context.Context, obj runtime.Obj return nil } - logger.Info("Defaulting SparkApplication", "name", app.Name, "namespace", app.Namespace, "state", util.GetApplicationState(app)) - defaultSparkApplication(app) + logger := log.FromContext(ctx) + logger.Info("Mutating SparkApplication", "state", util.GetApplicationState(app)) + operatorscheme.WebhookScheme.Default(app) return nil } - -// defaultSparkApplication sets default values for certain fields of a SparkApplication. -func defaultSparkApplication(app *v1beta2.SparkApplication) { - if app.Spec.Mode == "" { - app.Spec.Mode = v1beta2.DeployModeCluster - } - - if app.Spec.RestartPolicy.Type == "" { - app.Spec.RestartPolicy.Type = v1beta2.RestartPolicyNever - } - - if app.Spec.RestartPolicy.Type != v1beta2.RestartPolicyNever { - if app.Spec.RestartPolicy.OnSubmissionFailureRetryInterval == nil { - app.Spec.RestartPolicy.OnSubmissionFailureRetryInterval = util.Int64Ptr(5) - } - if app.Spec.RestartPolicy.OnFailureRetryInterval == nil { - app.Spec.RestartPolicy.OnFailureRetryInterval = util.Int64Ptr(5) - } - } - - defaultDriverSpec(app) - defaultExecutorSpec(app) -} - -func defaultDriverSpec(app *v1beta2.SparkApplication) { -} - -func defaultExecutorSpec(app *v1beta2.SparkApplication) { - - isDynamicAllocationEnabled := util.IsDynamicAllocationEnabled(app) - - if app.Spec.Executor.Instances == nil && - app.Spec.SparkConf[common.SparkExecutorInstances] == "" && - !isDynamicAllocationEnabled { - app.Spec.Executor.Instances = util.Int32Ptr(1) - } - - // Set default for ShuffleTrackingEnabled to true if DynamicAllocation.enabled is true and - // DynamicAllocation.ShuffleTrackingEnabled is nil. - if isDynamicAllocationEnabled && - app.Spec.DynamicAllocation != nil && - app.Spec.DynamicAllocation.ShuffleTrackingEnabled == nil { - app.Spec.DynamicAllocation.ShuffleTrackingEnabled = util.BoolPtr(true) - } - -} diff --git a/internal/webhook/sparkapplication_validator.go b/internal/webhook/sparkapplication_validator.go index 6f44f94b2d..da6a453a70 100644 --- a/internal/webhook/sparkapplication_validator.go +++ b/internal/webhook/sparkapplication_validator.go @@ -19,11 +19,14 @@ package webhook import ( "context" "fmt" + "strings" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/equality" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/util/validation" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/webhook/admission" "github.com/kubeflow/spark-operator/v2/api/v1beta2" @@ -57,7 +60,13 @@ func (v *SparkApplicationValidator) ValidateCreate(ctx context.Context, obj runt if !ok { return nil, nil } - logger.Info("Validating SparkApplication create", "name", app.Name, "namespace", app.Namespace, "state", util.GetApplicationState(app)) + logger := log.FromContext(ctx) + logger.Info("Validating SparkApplication create", "state", util.GetApplicationState(app)) + + // Validate metadata.name early to prevent downstream Service creation failures + if err := v.validateName(app.Name); err != nil { + return nil, err + } if err := v.validateSpec(ctx, app); err != nil { return nil, err } @@ -83,7 +92,13 @@ func (v *SparkApplicationValidator) ValidateUpdate(ctx context.Context, oldObj r return nil, nil } - logger.Info("Validating SparkApplication update", "name", newApp.Name, "namespace", newApp.Namespace) + logger := log.FromContext(ctx) + logger.Info("Validating SparkApplication update", "state", util.GetApplicationState(newApp)) + + // Name is immutable in Kubernetes, but validate anyway for safety in case of admission reconcilers + if err := v.validateName(newApp.Name); err != nil { + return nil, err + } // Skip validating when spec does not change. if equality.Semantic.DeepEqual(oldApp.Spec, newApp.Spec) { @@ -110,13 +125,12 @@ func (v *SparkApplicationValidator) ValidateDelete(ctx context.Context, obj runt if !ok { return nil, nil } - logger.Info("Validating SparkApplication delete", "name", app.Name, "namespace", app.Namespace, "state", util.GetApplicationState(app)) + logger := log.FromContext(ctx) + logger.Info("Validating SparkApplication delete", "state", util.GetApplicationState(app)) return nil, nil } -func (v *SparkApplicationValidator) validateSpec(_ context.Context, app *v1beta2.SparkApplication) error { - logger.V(1).Info("Validating SparkApplication spec", "name", app.Name, "namespace", app.Namespace, "state", util.GetApplicationState(app)) - +func (v *SparkApplicationValidator) validateSpec(ctx context.Context, app *v1beta2.SparkApplication) error { if err := v.validateSparkVersion(app); err != nil { return err } @@ -148,6 +162,16 @@ func (v *SparkApplicationValidator) validateSpec(_ context.Context, app *v1beta2 return nil } +// validateName ensures the SparkApplication metadata.name is a valid DNS-1035 label +// This prevents failures later when creating related resources like Services which +// require DNS-1035 compliant names. +func (v *SparkApplicationValidator) validateName(name string) error { + if errs := validation.IsDNS1035Label(name); len(errs) > 0 { + return fmt.Errorf("invalid SparkApplication name %q: %s", name, strings.Join(errs, ", ")) + } + return nil +} + func (v *SparkApplicationValidator) validateSparkVersion(app *v1beta2.SparkApplication) error { // The pod template feature requires Spark version 3.0.0 or higher. if app.Spec.Driver.Template != nil || app.Spec.Executor.Template != nil { @@ -159,8 +183,6 @@ func (v *SparkApplicationValidator) validateSparkVersion(app *v1beta2.SparkAppli } func (v *SparkApplicationValidator) validateResourceUsage(ctx context.Context, app *v1beta2.SparkApplication) error { - logger.V(1).Info("Validating SparkApplication resource usage", "name", app.Name, "namespace", app.Namespace, "state", util.GetApplicationState(app)) - requests, err := getResourceList(app) if err != nil { return fmt.Errorf("failed to calculate resource quests: %v", err) diff --git a/internal/webhook/sparkapplication_validator_test.go b/internal/webhook/sparkapplication_validator_test.go new file mode 100644 index 0000000000..ef7ccacb14 --- /dev/null +++ b/internal/webhook/sparkapplication_validator_test.go @@ -0,0 +1,327 @@ +/* +Copyright 2025 The Kubeflow authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package webhook + +import ( + "context" + "strings" + "testing" + + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/utils/ptr" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + "github.com/kubeflow/spark-operator/v2/api/v1beta2" +) + +func TestSparkApplicationValidatorValidateCreate_NodeSelectorConflict(t *testing.T) { + validator := newTestValidator(t, false) + + app := newSparkApplication() + app.Spec.NodeSelector = map[string]string{"role": "shared"} + app.Spec.Driver.NodeSelector = map[string]string{"role": "driver"} + + if _, err := validator.ValidateCreate(context.Background(), app); err == nil || !strings.Contains(err.Error(), "node selector cannot be defined") { + t.Fatalf("expected node selector validation error, got %v", err) + } +} + +func TestSparkApplicationValidatorValidateCreate_Success(t *testing.T) { + validator := newTestValidator(t, false) + + if _, err := validator.ValidateCreate(context.Background(), newSparkApplication()); err != nil { + t.Fatalf("expected success, got %v", err) + } +} + +func TestSparkApplicationValidatorValidateCreate_DriverIngressDuplicatePort(t *testing.T) { + validator := newTestValidator(t, false) + + app := newSparkApplication() + app.Spec.DriverIngressOptions = []v1beta2.DriverIngressConfiguration{ + { + ServicePort: ptr.To[int32](4040), + IngressURLFormat: "http://spark-a", + }, + { + ServicePort: ptr.To[int32](4040), + IngressURLFormat: "http://spark-b", + }, + } + + if _, err := validator.ValidateCreate(context.Background(), app); err == nil || !strings.Contains(err.Error(), "duplicate ServicePort") { + t.Fatalf("expected duplicate service port error, got %v", err) + } +} + +func TestSparkApplicationValidatorValidateCreate_PodTemplateRequiresSpark3(t *testing.T) { + validator := newTestValidator(t, false) + + app := newSparkApplication() + app.Spec.SparkVersion = "2.4.0" + app.Spec.Driver.Template = &corev1.PodTemplateSpec{} + + if _, err := validator.ValidateCreate(context.Background(), app); err == nil || !strings.Contains(err.Error(), "requires Spark version 3.0.0 or higher") { + t.Fatalf("expected spark version validation error, got %v", err) + } +} + +func TestSparkApplicationValidatorValidateCreate_ResourceQuotaSatisfied(t *testing.T) { + quota := &corev1.ResourceQuota{ + ObjectMeta: metav1.ObjectMeta{ + Name: "ample", + Namespace: "default", + }, + Spec: corev1.ResourceQuotaSpec{ + Hard: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("20"), + corev1.ResourceRequestsCPU: resource.MustParse("20"), + corev1.ResourceLimitsCPU: resource.MustParse("20"), + }, + }, + Status: corev1.ResourceQuotaStatus{ + Hard: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("20"), + corev1.ResourceRequestsCPU: resource.MustParse("20"), + corev1.ResourceLimitsCPU: resource.MustParse("20"), + }, + Used: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("0"), + corev1.ResourceRequestsCPU: resource.MustParse("0"), + corev1.ResourceLimitsCPU: resource.MustParse("0"), + }, + }, + } + + validator := newTestValidator(t, true, quota) + + if _, err := validator.ValidateCreate(context.Background(), newSparkApplication()); err != nil { + t.Fatalf("expected quota satisfied, got %v", err) + } +} + +func TestSparkApplicationValidatorValidateUpdate_SameSpecSkipsValidation(t *testing.T) { + validator := newTestValidator(t, true) + + base := newSparkApplication() + base.Spec.NodeSelector = map[string]string{"role": "shared"} + base.Spec.Driver.NodeSelector = map[string]string{"role": "driver"} + + oldApp := base.DeepCopy() + newApp := base.DeepCopy() + + if _, err := validator.ValidateUpdate(context.Background(), oldApp, newApp); err != nil { + t.Fatalf("expected no error when spec unchanged, got %v", err) + } +} + +func TestSparkApplicationValidatorValidateUpdate_SpecChangedTriggersValidation(t *testing.T) { + validator := newTestValidator(t, false) + + oldApp := newSparkApplication() + newApp := oldApp.DeepCopy() + newApp.Spec.NodeSelector = map[string]string{"role": "shared"} + newApp.Spec.Driver.NodeSelector = map[string]string{"role": "driver"} + + if _, err := validator.ValidateUpdate(context.Background(), oldApp, newApp); err == nil || !strings.Contains(err.Error(), "node selector cannot be defined") { + t.Fatalf("expected node selector validation error, got %v", err) + } +} + +func TestSparkApplicationValidatorValidateUpdate_SuccessWithSpecChange(t *testing.T) { + quota := &corev1.ResourceQuota{ + ObjectMeta: metav1.ObjectMeta{ + Name: "ample", + Namespace: "default", + }, + Spec: corev1.ResourceQuotaSpec{ + Hard: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("20"), + corev1.ResourceRequestsCPU: resource.MustParse("20"), + corev1.ResourceLimitsCPU: resource.MustParse("20"), + }, + }, + Status: corev1.ResourceQuotaStatus{ + Hard: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("20"), + corev1.ResourceRequestsCPU: resource.MustParse("20"), + corev1.ResourceLimitsCPU: resource.MustParse("20"), + }, + Used: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1"), + corev1.ResourceRequestsCPU: resource.MustParse("1"), + corev1.ResourceLimitsCPU: resource.MustParse("1"), + }, + }, + } + + validator := newTestValidator(t, true, quota) + + oldApp := newSparkApplication() + newApp := oldApp.DeepCopy() + newApp.Spec.Arguments = []string{"--foo"} + + if _, err := validator.ValidateUpdate(context.Background(), oldApp, newApp); err != nil { + t.Fatalf("expected successful update validation, got %v", err) + } +} + +func TestSparkApplicationValidatorValidateCreate_ResourceQuotaExceeded(t *testing.T) { + quota := &corev1.ResourceQuota{ + ObjectMeta: metav1.ObjectMeta{ + Name: "strict", + Namespace: "default", + }, + Spec: corev1.ResourceQuotaSpec{ + Hard: corev1.ResourceList{ + corev1.ResourceLimitsCPU: resource.MustParse("1"), + }, + }, + Status: corev1.ResourceQuotaStatus{ + Hard: corev1.ResourceList{ + corev1.ResourceLimitsCPU: resource.MustParse("1"), + }, + Used: corev1.ResourceList{ + corev1.ResourceLimitsCPU: resource.MustParse("0"), + }, + }, + } + + validator := newTestValidator(t, true, quota) + + if _, err := validator.ValidateCreate(context.Background(), newSparkApplication()); err == nil || !strings.Contains(err.Error(), "failed to validate resource quota") { + t.Fatalf("expected resource quota validation error, got %v", err) + } +} + +func TestSparkApplicationValidatorValidateDelete_Success(t *testing.T) { + validator := newTestValidator(t, false) + + if _, err := validator.ValidateDelete(context.Background(), newSparkApplication()); err != nil { + t.Fatalf("expected successful delete validation, got %v", err) + } +} + +func newTestValidator(t *testing.T, enforceQuota bool, objs ...client.Object) *SparkApplicationValidator { + t.Helper() + + scheme := newTestScheme(t) + + builder := fake.NewClientBuilder().WithScheme(scheme) + if len(objs) > 0 { + builder = builder.WithObjects(objs...) + } + + return NewSparkApplicationValidator(builder.Build(), enforceQuota) +} + +func newTestScheme(t *testing.T) *runtime.Scheme { + t.Helper() + + scheme := runtime.NewScheme() + if err := corev1.AddToScheme(scheme); err != nil { + t.Fatalf("failed to add corev1 to scheme: %v", err) + } + if err := v1beta2.AddToScheme(scheme); err != nil { + t.Fatalf("failed to add v1beta2 to scheme: %v", err) + } + return scheme +} + +func TestSparkApplicationValidatorValidateName(t *testing.T) { + validator := newTestValidator(t, false) + + tests := []struct { + name string + appName string + wantError bool + }{ + // Valid names + {"valid simple name", "test-app", false}, + {"valid name with numbers", "test-app-123", false}, + {"valid single letter", "a", false}, + {"valid name ending with number", "my-app-1", false}, + {"valid name with multiple hyphens", "my-test-app-123", false}, + {"valid 63 char name", strings.Repeat("a", 63), false}, + {"valid name with hyphens in middle", "a-b-c-d-e", false}, + + // Invalid names + {"name starting with number", "123test-app", true}, + {"name with uppercase", "Test-App", true}, + {"name with uppercase at start", "TestApp", true}, + {"name with uppercase in middle", "test-App", true}, + {"name starting with hyphen", "-test-app", true}, + {"name ending with hyphen", "test-app-", true}, + {"name with consecutive hyphens", "test--app", false}, // Kubernetes validation allows consecutive hyphens + {"empty name", "", true}, + {"name too long", strings.Repeat("a", 64), true}, + {"name with special characters", "test@app", true}, + {"name with underscore", "test_app", true}, + {"name with spaces", "test app", true}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + app := newSparkApplication() + app.Name = tt.appName + + _, err := validator.ValidateCreate(context.Background(), app) + hasError := err != nil + + if hasError != tt.wantError { + t.Errorf("validateName(%q) = error %v, wantError %v, got error: %v", tt.appName, hasError, tt.wantError, err) + } + + if hasError && err.Error() == "" { + t.Errorf("validateName(%q) should return a non-empty error message, got: %v", tt.appName, err) + } + }) + } +} + +func newSparkApplication() *v1beta2.SparkApplication { + mainFile := "local:///app.py" + return &v1beta2.SparkApplication{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-app", + Namespace: "default", + }, + Spec: v1beta2.SparkApplicationSpec{ + Type: v1beta2.SparkApplicationTypeScala, + SparkVersion: "3.5.0", + Mode: v1beta2.DeployModeCluster, + MainApplicationFile: &mainFile, + Driver: v1beta2.DriverSpec{ + SparkPodSpec: v1beta2.SparkPodSpec{ + Cores: ptr.To[int32](1), + Memory: ptr.To("1g"), + }, + }, + Executor: v1beta2.ExecutorSpec{ + SparkPodSpec: v1beta2.SparkPodSpec{ + Cores: ptr.To[int32](1), + Memory: ptr.To("1g"), + }, + Instances: ptr.To[int32](1), + }, + }, + } +} diff --git a/internal/webhook/sparkpod_defaulter.go b/internal/webhook/sparkpod_defaulter.go index 3cd5eef0fe..b7642acc75 100644 --- a/internal/webhook/sparkpod_defaulter.go +++ b/internal/webhook/sparkpod_defaulter.go @@ -19,6 +19,8 @@ package webhook import ( "context" "fmt" + "reflect" + "slices" "strings" corev1 "k8s.io/api/core/v1" @@ -27,6 +29,7 @@ import ( "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/webhook/admission" "github.com/kubeflow/spark-operator/v2/api/v1beta2" @@ -73,6 +76,7 @@ func (d *SparkPodDefaulter) Default(ctx context.Context, obj runtime.Object) err return nil } + logger := log.FromContext(ctx) namespace := pod.Namespace if !d.isSparkJobNamespace(namespace) { return nil @@ -88,9 +92,8 @@ func (d *SparkPodDefaulter) Default(ctx context.Context, obj runtime.Object) err return fmt.Errorf("failed to get SparkApplication %s/%s: %v", namespace, appName, err) } - logger.Info("Mutating Spark pod", "name", pod.Name, "namespace", namespace, "phase", pod.Status.Phase) + logger.Info("Mutating Pod", "phase", pod.Status.Phase) if err := mutateSparkPod(pod, app); err != nil { - logger.Info("Denying Spark pod", "name", pod.Name, "namespace", namespace, "errorMessage", err.Error()) return fmt.Errorf("failed to mutate Spark pod: %v", err) } @@ -126,7 +129,6 @@ func addMemoryLimit(pod *corev1.Pod, app *v1beta2.SparkApplication) error { // Apply the memory limit to the container's resources pod.Spec.Containers[i].Resources.Limits[corev1.ResourceMemory] = limitQuantity - logger.V(1).Info("Added memory limit to Spark container in pod", "name", pod.Name, "namespace", pod.Namespace, "memoryLimit", limitQuantity.String()) return nil } @@ -180,7 +182,11 @@ func addOwnerReference(pod *corev1.Pod, app *v1beta2.SparkApplication) error { return nil } ownerReference := util.GetOwnerReference(app) - pod.OwnerReferences = append(pod.OwnerReferences, ownerReference) + if !slices.ContainsFunc(pod.OwnerReferences, func(r metav1.OwnerReference) bool { + return reflect.DeepEqual(r, ownerReference) + }) { + pod.OwnerReferences = append(pod.OwnerReferences, ownerReference) + } return nil } @@ -330,11 +336,12 @@ func addGeneralConfigMaps(pod *corev1.Pod, app *v1beta2.SparkApplication) error configMaps = app.Spec.Executor.ConfigMaps } + logger := log.FromContext(context.TODO()) for _, namePath := range configMaps { volumeName := namePath.Name + "-vol" if len(volumeName) > maxNameLength { volumeName = volumeName[0:maxNameLength] - logger.Info(fmt.Sprintf("ConfigMap volume name is too long. Truncating to length %d. Result: %s.", maxNameLength, volumeName)) + logger.Info("ConfigMap volume name is too long. Truncating", "result", volumeName) } if err := addConfigMapVolume(pod, namePath.Name, volumeName); err != nil { return err @@ -626,6 +633,8 @@ func addGPU(pod *corev1.Pod, app *v1beta2.SparkApplication) error { if gpu == nil { return nil } + + logger := log.FromContext(context.TODO()) if gpu.Name == "" { logger.V(1).Info(fmt.Sprintf("Please specify GPU resource name, such as: nvidia.com/gpu, amd.com/gpu etc. Current gpu spec: %+v", gpu)) return nil diff --git a/internal/webhook/sparkpod_defaulter_test.go b/internal/webhook/sparkpod_defaulter_test.go index 64dfb96d30..eb41efbdbb 100644 --- a/internal/webhook/sparkpod_defaulter_test.go +++ b/internal/webhook/sparkpod_defaulter_test.go @@ -73,6 +73,15 @@ func TestPatchSparkPod_OwnerReference(t *testing.T) { t.Fatal(err) } assert.Len(t, modifiedPod.OwnerReferences, 2) + + // Test patching OwnerReference be idempotent. + oldOwnerReferences := modifiedPod.DeepCopy().OwnerReferences + pod.OwnerReferences = modifiedPod.DeepCopy().OwnerReferences + modifiedPod, err = getModifiedPod(pod, app) + if err != nil { + t.Fatal(err) + } + assert.Equal(t, modifiedPod.OwnerReferences, oldOwnerReferences) } func TestPatchSparkPod_Local_Volumes(t *testing.T) { diff --git a/internal/webhook/suite_test.go b/internal/webhook/suite_test.go index 6720f85d25..3c7c900bc1 100644 --- a/internal/webhook/suite_test.go +++ b/internal/webhook/suite_test.go @@ -72,7 +72,7 @@ var _ = BeforeSuite(func() { // Note that you must have the required binaries setup under the bin directory to perform // the tests directly. When we run make test it will be setup and used automatically. BinaryAssetsDirectory: filepath.Join("..", "..", "bin", "k8s", - fmt.Sprintf("1.32.0-%s-%s", runtime.GOOS, runtime.GOARCH)), + fmt.Sprintf("1.33.0-%s-%s", runtime.GOOS, runtime.GOARCH)), } var err error diff --git a/internal/webhook/webhook.go b/internal/webhook/webhook.go index 8a2088f7a6..2f8145007f 100644 --- a/internal/webhook/webhook.go +++ b/internal/webhook/webhook.go @@ -17,11 +17,9 @@ limitations under the License. package webhook import ( - ctrl "sigs.k8s.io/controller-runtime" -) - -var ( - logger = ctrl.Log.WithName("") + "github.com/go-logr/logr" + "k8s.io/klog/v2" + "sigs.k8s.io/controller-runtime/pkg/webhook/admission" ) type Options struct { @@ -35,3 +33,17 @@ type Options struct { WebhookMetricsBindAddress string EnableResourceQuotaEnforcement bool } + +// LogConstructor is used to customize the loggers used in webhooks. +func LogConstructor(logger logr.Logger, req *admission.Request) logr.Logger { + if req == nil { + return logger + } + + return logger.WithValues( + req.Kind.Kind, + klog.KRef(req.Namespace, req.Name), + "requestID", + req.UID, + ) +} diff --git a/pkg/certificate/suite_test.go b/pkg/certificate/suite_test.go index 71dbf91941..4559b1699a 100644 --- a/pkg/certificate/suite_test.go +++ b/pkg/certificate/suite_test.go @@ -63,7 +63,7 @@ var _ = BeforeSuite(func() { // Note that you must have the required binaries setup under the bin directory to perform // the tests directly. When we run make test it will be setup and used automatically. BinaryAssetsDirectory: filepath.Join("..", "..", "bin", "k8s", - fmt.Sprintf("1.32.0-%s-%s", runtime.GOOS, runtime.GOARCH)), + fmt.Sprintf("1.33.0-%s-%s", runtime.GOOS, runtime.GOARCH)), } var err error diff --git a/pkg/common/kueue.go b/pkg/common/kueue.go new file mode 100644 index 0000000000..75323dacd5 --- /dev/null +++ b/pkg/common/kueue.go @@ -0,0 +1,21 @@ +/* +Copyright 2024 The Kubeflow authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package common + +const ( + KueueLabelPrefix = "kueue.x-k8s.io/" +) diff --git a/pkg/features/features.go b/pkg/features/features.go new file mode 100644 index 0000000000..70c8812b5b --- /dev/null +++ b/pkg/features/features.go @@ -0,0 +1,103 @@ +/* +Copyright 2025 The Kubeflow authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package features + +import ( + "fmt" + "testing" + + "k8s.io/apimachinery/pkg/util/runtime" + utilfeature "k8s.io/apiserver/pkg/util/feature" + "k8s.io/component-base/featuregate" + featuregatetesting "k8s.io/component-base/featuregate/testing" +) + +const ( + // PartialRestart enables skipping reconcile for webhook-patched executor fields. + // When enabled, changes to executor's PriorityClassName, NodeSelector, Tolerations, + // Affinity, and SchedulerName will not trigger application restart since these fields + // are applied by the mutating webhook when new pods are created. + // + // owner: @Kevinz857 + // alpha: v2.5.0 + PartialRestart featuregate.Feature = "PartialRestart" + + // LoadSparkDefaults enables loading of Spark default properties file (e.g. `${SPARK_CONF_DIR}/spark-defaults.conf` or `${SPARK_HOME}/conf/spark-defaults.conf`). + // When enabled, operator will add `--load-spark-defaults` flag to spark-submit command (available since Spark 4.0.0). + // See https://github.com/kubeflow/spark-operator/issues/2795. + // + // owner: @ChenYi015 + // alpha: v2.5.0 + LoadSparkDefaults featuregate.Feature = "LoadSparkDefaults" +) + +// To add a new feature gate, follow these steps: +// +// 1. Define a new feature gate constant: +// +// const ( +// // MyFeature enables some new functionality. +// // +// // owner: @your-github-username +// // alpha: v2.x.0 +// MyFeature featuregate.Feature = "MyFeature" +// ) +// +// 2. Add the feature gate to defaultFeatureGates map: +// +// var defaultFeatureGates = map[featuregate.Feature]featuregate.FeatureSpec{ +// MyFeature: {Default: false, PreRelease: featuregate.Alpha}, +// } +// +// 3. Use the feature gate in your code: +// +// if features.Enabled(features.MyFeature) { +// // feature-specific code +// } + +func init() { + runtime.Must(utilfeature.DefaultMutableFeatureGate.Add(defaultFeatureGates)) +} + +// defaultFeatureGates consists of all known spark-operator-specific feature keys. +// To add a new feature, define a key for it above and add it here. The features will be +// available throughout spark-operator binaries. +// +// Entries are separated from each other with blank lines to avoid sweeping gofmt changes +// when adding or removing one entry. +var defaultFeatureGates = map[featuregate.Feature]featuregate.FeatureSpec{ + PartialRestart: {Default: false, PreRelease: featuregate.Alpha}, + + LoadSparkDefaults: {Default: false, PreRelease: featuregate.Alpha}, +} + +// SetFeatureGateDuringTest sets the specified feature gate to the specified value during a test. +func SetFeatureGateDuringTest(tb testing.TB, f featuregate.Feature, value bool) { + featuregatetesting.SetFeatureGateDuringTest(tb, utilfeature.DefaultFeatureGate, f, value) +} + +// Enabled is helper for `utilfeature.DefaultFeatureGate.Enabled()` +func Enabled(f featuregate.Feature) bool { + return utilfeature.DefaultFeatureGate.Enabled(f) +} + +// SetEnable helper function that can be used to set the enabled value of a feature gate, +// it should only be used in integration test pending the merge of +// https://github.com/kubernetes/kubernetes/pull/118346 +func SetEnable(f featuregate.Feature, v bool) error { + return utilfeature.DefaultMutableFeatureGate.Set(fmt.Sprintf("%s=%v", f, v)) +} diff --git a/pkg/features/features_test.go b/pkg/features/features_test.go new file mode 100644 index 0000000000..56c3d9997c --- /dev/null +++ b/pkg/features/features_test.go @@ -0,0 +1,82 @@ +/* +Copyright 2025 The Kubeflow authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package features + +import ( + "testing" + + "github.com/stretchr/testify/assert" + utilfeature "k8s.io/apiserver/pkg/util/feature" + "k8s.io/component-base/featuregate" +) + +func TestDefaultFeatureGatesRegistered(t *testing.T) { + // Verify that defaultFeatureGates is registered with the global feature gate. + // This test ensures the init() function ran successfully. + assert.NotNil(t, utilfeature.DefaultFeatureGate) +} + +func TestEnabledWithRegisteredFeature(t *testing.T) { + // Register a test feature for this test + testFeature := featuregate.Feature("TestFeatureForEnabled") + err := utilfeature.DefaultMutableFeatureGate.Add(map[featuregate.Feature]featuregate.FeatureSpec{ + testFeature: {Default: false, PreRelease: featuregate.Alpha}, + }) + assert.NoError(t, err) + + // Test that Enabled returns false for a disabled feature + assert.False(t, Enabled(testFeature)) +} + +func TestSetEnableWithRegisteredFeature(t *testing.T) { + // Register a test feature for this test + testFeature := featuregate.Feature("TestFeatureForSetEnable") + err := utilfeature.DefaultMutableFeatureGate.Add(map[featuregate.Feature]featuregate.FeatureSpec{ + testFeature: {Default: false, PreRelease: featuregate.Alpha}, + }) + assert.NoError(t, err) + + // Test SetEnable + err = SetEnable(testFeature, true) + assert.NoError(t, err) + assert.True(t, Enabled(testFeature)) + + // Disable the feature + err = SetEnable(testFeature, false) + assert.NoError(t, err) + assert.False(t, Enabled(testFeature)) +} + +func TestSetFeatureGateDuringTestHelper(t *testing.T) { + // Register a test feature for this test + testFeature := featuregate.Feature("TestFeatureForDuringTest") + err := utilfeature.DefaultMutableFeatureGate.Add(map[featuregate.Feature]featuregate.FeatureSpec{ + testFeature: {Default: false, PreRelease: featuregate.Alpha}, + }) + assert.NoError(t, err) + + // Verify the feature is disabled by default + assert.False(t, Enabled(testFeature)) + + // Enable the feature gate during the test + SetFeatureGateDuringTest(t, testFeature, true) + + // Verify the feature is now enabled + assert.True(t, Enabled(testFeature)) + + // After the test, the feature gate will be automatically restored to its original value +} diff --git a/pkg/scheme/scheme.go b/pkg/scheme/scheme.go new file mode 100644 index 0000000000..03242af026 --- /dev/null +++ b/pkg/scheme/scheme.go @@ -0,0 +1,45 @@ +/* +Copyright 2025 The Kubeflow authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by lister-gen. DO NOT EDIT. + +package scheme + +import ( + "k8s.io/apimachinery/pkg/runtime" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + clientgoscheme "k8s.io/client-go/kubernetes/scheme" + schedulingv1alpha1 "sigs.k8s.io/scheduler-plugins/apis/scheduling/v1alpha1" + + "github.com/kubeflow/spark-operator/v2/api/v1alpha1" + "github.com/kubeflow/spark-operator/v2/api/v1beta2" +) + +// Scheme holds the aggregated Kubernetes's schemes and extended schemes. +var ControllerScheme = runtime.NewScheme() +var WebhookScheme = runtime.NewScheme() + +func init() { + utilruntime.Must(clientgoscheme.AddToScheme(ControllerScheme)) + utilruntime.Must(schedulingv1alpha1.AddToScheme(ControllerScheme)) + + utilruntime.Must(v1alpha1.AddToScheme(ControllerScheme)) + utilruntime.Must(v1beta2.AddToScheme(ControllerScheme)) + // +kubebuilder:scaffold:scheme + + utilruntime.Must(clientgoscheme.AddToScheme(WebhookScheme)) + utilruntime.Must(v1beta2.AddToScheme(WebhookScheme)) + // +kubebuilder:scaffold:scheme +} diff --git a/pkg/util/sparkapplication.go b/pkg/util/sparkapplication.go index f7b8b5de54..7908bbef6e 100644 --- a/pkg/util/sparkapplication.go +++ b/pkg/util/sparkapplication.go @@ -28,6 +28,7 @@ import ( networkingv1 "k8s.io/api/networking/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/ptr" "github.com/kubeflow/spark-operator/v2/api/v1beta2" "github.com/kubeflow/spark-operator/v2/pkg/common" @@ -281,8 +282,8 @@ func GetOwnerReference(app *v1beta2.SparkApplication) metav1.OwnerReference { Kind: reflect.TypeOf(v1beta2.SparkApplication{}).Name(), Name: app.Name, UID: app.UID, - Controller: BoolPtr(true), - BlockOwnerDeletion: BoolPtr(true), + Controller: ptr.To(true), + BlockOwnerDeletion: ptr.To(true), } } diff --git a/pkg/util/sparkapplication_test.go b/pkg/util/sparkapplication_test.go index 3e6b00cf96..5e4dd62b14 100644 --- a/pkg/util/sparkapplication_test.go +++ b/pkg/util/sparkapplication_test.go @@ -24,6 +24,7 @@ import ( corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/ptr" "github.com/kubeflow/spark-operator/v2/api/v1beta2" "github.com/kubeflow/spark-operator/v2/pkg/common" @@ -143,7 +144,7 @@ var _ = Describe("IsExpired", func() { Namespace: "test-namespace", }, Spec: v1beta2.SparkApplicationSpec{ - TimeToLiveSeconds: util.Int64Ptr(3600), + TimeToLiveSeconds: ptr.To[int64](3600), }, } @@ -160,7 +161,7 @@ var _ = Describe("IsExpired", func() { Namespace: "test-namespace", }, Spec: v1beta2.SparkApplicationSpec{ - TimeToLiveSeconds: util.Int64Ptr(3600), + TimeToLiveSeconds: ptr.To[int64](3600), }, Status: v1beta2.SparkApplicationStatus{ TerminationTime: metav1.NewTime(now.Add(-30 * time.Minute)), @@ -180,7 +181,7 @@ var _ = Describe("IsExpired", func() { Namespace: "test-namespace", }, Spec: v1beta2.SparkApplicationSpec{ - TimeToLiveSeconds: util.Int64Ptr(3600), + TimeToLiveSeconds: ptr.To[int64](3600), }, Status: v1beta2.SparkApplicationStatus{ TerminationTime: metav1.NewTime(now.Add(-2 * time.Hour)), diff --git a/pkg/util/util.go b/pkg/util/util.go index 0ac87d4d83..df0083b2c8 100644 --- a/pkg/util/util.go +++ b/pkg/util/util.go @@ -24,8 +24,11 @@ import ( "strings" "golang.org/x/mod/semver" + "k8s.io/klog/v2" + "sigs.k8s.io/controller-runtime/pkg/reconcile" "sigs.k8s.io/yaml" + "github.com/go-logr/logr" "github.com/kubeflow/spark-operator/v2/pkg/common" ) @@ -59,17 +62,6 @@ func GetMasterURL() (string, error) { return fmt.Sprintf("k8s://https://%s:%s", kubernetesServiceHost, kubernetesServicePort), nil } -// Helper functions to check and remove a string from a slice of strings. -// ContainsString checks if a given string is present in a slice -func ContainsString(slice []string, s string) bool { - for _, item := range slice { - if item == s { - return true - } - } - return false -} - // RemoveString removes a given string from a slice, if present func RemoveString(slice []string, s string) (result []string) { for _, item := range slice { @@ -80,22 +72,6 @@ func RemoveString(slice []string, s string) (result []string) { return result } -func BoolPtr(b bool) *bool { - return &b -} - -func Int32Ptr(n int32) *int32 { - return &n -} - -func Int64Ptr(n int64) *int64 { - return &n -} - -func StringPtr(s string) *string { - return &s -} - // CompareSemanticVersion compares two semantic versions. func CompareSemanticVersion(v1, v2 string) int { // Add 'v' prefix if needed @@ -160,3 +136,21 @@ func SetIfNotExists[K comparable, V any](m map[K]V, key K, value V) { m[key] = value } } + +// NewLogConstructor returns a log constructor for the given kind. +func NewLogConstructor(logger logr.Logger, kind string) func(*reconcile.Request) logr.Logger { + // Use the lowercase of kind as controller name, as it will show up in the metrics, + // and thus should be a prometheus compatible name(underscores and alphanumeric characters only). + name := strings.ToLower(kind) + + // Use a custom log constructor. + logConstructor := func(req *reconcile.Request) logr.Logger { + logger := logger.WithValues("controller", name) + if req != nil { + logger = logger.WithValues(kind, klog.KRef(req.Namespace, req.Name)) + } + return logger + } + + return logConstructor +} diff --git a/pkg/util/util_test.go b/pkg/util/util_test.go index 5a7b6f14d9..a6b63a6757 100644 --- a/pkg/util/util_test.go +++ b/pkg/util/util_test.go @@ -23,6 +23,7 @@ import ( . "github.com/onsi/gomega" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/ptr" "github.com/kubeflow/spark-operator/v2/pkg/common" "github.com/kubeflow/spark-operator/v2/pkg/util" @@ -68,22 +69,6 @@ var _ = Describe("GetMasterURL", func() { }) }) -var _ = Describe("ContainsString", func() { - slice := []string{"a", "b", "c"} - - Context("When the string is in the slice", func() { - It("Should return true", func() { - Expect(util.ContainsString(slice, "b")).To(BeTrue()) - }) - }) - - Context("When the string is not in the slice", func() { - It("Should return false", func() { - Expect(util.ContainsString(slice, "d")).To(BeFalse()) - }) - }) -}) - var _ = Describe("RemoveString", func() { Context("When the string is in the slice", func() { slice := []string{"a", "b", "c"} @@ -107,28 +92,28 @@ var _ = Describe("RemoveString", func() { var _ = Describe("BoolPtr", func() { It("Should return a pointer to the given bool value", func() { b := true - Expect(util.BoolPtr(b)).To(Equal(&b)) + Expect(ptr.To(b)).To(Equal(&b)) }) }) var _ = Describe("Int32Ptr", func() { It("Should return a pointer to the given int32 value", func() { i := int32(42) - Expect(util.Int32Ptr(i)).To(Equal(&i)) + Expect(ptr.To[int32](i)).To(Equal(&i)) }) }) var _ = Describe("Int64Ptr", func() { It("Should return a pointer to the given int64 value", func() { i := int64(42) - Expect(util.Int64Ptr(i)).To(Equal(&i)) + Expect(ptr.To[int64](i)).To(Equal(&i)) }) }) var _ = Describe("StringPtr", func() { It("Should return a pointer to the given string value", func() { s := "hello" - Expect(util.StringPtr(s)).To(Equal(&s)) + Expect(ptr.To(s)).To(Equal(&s)) }) }) diff --git a/test/e2e/bad_examples/fail-application.yaml b/test/e2e/bad_examples/fail-application.yaml index b488f97c09..939af0657b 100644 --- a/test/e2e/bad_examples/fail-application.yaml +++ b/test/e2e/bad_examples/fail-application.yaml @@ -21,11 +21,11 @@ metadata: spec: type: Scala mode: cluster - image: spark:4.0.0 + image: spark:4.0.1 imagePullPolicy: IfNotPresent mainClass: non-existent mainApplicationFile: local:///non-existent.jar - sparkVersion: 4.0.0 + sparkVersion: 4.0.1 restartPolicy: type: OnFailure onFailureRetries: 3 diff --git a/test/e2e/bad_examples/fail-submission.yaml b/test/e2e/bad_examples/fail-submission.yaml index f19e7d6c89..ec8ee13407 100644 --- a/test/e2e/bad_examples/fail-submission.yaml +++ b/test/e2e/bad_examples/fail-submission.yaml @@ -21,11 +21,11 @@ metadata: spec: type: Scala mode: cluster - image: spark:4.0.0 + image: spark:4.0.1 imagePullPolicy: IfNotPresent mainClass: dummy mainApplicationFile: local:///dummy.jar - sparkVersion: 4.0.0 + sparkVersion: 4.0.1 restartPolicy: type: OnFailure onSubmissionFailureRetries: 3 diff --git a/test/e2e/sparkapplication_test.go b/test/e2e/sparkapplication_test.go index bab5b8c94f..3c52de5f45 100644 --- a/test/e2e/sparkapplication_test.go +++ b/test/e2e/sparkapplication_test.go @@ -33,6 +33,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/yaml" + clientretry "k8s.io/client-go/util/retry" "k8s.io/utils/ptr" "github.com/kubeflow/spark-operator/v2/api/v1beta2" @@ -440,10 +441,15 @@ var _ = Describe("Example SparkApplication", func() { By("Suspending Spark Application") Eventually(func(g Gomega) { - app := &v1beta2.SparkApplication{} - Expect(k8sClient.Get(ctx, key, app)).To(Succeed()) - app.Spec.Suspend = ptr.To(true) - Expect(k8sClient.Update(ctx, app)).To(Succeed()) + err := clientretry.RetryOnConflict(clientretry.DefaultRetry, func() error { + app := &v1beta2.SparkApplication{} + if err := k8sClient.Get(ctx, key, app); err != nil { + return err + } + app.Spec.Suspend = ptr.To(true) + return k8sClient.Update(ctx, app) + }) + g.Expect(err).NotTo(HaveOccurred()) }).WithTimeout(5 * time.Second).Should(Succeed()) By("Waiting for SparkApplication to Suspended") @@ -455,10 +461,15 @@ var _ = Describe("Example SparkApplication", func() { By("Resuming for SparkApplication") Eventually(func(g Gomega) { - app := &v1beta2.SparkApplication{} - Expect(k8sClient.Get(ctx, key, app)).To(Succeed()) - app.Spec.Suspend = ptr.To(false) - Expect(k8sClient.Update(ctx, app)).To(Succeed()) + err := clientretry.RetryOnConflict(clientretry.DefaultRetry, func() error { + app := &v1beta2.SparkApplication{} + if err := k8sClient.Get(ctx, key, app); err != nil { + return err + } + app.Spec.Suspend = ptr.To(false) + return k8sClient.Update(ctx, app) + }) + g.Expect(err).NotTo(HaveOccurred()) }).WithTimeout(5 * time.Second).Should(Succeed()) By("Waiting for SparkApplication to Running") diff --git a/test/e2e/suit_test.go b/test/e2e/suite_test.go similarity index 98% rename from test/e2e/suit_test.go rename to test/e2e/suite_test.go index 41f5d604ab..6fd1ead9ca 100644 --- a/test/e2e/suit_test.go +++ b/test/e2e/suite_test.go @@ -41,6 +41,7 @@ import ( "k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes/scheme" "k8s.io/client-go/rest" + "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/envtest" logf "sigs.k8s.io/controller-runtime/pkg/log" @@ -48,7 +49,6 @@ import ( "github.com/kubeflow/spark-operator/v2/api/v1alpha1" "github.com/kubeflow/spark-operator/v2/api/v1beta2" - "github.com/kubeflow/spark-operator/v2/pkg/util" // +kubebuilder:scaffold:imports ) @@ -94,8 +94,8 @@ var _ = BeforeSuite(func() { // Note that you must have the required binaries setup under the bin directory to perform // the tests directly. When we run make test it will be setup and used automatically. BinaryAssetsDirectory: filepath.Join("..", "..", "bin", "k8s", - fmt.Sprintf("1.32.0-%s-%s", runtime.GOOS, runtime.GOARCH)), - UseExistingCluster: util.BoolPtr(true), + fmt.Sprintf("1.33.0-%s-%s", runtime.GOOS, runtime.GOARCH)), + UseExistingCluster: ptr.To(true), } cfg, err = testEnv.Start()