From ac9b257a7a249c4b4c20b04d4c95ff8354c5b4e0 Mon Sep 17 00:00:00 2001 From: Matt Prahl Date: Wed, 29 Jan 2025 12:26:52 -0500 Subject: [PATCH] fix(CI): Use the correct image registry for replacements in integration tests (#11564) * Use the correct image registry for replacements in integration tests The image registry was changed to GitHub Container Registry in the 2.4 release. Signed-off-by: mprahl * Print the pod logs when the pods fail to start in integration tests Signed-off-by: mprahl * Fix the sample compilation in the API server container build Signed-off-by: mprahl * Show the output when building the container images in CI Signed-off-by: mprahl --------- Signed-off-by: mprahl --- .github/resources/manifests/argo/kustomization.yaml | 6 +++--- .../resources/manifests/tekton/kustomization.yaml | 6 +++--- .github/resources/scripts/build-images.sh | 10 +++++----- .../scripts/kfp-readiness/wait_for_pods.py | 13 +++++++++++++ backend/Dockerfile | 4 ++-- 5 files changed, 26 insertions(+), 13 deletions(-) diff --git a/.github/resources/manifests/argo/kustomization.yaml b/.github/resources/manifests/argo/kustomization.yaml index cd2c6bdb0b3..825e2695db8 100644 --- a/.github/resources/manifests/argo/kustomization.yaml +++ b/.github/resources/manifests/argo/kustomization.yaml @@ -5,13 +5,13 @@ resources: - ../../../../manifests/kustomize/env/platform-agnostic images: -- name: gcr.io/ml-pipeline/api-server +- name: ghcr.io/kubeflow/kfp-api-server newName: kind-registry:5000/apiserver newTag: latest -- name: gcr.io/ml-pipeline/persistenceagent +- name: ghcr.io/kubeflow/kfp-persistence-agent newName: kind-registry:5000/persistenceagent newTag: latest -- name: gcr.io/ml-pipeline/scheduledworkflow +- name: ghcr.io/kubeflow/kfp-scheduled-workflow-controller newName: kind-registry:5000/scheduledworkflow newTag: latest diff --git a/.github/resources/manifests/tekton/kustomization.yaml b/.github/resources/manifests/tekton/kustomization.yaml index a86686a70b1..391a26b9367 100644 --- a/.github/resources/manifests/tekton/kustomization.yaml +++ b/.github/resources/manifests/tekton/kustomization.yaml @@ -14,13 +14,13 @@ resources: # when application is deleted. images: -- name: gcr.io/ml-pipeline/api-server +- name: ghcr.io/kubeflow/kfp-api-server newName: kind-registry:5000/apiserver newTag: latest -- name: gcr.io/ml-pipeline/persistenceagent +- name: ghcr.io/kubeflow/kfp-persistence-agent newName: kind-registry:5000/persistenceagent newTag: latest -- name: gcr.io/ml-pipeline/scheduledworkflow +- name: ghcr.io/kubeflow/kfp-scheduled-workflow-controller newName: kind-registry:5000/scheduledworkflow newTag: latest - name: '*/aipipeline/tekton-exithandler-controller' diff --git a/.github/resources/scripts/build-images.sh b/.github/resources/scripts/build-images.sh index a70d295c291..7cb06b3a037 100755 --- a/.github/resources/scripts/build-images.sh +++ b/.github/resources/scripts/build-images.sh @@ -25,35 +25,35 @@ EXIT_CODE=0 docker system prune -a -f -docker build -q -t "${REGISTRY}/apiserver:${TAG}" -f backend/Dockerfile . && docker push "${REGISTRY}/apiserver:${TAG}" || EXIT_CODE=$? +docker build --progress=plain -t "${REGISTRY}/apiserver:${TAG}" -f backend/Dockerfile . && docker push "${REGISTRY}/apiserver:${TAG}" || EXIT_CODE=$? if [[ $EXIT_CODE -ne 0 ]] then echo "Failed to build apiserver image." exit $EXIT_CODE fi -docker build -q -t "${REGISTRY}/persistenceagent:${TAG}" -f backend/Dockerfile.persistenceagent . && docker push "${REGISTRY}/persistenceagent:${TAG}" || EXIT_CODE=$? +docker build --progress=plain -t "${REGISTRY}/persistenceagent:${TAG}" -f backend/Dockerfile.persistenceagent . && docker push "${REGISTRY}/persistenceagent:${TAG}" || EXIT_CODE=$? if [[ $EXIT_CODE -ne 0 ]] then echo "Failed to build persistenceagent image." exit $EXIT_CODE fi -docker build -q -t "${REGISTRY}/scheduledworkflow:${TAG}" -f backend/Dockerfile.scheduledworkflow . && docker push "${REGISTRY}/scheduledworkflow:${TAG}" || EXIT_CODE=$? +docker build --progress=plain -t "${REGISTRY}/scheduledworkflow:${TAG}" -f backend/Dockerfile.scheduledworkflow . && docker push "${REGISTRY}/scheduledworkflow:${TAG}" || EXIT_CODE=$? if [[ $EXIT_CODE -ne 0 ]] then echo "Failed to build scheduledworkflow image." exit $EXIT_CODE fi -docker build -q -t "${REGISTRY}/driver:${TAG}" -f backend/Dockerfile.driver . && docker push "${REGISTRY}/driver:${TAG}" || EXIT_CODE=$? +docker build --progress=plain -t "${REGISTRY}/driver:${TAG}" -f backend/Dockerfile.driver . && docker push "${REGISTRY}/driver:${TAG}" || EXIT_CODE=$? if [[ $EXIT_CODE -ne 0 ]] then echo "Failed to build driver image." exit $EXIT_CODE fi -docker build -q -t "${REGISTRY}/launcher:${TAG}" -f backend/Dockerfile.launcher . && docker push "${REGISTRY}/launcher:${TAG}" || EXIT_CODE=$? +docker build --progress=plain -t "${REGISTRY}/launcher:${TAG}" -f backend/Dockerfile.launcher . && docker push "${REGISTRY}/launcher:${TAG}" || EXIT_CODE=$? if [[ $EXIT_CODE -ne 0 ]] then echo "Failed to build launcher image." diff --git a/.github/resources/scripts/kfp-readiness/wait_for_pods.py b/.github/resources/scripts/kfp-readiness/wait_for_pods.py index ebc7546a300..cc405bcbe21 100644 --- a/.github/resources/scripts/kfp-readiness/wait_for_pods.py +++ b/.github/resources/scripts/kfp-readiness/wait_for_pods.py @@ -13,6 +13,17 @@ config.load_kube_config() v1 = client.CoreV1Api() +def log_pods(): + pods = v1.list_namespaced_pod(namespace=namespace) + + for pod in pods.items: + try: + logging.info( + f"---- Pod {namespace}/{pod.metadata.name} logs ----\n" + + v1.read_namespaced_pod_log(pod.metadata.name, namespace) + ) + except client.exceptions.ApiException: + continue def get_pod_statuses(): pods = v1.list_namespaced_pod(namespace=namespace) @@ -74,6 +85,8 @@ def check_pods(calm_time=10, timeout=600, retries_after_ready=5): logging.info(f"Pods are still stabilizing. Retrying in {calm_time} seconds...") time.sleep(calm_time) else: + log_pods() + raise Exception("Pods did not stabilize within the timeout period.") logging.info("Final pod statuses:") diff --git a/backend/Dockerfile b/backend/Dockerfile index 22d917aa24d..082f910305f 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -54,9 +54,9 @@ COPY backend/src/apiserver/config/sample_config.json /samples/ # Compiling the preloaded samples. # The default image is replaced with the GCR-hosted python image. RUN set -e; \ - < /samples/sample_config.json jq .[].file --raw-output | while read pipeline_yaml; do \ + < /samples/sample_config.json jq ".pipelines[].file" --raw-output | while read pipeline_yaml; do \ pipeline_py="${pipeline_yaml%.yaml}"; \ - python3 "$pipeline_py"; \ + echo "Compiling: \"$pipeline_py\"" && python3 "$pipeline_py" && echo -n "Output: " && ls "$pipeline_py.yaml"; \ done # 3. Start api web server