Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .ciux
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ dependencies:
- image: gitlab-registry.in2p3.fr/astrolabsoftware/fink/fink-deps-science-ztf:latest
labels:
build: "science"
- package: github.com/k8s-school/ktbx@v1.1.4-rc7
- package: github.com/k8s-school/ktbx@v1.1.6-rc5
labels:
itest: "optional"
- package: github.com/astrolabsoftware/finkctl/v3@v3.1.3-rc3
Expand Down
146 changes: 107 additions & 39 deletions .github/workflows/e2e-common.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@ on:
required: false
type: string
default: "v0.20.0"
skip_artifacts:
description: 'Skip artifact upload/download (for self-hosted runners)'
required: false
type: boolean
default: false
secrets:
registry_username:
required: true
Expand All @@ -24,16 +29,17 @@ env:
CIUX_VERSION: v0.0.7-rc1
GHA_BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
SUFFIX: ${{ inputs.suffix }}
STORAGE: ${{ inputs.storage }}
MONITORING_OPT: "-m"
# Override the self-hosted runner value
POD_NAMESPACE: default
SHARED_IMAGE_PATH: "/tmp/fink-ci-image.tar"
jobs:
build:
name: Build image
runs-on: ${{ fromJSON(inputs.runner) }}
outputs:
image: ${{ steps.export.outputs.IMAGE }}
ciux_image_url: ${{ steps.export.outputs.CIUX_IMAGE_URL }}
skip_artifacts: ${{ steps.export.outputs.skip_artifacts }}
steps:
- name: Checkout code
uses: actions/checkout@v4
Expand All @@ -51,18 +57,33 @@ jobs:
- name: Export fink-broker image
id: export
run: |
mkdir -p artifacts
# Cannot use CIUXCONFIG because it may not have been created yet
$(ciux get image --check $PWD --suffix "${{ env.SUFFIX }}" --env)
if [ $CIUX_BUILD = true ]; then
if [ "${{ inputs.skip_artifacts }}" = "true" ]; then
# For self-hosted: save image to shared location to persist between jobs
SHARED_IMAGE_PATH="${{ env.SHARED_IMAGE_PATH }}"
echo "Removing existing shared image file $SHARED_IMAGE_PATH"
rm -f "$SHARED_IMAGE_PATH"
echo "Saving image $CIUX_IMAGE_URL to $SHARED_IMAGE_PATH"
docker save "$CIUX_IMAGE_URL" -o "$SHARED_IMAGE_PATH"
echo "CIUX_IMAGE_URL=$CIUX_IMAGE_URL" >> "$GITHUB_OUTPUT"
echo "SHARED_IMAGE_PATH=$SHARED_IMAGE_PATH" >> "$GITHUB_OUTPUT"
else
# For GitHub runners: use artifacts
mkdir -p artifacts
echo "Export $CIUX_IMAGE_URL to Github artifact store"
docker save "$CIUX_IMAGE_URL" > artifacts/image.tar
echo "CIUX_IMAGE_URL=$CIUX_IMAGE_URL" >> "$GITHUB_OUTPUT"
fi
echo "skip_artifacts=${{ inputs.skip_artifacts }}" >> "$GITHUB_OUTPUT"
else
echo "Using existing image $CIUX_IMAGE_URL"
touch artifacts/empty
echo "skip_artifacts=true" >> "$GITHUB_OUTPUT"
echo "CIUX_IMAGE_URL=$CIUX_IMAGE_URL" >> "$GITHUB_OUTPUT"
fi
echo "IMAGE=$CIUX_IMAGE_URL" >> "$GITHUB_OUTPUT"
- uses: actions/upload-artifact@v4
if: ${{ steps.export.outputs.skip_artifacts != 'true' }}
with:
name: docker-artifact
path: artifacts
Expand All @@ -73,8 +94,8 @@ jobs:
name: Run integration tests
runs-on: ${{ fromJSON(inputs.runner) }}
outputs:
new_image: ${{ steps.promote.outputs.NEW_IMAGE }}
promoted_image: ${{ steps.promote.outputs.PROMOTED_IMAGE }}
ciux_build: ${{ steps.promote.outputs.CIUX_BUILD }}
ciux_promoted_image_url: ${{ steps.promote.outputs.CIUX_PROMOTED_IMAGE_URL }}
needs: build
steps:
- name: Checkout code
Expand Down Expand Up @@ -109,14 +130,30 @@ jobs:
run: |
# v0.20.0 does not work on self-hosted runners
./e2e/prereq-install.sh -k "${{ inputs.kind_version }}" ${{ env.MONITORING_OPT}}

- name: Download image
uses: actions/download-artifact@v4
if: ${{ needs.build.outputs.skip_artifacts != 'true' }}
with:
name: docker-artifact
path: artifacts
- name: Load container image inside kind
run: |
if [ -f artifacts/image.tar ]; then
if [ "${{ inputs.skip_artifacts }}" = "true" ]; then
# For self-hosted: load from shared location
SHARED_IMAGE_PATH="${{ env.SHARED_IMAGE_PATH }}"
if [ -f "$SHARED_IMAGE_PATH" ]; then
echo "Loading image ${{ needs.build.outputs.ciux_image_url }} from $SHARED_IMAGE_PATH"
docker load -i "$SHARED_IMAGE_PATH"
cluster_name=$(ciux get clustername $PWD)
kind load docker-image "${{ needs.build.outputs.ciux_image_url }}" --name "$cluster_name"
node=$(kubectl get nodes --selector=node-role.kubernetes.io/control-plane -o jsonpath='{.items[0].metadata.name}')
docker exec -- $node crictl image
else
echo "Error: shared image file $SHARED_IMAGE_PATH not found"
exit 1
fi
elif [ -f artifacts/image.tar ]; then
echo "Loading image from archive"
cluster_name=$(ciux get clustername $PWD)
kind load image-archive artifacts/image.tar --name "$cluster_name"
Expand All @@ -135,12 +172,17 @@ jobs:
- name: Check results
run: |
./e2e/check-results.sh
- name: Delete cluster
if: always()
run: |
cluster_name=$(ciux get clustername $PWD)
kind delete cluster --name "$cluster_name"
- name: Promote fink-broker image
id: promote
run: |
. .ciux.d/ciux_itest.sh
echo "PROMOTED_IMAGE=$CIUX_PROMOTED_IMAGE_URL" >> "$GITHUB_OUTPUT"
echo "NEW_IMAGE=$CIUX_BUILD" >> "$GITHUB_OUTPUT"
echo "CIUX_PROMOTED_IMAGE_URL=$CIUX_PROMOTED_IMAGE_URL" >> "$GITHUB_OUTPUT"
echo "CIUX_BUILD=$CIUX_BUILD" >> "$GITHUB_OUTPUT"
image-analysis:
name: Analyze image
runs-on: ${{ fromJSON(inputs.runner) }}
Expand All @@ -152,22 +194,33 @@ jobs:
uses: actions/checkout@v3
- name: Download image
uses: actions/download-artifact@v4
if: ${{ !needs.build.outputs.skip_artifacts }}
with:
name: docker-artifact
path: artifacts
- name: Load image in local registry
run: |
if [ -f artifacts/image.tar ]; then
echo "Loading image ${{ needs.build.outputs.image }} from archive"
if [ "${{ inputs.skip_artifacts }}" = "true" ]; then
# For self-hosted: load from shared location
SHARED_IMAGE_PATH="${{ env.SHARED_IMAGE_PATH }}"
if [ -f "$SHARED_IMAGE_PATH" ]; then
echo "Loading image ${{ needs.build.outputs.ciux_image_url }} from $SHARED_IMAGE_PATH"
docker load -i "$SHARED_IMAGE_PATH"
else
echo "Error: shared image file $SHARED_IMAGE_PATH not found"
exit 1
fi
elif [ -f artifacts/image.tar ]; then
echo "Loading image ${{ needs.build.outputs.ciux_image_url }} from archive"
docker load --input artifacts/image.tar
else
echo "Using existing image ${{ needs.build.outputs.image }}"
echo "Using existing image ${{ needs.build.outputs.ciux_image_url }}"
fi
- name: Scan fink-broker image
uses: anchore/scan-action@v6
id: scan
with:
image: "${{ needs.build.outputs.image }}"
image: "${{ needs.build.outputs.ciux_image_url }}"
fail-build: false
- name: Display SARIF report
run: |
Expand All @@ -178,59 +231,74 @@ jobs:
sarif_file: ${{ steps.scan.outputs.sarif }}
push:
env:
NEW_IMAGE: ${{ needs.integration-tests.outputs.new_image }}
IMAGE: ${{ needs.build.outputs.image }}
PROMOTED_IMAGE: ${{ needs.integration-tests.outputs.promoted_image }}
CIUX_BUILD: ${{ needs.integration-tests.outputs.ciux_build }}
CIUX_IMAGE_URL: ${{ needs.build.outputs.ciux_image_url }}
CIUX_PROMOTED_IMAGE_URL: ${{ needs.integration-tests.outputs.ciux_promoted_image_url }}
name: Push fink-broker image to IN2P3 registry
runs-on: ${{ fromJSON(inputs.runner) }}
needs: [build, integration-tests]
steps:
- name: Download image
uses: actions/download-artifact@v4
if: ${{ !needs.build.outputs.skip_artifacts }}
with:
name: docker-artifact
path: artifacts
- name: Load image in local registry
run: |
if [ $NEW_IMAGE = true ]; then
# GHA setup
if [ -f artifacts/image.tar ]; then
echo "Loading image "$IMAGE" from archive"
if [ $CIUX_BUILD = true ]; then
if [ "${{ inputs.skip_artifacts }}" = "true" ]; then
# For self-hosted: load from shared location
SHARED_IMAGE_PATH="${{ env.SHARED_IMAGE_PATH }}"
if [ -f "$SHARED_IMAGE_PATH" ]; then
echo "Loading image $CIUX_IMAGE_URL from $SHARED_IMAGE_PATH"
docker load -i "$SHARED_IMAGE_PATH"
else
echo "Error: shared image file $SHARED_IMAGE_PATH not found"
exit 1
fi
elif [ -f artifacts/image.tar ]; then
# GHA setup
echo "Loading image $CIUX_IMAGE_URL from archive"
docker load --input artifacts/image.tar
# Self-hosted runners, new image is stored in the local registry
else
echo "Error: no image found"
echo "Error: no image source available"
exit 1
fi
else
echo "Using existing image $IMAGE"
echo "Using existing image $CIUX_IMAGE_URL"
fi
- name: Login to DockerHub
uses: docker/login-action@v2
with:
registry: gitlab-registry.in2p3.fr
username: ${{ secrets.registry_username }}
password: ${{ secrets.registry_token }}
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0
- uses: actions/setup-go@v4
with:
go-version: '1.21.4'
- name: Install ciux
run: go install github.com/k8s-school/ciux@"${{ env.CIUX_VERSION }}"
- name: Push image to official registry
run: |
if [ $NEW_IMAGE = true ]; then
echo "Push image $PROMOTED_IMAGE"
docker tag "$IMAGE" "$PROMOTED_IMAGE"
docker push "$PROMOTED_IMAGE"
else
if which skopeo; then
echo "skopeo is already installed"
else
echo "Install skopeo"
sudo apt-get update -y
sudo apt-get install -y skopeo
fi
echo "Add image tag $PROMOTED_IMAGE to $IMAGE"
skopeo copy docker://$IMAGE docker://$PROMOTED_IMAGE
fi
./push-image.sh
- uses: act10ns/slack@v1
with:
webhook-url: ${{ secrets.slack_webhook_url }}
status: ${{ job.status }}
if: always()
- name: Cleanup shared image file
run: |
if [ "${{ inputs.skip_artifacts }}" = "true" ]; then
SHARED_IMAGE_PATH="${{ env.SHARED_IMAGE_PATH }}"
if [ -f "$SHARED_IMAGE_PATH" ]; then
echo "Removing shared image file $SHARED_IMAGE_PATH"
rm -f "$SHARED_IMAGE_PATH"
fi
fi
if: always()

1 change: 1 addition & 0 deletions .github/workflows/e2e-k8s-ztf-science-self-hosted.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ jobs:
with:
suffix: ""
runner: "['self-hosted']"
skip_artifacts: true
secrets:
registry_username: ${{ secrets.REGISTRY_USERNAME }}
registry_token: ${{ secrets.REGISTRY_TOKEN }}
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
/secret.yaml
/e2e-report.yaml
/.ciux.d
/.claude
/\#TODO.org\#
Expand Down
3 changes: 0 additions & 3 deletions TODO.931

This file was deleted.

2 changes: 0 additions & 2 deletions TODO.latest

This file was deleted.

3 changes: 0 additions & 3 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,6 @@ fi

ciux ignite --selector "$SELECTOR" $DIR --suffix "$suffix"


# TODO improve and use
# . $DIR/.ciux.d/ciuxconfig.sh
CIUXCONFIG=$(ciux get configpath --selector $SELECTOR $DIR)
echo "Sourcing ciux config from $CIUXCONFIG"
. $CIUXCONFIG
Expand Down
64 changes: 46 additions & 18 deletions e2e/check-results.sh
Original file line number Diff line number Diff line change
Expand Up @@ -64,38 +64,65 @@ else
expected_topics="16"
fi

# Wait for topics to be created, and check if fink-broker has not crashed in the meantime
# display logs of failed pods if any, and of running pods if no topics after 10 attempts (~10 minutes)
count=0
max_attempts=10
selector="spark-app-name"
err_msg=""
while ! finkctl wait topics --expected "$expected_topics" --timeout 60s -v1 > /dev/null
do
echo "INFO: Waiting for expected topics: $expected_topics"
echo "INFO: Waiting for expected topics: $expected_topics, attempt: $((count+1))/$max_attempts"
sleep 5
echo "INFO: List pods in spark namespace:"
kubectl get pods -n spark
if [ $(kubectl get pods -n spark -l app.kubernetes.io/instance=fink-broker --field-selector=status.phase!=Running | wc -l) -ge 1 ];
then
echo "ERROR: fink-broker has crashed" 1>&2
# Useful for debugging on github actions
echo "ERROR: enabling interactive access for debugging purpose" 1>&2
kubectl get pods -n spark -l app.kubernetes.io/instance=fink-broker
sleep 7200
exit 1

crashed_pods=$(kubectl get pods -n spark -l $selector --field-selector=status.phase=Failed -o name)
if [ -n "$crashed_pods" ]; then
echo "ERROR: crashed pods found: $crashed_pods" 1>&2
for pod in $crashed_pods
do
echo "--- Logs for crashed Pod: $pod ---"
kubectl logs "$pod" -n spark
done
running_pods=$(kubectl get pods -n spark -l $selector --field-selector=status.phase=Running -o name)
if [ -n "$running_pods" ]; then
echo "INFO: logs of running pods:"
for pod in $running_pods; do
echo "--- Logs for running Pod: $pod ---"
kubectl logs "$pod" -n spark --tail -1
done
fi
err_msg="ERROR: fink-broker has crashed" 1>&2
# echo "ERROR: enabling interactive access for debugging purpose" 1>&2
# sleep 7200
break
fi

count=$((count+1))
if [ $count -eq 10 ]; then
echo "ERROR: Timeout waiting for topics to be created" 1>&2
kubectl logs -l sparkoperator.k8s.io/launched-by-spark-operator=true --tail -1
echo "PODS"
kubectl get pods -A
echo "FINK KAFKA TOPICS"
finkctl get topics
sleep 7200
exit 1
if [ $count -eq $max_attempts ]; then
pods=$(kubectl get pods -n spark -l $selector -o name)
for pod in $pods
do
echo "--- Logs for Pod: $pod ---"
kubectl logs "$pod" -n spark --tail -1
done
err_msg="ERROR: fink-broker did not produce expected results after ~10 minutes"
# echo "ERROR: enabling interactive access for debugging purpose" 1>&2
# sleep 7200
break
fi
done
finkctl get topics

if [ -n "$err_msg" ]; then
echo "$err_msg" 1>&2
exit 1
fi

if $monitoring;
then
echo "Checking prometheus exporter is enabled in fink-broker"
if kubectl exec -it -n spark fink-broker-stream2raw-driver -- curl http://localhost:8090/metrics | grep jvm > /dev/null
then
echo "Prometheus exporter is enabled"
Expand All @@ -119,4 +146,5 @@ then

fi


echo "INFO: Fink-broker is running and all topics are created"
Loading
Loading