From cdff9d15ba8db4539a7ee2c6adab6d28c6460679 Mon Sep 17 00:00:00 2001
From: Helber Belmiro <helber.belmiro@gmail.com>
Date: Mon, 8 Dec 2025 18:10:42 -0300
Subject: [PATCH 1/4] Added Argo Workflows logs to the HTML report for failed
 tests

Signed-off-by: Helber Belmiro <helber.belmiro@gmail.com>
---
 .github/actions/test-and-report/action.yml    |  34 +++
 .../scripts/enhance-html-report-with-logs.sh  | 222 ++++++++++++++++++
 .github/workflows/e2e-test-frontend.yml       |   5 +-
 .github/workflows/integration-tests-v1.yml    |   5 +-
 ...kfp-kubernetes-native-migration-tests.yaml |   7 +-
 .github/workflows/kfp-sdk-client-tests.yml    |   5 +-
 .github/workflows/kfp-webhooks.yml            |   5 +-
 .../legacy-v2-api-integration-tests.yml       |   5 +-
 backend/test/end2end/pipeline_e2e_test.go     |  18 +-
 backend/test/testutil/test_utils.go           |  62 +++++
 backend/test/v2/api/integration_suite_test.go |   9 +
 11 files changed, 369 insertions(+), 8 deletions(-)
 create mode 100755 .github/resources/scripts/enhance-html-report-with-logs.sh
diff --git a/.github/actions/test-and-report/action.yml b/.github/actions/test-and-report/action.yml
index 46243143183..d5dc24463d3 100644
--- a/.github/actions/test-and-report/action.yml
+++ b/.github/actions/test-and-report/action.yml
@@ -115,6 +115,40 @@ runs:
         junit2html ${{ inputs.test_directory }}/reports/junit.xml ${{ inputs.test_directory }}/reports/test-report.html
       continue-on-error: true
 
+    - name: Install MinIO Client for log collection
+      id: install-mc
+      if: ${{ steps.run-tests.outcome != 'success' }}
+      shell: bash
+      run: |
+        MC_PATH="$HOME/.local/bin/minio-mc"
+        if [ -f "$MC_PATH" ]; then
+          echo "MinIO client already installed"
+        else
+          echo "Installing MinIO client..."
+          curl -sLO https://dl.min.io/client/mc/release/linux-amd64/mc
+          chmod +x mc
+          mkdir -p "$HOME/.local/bin"
+          mv mc "$MC_PATH"
+        fi
+        echo "MC_PATH=$MC_PATH" >> "$GITHUB_ENV"
+      continue-on-error: true
+
+    - name: Enhance HTML report with workflow logs for failed tests
+      id: enhance-report
+      if: ${{ steps.run-tests.outcome != 'success' && steps.install-mc.outcome == 'success' }}
+      shell: bash
+      run: |
+        MAPPING_FILE="${{ inputs.test_directory }}/reports/test-workflow-mapping.txt"
+        HTML_REPORT="${{ inputs.test_directory }}/reports/test-report.html"
+        if [[ -f "$MAPPING_FILE" && -f "$HTML_REPORT" ]]; then
+          ./.github/resources/scripts/enhance-html-report-with-logs.sh \
+            --mapping-file "$MAPPING_FILE" \
+            --html-report "$HTML_REPORT"
+        else
+          echo "Skipping: mapping file or HTML report not found"
+        fi
+      continue-on-error: true
+
     - name: Configure report name
       id: name_gen
       shell: bash
diff --git a/.github/resources/scripts/enhance-html-report-with-logs.sh b/.github/resources/scripts/enhance-html-report-with-logs.sh
new file mode 100755
index 00000000000..5b0063b93b4
--- /dev/null
+++ b/.github/resources/scripts/enhance-html-report-with-logs.sh
@@ -0,0 +1,222 @@
+#!/usr/bin/env bash
+
+set -e
+
+NAMESPACE="kubeflow"
+MAPPING_FILE=""
+HTML_REPORT=""
+
+while [[ "$#" -gt 0 ]]; do
+    case $1 in
+        --mapping-file) MAPPING_FILE="$2"; shift ;;
+        --html-report) HTML_REPORT="$2"; shift ;;
+        *) echo "Unknown parameter passed: $1"; exit 1 ;;
+    esac
+    shift
+done
+
+if [[ -z "$MAPPING_FILE" || -z "$HTML_REPORT" ]]; then
+    echo "Usage: $0 --mapping-file <path> --html-report <path>"
+    exit 1
+fi
+
+if [[ ! -f "$MAPPING_FILE" ]]; then
+    echo "No test-workflow mapping file found at: $MAPPING_FILE"
+    echo "No failed tests with workflow associations to process."
+    exit 0
+fi
+
+if [[ ! -f "$HTML_REPORT" ]]; then
+    echo "HTML report not found at: $HTML_REPORT"
+    exit 1
+fi
+
+MC_CMD="${MC_PATH:-minio-mc}"
+if ! command -v "$MC_CMD" &>/dev/null; then
+    MC_CMD="mc"
+    if ! command -v "$MC_CMD" &>/dev/null; then
+        echo "ERROR: MinIO client is not installed."
+        exit 1
+    fi
+fi
+
+echo "Using MinIO client: $MC_CMD"
+
+if ! kubectl get namespace "$NAMESPACE" &>/dev/null; then
+    echo "Namespace '$NAMESPACE' does not exist."
+    exit 1
+fi
+
+echo "Checking if Argo Workflows log archiving is enabled..."
+ARTIFACT_REPO=$(kubectl get configmap workflow-controller-configmap -n "$NAMESPACE" -o jsonpath='{.data.artifactRepository}' 2>/dev/null || true)
+if [[ -z "$ARTIFACT_REPO" ]]; then
+    echo "WARNING: Could not read workflow-controller-configmap. Skipping log enhancement."
+    exit 0
+fi
+if ! echo "$ARTIFACT_REPO" | grep -q "archiveLogs: true"; then
+    echo "WARNING: Log archiving is NOT enabled. Skipping log enhancement."
+    exit 0
+fi
+
+BUCKET="mlpipeline"
+LOGS_PREFIX="private-artifacts/${NAMESPACE}"
+
+ACCESS_KEY=$(kubectl get secret mlpipeline-minio-artifact -n "${NAMESPACE}" -o jsonpath='{.data.accesskey}' | base64 -d)
+SECRET_KEY=$(kubectl get secret mlpipeline-minio-artifact -n "${NAMESPACE}" -o jsonpath='{.data.secretkey}' | base64 -d)
+
+kubectl port-forward svc/minio-service -n "${NAMESPACE}" 9000:9000 &
+PF_PID=$!
+sleep 3
+
+cleanup() {
+    kill $PF_PID 2>/dev/null || true
+}
+trap cleanup EXIT
+
+$MC_CMD alias set kfp-minio "http://localhost:9000" "${ACCESS_KEY}" "${SECRET_KEY}" --api S3v4 2>/dev/null || {
+    echo "Failed to configure object storage client."
+    exit 1
+}
+
+
+get_logs_for_workflow() {
+    local wf_name="$1"
+    local logs=""
+
+    LOG_FILES=$($MC_CMD find "kfp-minio/${BUCKET}/${LOGS_PREFIX}/${wf_name}/" --name "*.log" 2>/dev/null || true)
+
+    if [[ -z "${LOG_FILES}" ]]; then
+        LOG_FILES=$($MC_CMD ls --recursive "kfp-minio/${BUCKET}/${LOGS_PREFIX}/${wf_name}/" 2>/dev/null | awk '{print $NF}' || true)
+    fi
+
+    if [[ -z "${LOG_FILES}" ]]; then
+        echo "No log files found for workflow ${wf_name}"
+        return
+    fi
+
+    for LOG_PATH in ${LOG_FILES}; do
+        local step_name
+        step_name=$(basename "$(dirname "$LOG_PATH")")
+        local content
+        content=$($MC_CMD cat "${LOG_PATH}" 2>/dev/null || echo "Could not read log file.")
+        logs+="<div class='step-log'><h4>Step: ${step_name}</h4><pre>${content}</pre></div>"
+    done
+
+    echo "$logs"
+}
+
+LOGS_HTML="<style>
+.workflow-logs-section {
+    margin-top: 40px;
+    padding: 20px;
+    background-color: #f8f9fa;
+    border: 1px solid #dee2e6;
+    border-radius: 8px;
+}
+.workflow-logs-section h2 {
+    color: #dc3545;
+    border-bottom: 2px solid #dc3545;
+    padding-bottom: 10px;
+    margin-bottom: 20px;
+}
+.test-workflow-logs {
+    margin-bottom: 30px;
+    padding: 15px;
+    background-color: #fff;
+    border: 1px solid #ccc;
+    border-radius: 4px;
+}
+.test-workflow-logs h3 {
+    color: #495057;
+    margin-top: 0;
+}
+.workflow-name {
+    font-weight: bold;
+    color: #0066cc;
+    margin-bottom: 10px;
+}
+.step-log {
+    margin: 10px 0;
+    padding: 10px;
+    background-color: #f1f1f1;
+    border-left: 3px solid #007bff;
+}
+.step-log h4 {
+    margin: 0 0 10px 0;
+    color: #333;
+}
+.step-log pre {
+    margin: 0;
+    padding: 10px;
+    background-color: #282c34;
+    color: #abb2bf;
+    overflow-x: auto;
+    font-size: 12px;
+    line-height: 1.4;
+    border-radius: 4px;
+    max-height: 500px;
+    overflow-y: auto;
+}
+</style>
+<div class='workflow-logs-section'>
+<h2>Workflow Logs for Failed Tests</h2>"
+
+HAS_LOGS=false
+
+echo "=== Processing test-workflow mapping file ==="
+echo "Mapping file contents:"
+cat "$MAPPING_FILE"
+echo "=== End mapping file ==="
+
+while IFS='|' read -r test_name workflow_names; do
+    [[ -z "$test_name" || -z "$workflow_names" ]] && continue
+
+    echo "Processing test: $test_name with workflows: $workflow_names"
+
+    LOGS_HTML+="<div class='test-workflow-logs'>"
+    LOGS_HTML+="<h3>Test: ${test_name}</h3>"
+
+    IFS=',' read -ra WORKFLOW_ARRAY <<< "$workflow_names"
+    for wf_name in "${WORKFLOW_ARRAY[@]}"; do
+        echo "  Processing workflow: $wf_name"
+
+        LOGS_HTML+="<div class='workflow-name'>Workflow: ${wf_name}</div>"
+
+        echo "  Searching for logs at: kfp-minio/${BUCKET}/${LOGS_PREFIX}/${wf_name}/"
+        echo "  Listing bucket contents for this workflow:"
+        $MC_CMD ls "kfp-minio/${BUCKET}/${LOGS_PREFIX}/${wf_name}/" 2>&1 | head -20 || echo "  No directory found for workflow"
+        
+        wf_logs=$(get_logs_for_workflow "$wf_name")
+        if [[ -n "$wf_logs" ]]; then
+            LOGS_HTML+="$wf_logs"
+            HAS_LOGS=true
+            echo "  Found logs for workflow $wf_name"
+        else
+            LOGS_HTML+="<p>No logs found in object storage for workflow ${wf_name}</p>"
+            echo "  No logs found for workflow $wf_name"
+        fi
+    done
+
+    LOGS_HTML+="</div>"
+done < "$MAPPING_FILE"
+
+LOGS_HTML+="</div>"
+
+if [[ "$HAS_LOGS" == "true" ]]; then
+    # Write logs to a temp file to avoid ARG_MAX limit with sed
+    TEMP_LOGS=$(mktemp)
+    echo "$LOGS_HTML" > "$TEMP_LOGS"
+    
+    # Insert the logs before </body> using a temp file approach
+    TEMP_HTML=$(mktemp)
+    # Remove </body></html> from end, append logs, then add closing tags back
+    sed 's|</body>||; s|</html>||' "$HTML_REPORT" > "$TEMP_HTML"
+    cat "$TEMP_LOGS" >> "$TEMP_HTML"
+    echo "</body></html>" >> "$TEMP_HTML"
+    mv "$TEMP_HTML" "$HTML_REPORT"
+    rm -f "$TEMP_LOGS"
+    
+    echo "HTML report enhanced with workflow logs for failed tests."
+else
+    echo "No workflow logs found to add to the HTML report."
+fi
diff --git a/.github/workflows/e2e-test-frontend.yml b/.github/workflows/e2e-test-frontend.yml
index cd415c4826b..0958e4f45e2 100644
--- a/.github/workflows/e2e-test-frontend.yml
+++ b/.github/workflows/e2e-test-frontend.yml
@@ -71,7 +71,10 @@ jobs:
         if: ${{ steps.deploy.outcome != 'success' || steps.forward-frontend-port.outcome != 'success' || steps.tests.outcome != 'success' }}
         run: |
           ./.github/resources/scripts/collect-logs.sh --ns kubeflow --output /tmp/tmp_pod_log.txt
-          exit 1
+
+      - name: Mark workflow as failed
+        if: ${{ steps.deploy.outcome != 'success' || steps.forward-frontend-port.outcome != 'success' || steps.tests.outcome != 'success' }}
+        run: exit 1
 
       - name: Collect test results
         if: always()
diff --git a/.github/workflows/integration-tests-v1.yml b/.github/workflows/integration-tests-v1.yml
index 54115d80f69..0b0800bf69f 100644
--- a/.github/workflows/integration-tests-v1.yml
+++ b/.github/workflows/integration-tests-v1.yml
@@ -86,7 +86,10 @@ jobs:
         if: ${{ steps.forward-mysql-port.outcome != 'success' || steps.integration-tests.outcome != 'success' || steps.initialization-tests.outcome != 'success' }}
         run: |
           ./.github/resources/scripts/collect-logs.sh --ns kubeflow --output /tmp/tmp_pod_log.txt
-          exit 1
+
+      - name: Mark workflow as failed
+        if: ${{ steps.forward-mysql-port.outcome != 'success' || steps.integration-tests.outcome != 'success' || steps.initialization-tests.outcome != 'success' }}
+        run: exit 1
 
       - name: Collect test results
         if: always()
diff --git a/.github/workflows/kfp-kubernetes-native-migration-tests.yaml b/.github/workflows/kfp-kubernetes-native-migration-tests.yaml
index cbbd60e036a..fc46ba89a2e 100644
--- a/.github/workflows/kfp-kubernetes-native-migration-tests.yaml
+++ b/.github/workflows/kfp-kubernetes-native-migration-tests.yaml
@@ -179,7 +179,12 @@ jobs:
           steps.switch-to-k8s-mode.outcome != 'success' || steps.re-forward-api-port.outcome != 'success' || steps.run-k8s-mode-tests.outcome != 'success' }}
         run: |
           ./.github/resources/scripts/collect-logs.sh --ns kubeflow --output /tmp/tmp_pod_log.txt
-          exit 1
+
+      - name: Mark workflow as failed
+        if:
+          ${{ steps.deploy.outcome != 'success' || steps.run-db-mode-tests.outcome != 'success' ||
+          steps.switch-to-k8s-mode.outcome != 'success' || steps.re-forward-api-port.outcome != 'success' || steps.run-k8s-mode-tests.outcome != 'success' }}
+        run: exit 1
 
       - name: Collect test results
         if: always()
diff --git a/.github/workflows/kfp-sdk-client-tests.yml b/.github/workflows/kfp-sdk-client-tests.yml
index 451ff28356e..1d0879600b8 100644
--- a/.github/workflows/kfp-sdk-client-tests.yml
+++ b/.github/workflows/kfp-sdk-client-tests.yml
@@ -111,7 +111,10 @@ jobs:
         if: ${{ steps.deploy.outcome != 'success' || steps.forward-api-port.outcome != 'success' || steps.tests.outcome != 'success'}}
         run: |
           ./.github/resources/scripts/collect-logs.sh --ns kubeflow --output /tmp/tmp_pod_log.txt
-          exit 1
+
+      - name: Mark workflow as failed
+        if: ${{ steps.deploy.outcome != 'success' || steps.forward-api-port.outcome != 'success' || steps.tests.outcome != 'success'}}
+        run: exit 1
 
       - name: Collect test results
         if: always()
diff --git a/.github/workflows/kfp-webhooks.yml b/.github/workflows/kfp-webhooks.yml
index 151157206ea..daee7fcad45 100644
--- a/.github/workflows/kfp-webhooks.yml
+++ b/.github/workflows/kfp-webhooks.yml
@@ -57,7 +57,10 @@ jobs:
         if: ${{ steps.create-kfp-cluster.outcome != 'success' || steps.deploy.outcome != 'success' || steps.tests.outcome != 'success' }}
         run: |
           ./.github/resources/scripts/collect-logs.sh --ns kubeflow --output /tmp/tmp_pod_log.txt
-          exit 1
+
+      - name: Mark workflow as failed
+        if: ${{ steps.create-kfp-cluster.outcome != 'success' || steps.deploy.outcome != 'success' || steps.tests.outcome != 'success' }}
+        run: exit 1
 
       - name: Collect test results
         if: always()
diff --git a/.github/workflows/legacy-v2-api-integration-tests.yml b/.github/workflows/legacy-v2-api-integration-tests.yml
index e28e78dda0a..06a38d42b71 100644
--- a/.github/workflows/legacy-v2-api-integration-tests.yml
+++ b/.github/workflows/legacy-v2-api-integration-tests.yml
@@ -97,7 +97,10 @@ jobs:
         if: ${{ steps.deploy.outcome != 'success' || steps.forward-mlmd-port.outcome != 'success' || steps.tests.outcome != 'success' }}
         run: |
           ./.github/resources/scripts/collect-logs.sh --ns kubeflow --output /tmp/tmp_pod_log.txt
-          exit 1
+
+      - name: Mark workflow as failed
+        if: ${{ steps.deploy.outcome != 'success' || steps.forward-mlmd-port.outcome != 'success' || steps.tests.outcome != 'success' }}
+        run: exit 1
 
       - name: Collect test results
         if: always()
diff --git a/backend/test/end2end/pipeline_e2e_test.go b/backend/test/end2end/pipeline_e2e_test.go
index ed0a0536ca5..d13adc1cb65 100644
--- a/backend/test/end2end/pipeline_e2e_test.go
+++ b/backend/test/end2end/pipeline_e2e_test.go
@@ -16,6 +16,7 @@ package end2end
 
 import (
 	"fmt"
+	"os"
 	"path/filepath"
 	"strconv"
 	"strings"
@@ -94,10 +95,21 @@ var _ = Describe("Upload and Verify Pipeline Run >", Label(FullRegression), func
 	})
 
 	AfterEach(func() {
-
-		// Delete pipelines created during the test
 		logger.Log("################### Global Cleanup after each test #####################")
 
+		// Capture workflow mapping for failed tests before cleanup deletes the workflows
+		if CurrentSpecReport().Failed() && len(testContext.PipelineRun.CreatedRunIds) > 0 {
+			currentDir, err := os.Getwd()
+			if err == nil {
+				testutil.WriteTestWorkflowMapping(
+					GinkgoT().Name(),
+					testContext.PipelineRun.CreatedRunIds,
+					testutil.GetNamespace(),
+					filepath.Join(currentDir, testReportDirectory, "test-workflow-mapping.txt"),
+				)
+			}
+		}
+
 		logger.Log("Deleting %d run(s)", len(testContext.PipelineRun.CreatedRunIds))
 		for _, runID := range testContext.PipelineRun.CreatedRunIds {
 			runID := runID
@@ -138,6 +150,8 @@ var _ = Describe("Upload and Verify Pipeline Run >", Label(FullRegression), func
 		for _, pipelineFile := range pipelineFiles {
 			It(fmt.Sprintf("Upload %s pipeline", pipelineFile), FlakeAttempts(2), func() {
 				validatePipelineRunSuccess(pipelineFile, pipelineDir, testContext)
+				// TEMPORARY: Force failure to test workflow logs in HTML report
+				Fail("Intentional failure to test workflow logs in HTML report")
 			})
 		}
 	})
diff --git a/backend/test/testutil/test_utils.go b/backend/test/testutil/test_utils.go
index f08f2318fdc..90f4be47fa0 100644
--- a/backend/test/testutil/test_utils.go
+++ b/backend/test/testutil/test_utils.go
@@ -22,6 +22,7 @@ import (
 	"math/rand"
 	"net/http"
 	"os"
+	"os/exec"
 	"path/filepath"
 	"regexp"
 	"strings"
@@ -88,6 +89,67 @@ func WriteLogFile(specReport types.SpecReport, testName, logDirectory string) {
 	}
 }
 
+// GetWorkflowNameByRunID retrieves the Argo Workflow name for a given pipeline run ID
+// by querying the Kubernetes API using the pipeline/runid label.
+func GetWorkflowNameByRunID(namespace string, runID string) string {
+	cmd := exec.Command("kubectl", "get", "workflows", "-n", namespace,
+		"-l", fmt.Sprintf("pipeline/runid=%s", runID),
+		"-o", "jsonpath={.items[0].metadata.name}")
+	output, err := cmd.Output()
+	if err != nil {
+		logger.Log("Failed to get workflow for run ID %s: %v", runID, err)
+		return ""
+	}
+	workflowName := strings.TrimSpace(string(output))
+	if workflowName == "" {
+		logger.Log("No workflow found for run ID %s", runID)
+	}
+	return workflowName
+}
+
+// WriteTestWorkflowMapping appends a test-to-workflow mapping entry for failed tests.
+// The mapping file is used to correlate failed tests with their associated workflow logs.
+// Format: TEST_NAME|WORKFLOW_NAME1,WORKFLOW_NAME2,...
+func WriteTestWorkflowMapping(testName string, runIDs []string, namespace string, mappingFilePath string) {
+	if len(runIDs) == 0 {
+		return
+	}
+
+	var workflowNames []string
+	for _, runID := range runIDs {
+		wfName := GetWorkflowNameByRunID(namespace, runID)
+		if wfName != "" {
+			workflowNames = append(workflowNames, wfName)
+		}
+	}
+
+	if len(workflowNames) == 0 {
+		logger.Log("No workflows found for run IDs %v, skipping mapping", runIDs)
+		return
+	}
+
+	mappingDir := filepath.Dir(mappingFilePath)
+	if err := os.MkdirAll(mappingDir, 0755); err != nil {
+		logger.Log("Failed to create mapping directory due to: %s", err.Error())
+		return
+	}
+	file, err := os.OpenFile(mappingFilePath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
+	if err != nil {
+		logger.Log("Failed to open mapping file due to: %s", err.Error())
+		return
+	}
+	defer func() {
+		if closeErr := file.Close(); closeErr != nil {
+			logger.Log("Failed to close mapping file: %s", closeErr.Error())
+		}
+	}()
+	entry := fmt.Sprintf("%s|%s\n", testName, strings.Join(workflowNames, ","))
+	if _, err := file.WriteString(entry); err != nil {
+		logger.Log("Failed to write to mapping file due to: %s", err.Error())
+	}
+	logger.Log("Wrote test-workflow mapping: %s -> %v", testName, workflowNames)
+}
+
 // GetNamespace - Get Namespace based on the deployment mode
 func GetNamespace() string {
 	if *config.KubeflowMode || *config.MultiUserMode {
diff --git a/backend/test/v2/api/integration_suite_test.go b/backend/test/v2/api/integration_suite_test.go
index 8e7345ef52c..aa891992b66 100644
--- a/backend/test/v2/api/integration_suite_test.go
+++ b/backend/test/v2/api/integration_suite_test.go
@@ -193,6 +193,15 @@ var _ = ReportAfterEach(func(specReport types.SpecReport) {
 		currentDir, err := os.Getwd()
 		Expect(err).NotTo(HaveOccurred(), "Failed to get current directory")
 		testutil.WriteLogFile(specReport, GinkgoT().Name(), filepath.Join(currentDir, testLogsDirectory))
+
+		if len(testContext.PipelineRun.CreatedRunIds) > 0 {
+			testutil.WriteTestWorkflowMapping(
+				GinkgoT().Name(),
+				testContext.PipelineRun.CreatedRunIds,
+				testutil.GetNamespace(),
+				filepath.Join(currentDir, testReportDirectory, "test-workflow-mapping.txt"),
+			)
+		}
 	} else {
 		log.Printf("Test passed")
 	}

From d08715f8edfac4a938fb264cbcdcd86445fca882 Mon Sep 17 00:00:00 2001
From: Helber Belmiro <helber.belmiro@gmail.com>
Date: Sat, 13 Dec 2025 09:35:20 -0300
Subject: [PATCH 2/4] Removed css

Signed-off-by: Helber Belmiro <helber.belmiro@gmail.com>
---
 .github/actions/test-and-report/action.yml    |  6 +-
 ...-with-logs.sh => collect-workflow-logs.sh} | 83 ++++---------------
 2 files changed, 20 insertions(+), 69 deletions(-)
 rename .github/resources/scripts/{enhance-html-report-with-logs.sh => collect-workflow-logs.sh} (73%)

diff --git a/.github/actions/test-and-report/action.yml b/.github/actions/test-and-report/action.yml
index d5dc24463d3..b7b5af1cf6d 100644
--- a/.github/actions/test-and-report/action.yml
+++ b/.github/actions/test-and-report/action.yml
@@ -133,15 +133,15 @@ runs:
         echo "MC_PATH=$MC_PATH" >> "$GITHUB_ENV"
       continue-on-error: true
 
-    - name: Enhance HTML report with workflow logs for failed tests
-      id: enhance-report
+    - name: Collect workflow logs for failed tests
+      id: collect-workflow-logs
       if: ${{ steps.run-tests.outcome != 'success' && steps.install-mc.outcome == 'success' }}
       shell: bash
       run: |
         MAPPING_FILE="${{ inputs.test_directory }}/reports/test-workflow-mapping.txt"
         HTML_REPORT="${{ inputs.test_directory }}/reports/test-report.html"
         if [[ -f "$MAPPING_FILE" && -f "$HTML_REPORT" ]]; then
-          ./.github/resources/scripts/enhance-html-report-with-logs.sh \
+          ./.github/resources/scripts/collect-workflow-logs.sh \
             --mapping-file "$MAPPING_FILE" \
             --html-report "$HTML_REPORT"
         else
diff --git a/.github/resources/scripts/enhance-html-report-with-logs.sh b/.github/resources/scripts/collect-workflow-logs.sh
similarity index 73%
rename from .github/resources/scripts/enhance-html-report-with-logs.sh
rename to .github/resources/scripts/collect-workflow-logs.sh
index 5b0063b93b4..705751c1485 100755
--- a/.github/resources/scripts/enhance-html-report-with-logs.sh
+++ b/.github/resources/scripts/collect-workflow-logs.sh
@@ -50,12 +50,12 @@ fi
 echo "Checking if Argo Workflows log archiving is enabled..."
 ARTIFACT_REPO=$(kubectl get configmap workflow-controller-configmap -n "$NAMESPACE" -o jsonpath='{.data.artifactRepository}' 2>/dev/null || true)
 if [[ -z "$ARTIFACT_REPO" ]]; then
-    echo "WARNING: Could not read workflow-controller-configmap. Skipping log enhancement."
-    exit 0
+    echo "ERROR: Could not read workflow-controller-configmap."
+    exit 1
 fi
 if ! echo "$ARTIFACT_REPO" | grep -q "archiveLogs: true"; then
-    echo "WARNING: Log archiving is NOT enabled. Skipping log enhancement."
-    exit 0
+    echo "ERROR: Log archiving is NOT enabled."
+    exit 1
 fi
 
 BUCKET="mlpipeline"
@@ -99,67 +99,15 @@ get_logs_for_workflow() {
         step_name=$(basename "$(dirname "$LOG_PATH")")
         local content
         content=$($MC_CMD cat "${LOG_PATH}" 2>/dev/null || echo "Could not read log file.")
-        logs+="<div class='step-log'><h4>Step: ${step_name}</h4><pre>${content}</pre></div>"
+        logs+="<details><summary>Step: ${step_name}</summary><pre>${content}</pre></details>"
     done
 
     echo "$logs"
 }
 
-LOGS_HTML="<style>
-.workflow-logs-section {
-    margin-top: 40px;
-    padding: 20px;
-    background-color: #f8f9fa;
-    border: 1px solid #dee2e6;
-    border-radius: 8px;
-}
-.workflow-logs-section h2 {
-    color: #dc3545;
-    border-bottom: 2px solid #dc3545;
-    padding-bottom: 10px;
-    margin-bottom: 20px;
-}
-.test-workflow-logs {
-    margin-bottom: 30px;
-    padding: 15px;
-    background-color: #fff;
-    border: 1px solid #ccc;
-    border-radius: 4px;
-}
-.test-workflow-logs h3 {
-    color: #495057;
-    margin-top: 0;
-}
-.workflow-name {
-    font-weight: bold;
-    color: #0066cc;
-    margin-bottom: 10px;
-}
-.step-log {
-    margin: 10px 0;
-    padding: 10px;
-    background-color: #f1f1f1;
-    border-left: 3px solid #007bff;
-}
-.step-log h4 {
-    margin: 0 0 10px 0;
-    color: #333;
-}
-.step-log pre {
-    margin: 0;
-    padding: 10px;
-    background-color: #282c34;
-    color: #abb2bf;
-    overflow-x: auto;
-    font-size: 12px;
-    line-height: 1.4;
-    border-radius: 4px;
-    max-height: 500px;
-    overflow-y: auto;
-}
-</style>
-<div class='workflow-logs-section'>
-<h2>Workflow Logs for Failed Tests</h2>"
+LOGS_HTML="<section id='workflow-logs'>
+<h2>Workflow logs for failed tests</h2>
+<p>Collected from archived Argo Workflows logs in object storage.</p>"
 
 HAS_LOGS=false
 
@@ -173,14 +121,15 @@ while IFS='|' read -r test_name workflow_names; do
 
     echo "Processing test: $test_name with workflows: $workflow_names"
 
-    LOGS_HTML+="<div class='test-workflow-logs'>"
-    LOGS_HTML+="<h3>Test: ${test_name}</h3>"
+    LOGS_HTML+="<details>"
+    LOGS_HTML+="<summary>Test: ${test_name}</summary>"
 
     IFS=',' read -ra WORKFLOW_ARRAY <<< "$workflow_names"
     for wf_name in "${WORKFLOW_ARRAY[@]}"; do
         echo "  Processing workflow: $wf_name"
 
-        LOGS_HTML+="<div class='workflow-name'>Workflow: ${wf_name}</div>"
+        LOGS_HTML+="<details>"
+        LOGS_HTML+="<summary>Workflow: ${wf_name}</summary>"
 
         echo "  Searching for logs at: kfp-minio/${BUCKET}/${LOGS_PREFIX}/${wf_name}/"
         echo "  Listing bucket contents for this workflow:"
@@ -192,15 +141,17 @@ while IFS='|' read -r test_name workflow_names; do
             HAS_LOGS=true
             echo "  Found logs for workflow $wf_name"
         else
-            LOGS_HTML+="<p>No logs found in object storage for workflow ${wf_name}</p>"
+            LOGS_HTML+="<p>No logs found in object storage for workflow ${wf_name}.</p>"
             echo "  No logs found for workflow $wf_name"
         fi
+
+        LOGS_HTML+="</details>"
     done
 
-    LOGS_HTML+="</div>"
+    LOGS_HTML+="</details>"
 done < "$MAPPING_FILE"
 
-LOGS_HTML+="</div>"
+LOGS_HTML+="</section>"
 
 if [[ "$HAS_LOGS" == "true" ]]; then
     # Write logs to a temp file to avoid ARG_MAX limit with sed

From f1343b5f640fc740d05600300fbac7cdbbd34a24 Mon Sep 17 00:00:00 2001
From: Helber Belmiro <helber.belmiro@gmail.com>
Date: Sat, 13 Dec 2025 12:04:18 -0300
Subject: [PATCH 3/4] Add script to augment JUnit XML with Argo Workflows logs
 for failed tests and update workflow steps

Signed-off-by: Helber Belmiro <helber.belmiro@gmail.com>
---
 .github/actions/test-and-report/action.yml    |  25 +-
 .../augment-junit-xml-with-workflow-logs.py   | 322 ++++++++++++++++++
 2 files changed, 338 insertions(+), 9 deletions(-)
 create mode 100644 .github/resources/scripts/augment-junit-xml-with-workflow-logs.py

diff --git a/.github/actions/test-and-report/action.yml b/.github/actions/test-and-report/action.yml
index b7b5af1cf6d..d97f61af93e 100644
--- a/.github/actions/test-and-report/action.yml
+++ b/.github/actions/test-and-report/action.yml
@@ -107,12 +107,11 @@ runs:
         fi
         ./.github/resources/scripts/collect-logs.sh --ns $NAMESPACE --output /tmp/tmp_pod_log.txt
 
-    - name: Install Junit2Html plugin and generate report
+    - name: Install Junit2Html plugin
       if: (!cancelled()) && steps.collect-logs.outcome != 'failure'
       shell: bash
       run: |
         pip install junit2html
-        junit2html ${{ inputs.test_directory }}/reports/junit.xml ${{ inputs.test_directory }}/reports/test-report.html
       continue-on-error: true
 
     - name: Install MinIO Client for log collection
@@ -133,22 +132,30 @@ runs:
         echo "MC_PATH=$MC_PATH" >> "$GITHUB_ENV"
       continue-on-error: true
 
-    - name: Collect workflow logs for failed tests
-      id: collect-workflow-logs
+    - name: Augment junit.xml with workflow logs
+      id: augment-junit-xml
       if: ${{ steps.run-tests.outcome != 'success' && steps.install-mc.outcome == 'success' }}
       shell: bash
       run: |
         MAPPING_FILE="${{ inputs.test_directory }}/reports/test-workflow-mapping.txt"
-        HTML_REPORT="${{ inputs.test_directory }}/reports/test-report.html"
-        if [[ -f "$MAPPING_FILE" && -f "$HTML_REPORT" ]]; then
-          ./.github/resources/scripts/collect-workflow-logs.sh \
+        JUNIT_XML="${{ inputs.test_directory }}/reports/junit.xml"
+        if [[ -f "$MAPPING_FILE" && -f "$JUNIT_XML" ]]; then
+          python3 ./.github/resources/scripts/augment-junit-xml-with-workflow-logs.py \
+            --junit-xml "$JUNIT_XML" \
             --mapping-file "$MAPPING_FILE" \
-            --html-report "$HTML_REPORT"
+            --namespace "${{ inputs.default_namespace }}"
         else
-          echo "Skipping: mapping file or HTML report not found"
+          echo "Skipping: mapping file or junit.xml not found"
         fi
       continue-on-error: true
 
+    - name: Generate HTML report
+      if: (!cancelled()) && steps.collect-logs.outcome != 'failure'
+      shell: bash
+      run: |
+        junit2html ${{ inputs.test_directory }}/reports/junit.xml ${{ inputs.test_directory }}/reports/test-report.html
+      continue-on-error: true
+
     - name: Configure report name
       id: name_gen
       shell: bash
diff --git a/.github/resources/scripts/augment-junit-xml-with-workflow-logs.py b/.github/resources/scripts/augment-junit-xml-with-workflow-logs.py
new file mode 100644
index 00000000000..78249175db6
--- /dev/null
+++ b/.github/resources/scripts/augment-junit-xml-with-workflow-logs.py
@@ -0,0 +1,322 @@
+#!/usr/bin/env python3
+
+import argparse
+import os
+import re
+import signal
+import subprocess
+import sys
+import tempfile
+import time
+import xml.etree.ElementTree as ET
+
+
+def _run(cmd: list[str], *, check: bool = True, capture: bool = True, text: bool = True) -> subprocess.CompletedProcess:
+    return subprocess.run(
+        cmd,
+        check=check,
+        stdout=subprocess.PIPE if capture else None,
+        stderr=subprocess.PIPE if capture else None,
+        text=text,
+    )
+
+
+def _get_artifact_repo(namespace: str) -> str:
+    cp = _run(
+        [
+            "kubectl",
+            "get",
+            "configmap",
+            "workflow-controller-configmap",
+            "-n",
+            namespace,
+            "-o",
+            "jsonpath={.data.artifactRepository}",
+        ]
+    )
+    return cp.stdout or ""
+
+
+def _get_minio_creds(namespace: str) -> tuple[str, str]:
+    access = _run(
+        [
+            "kubectl",
+            "get",
+            "secret",
+            "mlpipeline-minio-artifact",
+            "-n",
+            namespace,
+            "-o",
+            "jsonpath={.data.accesskey}",
+        ]
+    ).stdout.strip()
+    secret = _run(
+        [
+            "kubectl",
+            "get",
+            "secret",
+            "mlpipeline-minio-artifact",
+            "-n",
+            namespace,
+            "-o",
+            "jsonpath={.data.secretkey}",
+        ]
+    ).stdout.strip()
+    access_key = subprocess.check_output(["base64", "-d"], input=access.encode("utf-8")).decode("utf-8").strip()
+    secret_key = subprocess.check_output(["base64", "-d"], input=secret.encode("utf-8")).decode("utf-8").strip()
+    return access_key, secret_key
+
+
+class PortForward:
+    def __init__(self, namespace: str, local_port: int = 9000, remote_port: int = 9000):
+        self._namespace = namespace
+        self._local_port = local_port
+        self._remote_port = remote_port
+        self._proc: subprocess.Popen[str] | None = None
+
+    def __enter__(self):
+        self._proc = subprocess.Popen(
+            [
+                "kubectl",
+                "port-forward",
+                "-n",
+                self._namespace,
+                "svc/minio-service",
+                f"{self._local_port}:{self._remote_port}",
+            ],
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT,
+            text=True,
+        )
+        # Wait briefly for port-forward to come up.
+        deadline = time.time() + 10
+        out = ""
+        while time.time() < deadline:
+            if self._proc.poll() is not None:
+                break
+            try:
+                if self._proc.stdout is not None:
+                    line = self._proc.stdout.readline()
+                    if line:
+                        out += line
+                        if "Forwarding from" in line:
+                            return self
+            except Exception:
+                pass
+            time.sleep(0.1)
+        raise RuntimeError(f"kubectl port-forward did not start successfully. Output:\n{out}")
+
+    def __exit__(self, exc_type, exc, tb):
+        if self._proc and self._proc.poll() is None:
+            try:
+                self._proc.send_signal(signal.SIGINT)
+                self._proc.wait(timeout=3)
+            except Exception:
+                try:
+                    self._proc.kill()
+                except Exception:
+                    pass
+
+
+def _mc_alias_set(mc: str, endpoint: str, access_key: str, secret_key: str) -> None:
+    _run([mc, "alias", "set", "kfp-minio", endpoint, access_key, secret_key, "--api", "S3v4"])
+
+
+def _mc_find_logs(mc: str, bucket: str, prefix: str) -> list[str]:
+    # Try find first (best signal). Fallback to ls --recursive parsing.
+    cp = _run([mc, "find", f"kfp-minio/{bucket}/{prefix}", "--name", "*.log"], check=False)
+    paths = []
+    if cp.returncode == 0 and cp.stdout:
+        for line in cp.stdout.splitlines():
+            line = line.strip()
+            if line:
+                paths.append(line)
+    if paths:
+        return paths
+    cp = _run([mc, "ls", "--recursive", f"kfp-minio/{bucket}/{prefix}"], check=False)
+    if cp.returncode != 0 or not cp.stdout:
+        return []
+    for line in cp.stdout.splitlines():
+        parts = line.split()
+        if not parts:
+            continue
+        # mc ls --recursive prints "<date> <time> <size> <path>"
+        p = parts[-1]
+        if p.endswith(".log"):
+            paths.append(p)
+    return paths
+
+
+def _tail_bytes(s: str, max_bytes: int) -> str:
+    b = s.encode("utf-8", errors="replace")
+    if len(b) <= max_bytes:
+        return s
+    tail = b[-max_bytes:]
+    return (
+        f"[truncated: showing last {max_bytes} bytes of {len(b)}]\n"
+        + tail.decode("utf-8", errors="replace")
+    )
+
+
+def _mc_cat(mc: str, path: str) -> str:
+    cp = _run([mc, "cat", path], check=False)
+    if cp.returncode != 0:
+        err = (cp.stderr or "").strip()
+        return f"[mc cat failed for {path}] {err}\n"
+    return cp.stdout or ""
+
+
+def _read_mapping(mapping_file: str) -> dict[str, list[str]]:
+    mapping: dict[str, list[str]] = {}
+    with open(mapping_file, "r", encoding="utf-8", errors="replace") as f:
+        for line in f:
+            line = line.strip()
+            if not line or "|" not in line:
+                continue
+            test_name, workflows = line.split("|", 1)
+            wf_list = [w.strip() for w in workflows.split(",") if w.strip()]
+            if wf_list:
+                mapping.setdefault(test_name, []).extend(wf_list)
+    return mapping
+
+
+def _is_failed_testcase(tc: ET.Element) -> bool:
+    return tc.find("failure") is not None or tc.find("error") is not None
+
+
+def _match_test_name(testcase_name: str, mapping: dict[str, list[str]]) -> str | None:
+    if testcase_name in mapping:
+        return testcase_name
+    # Fallback: normalize whitespace and try substring match.
+    norm = re.sub(r"\s+", " ", testcase_name).strip()
+    if norm in mapping:
+        return norm
+    for k in mapping.keys():
+        if k in testcase_name or testcase_name in k:
+            return k
+    return None
+
+
+def _append_system_out(tc: ET.Element, text_to_append: str) -> None:
+    so = tc.find("system-out")
+    if so is None:
+        so = ET.SubElement(tc, "system-out")
+        so.text = ""
+    if so.text is None:
+        so.text = ""
+    if so.text and not so.text.endswith("\n"):
+        so.text += "\n"
+    so.text += text_to_append
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--junit-xml", required=True)
+    parser.add_argument("--mapping-file", required=True)
+    parser.add_argument("--namespace", default="kubeflow")
+    parser.add_argument("--bucket", default="mlpipeline")
+    parser.add_argument("--logs-prefix", default=None)
+    parser.add_argument("--max-bytes-per-workflow", type=int, default=200_000)
+    parser.add_argument("--max-bytes-per-step", type=int, default=80_000)
+    args = parser.parse_args()
+
+    junit_xml = args.junit_xml
+    mapping_file = args.mapping_file
+    namespace = args.namespace
+    bucket = args.bucket
+    logs_prefix = args.logs_prefix or f"private-artifacts/{namespace}"
+
+    if not os.path.isfile(junit_xml):
+        print(f"ERROR: junit.xml not found at {junit_xml}", file=sys.stderr)
+        return 2
+    if not os.path.isfile(mapping_file):
+        print(f"ERROR: mapping file not found at {mapping_file}", file=sys.stderr)
+        return 2
+
+    mapping = _read_mapping(mapping_file)
+    if not mapping:
+        print("No mappings found; nothing to do.")
+        return 0
+
+    artifact_repo = _get_artifact_repo(namespace)
+    if "archiveLogs: true" not in artifact_repo:
+        print("ERROR: Argo Workflows log archiving is not enabled (archiveLogs: true not found).", file=sys.stderr)
+        return 1
+
+    # Determine mc path lazily (import shutil only if needed to keep startup small).
+    import shutil  # noqa: WPS433
+
+    mc = os.environ.get("MC_PATH") if os.environ.get("MC_PATH") else shutil.which("mc") or shutil.which("minio-mc")
+    if not mc:
+        print("ERROR: MinIO client not found. Set MC_PATH or ensure mc is on PATH.", file=sys.stderr)
+        return 1
+
+    access_key, secret_key = _get_minio_creds(namespace)
+
+    with PortForward(namespace):
+        _mc_alias_set(mc, "http://localhost:9000", access_key, secret_key)
+
+        tree = ET.parse(junit_xml)
+        root = tree.getroot()
+
+        # JUnit can be <testsuite> or <testsuites>
+        testcases = root.findall(".//testcase")
+        modified = 0
+
+        for tc in testcases:
+            name = tc.get("name") or ""
+            if not name or not _is_failed_testcase(tc):
+                continue
+            key = _match_test_name(name, mapping)
+            if not key:
+                continue
+
+            workflows = mapping.get(key, [])
+            if not workflows:
+                continue
+
+            out_lines: list[str] = []
+            out_lines.append("===== Argo Workflows archived logs (tailed) =====")
+            for wf in workflows:
+                out_lines.append(f"--- Workflow: {wf} ---")
+                wf_prefix = f"{logs_prefix}/{wf}/"
+                log_paths = _mc_find_logs(mc, bucket, wf_prefix)
+                if not log_paths:
+                    out_lines.append(f"[no *.log files found under s3://{bucket}/{wf_prefix}]")
+                    continue
+
+                bytes_budget = args.max_bytes_per_workflow
+                for p in log_paths:
+                    step_name = os.path.basename(os.path.dirname(p))
+                    out_lines.append(f"[step: {step_name}]")
+                    content = _mc_cat(mc, p)
+                    content = _tail_bytes(content, min(args.max_bytes_per_step, bytes_budget))
+                    out_lines.append(content.rstrip("\n"))
+                    out_lines.append("")
+                    bytes_budget -= len(content.encode("utf-8", errors="replace"))
+                    if bytes_budget <= 0:
+                        out_lines.append(f"[truncated: workflow {wf} exceeded max bytes budget]")
+                        break
+
+            out_lines.append("===== End Argo Workflows logs =====")
+            out = "\n".join(out_lines) + "\n"
+            _append_system_out(tc, out)
+            modified += 1
+
+        if modified == 0:
+            print("No failing testcases matched mapping entries; junit.xml unchanged.")
+            return 0
+
+        # Write atomically
+        with tempfile.NamedTemporaryFile("wb", delete=False, dir=os.path.dirname(junit_xml) or None) as tmp:
+            tmp_path = tmp.name
+            tree.write(tmp, encoding="utf-8", xml_declaration=True)
+        os.replace(tmp_path, junit_xml)
+        print(f"Updated junit.xml: appended workflow logs to {modified} failing testcase(s).")
+        return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
+
+

From ecf1fe154a76789d705558d8d3c3aca76aa9a90d Mon Sep 17 00:00:00 2001
From: Helber Belmiro <helber.belmiro@gmail.com>
Date: Sat, 13 Dec 2025 13:35:03 -0300
Subject: [PATCH 4/4] refactor(scripts): Replace Python type hints with
 `typing` annotations for compatibility

Signed-off-by: Helber Belmiro <helber.belmiro@gmail.com>
---
 .../scripts/augment-junit-xml-with-workflow-logs.py   | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/.github/resources/scripts/augment-junit-xml-with-workflow-logs.py b/.github/resources/scripts/augment-junit-xml-with-workflow-logs.py
index 78249175db6..076b01b93d2 100644
--- a/.github/resources/scripts/augment-junit-xml-with-workflow-logs.py
+++ b/.github/resources/scripts/augment-junit-xml-with-workflow-logs.py
@@ -9,6 +9,7 @@
 import tempfile
 import time
 import xml.etree.ElementTree as ET
+from typing import Dict, List, Optional
 
 
 def _run(cmd: list[str], *, check: bool = True, capture: bool = True, text: bool = True) -> subprocess.CompletedProcess:
@@ -72,9 +73,9 @@ def __init__(self, namespace: str, local_port: int = 9000, remote_port: int = 90
         self._namespace = namespace
         self._local_port = local_port
         self._remote_port = remote_port
-        self._proc: subprocess.Popen[str] | None = None
+        self._proc: Optional[subprocess.Popen] = None
 
-    def __enter__(self):
+    def __enter__(self) -> "PortForward":
         self._proc = subprocess.Popen(
             [
                 "kubectl",
@@ -167,7 +168,7 @@ def _mc_cat(mc: str, path: str) -> str:
 
 
 def _read_mapping(mapping_file: str) -> dict[str, list[str]]:
-    mapping: dict[str, list[str]] = {}
+    mapping: Dict[str, List[str]] = {}
     with open(mapping_file, "r", encoding="utf-8", errors="replace") as f:
         for line in f:
             line = line.strip()
@@ -184,7 +185,7 @@ def _is_failed_testcase(tc: ET.Element) -> bool:
     return tc.find("failure") is not None or tc.find("error") is not None
 
 
-def _match_test_name(testcase_name: str, mapping: dict[str, list[str]]) -> str | None:
+def _match_test_name(testcase_name: str, mapping: Dict[str, List[str]]) -> Optional[str]:
     if testcase_name in mapping:
         return testcase_name
     # Fallback: normalize whitespace and try substring match.
@@ -275,7 +276,7 @@ def main() -> int:
             if not workflows:
                 continue
 
-            out_lines: list[str] = []
+            out_lines: List[str] = []
             out_lines.append("===== Argo Workflows archived logs (tailed) =====")
             for wf in workflows:
                 out_lines.append(f"--- Workflow: {wf} ---")