From cbb42f0977abd29679ddec73a516e910259db73e Mon Sep 17 00:00:00 2001 From: Mitch Denny Date: Sat, 16 May 2026 00:12:32 +1000 Subject: [PATCH 1/7] Mirror deployment E2E summary in CLI E2E recording comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update the CLI E2E recording PR comment to surface a passed/failed breakdown in the same style as the Deployment E2E Tests comment, e.g. "✅ **CLI E2E Tests passed** — 12 passed, 0 failed". Counts are derived from the TRX outcomes that are already parsed, adding explicit pass and unknown tallies alongside the existing fail count. Recording upload failures stay surfaced but no longer drive the headline status, since they are unrelated to test correctness. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../workflows/cli-e2e-recording-comment.yml | 37 ++++++++++++++----- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/.github/workflows/cli-e2e-recording-comment.yml b/.github/workflows/cli-e2e-recording-comment.yml index c9a26af0191..1651ad7d8cd 100644 --- a/.github/workflows/cli-e2e-recording-comment.yml +++ b/.github/workflows/cli-e2e-recording-comment.yml @@ -231,7 +231,9 @@ jobs: UPLOAD_COUNT=0 FAIL_COUNT=0 TOTAL_COUNT=0 + TEST_PASS_COUNT=0 TEST_FAIL_COUNT=0 + TEST_UNKNOWN_COUNT=0 # Arrays to track failed test recordings separately FAILED_TESTS_BODY="" @@ -255,11 +257,13 @@ jobs: TEST_OUTCOME=$(jq -r --arg name "$filename" '.[$name] // "Unknown"' test_outcomes.json) if [ "$TEST_OUTCOME" = "Passed" ]; then STATUS_EMOJI="✅" + TEST_PASS_COUNT=$((TEST_PASS_COUNT + 1)) elif [ "$TEST_OUTCOME" = "Failed" ]; then STATUS_EMOJI="❌" TEST_FAIL_COUNT=$((TEST_FAIL_COUNT + 1)) else STATUS_EMOJI="❔" + TEST_UNKNOWN_COUNT=$((TEST_UNKNOWN_COUNT + 1)) fi # Upload to asciinema with retry logic for transient failures @@ -302,18 +306,33 @@ jobs: fi done - echo "Uploaded $UPLOAD_COUNT recordings, $FAIL_COUNT upload failures, $TEST_FAIL_COUNT test failures" + echo "Uploaded $UPLOAD_COUNT recordings, $FAIL_COUNT upload failures, $TEST_PASS_COUNT passed, $TEST_FAIL_COUNT failed, $TEST_UNKNOWN_COUNT unknown" - # Build comment with summary outside collapsible and table inside + # Build the summary line in the same style as the deployment E2E comment: + # " **CLI E2E Tests ** — X passed, Y failed[, Z unknown]" + # Status reflects test outcomes; recording-upload failures are a secondary concern + # surfaced in the table rather than the headline status. if [ "$TEST_FAIL_COUNT" -gt 0 ]; then SUMMARY_EMOJI="❌" - SUMMARY_TEXT="${TEST_FAIL_COUNT} test(s) failed, ${UPLOAD_COUNT} recordings uploaded" - elif [ "$FAIL_COUNT" -gt 0 ]; then - SUMMARY_EMOJI="⚠️" - SUMMARY_TEXT="${UPLOAD_COUNT}/${TOTAL_COUNT} recordings uploaded, ${FAIL_COUNT} upload(s) failed" - else + SUMMARY_STATUS="failed" + elif [ "$TEST_PASS_COUNT" -gt 0 ] && [ "$TEST_UNKNOWN_COUNT" -eq 0 ]; then + SUMMARY_EMOJI="✅" + SUMMARY_STATUS="passed" + elif [ "$TEST_PASS_COUNT" -eq 0 ] && [ "$TEST_FAIL_COUNT" -eq 0 ]; then + # No TRX outcomes available — fall back to a recording-only summary SUMMARY_EMOJI="🎬" - SUMMARY_TEXT="${UPLOAD_COUNT} recordings uploaded" + SUMMARY_STATUS="completed" + else + SUMMARY_EMOJI="❓" + SUMMARY_STATUS="unknown" + fi + + SUMMARY_TEXT="${TEST_PASS_COUNT} passed, ${TEST_FAIL_COUNT} failed" + if [ "$TEST_UNKNOWN_COUNT" -gt 0 ]; then + SUMMARY_TEXT="${SUMMARY_TEXT}, ${TEST_UNKNOWN_COUNT} unknown" + fi + if [ "$FAIL_COUNT" -gt 0 ]; then + SUMMARY_TEXT="${SUMMARY_TEXT} (${UPLOAD_COUNT}/${TOTAL_COUNT} recordings uploaded, ${FAIL_COUNT} upload(s) failed)" fi # Build the failed tests section (shown outside the collapsible) @@ -326,7 +345,7 @@ jobs: fi COMMENT_BODY="${COMMENT_MARKER} - ${SUMMARY_EMOJI} **CLI E2E Test Recordings** — ${SUMMARY_TEXT} (commit \`${SHORT_SHA}\`) + ${SUMMARY_EMOJI} **CLI E2E Tests ${SUMMARY_STATUS}** — ${SUMMARY_TEXT} (commit \`${SHORT_SHA}\`) ${FAILED_SECTION}
View all recordings From 8bb670d6bc9e79d3b75c9b60f2f2c07c256f881b Mon Sep 17 00:00:00 2001 From: Mitch Denny Date: Sat, 16 May 2026 10:12:55 +1000 Subject: [PATCH 2/7] Embed per-row outcome in CLI E2E recording link label MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Surface the test outcome on the recording link itself, not only in the table's Status column, so a URL pulled out of the table still tells a reader whether it represents a pass or a failure. Failed recordings get "❌ ▶️ View failure recording", passes get "✅ ▶️ View recording", and unknowns keep "❔". Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/workflows/cli-e2e-recording-comment.yml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cli-e2e-recording-comment.yml b/.github/workflows/cli-e2e-recording-comment.yml index 1651ad7d8cd..d4fea7df56c 100644 --- a/.github/workflows/cli-e2e-recording-comment.yml +++ b/.github/workflows/cli-e2e-recording-comment.yml @@ -254,15 +254,20 @@ jobs: # Look up test outcome from TRX data. # .cast files are named after the test method name (via [CallerMemberName] in CreateTestTerminal), # so the filename matches the method name key in the outcomes JSON. + # Per-link label carries the outcome too, so a recording URL copied out of + # the table still tells reviewers whether it represents a pass or a failure. TEST_OUTCOME=$(jq -r --arg name "$filename" '.[$name] // "Unknown"' test_outcomes.json) if [ "$TEST_OUTCOME" = "Passed" ]; then STATUS_EMOJI="✅" + LINK_LABEL="✅ ▶️ View recording" TEST_PASS_COUNT=$((TEST_PASS_COUNT + 1)) elif [ "$TEST_OUTCOME" = "Failed" ]; then STATUS_EMOJI="❌" + LINK_LABEL="❌ ▶️ View failure recording" TEST_FAIL_COUNT=$((TEST_FAIL_COUNT + 1)) else STATUS_EMOJI="❔" + LINK_LABEL="❔ ▶️ View recording" TEST_UNKNOWN_COUNT=$((TEST_UNKNOWN_COUNT + 1)) fi @@ -283,14 +288,14 @@ jobs: if [ -n "$ASCIINEMA_URL" ]; then TABLE_BODY="${TABLE_BODY} - | ${STATUS_EMOJI} | ${safe_filename} | [▶️ View Recording](${ASCIINEMA_URL}) |" + | ${STATUS_EMOJI} | ${safe_filename} | [${LINK_LABEL}](${ASCIINEMA_URL}) |" echo "Uploaded: $ASCIINEMA_URL" UPLOAD_COUNT=$((UPLOAD_COUNT + 1)) # Track failed tests for the prominent section if [ "$TEST_OUTCOME" = "Failed" ]; then FAILED_TESTS_BODY="${FAILED_TESTS_BODY} - - ❌ **${safe_filename}** — [▶️ View Recording](${ASCIINEMA_URL})" + - ❌ **${safe_filename}** — [${LINK_LABEL}](${ASCIINEMA_URL})" fi else TABLE_BODY="${TABLE_BODY} From 56a4aa27e3ea0eec6a80b1977029cacaa5a5c5ea Mon Sep 17 00:00:00 2001 From: Mitch Denny Date: Sat, 16 May 2026 10:14:46 +1000 Subject: [PATCH 3/7] Allow workflow_dispatch to override target PR for testing Adds a pr_number input to the manual trigger so the workflow can be exercised against a chosen PR's artifacts without depending on the open head-SHA lookup, which is needed to test changes to this workflow before they reach main. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../workflows/cli-e2e-recording-comment.yml | 27 +++++++++++++------ 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/.github/workflows/cli-e2e-recording-comment.yml b/.github/workflows/cli-e2e-recording-comment.yml index d4fea7df56c..b917e365909 100644 --- a/.github/workflows/cli-e2e-recording-comment.yml +++ b/.github/workflows/cli-e2e-recording-comment.yml @@ -14,6 +14,10 @@ on: description: 'Workflow run ID to download artifacts from' required: true type: number + pr_number: + description: 'Optional PR number to comment on (skips the head-SHA lookup; useful for testing against merged PRs)' + required: false + type: number jobs: add-recording-comment: @@ -50,14 +54,21 @@ jobs: }); headSha = run.data.head_sha; - // Find PR by head SHA - const prs = await github.rest.pulls.list({ - owner: context.repo.owner, - repo: context.repo.repo, - state: 'open', - head: `${context.repo.owner}:${run.data.head_branch}` - }); - prNumber = prs.data.length > 0 ? prs.data[0].number : null; + // Allow explicit PR override so we can dry-run against a merged PR's + // artifacts without depending on the open-PR head-SHA lookup below. + const overridePr = context.payload.inputs.pr_number; + if (overridePr) { + prNumber = Number(overridePr); + } else { + // Find PR by head SHA + const prs = await github.rest.pulls.list({ + owner: context.repo.owner, + repo: context.repo.repo, + state: 'open', + head: `${context.repo.owner}:${run.data.head_branch}` + }); + prNumber = prs.data.length > 0 ? prs.data[0].number : null; + } } else { // Triggered by workflow_run runId = context.payload.workflow_run.id; From 9302841e44c1f417f136dacd9e6a4e0250f59f46 Mon Sep 17 00:00:00 2001 From: Mitch Denny Date: Sat, 16 May 2026 10:21:17 +1000 Subject: [PATCH 4/7] Fix TRX outcome parser The previous yq query used jq-style 'if type == "!!seq"' which the Go yq lexer rejects, so test_outcomes.json was always empty and every recording showed as Unknown. The step had continue-on-error: true, so the failure stayed silent until the new pass/fail summary surfaced it. Convert TRX to JSON with yq -p xml -o json, then do all reshaping in jq where it handles single-vs-array UnitTestResult shapes correctly. yq also emits attributes as '+@' (newer) or '@' (older), so the lookup tries both forms. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../workflows/cli-e2e-recording-comment.yml | 46 +++++++++++++------ 1 file changed, 32 insertions(+), 14 deletions(-) diff --git a/.github/workflows/cli-e2e-recording-comment.yml b/.github/workflows/cli-e2e-recording-comment.yml index b917e365909..b4228667e41 100644 --- a/.github/workflows/cli-e2e-recording-comment.yml +++ b/.github/workflows/cli-e2e-recording-comment.yml @@ -175,28 +175,46 @@ jobs: continue-on-error: true shell: bash run: | - # Parse TRX (XML) files to extract test method outcomes using yq (pre-installed on ubuntu-latest). - # Produces a JSON map of testMethodName -> outcome for the bash comment step to consume. - # When the same test appears in multiple TRX files (e.g. retries), "Failed" wins over other outcomes. + # Parse TRX (XML) files to extract test method outcomes using yq + jq + # (both pre-installed on ubuntu-latest). Produces a JSON map of + # testMethodName -> outcome for the bash comment step to consume. + # When the same test appears in multiple TRX files (e.g. retries), + # "Failed" wins over other outcomes. if compgen -G trx_files/*.trx > /dev/null 2>&1; then echo "Parsing TRX files with yq..." - # yq can read XML natively; extract testName+outcome from each UnitTestResult. - # -s merges all files, producing a combined JSON array of results. - # Prefer "Failed" over other outcomes when duplicates exist. - yq -p xml -o json '.TestRun.Results.UnitTestResult | (if type == "!!seq" then .[] else . end) | {(."@testName" // ."+@testName"): (."@outcome" // ."+@outcome")}' trx_files/*.trx \ + # Convert each TRX to JSON with yq (Go yq's expression language is + # limited, so do all reshaping in jq). The resulting documents look like: + # { "TestRun": { "Results": { "UnitTestResult": } } } + # Attribute keys are exposed by yq as "+@" (newer yq) or + # "@" (older yq), and a single result is emitted as an object + # rather than an array, so jq must handle both shapes. + yq -p xml -o json '.' trx_files/*.trx \ | jq -s ' - reduce (.[] | to_entries[]) as {$key, $value} ({}; - # Extract simple method name (last segment after dot) for .cast file matching - ($key | split(".") | last) as $method | - # Prefer "Failed" over any other outcome - if .[$method] == "Failed" then . else .[$method] = $value end | - if .[$key] == "Failed" then . else .[$key] = $value end + def attr(o; k): o["+@" + k] // o["@" + k]; + reduce ( + .[] + | .TestRun.Results.UnitTestResult + | (if type == "array" then .[] else . end) + ) as $r ({}; + attr($r; "testName") as $name | + attr($r; "outcome") as $outcome | + if $name == null or $outcome == null then . + else + ($name | split(".") | last) as $method | + # Prefer "Failed" over any other outcome + (if .[$method] == "Failed" then . else .[$method] = $outcome end) + | (if .[$name] == "Failed" then . else .[$name] = $outcome end) + end ) ' > test_outcomes.json OUTCOME_COUNT=$(jq 'length' test_outcomes.json) echo "Parsed $OUTCOME_COUNT test outcome(s)" - echo "has_outcomes=true" >> "$GITHUB_OUTPUT" + if [ "$OUTCOME_COUNT" -gt 0 ]; then + echo "has_outcomes=true" >> "$GITHUB_OUTPUT" + else + echo "has_outcomes=false" >> "$GITHUB_OUTPUT" + fi else echo "No TRX files found" echo '{}' > test_outcomes.json From 8aef4a380afc4987e8a85155a34ccbf05421c122 Mon Sep 17 00:00:00 2001 From: Mitch Denny Date: Sat, 16 May 2026 12:31:16 +1000 Subject: [PATCH 5/7] Match cast files against TRX theory tests by bare method name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Theory test recordings (e.g. recordings produced from [Theory] methods) are saved with their [CallerMemberName] bare method name, while TRX reports them with parameter data appended: 'Namespace.Class.Method(toolchain: "pnpm")'. The previous parser only keyed by the last dotted segment, which preserved those parens and so never matched the .cast filename, leaving theory recordings reported as 'Unknown'. Strip the parameter suffix and also key the outcome map by the bare method name and the no-parameter FQN. 'Failed' still wins over other outcomes when the same key is seen multiple times (retries, multiple theory cases), so a single failing case still surfaces as ❌ in the comment. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/workflows/cli-e2e-recording-comment.yml | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/.github/workflows/cli-e2e-recording-comment.yml b/.github/workflows/cli-e2e-recording-comment.yml index b4228667e41..5a174050e55 100644 --- a/.github/workflows/cli-e2e-recording-comment.yml +++ b/.github/workflows/cli-e2e-recording-comment.yml @@ -191,6 +191,16 @@ jobs: yq -p xml -o json '.' trx_files/*.trx \ | jq -s ' def attr(o; k): o["+@" + k] // o["@" + k]; + # Best-effort: also key by the bare method name (with theory + # parameter data stripped) so a .cast file named after the + # CallerMemberName matches a TRX entry like + # "Namespace.Class.Method(toolchain: "pnpm")". + def bare_method(name): + (name | split(".") | last) | sub("\\(.*$"; ""); + def fqn_no_params(name): + name | sub("\\(.*$"; ""); + def merge(map; key; outcome): + if map[key] == "Failed" then map else map + {(key): outcome} end; reduce ( .[] | .TestRun.Results.UnitTestResult @@ -200,10 +210,9 @@ jobs: attr($r; "outcome") as $outcome | if $name == null or $outcome == null then . else - ($name | split(".") | last) as $method | - # Prefer "Failed" over any other outcome - (if .[$method] == "Failed" then . else .[$method] = $outcome end) - | (if .[$name] == "Failed" then . else .[$name] = $outcome end) + merge(.; bare_method($name); $outcome) + | merge(.; fqn_no_params($name); $outcome) + | merge(.; $name; $outcome) end ) ' > test_outcomes.json From 89ecd073439a85f0425db688e2318be468e1626f Mon Sep 17 00:00:00 2001 From: Mitch Denny Date: Sat, 16 May 2026 12:48:00 +1000 Subject: [PATCH 6/7] Thread CallerMemberName through OtelLogs core helper Both [Fact]s delegate to a private helper that calls CreateDockerTestTerminal, so [CallerMemberName] resolved to the helper name. That produced a single .cast file named after the private method with no matching TRX entry, which the comment workflow reported as 'Unknown'. Pass the public test name through explicitly so each Fact gets its own recording and matches TRX. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- tests/Aspire.Cli.EndToEnd.Tests/OtelLogsTests.cs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/Aspire.Cli.EndToEnd.Tests/OtelLogsTests.cs b/tests/Aspire.Cli.EndToEnd.Tests/OtelLogsTests.cs index 119178a649d..a0351924f3d 100644 --- a/tests/Aspire.Cli.EndToEnd.Tests/OtelLogsTests.cs +++ b/tests/Aspire.Cli.EndToEnd.Tests/OtelLogsTests.cs @@ -1,6 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Runtime.CompilerServices; using Aspire.Cli.EndToEnd.Tests.Helpers; using Aspire.Cli.Tests.Utils; using Hex1b.Automation; @@ -24,14 +25,14 @@ public Task OtelLogsReturnsStructuredLogsFromStarterApp() public Task OtelLogsReturnsStructuredLogsFromStarterAppIsolated() => OtelLogsReturnsStructuredLogsFromStarterAppCore(isolated: true); - private async Task OtelLogsReturnsStructuredLogsFromStarterAppCore(bool isolated) + private async Task OtelLogsReturnsStructuredLogsFromStarterAppCore(bool isolated, [CallerMemberName] string testName = "") { var repoRoot = CliE2ETestHelpers.GetRepoRoot(); var strategy = CliInstallStrategy.Detect(output.WriteLine); using var workspace = TemporaryWorkspace.Create(output); - using var terminal = CliE2ETestHelpers.CreateDockerTestTerminal(repoRoot, strategy, output, mountDockerSocket: true, workspace: workspace); + using var terminal = CliE2ETestHelpers.CreateDockerTestTerminal(repoRoot, strategy, output, mountDockerSocket: true, workspace: workspace, testName: testName); var pendingRun = terminal.RunAsync(TestContext.Current.CancellationToken); From 090f97078420a545c30bf5c153491598c9141fde Mon Sep 17 00:00:00 2001 From: Mitch Denny Date: Sat, 16 May 2026 13:02:36 +1000 Subject: [PATCH 7/7] Replace zero pass/fail counts with recording count in fallback summary When no TRX outcomes can be matched to any recording the headline used to render '0 passed, 0 failed', which reads as 'no tests ran' rather than 'outcome data unavailable'. Switch the fallback wording to ' recording(s), outcomes unavailable' so the headline reflects what actually happened. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../workflows/cli-e2e-recording-comment.yml | 20 +++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/.github/workflows/cli-e2e-recording-comment.yml b/.github/workflows/cli-e2e-recording-comment.yml index 5a174050e55..5c76728b02e 100644 --- a/.github/workflows/cli-e2e-recording-comment.yml +++ b/.github/workflows/cli-e2e-recording-comment.yml @@ -355,6 +355,11 @@ jobs: # " **CLI E2E Tests ** — X passed, Y failed[, Z unknown]" # Status reflects test outcomes; recording-upload failures are a secondary concern # surfaced in the table rather than the headline status. + # Choose headline emoji + status word from the tallied outcomes. + # We never let unknowns suppress a real failure, but we do flag + # unknowns explicitly when the rest of the run was clean so + # reviewers don't read a misleading 'passed'. + FALLBACK_TEXT="" if [ "$TEST_FAIL_COUNT" -gt 0 ]; then SUMMARY_EMOJI="❌" SUMMARY_STATUS="failed" @@ -362,17 +367,24 @@ jobs: SUMMARY_EMOJI="✅" SUMMARY_STATUS="passed" elif [ "$TEST_PASS_COUNT" -eq 0 ] && [ "$TEST_FAIL_COUNT" -eq 0 ]; then - # No TRX outcomes available — fall back to a recording-only summary + # No TRX outcomes matched any recording — describe the run by + # recording count rather than zero pass/fail counts, which would + # read as 'no tests ran' instead of 'outcome data unavailable'. SUMMARY_EMOJI="🎬" SUMMARY_STATUS="completed" + FALLBACK_TEXT="${TOTAL_COUNT} recording(s), outcomes unavailable" else SUMMARY_EMOJI="❓" SUMMARY_STATUS="unknown" fi - SUMMARY_TEXT="${TEST_PASS_COUNT} passed, ${TEST_FAIL_COUNT} failed" - if [ "$TEST_UNKNOWN_COUNT" -gt 0 ]; then - SUMMARY_TEXT="${SUMMARY_TEXT}, ${TEST_UNKNOWN_COUNT} unknown" + if [ -n "$FALLBACK_TEXT" ]; then + SUMMARY_TEXT="$FALLBACK_TEXT" + else + SUMMARY_TEXT="${TEST_PASS_COUNT} passed, ${TEST_FAIL_COUNT} failed" + if [ "$TEST_UNKNOWN_COUNT" -gt 0 ]; then + SUMMARY_TEXT="${SUMMARY_TEXT}, ${TEST_UNKNOWN_COUNT} unknown" + fi fi if [ "$FAIL_COUNT" -gt 0 ]; then SUMMARY_TEXT="${SUMMARY_TEXT} (${UPLOAD_COUNT}/${TOTAL_COUNT} recordings uploaded, ${FAIL_COUNT} upload(s) failed)"