From 77331c50bc3de8e99b49bd88dbc18509d9f83c21 Mon Sep 17 00:00:00 2001
From: dacharyc <dc@dacharycarey.com>
Date: Sun, 15 Mar 2026 00:19:03 -0400
Subject: [PATCH 1/7] Add 'rendering-strategy', 'tabbed-content-serialization',
 'section-header-quality'

---
 README.md                                     |  27 +-
 package-lock.json                             | 135 +++-
 package.json                                  |   1 +
 .../section-header-quality.ts                 | 295 +++++++-
 .../tabbed-content-serialization.ts           | 229 +++++-
 src/checks/index.ts                           |   1 +
 .../page-size/content-start-position.ts       |  12 +-
 src/checks/page-size/page-size-html.ts        |  14 +-
 src/checks/page-size/rendering-strategy.ts    | 182 +++++
 src/helpers/detect-rendering.ts               | 115 +++
 src/helpers/detect-tabs.ts                    | 354 ++++++++++
 src/helpers/fetch-page.ts                     |  23 +
 src/helpers/index.ts                          |   5 +
 src/runner.ts                                 |   1 +
 src/types.ts                                  |   9 +
 test/integration/check-pipeline.test.ts       |  52 ++
 test/integration/cli.test.ts                  |   2 +-
 test/unit/checks/rendering-strategy.test.ts   | 251 +++++++
 .../checks/section-header-quality.test.ts     | 594 ++++++++++++++++
 .../tabbed-content-serialization.test.ts      | 385 +++++++++++
 test/unit/helpers/detect-rendering.test.ts    | 165 +++++
 test/unit/helpers/detect-tabs.test.ts         | 654 ++++++++++++++++++
 test/unit/helpers/fetch-page.test.ts          | 102 +++
 test/unit/runner.test.ts                      |   2 +-
 24 files changed, 3562 insertions(+), 48 deletions(-)
 create mode 100644 src/checks/page-size/rendering-strategy.ts
 create mode 100644 src/helpers/detect-rendering.ts
 create mode 100644 src/helpers/detect-tabs.ts
 create mode 100644 src/helpers/fetch-page.ts
 create mode 100644 test/unit/checks/rendering-strategy.test.ts
 create mode 100644 test/unit/checks/section-header-quality.test.ts
 create mode 100644 test/unit/checks/tabbed-content-serialization.test.ts
 create mode 100644 test/unit/helpers/detect-rendering.test.ts
 create mode 100644 test/unit/helpers/detect-tabs.test.ts
 create mode 100644 test/unit/helpers/fetch-page.test.ts

diff --git a/README.md b/README.md
index 38d6f71..33297f2 100644
--- a/README.md
+++ b/README.md
@@ -7,7 +7,7 @@ Test your documentation site against the [Agent-Friendly Documentation Spec](htt
 
 Agents don't use docs like humans. They hit truncation limits, get walls of CSS instead of content, can't follow cross-host redirects, and don't know about quality-of-life improvements like `llms.txt` or `.md` docs pages that would make life swell. Maybe this is because the industry has lacked guidance - until now.
 
-afdocs runs 21 checks across 8 categories to evaluate how well your docs serve agent consumers. 16 are fully implemented; the rest return `skip` until completed.
+afdocs runs 22 checks across 8 categories to evaluate how well your docs serve agent consumers. 19 are fully implemented; the rest return `skip` until completed.
 
 > **Status: Early development (0.x)**
 > This project is under active development. Check IDs, CLI flags, and output formats may change between minor versions. Feel free to try it out, but don't build automation against specific output until 1.0.
@@ -43,7 +43,7 @@ Authentication
   ✓ auth-gate-detection: All 50 sampled pages are publicly accessible
 
 Summary
-  9 passed, 3 failed, 9 skipped (21 total)
+  9 passed, 3 failed, 10 skipped (22 total)
 ```
 
 ## Install
@@ -144,7 +144,7 @@ describe('agent-friendliness', () => {
 
 ## Checks
 
-21 checks across 8 categories. Checks marked with \* are not yet implemented and return `skip`.
+22 checks across 8 categories. Checks marked with \* are not yet implemented and return `skip`.
 
 ### Category 1: llms.txt
 
@@ -165,19 +165,20 @@ describe('agent-friendliness', () => {
 
 ### Category 3: Page Size and Truncation Risk
 
-| Check                    | Description                                      |
-| ------------------------ | ------------------------------------------------ |
-| `page-size-markdown`     | Character count when served as markdown          |
-| `page-size-html`         | Character count of HTML and post-conversion size |
-| `content-start-position` | How far into the response actual content begins  |
+| Check                    | Description                                                     |
+| ------------------------ | --------------------------------------------------------------- |
+| `rendering-strategy`     | Whether pages contain server-rendered content or are SPA shells |
+| `page-size-markdown`     | Character count when served as markdown                         |
+| `page-size-html`         | Character count of HTML and post-conversion size                |
+| `content-start-position` | How far into the response actual content begins                 |
 
 ### Category 4: Content Structure
 
-| Check                             | Description                                        |
-| --------------------------------- | -------------------------------------------------- |
-| `tabbed-content-serialization` \* | Whether tabbed content creates oversized output    |
-| `section-header-quality` \*       | Whether headers in tabbed sections include context |
-| `markdown-code-fence-validity`    | Whether markdown has unclosed code fences          |
+| Check                          | Description                                        |
+| ------------------------------ | -------------------------------------------------- |
+| `tabbed-content-serialization` | Whether tabbed content creates oversized output    |
+| `section-header-quality`       | Whether headers in tabbed sections include context |
+| `markdown-code-fence-validity` | Whether markdown has unclosed code fences          |
 
 ### Category 5: URL Stability and Redirects
 
diff --git a/package-lock.json b/package-lock.json
index 7cbc6aa..d780a0d 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -11,6 +11,7 @@
       "dependencies": {
         "chalk": "^5.4.1",
         "commander": "^13.1.0",
+        "node-html-parser": "^7.1.0",
         "turndown": "^7.2.2",
         "yaml": "^2.7.0"
       },
@@ -32,7 +33,7 @@
         "vitest": "^4.0.18"
       },
       "engines": {
-        "node": ">=20"
+        "node": ">=22"
       }
     },
     "node_modules/@babel/helper-string-parser": {
@@ -1807,6 +1808,12 @@
         "node": "20 || >=22"
       }
     },
+    "node_modules/boolbase": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz",
+      "integrity": "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==",
+      "license": "ISC"
+    },
     "node_modules/brace-expansion": {
       "version": "5.0.2",
       "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-5.0.2.tgz",
@@ -2042,6 +2049,34 @@
         "node": ">= 8"
       }
     },
+    "node_modules/css-select": {
+      "version": "5.2.2",
+      "resolved": "https://registry.npmjs.org/css-select/-/css-select-5.2.2.tgz",
+      "integrity": "sha512-TizTzUddG/xYLA3NXodFM0fSbNizXjOKhqiQQwvhlspadZokn1KDy0NZFS0wuEubIYAV5/c1/lAr0TaaFXEXzw==",
+      "license": "BSD-2-Clause",
+      "dependencies": {
+        "boolbase": "^1.0.0",
+        "css-what": "^6.1.0",
+        "domhandler": "^5.0.2",
+        "domutils": "^3.0.1",
+        "nth-check": "^2.0.1"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/fb55"
+      }
+    },
+    "node_modules/css-what": {
+      "version": "6.2.2",
+      "resolved": "https://registry.npmjs.org/css-what/-/css-what-6.2.2.tgz",
+      "integrity": "sha512-u/O3vwbptzhMs3L1fQE82ZSLHQQfto5gyZzwteVIEyeaY5Fc7R4dapF/BvRoSYFeqfBk4m0V1Vafq5Pjv25wvA==",
+      "license": "BSD-2-Clause",
+      "engines": {
+        "node": ">= 6"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/fb55"
+      }
+    },
     "node_modules/debug": {
       "version": "4.4.3",
       "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz",
@@ -2067,6 +2102,61 @@
       "dev": true,
       "license": "MIT"
     },
+    "node_modules/dom-serializer": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-2.0.0.tgz",
+      "integrity": "sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==",
+      "license": "MIT",
+      "dependencies": {
+        "domelementtype": "^2.3.0",
+        "domhandler": "^5.0.2",
+        "entities": "^4.2.0"
+      },
+      "funding": {
+        "url": "https://github.com/cheeriojs/dom-serializer?sponsor=1"
+      }
+    },
+    "node_modules/domelementtype": {
+      "version": "2.3.0",
+      "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-2.3.0.tgz",
+      "integrity": "sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==",
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/fb55"
+        }
+      ],
+      "license": "BSD-2-Clause"
+    },
+    "node_modules/domhandler": {
+      "version": "5.0.3",
+      "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-5.0.3.tgz",
+      "integrity": "sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==",
+      "license": "BSD-2-Clause",
+      "dependencies": {
+        "domelementtype": "^2.3.0"
+      },
+      "engines": {
+        "node": ">= 4"
+      },
+      "funding": {
+        "url": "https://github.com/fb55/domhandler?sponsor=1"
+      }
+    },
+    "node_modules/domutils": {
+      "version": "3.2.2",
+      "resolved": "https://registry.npmjs.org/domutils/-/domutils-3.2.2.tgz",
+      "integrity": "sha512-6kZKyUajlDuqlHKVX1w7gyslj9MPIXzIFiz/rGu35uC1wMi+kMhQwGhl4lt9unC9Vb9INnY9Z3/ZA3+FhASLaw==",
+      "license": "BSD-2-Clause",
+      "dependencies": {
+        "dom-serializer": "^2.0.0",
+        "domelementtype": "^2.3.0",
+        "domhandler": "^5.0.3"
+      },
+      "funding": {
+        "url": "https://github.com/fb55/domutils?sponsor=1"
+      }
+    },
     "node_modules/emoji-regex": {
       "version": "8.0.0",
       "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz",
@@ -2074,6 +2164,18 @@
       "dev": true,
       "license": "MIT"
     },
+    "node_modules/entities": {
+      "version": "4.5.0",
+      "resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz",
+      "integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==",
+      "license": "BSD-2-Clause",
+      "engines": {
+        "node": ">=0.12"
+      },
+      "funding": {
+        "url": "https://github.com/fb55/entities?sponsor=1"
+      }
+    },
     "node_modules/environment": {
       "version": "1.1.0",
       "resolved": "https://registry.npmjs.org/environment/-/environment-1.1.0.tgz",
@@ -2513,6 +2615,15 @@
         "node": ">=8"
       }
     },
+    "node_modules/he": {
+      "version": "1.2.0",
+      "resolved": "https://registry.npmjs.org/he/-/he-1.2.0.tgz",
+      "integrity": "sha512-F/1DnUGPopORZi0ni+CvrCgHQ5FyEAHRLSApuYWMmrbSwoN2Mn/7k+Gl38gJnR7yyDZk6WLXwiGod1JOWNDKGw==",
+      "license": "MIT",
+      "bin": {
+        "he": "bin/he"
+      }
+    },
     "node_modules/headers-polyfill": {
       "version": "4.0.3",
       "resolved": "https://registry.npmjs.org/headers-polyfill/-/headers-polyfill-4.0.3.tgz",
@@ -3166,6 +3277,28 @@
       "dev": true,
       "license": "MIT"
     },
+    "node_modules/node-html-parser": {
+      "version": "7.1.0",
+      "resolved": "https://registry.npmjs.org/node-html-parser/-/node-html-parser-7.1.0.tgz",
+      "integrity": "sha512-iJo8b2uYGT40Y8BTyy5ufL6IVbN8rbm/1QK2xffXU/1a/v3AAa0d1YAoqBNYqaS4R/HajkWIpIfdE6KcyFh1AQ==",
+      "license": "MIT",
+      "dependencies": {
+        "css-select": "^5.1.0",
+        "he": "1.2.0"
+      }
+    },
+    "node_modules/nth-check": {
+      "version": "2.1.1",
+      "resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.1.1.tgz",
+      "integrity": "sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==",
+      "license": "BSD-2-Clause",
+      "dependencies": {
+        "boolbase": "^1.0.0"
+      },
+      "funding": {
+        "url": "https://github.com/fb55/nth-check?sponsor=1"
+      }
+    },
     "node_modules/obug": {
       "version": "2.1.1",
       "resolved": "https://registry.npmjs.org/obug/-/obug-2.1.1.tgz",
diff --git a/package.json b/package.json
index b9b2626..a1cd25d 100644
--- a/package.json
+++ b/package.json
@@ -60,6 +60,7 @@
   "dependencies": {
     "chalk": "^5.4.1",
     "commander": "^13.1.0",
+    "node-html-parser": "^7.1.0",
     "turndown": "^7.2.2",
     "yaml": "^2.7.0"
   },
diff --git a/src/checks/content-structure/section-header-quality.ts b/src/checks/content-structure/section-header-quality.ts
index 3010ab0..82484ec 100644
--- a/src/checks/content-structure/section-header-quality.ts
+++ b/src/checks/content-structure/section-header-quality.ts
@@ -1,12 +1,291 @@
+import { parse } from 'node-html-parser';
 import { registerCheck } from '../registry.js';
-import type { CheckContext, CheckResult } from '../../types.js';
+import type { CheckContext, CheckResult, CheckStatus } from '../../types.js';
+import type { DetectedTabGroup } from '../../helpers/detect-tabs.js';
+
+interface TabbedPageResult {
+  url: string;
+  tabGroups: DetectedTabGroup[];
+  totalTabbedChars: number;
+  status: CheckStatus;
+  error?: string;
+}
+
+interface GroupHeaderAnalysis {
+  url: string;
+  framework: string;
+  totalHeaders: number;
+  genericHeaders: number;
+  contextualHeaders: number;
+  hasGenericMajority: boolean;
+  hasCrossGroupGeneric: boolean;
+}
+
+const MD_HEADING_RE = /^#{1,6}\s+(.+)$/gm;
+
+/**
+ * Extract header text from content that may be HTML, markdown, or a mix (MDX).
+ * Tries HTML parsing first, then falls back to markdown heading regex.
+ */
+function extractHeaders(content: string): string[] {
+  const headers: string[] = [];
+
+  // HTML headers
+  const root = parse(content);
+  const htmlHeaders = root.querySelectorAll('h1, h2, h3, h4, h5, h6');
+  for (const h of htmlHeaders) {
+    const text = h.textContent.trim();
+    if (text.length > 0) headers.push(text);
+  }
+
+  // Markdown headers (## Heading)
+  let match;
+  while ((match = MD_HEADING_RE.exec(content)) !== null) {
+    const text = match[1].trim();
+    if (text.length > 0) headers.push(text);
+  }
+
+  return headers;
+}
+
+async function check(ctx: CheckContext): Promise<CheckResult> {
+  const id = 'section-header-quality';
+  const category = 'content-structure';
+
+  const tabResult = ctx.previousResults.get('tabbed-content-serialization');
+
+  if (!tabResult || tabResult.status === 'skip') {
+    return {
+      id,
+      category,
+      status: 'skip',
+      message: 'Skipped: tabbed-content-serialization did not run',
+    };
+  }
+
+  const tabbedPages = (tabResult.details?.tabbedPages as TabbedPageResult[] | undefined) ?? [];
+  const pagesWithGroups = tabbedPages.filter((p) => p.tabGroups && p.tabGroups.length > 0);
+
+  if (pagesWithGroups.length === 0) {
+    return {
+      id,
+      category,
+      status: 'pass',
+      message: 'No tabbed content found; header quality check not applicable',
+    };
+  }
+
+  const analyses: GroupHeaderAnalysis[] = [];
+  // Track unique headers per analysis for cross-group pass
+  const analysisHeaderSets: Set<string>[] = [];
+
+  for (const page of pagesWithGroups) {
+    for (const group of page.tabGroups) {
+      if (group.panels.length < 2) continue;
+
+      // Extract headers from each panel
+      const panelHeaders: Array<{ label: string | null; headers: string[] }> = group.panels.map(
+        (panel) => ({
+          label: panel.label,
+          headers: extractHeaders(panel.html),
+        }),
+      );
+
+      // Count how many times each header text appears across panels
+      const headerCounts = new Map<string, number>();
+      const uniqueHeaders = new Set<string>();
+      for (const ph of panelHeaders) {
+        for (const h of ph.headers) {
+          const lower = h.toLowerCase();
+          headerCounts.set(lower, (headerCounts.get(lower) ?? 0) + 1);
+          uniqueHeaders.add(lower);
+        }
+      }
+
+      const allHeaders = panelHeaders.flatMap((ph) => ph.headers);
+      let genericCount = 0;
+      let contextualCount = 0;
+
+      for (const ph of panelHeaders) {
+        for (const h of ph.headers) {
+          const lower = h.toLowerCase();
+          const appearsInMultiple = (headerCounts.get(lower) ?? 0) >= 2;
+
+          // A header is contextual if it includes the panel label or is unique
+          const includesLabel = ph.label != null && lower.includes(ph.label.toLowerCase());
+
+          if (includesLabel || !appearsInMultiple) {
+            contextualCount++;
+          } else {
+            genericCount++;
+          }
+        }
+      }
+
+      const totalHeaders = allHeaders.length;
+      const hasGenericMajority = totalHeaders > 0 && genericCount > totalHeaders / 2;
+
+      analysisHeaderSets.push(uniqueHeaders);
+      analyses.push({
+        url: page.url,
+        framework: group.framework,
+        totalHeaders,
+        genericHeaders: genericCount,
+        contextualHeaders: contextualCount,
+        hasGenericMajority,
+        hasCrossGroupGeneric: false,
+      });
+    }
+  }
+
+  // Cross-group analysis: detect identical headers repeated across separate tab groups
+  // on the same page without variant context (e.g. "Build a MongoDB Search Query"
+  // appearing in 7 driver-specific tab groups).
+  let crossGroupGenericGroupCount = 0;
+  let crossGroupTotalGroupCount = 0;
+  const crossGroupRepeatedHeaders: Array<{ url: string; header: string; groupCount: number }> = [];
+
+  for (const page of pagesWithGroups) {
+    if (page.tabGroups.length < 2) continue;
+
+    // Collect all panel labels and unique headers per group
+    const allLabels = new Set<string>();
+    const perGroup: Set<string>[] = [];
+    for (const group of page.tabGroups) {
+      const headers = new Set<string>();
+      for (const panel of group.panels) {
+        if (panel.label) allLabels.add(panel.label.toLowerCase());
+        for (const h of extractHeaders(panel.html)) headers.add(h.toLowerCase());
+      }
+      perGroup.push(headers);
+    }
+
+    // Count how many groups each header appears in
+    const headerGroupCount = new Map<string, number>();
+    for (const hs of perGroup) {
+      for (const h of hs) headerGroupCount.set(h, (headerGroupCount.get(h) ?? 0) + 1);
+    }
+
+    // A header is cross-group generic if it appears in 2+ groups and doesn't
+    // include any panel label (i.e. lacks variant context)
+    const crossGenericSet = new Set<string>();
+    for (const [header, count] of headerGroupCount) {
+      if (count >= 2 && ![...allLabels].some((l) => header.includes(l))) {
+        crossGenericSet.add(header);
+        crossGroupRepeatedHeaders.push({ url: page.url, header, groupCount: count });
+      }
+    }
+
+    // Count groups affected by cross-group generic headers
+    for (const hs of perGroup) {
+      if (hs.size === 0) continue;
+      crossGroupTotalGroupCount++;
+      if ([...hs].some((h) => crossGenericSet.has(h))) crossGroupGenericGroupCount++;
+    }
+
+    // Update individual analyses with cross-group flag
+    if (crossGenericSet.size > 0) {
+      for (let i = 0; i < analyses.length; i++) {
+        if (analyses[i].url !== page.url) continue;
+        if ([...analysisHeaderSets[i]].some((h) => crossGenericSet.has(h))) {
+          analyses[i].hasCrossGroupGeneric = true;
+        }
+      }
+    }
+  }
+
+  if (analyses.length === 0 && crossGroupTotalGroupCount === 0) {
+    return {
+      id,
+      category,
+      status: 'pass',
+      message: 'Tab groups have fewer than 2 panels; header quality check not applicable',
+    };
+  }
+
+  const groupsWithGenericMajority = analyses.filter((a) => a.hasGenericMajority).length;
+  const groupsWithHeaders = analyses.filter((a) => a.totalHeaders > 0).length;
+
+  // If no tab panels contain any section headers, we can't evaluate quality
+  if (groupsWithHeaders === 0 && crossGroupTotalGroupCount === 0) {
+    return {
+      id,
+      category,
+      status: 'skip',
+      message: `${pagesWithGroups.length} page(s) with tabs found, but no section headers inside tab panels to evaluate`,
+    };
+  }
+
+  // Identify affected pages: pages where any group has within-group or cross-group issues
+  const pagesWithWithinGroupIssues = new Set(
+    analyses.filter((a) => a.hasGenericMajority).map((a) => a.url),
+  );
+  const pagesWithCrossGroupIssues = new Set(crossGroupRepeatedHeaders.map((h) => h.url));
+  const affectedPages = new Set([...pagesWithWithinGroupIssues, ...pagesWithCrossGroupIssues]);
+
+  // Count pages where we actually found headers to evaluate
+  const pagesWithHeaders = new Set(analyses.filter((a) => a.totalHeaders > 0).map((a) => a.url));
+
+  // Scoring: use group-level ratios for fine-grained thresholds
+  // Within-group: ratio of groups-with-headers that have majority-generic
+  let withinStatus: CheckStatus = 'pass';
+  if (groupsWithHeaders > 0) {
+    const wRatio = groupsWithGenericMajority / groupsWithHeaders;
+    if (wRatio > 0.5) withinStatus = 'fail';
+    else if (wRatio > 0.25) withinStatus = 'warn';
+  }
+
+  // Cross-group: ratio of groups on multi-group pages that have cross-group generics
+  let crossGroupStatus: CheckStatus = 'pass';
+  if (crossGroupTotalGroupCount > 0) {
+    const cRatio = crossGroupGenericGroupCount / crossGroupTotalGroupCount;
+    if (cRatio > 0.5) crossGroupStatus = 'fail';
+    else if (cRatio > 0.25) crossGroupStatus = 'warn';
+  }
+
+  // Combined status: worst of both
+  const statusRank: Record<CheckStatus, number> = { pass: 0, skip: 0, warn: 1, fail: 2, error: 2 };
+  const status: CheckStatus =
+    statusRank[crossGroupStatus] > statusRank[withinStatus] ? crossGroupStatus : withinStatus;
+
+  // Build a page-oriented message for docs teams
+  let message: string;
+  if (affectedPages.size === 0) {
+    message = `${pagesWithHeaders.size} page(s) with tab headers checked; headers include variant context`;
+  } else {
+    // Find the most-repeated cross-group header for a concrete example
+    const worstHeader =
+      crossGroupRepeatedHeaders.length > 0
+        ? [...crossGroupRepeatedHeaders].sort((a, b) => b.groupCount - a.groupCount)[0]
+        : null;
+
+    const pageSummary =
+      `${affectedPages.size} of ${pagesWithHeaders.size} page(s) with tab headers ` +
+      `don't distinguish between variants`;
+
+    if (worstHeader) {
+      message = `${pageSummary} (e.g. "${worstHeader.header}" repeats across ${worstHeader.groupCount} tab groups)`;
+    } else {
+      message = pageSummary;
+    }
+  }
 
-async function check(_ctx: CheckContext): Promise<CheckResult> {
   return {
-    id: 'section-header-quality',
-    category: 'content-structure',
-    status: 'skip',
-    message: 'Not yet implemented',
+    id,
+    category,
+    status,
+    message,
+    details: {
+      pagesWithTabs: pagesWithGroups.length,
+      pagesAffected: affectedPages.size,
+      totalGroupsAnalyzed: analyses.length,
+      groupsWithHeaders,
+      groupsWithGenericMajority,
+      crossGroupGenericGroupCount,
+      crossGroupTotalGroupCount,
+      crossGroupRepeatedHeaders,
+      analyses,
+    },
   };
 }
 
@@ -14,6 +293,8 @@ registerCheck({
   id: 'section-header-quality',
   category: 'content-structure',
   description: 'Whether headers in tabbed sections include variant context',
-  dependsOn: ['tabbed-content-serialization'],
+  // No hard dependency: we read from previousResults if available,
+  // but the check handles missing data gracefully (returns skip).
+  dependsOn: [],
   run: check,
 });
diff --git a/src/checks/content-structure/tabbed-content-serialization.ts b/src/checks/content-structure/tabbed-content-serialization.ts
index 6e91f67..3126ef0 100644
--- a/src/checks/content-structure/tabbed-content-serialization.ts
+++ b/src/checks/content-structure/tabbed-content-serialization.ts
@@ -1,12 +1,229 @@
 import { registerCheck } from '../registry.js';
-import type { CheckContext, CheckResult } from '../../types.js';
+import { discoverAndSamplePages } from '../../helpers/get-page-urls.js';
+import { htmlToMarkdown } from '../../helpers/html-to-markdown.js';
+import { fetchPage } from '../../helpers/fetch-page.js';
+import { detectTabGroups } from '../../helpers/detect-tabs.js';
+import { toMdUrls } from '../../helpers/to-md-urls.js';
+import type { CheckContext, CheckResult, CheckStatus } from '../../types.js';
+import type { DetectedTabGroup } from '../../helpers/detect-tabs.js';
+
+interface TabbedPageResult {
+  url: string;
+  tabGroups: DetectedTabGroup[];
+  totalTabbedChars: number;
+  status: CheckStatus;
+  source?: 'html' | 'md-fallback' | 'markdown';
+  error?: string;
+}
+
+function sizeStatus(chars: number): CheckStatus {
+  if (chars <= 50_000) return 'pass';
+  if (chars <= 100_000) return 'warn';
+  return 'fail';
+}
+
+function worstStatus(statuses: CheckStatus[]): CheckStatus {
+  if (statuses.includes('fail')) return 'fail';
+  if (statuses.includes('warn')) return 'warn';
+  return 'pass';
+}
+
+function formatSize(chars: number): string {
+  if (chars >= 1000) return `${Math.round(chars / 1000)}K`;
+  return String(chars);
+}
+
+/**
+ * Try to fetch a .md fallback URL for a page. Returns the body if successful, null otherwise.
+ */
+async function tryMdFallback(ctx: CheckContext, pageUrl: string): Promise<string | null> {
+  const candidates = toMdUrls(pageUrl);
+  for (const mdUrl of candidates) {
+    try {
+      const response = await ctx.http.fetch(mdUrl);
+      if (!response.ok) continue;
+      const contentType = response.headers.get('content-type') ?? '';
+      if (!contentType.includes('text/markdown') && !contentType.includes('text/plain')) continue;
+      const body = await response.text();
+      // Sanity check: must have some content and not be HTML
+      if (body.length > 0 && !body.trimStart().startsWith('<!')) return body;
+    } catch {
+      // Skip failed fetches
+    }
+  }
+  return null;
+}
+
+/**
+ * Check whether the rendering-strategy check flagged a specific URL as an SPA shell.
+ * Returns true if the URL was analyzed and found to lack server-rendered content.
+ */
+function isSpaShell(ctx: CheckContext, url: string): boolean {
+  const renderResult = ctx.previousResults.get('rendering-strategy');
+  if (!renderResult?.details?.pageResults) return false;
+  const pageResults = renderResult.details.pageResults as Array<{ url: string; status: string }>;
+  const match = pageResults.find((r) => r.url === url);
+  return match?.status === 'fail';
+}
+
+async function analyzePage(ctx: CheckContext, url: string): Promise<TabbedPageResult> {
+  const page = await fetchPage(ctx, url);
+
+  // For markdown responses, run MDX detection directly
+  if (!page.isHtml) {
+    const tabGroups = detectTabGroups(page.body);
+    if (tabGroups.length === 0) {
+      return { url, tabGroups: [], totalTabbedChars: 0, status: 'pass', source: 'markdown' };
+    }
+    // For markdown content, the serialized size is the raw content of the tab groups
+    let totalTabbedChars = 0;
+    for (const group of tabGroups) {
+      totalTabbedChars += group.htmlSlice.length;
+    }
+    return {
+      url,
+      tabGroups,
+      totalTabbedChars,
+      status: sizeStatus(totalTabbedChars),
+      source: 'markdown',
+    };
+  }
+
+  // HTML response: try HTML-based detection first
+  const tabGroups = detectTabGroups(page.body);
+  if (tabGroups.length > 0) {
+    let totalTabbedChars = 0;
+    for (const group of tabGroups) {
+      const md = htmlToMarkdown(group.htmlSlice);
+      totalTabbedChars += md.length;
+    }
+    return {
+      url,
+      tabGroups,
+      totalTabbedChars,
+      status: sizeStatus(totalTabbedChars),
+      source: 'html',
+    };
+  }
+
+  // No tabs found in HTML. If rendering-strategy flagged this as an SPA shell,
+  // try the markdown path as a fallback so we can still analyze tab content
+  // for agents that support content negotiation.
+  if (isSpaShell(ctx, url)) {
+    const mdBody = await tryMdFallback(ctx, url);
+    if (mdBody) {
+      const mdTabGroups = detectTabGroups(mdBody);
+      if (mdTabGroups.length > 0) {
+        let totalTabbedChars = 0;
+        for (const group of mdTabGroups) {
+          totalTabbedChars += group.htmlSlice.length;
+        }
+        return {
+          url,
+          tabGroups: mdTabGroups,
+          totalTabbedChars,
+          status: sizeStatus(totalTabbedChars),
+          source: 'md-fallback',
+        };
+      }
+    }
+  }
+
+  return { url, tabGroups: [], totalTabbedChars: 0, status: 'pass', source: 'html' };
+}
+
+async function check(ctx: CheckContext): Promise<CheckResult> {
+  const id = 'tabbed-content-serialization';
+  const category = 'content-structure';
+
+  const {
+    urls: pageUrls,
+    totalPages,
+    sampled: wasSampled,
+    warnings,
+  } = await discoverAndSamplePages(ctx);
+
+  const results: TabbedPageResult[] = [];
+  const concurrency = ctx.options.maxConcurrency;
+
+  for (let i = 0; i < pageUrls.length; i += concurrency) {
+    const batch = pageUrls.slice(i, i + concurrency);
+    const batchResults = await Promise.all(
+      batch.map(async (url): Promise<TabbedPageResult> => {
+        try {
+          return await analyzePage(ctx, url);
+        } catch (err) {
+          return {
+            url,
+            tabGroups: [],
+            totalTabbedChars: 0,
+            status: 'fail',
+            error: err instanceof Error ? err.message : String(err),
+          };
+        }
+      }),
+    );
+    results.push(...batchResults);
+  }
+
+  const successful = results.filter((r) => !r.error);
+  const fetchErrors = results.filter((r) => r.error).length;
+
+  if (successful.length === 0) {
+    const suffix = fetchErrors > 0 ? `; ${fetchErrors} failed to fetch` : '';
+    return {
+      id,
+      category,
+      status: 'fail',
+      message: `Could not fetch any pages to analyze${suffix}`,
+      details: {
+        totalPages,
+        testedPages: results.length,
+        sampled: wasSampled,
+        fetchErrors,
+        tabbedPages: results,
+        discoveryWarnings: warnings,
+      },
+    };
+  }
+
+  const pagesWithTabs = successful.filter((r) => r.tabGroups.length > 0);
+  const totalGroupsFound = successful.reduce((sum, r) => sum + r.tabGroups.length, 0);
+  const overallStatus = worstStatus(successful.map((r) => r.status));
+  const pageLabel = wasSampled ? 'sampled pages' : 'pages';
+
+  let message: string;
+  if (totalGroupsFound === 0) {
+    message = `No tabbed content detected across ${successful.length} ${pageLabel}`;
+  } else if (overallStatus === 'pass') {
+    message = `${totalGroupsFound} tab group(s) across ${pagesWithTabs.length} of ${successful.length} ${pageLabel}; all serialize under 50K chars`;
+  } else if (overallStatus === 'warn') {
+    const worst = Math.max(...successful.map((r) => r.totalTabbedChars));
+    message = `${totalGroupsFound} tab group(s) found; worst page serializes to ${formatSize(worst)} chars (50K–100K)`;
+  } else {
+    const worst = Math.max(...successful.map((r) => r.totalTabbedChars));
+    message = `${totalGroupsFound} tab group(s) found; worst page serializes to ${formatSize(worst)} chars (over 100K)`;
+  }
+
+  if (fetchErrors > 0) {
+    message += `; ${fetchErrors} failed to fetch`;
+  }
 
-async function check(_ctx: CheckContext): Promise<CheckResult> {
   return {
-    id: 'tabbed-content-serialization',
-    category: 'content-structure',
-    status: 'skip',
-    message: 'Not yet implemented',
+    id,
+    category,
+    status: overallStatus,
+    message,
+    details: {
+      totalPages,
+      testedPages: results.length,
+      sampled: wasSampled,
+      pagesWithTabs: pagesWithTabs.length,
+      totalGroupsFound,
+      fetchErrors,
+      tabbedPages: results,
+      discoveryWarnings: warnings,
+    },
   };
 }
 
diff --git a/src/checks/index.ts b/src/checks/index.ts
index c8e0221..ecbfe0a 100644
--- a/src/checks/index.ts
+++ b/src/checks/index.ts
@@ -12,6 +12,7 @@ import './markdown-availability/markdown-url-support.js';
 import './markdown-availability/content-negotiation.js';
 
 // Category 3: Page Size
+import './page-size/rendering-strategy.js';
 import './page-size/page-size-markdown.js';
 import './page-size/page-size-html.js';
 import './page-size/content-start-position.js';
diff --git a/src/checks/page-size/content-start-position.ts b/src/checks/page-size/content-start-position.ts
index 64c55a2..f3dc584 100644
--- a/src/checks/page-size/content-start-position.ts
+++ b/src/checks/page-size/content-start-position.ts
@@ -1,7 +1,7 @@
 import { registerCheck } from '../registry.js';
-import { looksLikeHtml } from '../../helpers/detect-markdown.js';
 import { discoverAndSamplePages } from '../../helpers/get-page-urls.js';
 import { htmlToMarkdown } from '../../helpers/html-to-markdown.js';
+import { fetchPage } from '../../helpers/fetch-page.js';
 import type { CheckContext, CheckResult, CheckStatus } from '../../types.js';
 
 interface PagePositionResult {
@@ -184,14 +184,8 @@ async function check(ctx: CheckContext): Promise<CheckResult> {
     const batchResults = await Promise.all(
       batch.map(async (url): Promise<PagePositionResult> => {
         try {
-          const response = await ctx.http.fetch(url);
-          const body = await response.text();
-          const contentType = response.headers.get('content-type') ?? '';
-          const isMarkdownType =
-            contentType.includes('text/markdown') || contentType.includes('text/plain');
-          const isHtml =
-            !isMarkdownType && (contentType.includes('text/html') || looksLikeHtml(body));
-          const markdown = isHtml ? htmlToMarkdown(body) : body;
+          const page = await fetchPage(ctx, url);
+          const markdown = page.isHtml ? htmlToMarkdown(page.body) : page.body;
           const totalChars = markdown.length;
           const contentStartChar = findContentStart(markdown);
           const contentStartPercent =
diff --git a/src/checks/page-size/page-size-html.ts b/src/checks/page-size/page-size-html.ts
index dff37bf..65ea027 100644
--- a/src/checks/page-size/page-size-html.ts
+++ b/src/checks/page-size/page-size-html.ts
@@ -1,7 +1,7 @@
 import { registerCheck } from '../registry.js';
-import { looksLikeHtml } from '../../helpers/detect-markdown.js';
 import { discoverAndSamplePages } from '../../helpers/get-page-urls.js';
 import { htmlToMarkdown } from '../../helpers/html-to-markdown.js';
+import { fetchPage } from '../../helpers/fetch-page.js';
 import type { CheckContext, CheckResult, CheckStatus } from '../../types.js';
 
 interface PageSizeResult {
@@ -50,18 +50,12 @@ async function check(ctx: CheckContext): Promise<CheckResult> {
     const batchResults = await Promise.all(
       batch.map(async (url): Promise<PageSizeResult> => {
         try {
-          const response = await ctx.http.fetch(url);
-          const body = await response.text();
-          const contentType = response.headers.get('content-type') ?? '';
-          const isMarkdownType =
-            contentType.includes('text/markdown') || contentType.includes('text/plain');
-          const isHtml =
-            !isMarkdownType && (contentType.includes('text/html') || looksLikeHtml(body));
+          const page = await fetchPage(ctx, url);
 
           // Skip conversion if the response is already markdown
-          const html = isHtml ? body : '';
+          const html = page.isHtml ? page.body : '';
           const htmlChars = html.length;
-          const converted = isHtml ? htmlToMarkdown(body) : body;
+          const converted = page.isHtml ? htmlToMarkdown(page.body) : page.body;
           const convertedChars = converted.length;
           const ratio = htmlChars > 0 ? Math.round((1 - convertedChars / htmlChars) * 100) : 0;
 
diff --git a/src/checks/page-size/rendering-strategy.ts b/src/checks/page-size/rendering-strategy.ts
new file mode 100644
index 0000000..be5624a
--- /dev/null
+++ b/src/checks/page-size/rendering-strategy.ts
@@ -0,0 +1,182 @@
+import { registerCheck } from '../registry.js';
+import { discoverAndSamplePages } from '../../helpers/get-page-urls.js';
+import { fetchPage } from '../../helpers/fetch-page.js';
+import { analyzeRendering, type RenderingAnalysis } from '../../helpers/detect-rendering.js';
+import type { CheckContext, CheckResult, CheckStatus } from '../../types.js';
+
+interface PageRenderingResult {
+  url: string;
+  status: CheckStatus;
+  analysis: RenderingAnalysis;
+  error?: string;
+}
+
+function pageStatus(analysis: RenderingAnalysis): CheckStatus {
+  if (!analysis.hasSpaMarkers) return 'pass';
+  if (analysis.hasContent) return 'pass';
+
+  // SPA markers present but sparse content — borderline
+  if (
+    analysis.contentHeadings >= 1 ||
+    analysis.contentParagraphs >= 2 ||
+    analysis.codeBlocks >= 1
+  ) {
+    return 'warn';
+  }
+
+  return 'fail';
+}
+
+function worstStatus(statuses: CheckStatus[]): CheckStatus {
+  if (statuses.includes('fail')) return 'fail';
+  if (statuses.includes('warn')) return 'warn';
+  return 'pass';
+}
+
+async function check(ctx: CheckContext): Promise<CheckResult> {
+  const id = 'rendering-strategy';
+  const category = 'page-size';
+
+  const {
+    urls: pageUrls,
+    totalPages,
+    sampled: wasSampled,
+    warnings,
+  } = await discoverAndSamplePages(ctx);
+
+  const results: PageRenderingResult[] = [];
+  const concurrency = ctx.options.maxConcurrency;
+
+  for (let i = 0; i < pageUrls.length; i += concurrency) {
+    const batch = pageUrls.slice(i, i + concurrency);
+    const batchResults = await Promise.all(
+      batch.map(async (url): Promise<PageRenderingResult> => {
+        try {
+          const page = await fetchPage(ctx, url);
+
+          // Only analyze HTML responses — markdown responses are inherently accessible
+          if (!page.isHtml) {
+            return {
+              url,
+              status: 'pass',
+              analysis: {
+                hasContent: true,
+                hasSpaMarkers: false,
+                spaMarker: null,
+                contentHeadings: 0,
+                contentParagraphs: 0,
+                codeBlocks: 0,
+                hasMainContent: false,
+                visibleTextLength: page.body.length,
+                htmlLength: 0,
+              },
+            };
+          }
+
+          const analysis = analyzeRendering(page.body);
+          return { url, status: pageStatus(analysis), analysis };
+        } catch (err) {
+          return {
+            url,
+            status: 'fail',
+            analysis: {
+              hasContent: false,
+              hasSpaMarkers: false,
+              spaMarker: null,
+              contentHeadings: 0,
+              contentParagraphs: 0,
+              codeBlocks: 0,
+              hasMainContent: false,
+              visibleTextLength: 0,
+              htmlLength: 0,
+            },
+            error: err instanceof Error ? err.message : String(err),
+          };
+        }
+      }),
+    );
+    results.push(...batchResults);
+  }
+
+  const successful = results.filter((r) => !r.error);
+  const fetchErrors = results.filter((r) => r.error).length;
+
+  if (successful.length === 0) {
+    const suffix = fetchErrors > 0 ? `; ${fetchErrors} failed to fetch` : '';
+    return {
+      id,
+      category,
+      status: 'fail',
+      message: `Could not fetch any pages to analyze${suffix}`,
+      details: {
+        totalPages,
+        testedPages: results.length,
+        sampled: wasSampled,
+        fetchErrors,
+        pageResults: results,
+        discoveryWarnings: warnings,
+      },
+    };
+  }
+
+  const spaShells = successful.filter((r) => r.status === 'fail');
+  const sparse = successful.filter((r) => r.status === 'warn');
+  const ok = successful.filter((r) => r.status === 'pass');
+  const overallStatus = worstStatus(successful.map((r) => r.status));
+  const pageLabel = wasSampled ? 'sampled pages' : 'pages';
+
+  // Identify the framework from the first failing page for the message
+  const firstShell = spaShells[0];
+  const frameworkHint = firstShell?.analysis.spaMarker
+    ? ` (${firstShell.analysis.spaMarker.replace('id="', '').replace('"', '')} detected)`
+    : '';
+
+  let message: string;
+  if (overallStatus === 'pass') {
+    message = `All ${successful.length} ${pageLabel} contain server-rendered content`;
+  } else if (spaShells.length > 0) {
+    message =
+      `${spaShells.length} of ${successful.length} ${pageLabel} appear to be ` +
+      `client-side rendered SPA shells${frameworkHint}; ` +
+      `agents using HTTP fetches will see no content`;
+    if (sparse.length > 0) {
+      message += `; ${sparse.length} more have page structure but little substantive content`;
+    }
+  } else {
+    message =
+      `${sparse.length} of ${successful.length} ${pageLabel} have server-rendered ` +
+      `page structure but little substantive content; agents will see headings ` +
+      `and navigation but not the page's actual documentation`;
+  }
+
+  if (fetchErrors > 0) {
+    message += `; ${fetchErrors} failed to fetch`;
+  }
+
+  return {
+    id,
+    category,
+    status: overallStatus,
+    message,
+    details: {
+      totalPages,
+      testedPages: results.length,
+      sampled: wasSampled,
+      serverRendered: ok.length,
+      sparseContent: sparse.length,
+      spaShells: spaShells.length,
+      fetchErrors,
+      pageResults: results,
+      discoveryWarnings: warnings,
+    },
+  };
+}
+
+registerCheck({
+  id: 'rendering-strategy',
+  category: 'page-size',
+  description:
+    'Whether pages contain server-rendered content or are client-side rendered SPA shells',
+  dependsOn: [],
+  run: check,
+});
diff --git a/src/helpers/detect-rendering.ts b/src/helpers/detect-rendering.ts
new file mode 100644
index 0000000..3710086
--- /dev/null
+++ b/src/helpers/detect-rendering.ts
@@ -0,0 +1,115 @@
+import { parse } from 'node-html-parser';
+
+const SPA_MARKERS = ['id="___gatsby"', 'id="__next"', 'id="__nuxt"', 'id="root"'];
+
+export interface RenderingAnalysis {
+  /** Whether the page appears to be server-rendered with real content. */
+  hasContent: boolean;
+  /** Whether known SPA framework markers were found. */
+  hasSpaMarkers: boolean;
+  /** Which SPA marker was found, if any. */
+  spaMarker: string | null;
+  /** Number of content headings found (excluding nav-only headings). */
+  contentHeadings: number;
+  /** Number of paragraphs with substantial prose (>30 chars). */
+  contentParagraphs: number;
+  /** Number of code blocks found. */
+  codeBlocks: number;
+  /** Whether a <main> or [role="main"] element with children exists. */
+  hasMainContent: boolean;
+  /** Visible text length after stripping script/style/noscript. */
+  visibleTextLength: number;
+  /** Total HTML length. */
+  htmlLength: number;
+}
+
+/**
+ * Analyze whether an HTML page contains server-rendered content or is
+ * a client-side-rendered SPA shell.
+ *
+ * Unlike a simple text-ratio heuristic, this checks for concrete content
+ * signals: headings, paragraphs with prose, code blocks, and main content
+ * regions. SSR sites with heavy bundled assets (low text ratio but real
+ * content) will pass; true SPA shells (framework marker + no content) will fail.
+ */
+export function analyzeRendering(html: string): RenderingAnalysis {
+  const htmlLength = html.length;
+
+  // Check for SPA framework markers
+  let spaMarker: string | null = null;
+  for (const marker of SPA_MARKERS) {
+    if (html.includes(marker)) {
+      spaMarker = marker;
+      break;
+    }
+  }
+  const hasSpaMarkers = spaMarker !== null;
+
+  // Parse and strip non-content elements
+  const root = parse(html);
+  const body = root.querySelector('body') ?? root;
+
+  for (const el of body.querySelectorAll('script, style, noscript, svg')) {
+    el.remove();
+  }
+
+  // Visible text
+  const visibleText = body.textContent.replace(/\s+/g, ' ').trim();
+  const visibleTextLength = visibleText.length;
+
+  // Content signals: headings with substantive text
+  const headings = body.querySelectorAll('h1, h2, h3, h4, h5, h6');
+  let contentHeadings = 0;
+  for (const h of headings) {
+    const text = h.textContent.trim();
+    // Skip very short headings that are likely nav labels
+    if (text.length > 3) contentHeadings++;
+  }
+
+  // Content signals: paragraphs with prose
+  const paragraphs = body.querySelectorAll('p');
+  let contentParagraphs = 0;
+  for (const p of paragraphs) {
+    const text = p.textContent.trim();
+    if (text.length > 30) contentParagraphs++;
+  }
+
+  // Content signals: code blocks
+  const codeBlocks = body.querySelectorAll('pre, code').length;
+
+  // Content signals: main content region with substantive content inside it.
+  // An SPA shell can have a <main> element with just a page title and breadcrumbs,
+  // so we check for real content (paragraphs, code) inside <main> specifically.
+  const main = body.querySelector('main, [role="main"]');
+  let hasMainContent = false;
+  if (main) {
+    const mainParas = main.querySelectorAll('p');
+    let mainParagraphs = 0;
+    for (const p of mainParas) {
+      if (p.textContent.trim().length > 30) mainParagraphs++;
+    }
+    const mainCode = main.querySelectorAll('pre, code').length;
+    hasMainContent = mainParagraphs >= 2 || mainCode >= 1;
+  }
+
+  // Determine if the page has real content
+  // A page has content if it has enough content signals, regardless of text ratio
+  const hasContent =
+    contentHeadings >= 3 ||
+    contentParagraphs >= 5 ||
+    (hasMainContent && contentHeadings >= 1) ||
+    codeBlocks >= 3 ||
+    !hasSpaMarkers; // No SPA markers = traditional server-rendered, assume content
+
+  return {
+    hasContent,
+    hasSpaMarkers,
+    spaMarker,
+    contentHeadings,
+    contentParagraphs,
+    codeBlocks,
+    hasMainContent,
+    visibleTextLength,
+    htmlLength,
+  };
+}
diff --git a/src/helpers/detect-tabs.ts b/src/helpers/detect-tabs.ts
new file mode 100644
index 0000000..8af0e55
--- /dev/null
+++ b/src/helpers/detect-tabs.ts
@@ -0,0 +1,354 @@
+import { parse, type HTMLElement } from 'node-html-parser';
+
+export interface TabPanel {
+  label: string | null;
+  html: string;
+}
+
+export interface DetectedTabGroup {
+  framework: string;
+  tabCount: number;
+  htmlSlice: string;
+  panels: TabPanel[];
+}
+
+type Detector = (
+  root: HTMLElement,
+  claimed: Set<HTMLElement>,
+  source?: string,
+) => DetectedTabGroup[];
+
+function isDescendantOf(node: HTMLElement, ancestor: HTMLElement): boolean {
+  let current = node.parentNode;
+  while (current) {
+    if (current === ancestor) return true;
+    current = current.parentNode;
+  }
+  return false;
+}
+
+function isInsideClaimed(node: HTMLElement, claimed: Set<HTMLElement>): boolean {
+  for (const container of claimed) {
+    if (container === node || isDescendantOf(node, container)) return true;
+  }
+  return false;
+}
+
+function textOf(el: HTMLElement): string {
+  // Clone to avoid mutating the original DOM, then strip <style> elements
+  // that some component libraries (e.g. LeafyGreen) embed inside tab buttons.
+  const clone = el.clone() as HTMLElement;
+  for (const style of clone.querySelectorAll('style')) {
+    style.remove();
+  }
+  return clone.textContent.trim();
+}
+
+/**
+ * Walk up from `start` (exclusive) looking for the nearest ancestor that
+ * contains `[role="tabpanel"]` children.  Some component libraries (e.g.
+ * LeafyGreen) place the tablist and the tab-panels as siblings rather than
+ * parent/child, so `tablist.parentNode` alone won't find the panels.
+ * Stops after `maxDepth` levels to avoid scanning the whole document.
+ */
+function findContainerWithPanels(
+  start: HTMLElement,
+  claimed: Set<HTMLElement>,
+  maxDepth = 4,
+): { container: HTMLElement; panels: HTMLElement[] } | null {
+  let current = start.parentNode as HTMLElement | null;
+  for (let depth = 0; current && depth < maxDepth; depth++) {
+    if (isInsideClaimed(current, claimed)) return null;
+    const panels = current.querySelectorAll('[role="tabpanel"]') as unknown as HTMLElement[];
+    if (panels.length > 0) return { container: current, panels: [...panels] };
+    current = current.parentNode as HTMLElement | null;
+  }
+  return null;
+}
+
+const docusaurus: Detector = (root, claimed) => {
+  const groups: DetectedTabGroup[] = [];
+  // Docusaurus uses role="tablist" with tabs__item class children
+  const tablists = root.querySelectorAll('[role="tablist"]');
+  for (const tablist of tablists) {
+    if (isInsideClaimed(tablist as HTMLElement, claimed)) continue;
+    const tabs = tablist.querySelectorAll('.tabs__item');
+    if (tabs.length === 0) continue;
+
+    // Find the wrapping container (parent of both tablist and tabpanels).
+    // Some component libraries put tablist and tabpanels as siblings rather
+    // than direct parent/child, so we walk up until we find panels.
+    const found = findContainerWithPanels(tablist as HTMLElement, claimed);
+    if (!found) continue;
+    const { container, panels } = found;
+
+    const labels = tabs.map((t) => textOf(t as HTMLElement));
+    const panelData: TabPanel[] = panels.map((p, i) => ({
+      label: labels[i] ?? null,
+      html: p.outerHTML,
+    }));
+
+    claimed.add(container);
+    groups.push({
+      framework: 'docusaurus',
+      tabCount: tabs.length,
+      htmlSlice: container.outerHTML,
+      panels: panelData,
+    });
+  }
+  return groups;
+};
+
+const mkdocs: Detector = (root, claimed) => {
+  const groups: DetectedTabGroup[] = [];
+  const containers = root.querySelectorAll('.tabbed-set');
+  for (const container of containers) {
+    const el = container as HTMLElement;
+    if (isInsideClaimed(el, claimed)) continue;
+
+    const labels = el.querySelectorAll('.tabbed-labels label, .tabbed-labels > *');
+    const panels = el.querySelectorAll('.tabbed-content > .tabbed-block, .tabbed-content > *');
+
+    if (labels.length === 0 && panels.length === 0) continue;
+
+    const panelData: TabPanel[] = [];
+    const count = Math.max(labels.length, panels.length);
+    for (let i = 0; i < count; i++) {
+      panelData.push({
+        label: labels[i] ? textOf(labels[i] as HTMLElement) : null,
+        html: panels[i] ? (panels[i] as HTMLElement).outerHTML : '',
+      });
+    }
+
+    claimed.add(el);
+    groups.push({
+      framework: 'mkdocs',
+      tabCount: count,
+      htmlSlice: el.outerHTML,
+      panels: panelData,
+    });
+  }
+  return groups;
+};
+
+const sphinx: Detector = (root, claimed) => {
+  const groups: DetectedTabGroup[] = [];
+  const containers = root.querySelectorAll('.sphinx-tabs');
+  for (const container of containers) {
+    const el = container as HTMLElement;
+    if (isInsideClaimed(el, claimed)) continue;
+
+    const tabs = el.querySelectorAll('.sphinx-tabs-tab');
+    const panels = el.querySelectorAll('.sphinx-tabs-panel');
+    if (tabs.length === 0 && panels.length === 0) continue;
+
+    const panelData: TabPanel[] = [];
+    const count = Math.max(tabs.length, panels.length);
+    for (let i = 0; i < count; i++) {
+      panelData.push({
+        label: tabs[i] ? textOf(tabs[i] as HTMLElement) : null,
+        html: panels[i] ? (panels[i] as HTMLElement).outerHTML : '',
+      });
+    }
+
+    claimed.add(el);
+    groups.push({
+      framework: 'sphinx',
+      tabCount: count,
+      htmlSlice: el.outerHTML,
+      panels: panelData,
+    });
+  }
+  return groups;
+};
+
+const msLearn: Detector = (root, claimed) => {
+  const groups: DetectedTabGroup[] = [];
+  const containers = root.querySelectorAll('.tabGroup');
+  for (const container of containers) {
+    const el = container as HTMLElement;
+    if (isInsideClaimed(el, claimed)) continue;
+
+    const tabs = el.querySelectorAll('[role="tab"][data-tab]');
+    const panels = el.querySelectorAll('[role="tabpanel"], .tabPanel');
+    if (tabs.length === 0 || panels.length === 0) continue;
+
+    const panelData: TabPanel[] = [];
+    const count = Math.max(tabs.length, panels.length);
+    for (let i = 0; i < count; i++) {
+      panelData.push({
+        label: tabs[i] ? textOf(tabs[i] as HTMLElement) : null,
+        html: panels[i] ? (panels[i] as HTMLElement).outerHTML : '',
+      });
+    }
+
+    claimed.add(el);
+    groups.push({
+      framework: 'microsoft-learn',
+      tabCount: count,
+      htmlSlice: el.outerHTML,
+      panels: panelData,
+    });
+  }
+  return groups;
+};
+
+const genericAria: Detector = (root, claimed) => {
+  const groups: DetectedTabGroup[] = [];
+  const tablists = root.querySelectorAll('[role="tablist"]');
+  for (const tablist of tablists) {
+    const el = tablist as HTMLElement;
+    if (isInsideClaimed(el, claimed)) continue;
+
+    const found = findContainerWithPanels(el, claimed);
+    const tabs = el.querySelectorAll('[role="tab"]');
+
+    if (!found) {
+      // No panels found — skip. Tabs without panels are typically site
+      // navigation, not content tab groups. The serialization check only
+      // cares about panel content, so there's nothing to measure here.
+      continue;
+    }
+
+    const { container, panels } = found;
+    if (tabs.length === 0 && panels.length === 0) continue;
+
+    // Use tab count as the authority. Containers may hold panels from
+    // multiple tab groups; capping to tabs.length avoids misattributing
+    // panels from sibling groups in the same ancestor.
+    const panelData: TabPanel[] = [];
+    const count = tabs.length > 0 ? tabs.length : panels.length;
+    for (let i = 0; i < count; i++) {
+      panelData.push({
+        label: tabs[i] ? textOf(tabs[i] as HTMLElement) : null,
+        html: panels[i] ? (panels[i] as HTMLElement).outerHTML : '',
+      });
+    }
+
+    claimed.add(container);
+    groups.push({
+      framework: 'generic-aria',
+      tabCount: count,
+      htmlSlice: container.outerHTML,
+      panels: panelData,
+    });
+  }
+  return groups;
+};
+
+/**
+ * Find all `<Tabs>...</Tabs>` blocks in raw source text with proper nesting.
+ * Returns the content (including the tags) and the start offset of each
+ * top-level `<Tabs>` block.  We use regex rather than node-html-parser
+ * because the DOM parser can't reliably handle `</Tabs>` followed by
+ * markdown text followed by `<Tabs>` — it merges them into one element.
+ */
+function findTabsBlocks(source: string): string[] {
+  const blocks: string[] = [];
+  const openRe = /<Tabs\b[^>]*>/gi;
+  let match: RegExpExecArray | null;
+  while ((match = openRe.exec(source)) !== null) {
+    const startIdx = match.index;
+    // Track nesting to find the matching </Tabs>
+    let depth = 1;
+    let pos = startIdx + match[0].length;
+    while (pos < source.length && depth > 0) {
+      const nextOpen = source.indexOf('<Tabs', pos);
+      const nextClose = source.indexOf('</Tabs>', pos);
+      if (nextClose === -1) break; // unclosed tag
+      if (nextOpen !== -1 && nextOpen < nextClose) {
+        depth++;
+        pos = nextOpen + 5;
+      } else {
+        depth--;
+        if (depth === 0) {
+          blocks.push(source.substring(startIdx, nextClose + 7));
+          // Advance the outer regex past this block to avoid re-entering
+          openRe.lastIndex = nextClose + 7;
+        }
+        pos = nextClose + 7;
+      }
+    }
+  }
+  return blocks;
+}
+
+const TAB_OPEN_RE = /<(Tab|TabItem)\b([^>]*)>/gi;
+const ATTR_RE = /(?:name|label|value)\s*=\s*"([^"]*)"/i;
+
+/**
+ * MDX-style tabs: `<Tabs>` container with `<Tab name="...">` or
+ * `<TabItem label="...">` children. Used by MongoDB docs, Docusaurus MDX, and others.
+ *
+ * Uses regex-based block finding instead of DOM parsing because
+ * node-html-parser can't reliably parse `</Tabs>` + markdown + `<Tabs>`
+ * as separate elements in mixed markdown/HTML content.
+ */
+const mdxTabs: Detector = (_root, _claimed, source?: string) => {
+  if (!source) return [];
+  const groups: DetectedTabGroup[] = [];
+  const blocks = findTabsBlocks(source);
+
+  for (const block of blocks) {
+    // Find direct <Tab>/<TabItem> children (depth 1 inside this block).
+    // We track nesting to skip tabs inside nested <Tabs> groups.
+    const inner = block.substring(block.indexOf('>') + 1, block.lastIndexOf('</'));
+    const panels: TabPanel[] = [];
+
+    TAB_OPEN_RE.lastIndex = 0;
+    let tabMatch: RegExpExecArray | null;
+    while ((tabMatch = TAB_OPEN_RE.exec(inner)) !== null) {
+      // Check for nested <Tabs> between our position and this <Tab>
+      const before = inner.substring(0, tabMatch.index);
+      const opensInBefore = (before.match(/<Tabs\b/gi) || []).length;
+      const closesInBefore = (before.match(/<\/Tabs>/gi) || []).length;
+      const depth = opensInBefore - closesInBefore;
+
+      if (depth > 0) continue; // This <Tab> belongs to a nested <Tabs>
+
+      const attrs = tabMatch[2];
+      const labelMatch = attrs.match(ATTR_RE);
+      const label = labelMatch ? labelMatch[1] : null;
+
+      // Find the matching closing tag for this <Tab>
+      const closeTag = `</${tabMatch[1]}>`;
+      const closeIdx = inner.indexOf(closeTag, tabMatch.index + tabMatch[0].length);
+      const tabContent =
+        closeIdx !== -1
+          ? inner.substring(tabMatch.index, closeIdx + closeTag.length)
+          : inner.substring(tabMatch.index);
+
+      panels.push({ label, html: tabContent });
+    }
+
+    if (panels.length === 0) continue;
+
+    groups.push({
+      framework: 'mdx',
+      tabCount: panels.length,
+      htmlSlice: block,
+      panels,
+    });
+  }
+  return groups;
+};
+
+const frameworkDetectors: Detector[] = [docusaurus, mkdocs, sphinx, msLearn, mdxTabs];
+
+export function detectTabGroups(html: string): DetectedTabGroup[] {
+  const root = parse(html);
+  const claimed = new Set<HTMLElement>();
+  const groups: DetectedTabGroup[] = [];
+
+  for (const detector of frameworkDetectors) {
+    for (const group of detector(root, claimed, html)) {
+      groups.push(group);
+    }
+  }
+
+  for (const group of genericAria(root, claimed)) {
+    groups.push(group);
+  }
+
+  return groups;
+}
diff --git a/src/helpers/fetch-page.ts b/src/helpers/fetch-page.ts
new file mode 100644
index 0000000..33cc1ad
--- /dev/null
+++ b/src/helpers/fetch-page.ts
@@ -0,0 +1,23 @@
+import { looksLikeHtml } from './detect-markdown.js';
+import type { CheckContext, FetchedPage } from '../types.js';
+
+/**
+ * Fetch a page URL, returning the body and content-type metadata.
+ * Results are cached on `ctx.htmlCache` so that multiple checks
+ * within the same run avoid redundant HTTP requests.
+ */
+export async function fetchPage(ctx: CheckContext, url: string): Promise<FetchedPage> {
+  const cached = ctx.htmlCache.get(url);
+  if (cached) return cached;
+
+  const response = await ctx.http.fetch(url);
+  const body = await response.text();
+  const contentType = response.headers.get('content-type') ?? '';
+  const isMarkdownType =
+    contentType.includes('text/markdown') || contentType.includes('text/plain');
+  const isHtml = !isMarkdownType && (contentType.includes('text/html') || looksLikeHtml(body));
+
+  const result: FetchedPage = { url, body, contentType, isHtml };
+  ctx.htmlCache.set(url, result);
+  return result;
+}
diff --git a/src/helpers/index.ts b/src/helpers/index.ts
index 4cede77..8e5533f 100644
--- a/src/helpers/index.ts
+++ b/src/helpers/index.ts
@@ -10,3 +10,8 @@ export {
 export type { PageUrlResult, SampledPages } from './get-page-urls.js';
 export { toMdUrls, isNonPageUrl } from './to-md-urls.js';
 export { htmlToMarkdown } from './html-to-markdown.js';
+export { fetchPage } from './fetch-page.js';
+export { detectTabGroups } from './detect-tabs.js';
+export type { DetectedTabGroup, TabPanel } from './detect-tabs.js';
+export { analyzeRendering } from './detect-rendering.js';
+export type { RenderingAnalysis } from './detect-rendering.js';
diff --git a/src/runner.ts b/src/runner.ts
index 90ba01e..ae2490a 100644
--- a/src/runner.ts
+++ b/src/runner.ts
@@ -48,6 +48,7 @@ export function createContext(baseUrl: string, options?: Partial<RunnerOptions>)
     }),
     options: merged,
     pageCache: new Map(),
+    htmlCache: new Map(),
   };
 }
 
diff --git a/src/types.ts b/src/types.ts
index 6c7cc83..d494f28 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -19,6 +19,13 @@ export interface CachedPage {
   };
 }
 
+export interface FetchedPage {
+  url: string;
+  body: string;
+  contentType: string;
+  isHtml: boolean;
+}
+
 export interface CheckContext {
   /** The base URL being checked (as provided by the user). */
   baseUrl: string;
@@ -32,6 +39,8 @@ export interface CheckContext {
   options: CheckOptions;
   /** Cached page content, keyed by original page URL. */
   pageCache: Map<string, CachedPage>;
+  /** Cached raw HTML fetches, keyed by URL. Shared across checks within a single run. */
+  htmlCache: Map<string, FetchedPage>;
   /** Cached sampled pages result, shared across checks within a single run. */
   _sampledPages?: SampledPages;
 }
diff --git a/test/integration/check-pipeline.test.ts b/test/integration/check-pipeline.test.ts
index 1d7ac65..506557d 100644
--- a/test/integration/check-pipeline.test.ts
+++ b/test/integration/check-pipeline.test.ts
@@ -535,6 +535,58 @@ describe('check pipeline: content-negotiation respects md-url cache', () => {
   });
 });
 
+describe('check pipeline: HTML fetch cache shared across checks', () => {
+  it('page-size-html and tabbed-content-serialization share fetched HTML', async () => {
+    let fetchCount = 0;
+    const pageHtml =
+      '<html><body><h1>Guide</h1><div class="sphinx-tabs"><div class="sphinx-tabs-tab">Python</div><div class="sphinx-tabs-panel"><pre>print("hi")</pre></div></div></body></html>';
+
+    server.use(
+      http.get('http://pipe-htmlcache.local/llms.txt', () =>
+        HttpResponse.text(
+          '# Docs\n## Links\n- [Guide](http://pipe-htmlcache.local/docs/guide): Guide\n',
+        ),
+      ),
+      http.get(
+        'http://pipe-htmlcache.local/docs/llms.txt',
+        () => new HttpResponse(null, { status: 404 }),
+      ),
+      http.get('http://pipe-htmlcache.local/docs/guide', () => {
+        fetchCount++;
+        return new HttpResponse(pageHtml, {
+          status: 200,
+          headers: { 'Content-Type': 'text/html' },
+        });
+      }),
+      // page-size-html probes for .md and 404-test URLs
+      http.get(
+        'http://pipe-htmlcache.local/docs/guide.md',
+        () => new HttpResponse(null, { status: 404 }),
+      ),
+      http.get(
+        'http://pipe-htmlcache.local/docs/guide-afdocs-nonexistent-8f3a',
+        () => new HttpResponse('Not Found', { status: 404 }),
+      ),
+    );
+
+    const report = await runChecks('http://pipe-htmlcache.local', {
+      checkIds: ['llms-txt-exists', 'page-size-html', 'tabbed-content-serialization'],
+      requestDelay: 0,
+    });
+
+    const htmlResult = report.results.find((r) => r.id === 'page-size-html')!;
+    const tabResult = report.results.find((r) => r.id === 'tabbed-content-serialization')!;
+
+    // Both checks should succeed
+    expect(htmlResult.status).not.toBe('error');
+    expect(tabResult.status).not.toBe('error');
+    expect(tabResult.details?.totalGroupsFound).toBe(1);
+
+    // The page should only be fetched once, not twice
+    expect(fetchCount).toBe(1);
+  });
+});
+
 describe('check pipeline: independent checks share sampling', () => {
   it('cache-header-hygiene and auth-gate-detection test the same pages', async () => {
     const pages = [];
diff --git a/test/integration/cli.test.ts b/test/integration/cli.test.ts
index 5450c50..00fa104 100644
--- a/test/integration/cli.test.ts
+++ b/test/integration/cli.test.ts
@@ -71,6 +71,6 @@ describe('CLI', () => {
     // Should have results from multiple categories
     const categories = new Set(report.results.map((r) => r.category));
     expect(categories.size).toBeGreaterThan(1);
-    expect(report.summary.total).toBe(21);
+    expect(report.summary.total).toBe(22);
   });
 });
diff --git a/test/unit/checks/rendering-strategy.test.ts b/test/unit/checks/rendering-strategy.test.ts
new file mode 100644
index 0000000..fe526a9
--- /dev/null
+++ b/test/unit/checks/rendering-strategy.test.ts
@@ -0,0 +1,251 @@
+import { describe, it, expect, beforeAll } from 'vitest';
+import { http, HttpResponse } from 'msw';
+import { setupServer } from 'msw/node';
+import { createContext } from '../../../src/runner.js';
+import { getCheck } from '../../../src/checks/registry.js';
+import '../../../src/checks/index.js';
+import type { DiscoveredFile } from '../../../src/types.js';
+
+const server = setupServer();
+
+beforeAll(() => {
+  server.listen({ onUnhandledRequest: 'bypass' });
+  return () => server.close();
+});
+
+describe('rendering-strategy', () => {
+  const check = getCheck('rendering-strategy')!;
+
+  function makeCtx(domain: string, content: string) {
+    const ctx = createContext(`http://${domain}`, { requestDelay: 0 });
+    const discovered: DiscoveredFile[] = [
+      { url: `http://${domain}/llms.txt`, content, status: 200, redirected: false },
+    ];
+    ctx.previousResults.set('llms-txt-exists', {
+      id: 'llms-txt-exists',
+      category: 'llms-txt',
+      status: 'pass',
+      message: 'Found',
+      details: { discoveredFiles: discovered },
+    });
+    return ctx;
+  }
+
+  function llmsTxt(domain: string, paths: string[]): string {
+    const links = paths.map((p, i) => `- [Page ${i}](http://${domain}${p}): Page ${i}`).join('\n');
+    return `# Docs\n> Summary\n## Links\n${links}\n`;
+  }
+
+  it('passes for markdown (non-HTML) responses', async () => {
+    const domain = 'rs-md-pass.local';
+    const md = '# Guide\n\nThis is markdown content about the API.\n';
+    server.use(
+      http.get(
+        `http://${domain}/docs/page1`,
+        () =>
+          new HttpResponse(md, {
+            status: 200,
+            headers: { 'Content-Type': 'text/markdown; charset=utf-8' },
+          }),
+      ),
+    );
+
+    const result = await check.run(makeCtx(domain, llmsTxt(domain, ['/docs/page1'])));
+    expect(result.status).toBe('pass');
+    const pageResults = result.details?.pageResults as Array<{
+      analysis: {
+        hasContent: boolean;
+        hasSpaMarkers: boolean;
+        spaMarker: string | null;
+        visibleTextLength: number;
+        htmlLength: number;
+      };
+    }>;
+    const a = pageResults[0].analysis;
+    expect(a.hasContent).toBe(true);
+    expect(a.hasSpaMarkers).toBe(false);
+    expect(a.spaMarker).toBeNull();
+    expect(a.visibleTextLength).toBe(md.length);
+    expect(a.htmlLength).toBe(0);
+  });
+
+  it('passes for all pages server-rendered', async () => {
+    const domain = 'rs-all-pass.local';
+    const htmlPage = (title: string) =>
+      `<html><body><h1>${title}</h1><p>Some real content here for the page.</p></body></html>`;
+
+    server.use(
+      http.get(
+        `http://${domain}/docs/page1`,
+        () =>
+          new HttpResponse(htmlPage('Page One'), {
+            status: 200,
+            headers: { 'Content-Type': 'text/html' },
+          }),
+      ),
+      http.get(
+        `http://${domain}/docs/page2`,
+        () =>
+          new HttpResponse(htmlPage('Page Two'), {
+            status: 200,
+            headers: { 'Content-Type': 'text/html' },
+          }),
+      ),
+    );
+
+    const result = await check.run(
+      makeCtx(domain, llmsTxt(domain, ['/docs/page1', '/docs/page2'])),
+    );
+    expect(result.status).toBe('pass');
+    expect(result.message).toContain('All 2 pages contain server-rendered content');
+  });
+
+  it('fails for SPA shell with framework hint', async () => {
+    const domain = 'rs-spa-fail.local';
+    // Gatsby SPA shell: has the marker, no real content
+    const spaHtml =
+      '<html><body><div id="___gatsby"><div id="gatsby-focus-wrapper"></div></div>' +
+      '<script src="/app.js"></script></body></html>';
+
+    server.use(
+      http.get(
+        `http://${domain}/docs/page1`,
+        () =>
+          new HttpResponse(spaHtml, {
+            status: 200,
+            headers: { 'Content-Type': 'text/html' },
+          }),
+      ),
+    );
+
+    const result = await check.run(makeCtx(domain, llmsTxt(domain, ['/docs/page1'])));
+    expect(result.status).toBe('fail');
+    expect(result.message).toContain('client-side rendered SPA shells');
+    expect(result.message).toContain('___gatsby');
+  });
+
+  it('warns for sparse content', async () => {
+    const domain = 'rs-sparse-warn.local';
+    // Has SPA marker + one heading (triggers warn path in pageStatus):
+    // hasSpaMarkers=true, hasContent=false (only 1 heading, not >=3),
+    // but contentHeadings >= 1 => warn
+    const sparseHtml =
+      '<html><body><div id="__next">' + '<h1>Getting Started Guide</h1>' + '</div></body></html>';
+
+    server.use(
+      http.get(
+        `http://${domain}/docs/page1`,
+        () =>
+          new HttpResponse(sparseHtml, {
+            status: 200,
+            headers: { 'Content-Type': 'text/html' },
+          }),
+      ),
+    );
+
+    const result = await check.run(makeCtx(domain, llmsTxt(domain, ['/docs/page1'])));
+    expect(result.status).toBe('warn');
+    expect(result.message).toContain('little substantive content');
+    expect(result.message).toContain('headings');
+  });
+
+  it('reports mixed SPA shells and sparse content', async () => {
+    const domain = 'rs-mixed.local';
+    // Page 1: full SPA shell (fail) with Gatsby marker
+    const spaHtml =
+      '<html><body><div id="___gatsby"></div>' + '<script src="/app.js"></script></body></html>';
+    // Page 2: sparse content (warn) with __next marker + one heading
+    const sparseHtml =
+      '<html><body><div id="__next">' + '<h1>Installation Guide</h1>' + '</div></body></html>';
+
+    server.use(
+      http.get(
+        `http://${domain}/docs/page1`,
+        () =>
+          new HttpResponse(spaHtml, {
+            status: 200,
+            headers: { 'Content-Type': 'text/html' },
+          }),
+      ),
+      http.get(
+        `http://${domain}/docs/page2`,
+        () =>
+          new HttpResponse(sparseHtml, {
+            status: 200,
+            headers: { 'Content-Type': 'text/html' },
+          }),
+      ),
+    );
+
+    const result = await check.run(
+      makeCtx(domain, llmsTxt(domain, ['/docs/page1', '/docs/page2'])),
+    );
+    expect(result.status).toBe('fail');
+    expect(result.message).toContain('1 of 2 pages appear to be client-side rendered SPA shells');
+    expect(result.message).toContain('1 more have page structure but little substantive content');
+  });
+
+  it('appends fetch error count to message', async () => {
+    const domain = 'rs-fetch-err.local';
+    const goodHtml =
+      '<html><body><h1>Working Page</h1><p>Content is here and accessible.</p></body></html>';
+
+    server.use(
+      http.get(
+        `http://${domain}/docs/good`,
+        () =>
+          new HttpResponse(goodHtml, {
+            status: 200,
+            headers: { 'Content-Type': 'text/html' },
+          }),
+      ),
+      http.get(`http://${domain}/docs/broken`, () => HttpResponse.error()),
+    );
+
+    const result = await check.run(
+      makeCtx(domain, llmsTxt(domain, ['/docs/good', '/docs/broken'])),
+    );
+    expect(result.message).toContain('; 1 failed to fetch');
+    expect(result.details?.fetchErrors).toBe(1);
+  });
+
+  it('fails when all pages fail to fetch', async () => {
+    const domain = 'rs-all-err.local';
+
+    server.use(
+      http.get(`http://${domain}/docs/page1`, () => HttpResponse.error()),
+      http.get(`http://${domain}/docs/page2`, () => HttpResponse.error()),
+    );
+
+    const result = await check.run(
+      makeCtx(domain, llmsTxt(domain, ['/docs/page1', '/docs/page2'])),
+    );
+    expect(result.status).toBe('fail');
+    expect(result.message).toContain('Could not fetch any pages');
+    expect(result.message).toContain('; 2 failed to fetch');
+  });
+
+  it('fails when SPA shell but no framework marker', async () => {
+    const domain = 'rs-generic-root.local';
+    // id="root" is a generic SPA marker with no content
+    const spaHtml =
+      '<html><body><div id="root"></div>' + '<script src="/bundle.js"></script></body></html>';
+
+    server.use(
+      http.get(
+        `http://${domain}/docs/page1`,
+        () =>
+          new HttpResponse(spaHtml, {
+            status: 200,
+            headers: { 'Content-Type': 'text/html' },
+          }),
+      ),
+    );
+
+    const result = await check.run(makeCtx(domain, llmsTxt(domain, ['/docs/page1'])));
+    expect(result.status).toBe('fail');
+    expect(result.message).toContain('client-side rendered SPA shells');
+    // "root" extracted from id="root" via the replace logic
+    expect(result.message).toContain('root detected');
+  });
+});
diff --git a/test/unit/checks/section-header-quality.test.ts b/test/unit/checks/section-header-quality.test.ts
new file mode 100644
index 0000000..535e96e
--- /dev/null
+++ b/test/unit/checks/section-header-quality.test.ts
@@ -0,0 +1,594 @@
+import { describe, it, expect } from 'vitest';
+import { createContext } from '../../../src/runner.js';
+import { getCheck } from '../../../src/checks/registry.js';
+import '../../../src/checks/index.js';
+
+describe('section-header-quality', () => {
+  const check = getCheck('section-header-quality')!;
+
+  function makeCtx(tabbedResult?: {
+    status: string;
+    tabbedPages: Array<{
+      url: string;
+      tabGroups: Array<{
+        framework: string;
+        tabCount: number;
+        htmlSlice: string;
+        panels: Array<{ label: string | null; html: string }>;
+      }>;
+      totalTabbedChars: number;
+      status: string;
+    }>;
+  }) {
+    const ctx = createContext('http://test.local', { requestDelay: 0 });
+
+    if (tabbedResult) {
+      ctx.previousResults.set('tabbed-content-serialization', {
+        id: 'tabbed-content-serialization',
+        category: 'content-structure',
+        status: tabbedResult.status as 'pass' | 'warn' | 'fail',
+        message: 'test',
+        details: { tabbedPages: tabbedResult.tabbedPages },
+      });
+    }
+
+    return ctx;
+  }
+
+  it('skips when tabbed-content-serialization did not run', async () => {
+    const ctx = createContext('http://test.local', { requestDelay: 0 });
+    const result = await check.run(ctx);
+    expect(result.status).toBe('skip');
+  });
+
+  it('passes when no tabbed content was found', async () => {
+    const result = await check.run(
+      makeCtx({
+        status: 'pass',
+        tabbedPages: [
+          { url: 'http://test.local/page', tabGroups: [], totalTabbedChars: 0, status: 'pass' },
+        ],
+      }),
+    );
+    expect(result.status).toBe('pass');
+    expect(result.message).toContain('not applicable');
+  });
+
+  it('passes when headers include variant context', async () => {
+    const result = await check.run(
+      makeCtx({
+        status: 'pass',
+        tabbedPages: [
+          {
+            url: 'http://test.local/page',
+            tabGroups: [
+              {
+                framework: 'sphinx',
+                tabCount: 2,
+                htmlSlice: '<div></div>',
+                panels: [
+                  {
+                    label: 'Python',
+                    html: '<div><h2>Python Installation</h2><p>pip install foo</p></div>',
+                  },
+                  {
+                    label: 'Node',
+                    html: '<div><h2>Node Installation</h2><p>npm install foo</p></div>',
+                  },
+                ],
+              },
+            ],
+            totalTabbedChars: 100,
+            status: 'pass',
+          },
+        ],
+      }),
+    );
+    expect(result.status).toBe('pass');
+    expect(result.details?.groupsWithGenericMajority).toBe(0);
+  });
+
+  it('fails when majority of headers are generic across panels', async () => {
+    const result = await check.run(
+      makeCtx({
+        status: 'pass',
+        tabbedPages: [
+          {
+            url: 'http://test.local/page',
+            tabGroups: [
+              {
+                framework: 'sphinx',
+                tabCount: 2,
+                htmlSlice: '<div></div>',
+                panels: [
+                  {
+                    label: 'Python',
+                    html: '<div><h2>Installation</h2><h3>Configuration</h3><h3>Usage</h3></div>',
+                  },
+                  {
+                    label: 'Node',
+                    html: '<div><h2>Installation</h2><h3>Configuration</h3><h3>Usage</h3></div>',
+                  },
+                ],
+              },
+            ],
+            totalTabbedChars: 100,
+            status: 'pass',
+          },
+        ],
+      }),
+    );
+    expect(result.status).toBe('fail');
+    expect(result.details?.groupsWithGenericMajority).toBe(1);
+    expect(result.details?.pagesAffected).toBe(1);
+    expect(result.message).toContain("don't distinguish between variants");
+  });
+
+  it('passes when headers are unique to each panel', async () => {
+    const result = await check.run(
+      makeCtx({
+        status: 'pass',
+        tabbedPages: [
+          {
+            url: 'http://test.local/page',
+            tabGroups: [
+              {
+                framework: 'mkdocs',
+                tabCount: 2,
+                htmlSlice: '<div></div>',
+                panels: [
+                  {
+                    label: 'Docker',
+                    html: '<div><h2>Dockerfile Setup</h2><h3>Docker Compose</h3></div>',
+                  },
+                  {
+                    label: 'Kubernetes',
+                    html: '<div><h2>Helm Chart</h2><h3>kubectl apply</h3></div>',
+                  },
+                ],
+              },
+            ],
+            totalTabbedChars: 100,
+            status: 'pass',
+          },
+        ],
+      }),
+    );
+    expect(result.status).toBe('pass');
+  });
+
+  it('warns when 25-50% of groups have generic majority', async () => {
+    // 2 groups: 1 generic, 1 contextual = 50% → warn
+    const result = await check.run(
+      makeCtx({
+        status: 'pass',
+        tabbedPages: [
+          {
+            url: 'http://test.local/page',
+            tabGroups: [
+              {
+                framework: 'sphinx',
+                tabCount: 2,
+                htmlSlice: '<div></div>',
+                panels: [
+                  { label: 'Python', html: '<div><h2>Installation</h2><h3>Setup</h3></div>' },
+                  { label: 'Node', html: '<div><h2>Installation</h2><h3>Setup</h3></div>' },
+                ],
+              },
+              {
+                framework: 'sphinx',
+                tabCount: 2,
+                htmlSlice: '<div></div>',
+                panels: [
+                  { label: 'Python', html: '<div><h2>Python Guide</h2></div>' },
+                  { label: 'Node', html: '<div><h2>Node Guide</h2></div>' },
+                ],
+              },
+            ],
+            totalTabbedChars: 100,
+            status: 'pass',
+          },
+        ],
+      }),
+    );
+    expect(result.status).toBe('warn');
+  });
+
+  it('handles tab groups with fewer than 2 panels', async () => {
+    const result = await check.run(
+      makeCtx({
+        status: 'pass',
+        tabbedPages: [
+          {
+            url: 'http://test.local/page',
+            tabGroups: [
+              {
+                framework: 'sphinx',
+                tabCount: 1,
+                htmlSlice: '<div></div>',
+                panels: [{ label: 'Only', html: '<div><h2>Installation</h2></div>' }],
+              },
+            ],
+            totalTabbedChars: 50,
+            status: 'pass',
+          },
+        ],
+      }),
+    );
+    expect(result.status).toBe('pass');
+    expect(result.message).toContain('fewer than 2 panels');
+  });
+
+  it('handles panels with no headers', async () => {
+    const result = await check.run(
+      makeCtx({
+        status: 'pass',
+        tabbedPages: [
+          {
+            url: 'http://test.local/page',
+            tabGroups: [
+              {
+                framework: 'mkdocs',
+                tabCount: 2,
+                htmlSlice: '<div></div>',
+                panels: [
+                  { label: 'A', html: '<div><p>Just text</p></div>' },
+                  { label: 'B', html: '<div><p>More text</p></div>' },
+                ],
+              },
+            ],
+            totalTabbedChars: 50,
+            status: 'pass',
+          },
+        ],
+      }),
+    );
+    // No headers inside panels → can't evaluate, skip
+    expect(result.status).toBe('skip');
+    expect(result.message).toContain('no section headers inside tab panels');
+  });
+
+  it('detects generic markdown headers in MDX panels', async () => {
+    const result = await check.run(
+      makeCtx({
+        status: 'fail',
+        tabbedPages: [
+          {
+            url: 'http://test.local/page',
+            tabGroups: [
+              {
+                framework: 'mdx',
+                tabCount: 2,
+                htmlSlice: '<Tabs></Tabs>',
+                panels: [
+                  {
+                    label: 'Python',
+                    html: '<Tab name="Python">\n\n## Installation\n\n## Configuration\n\n## Usage\n\n</Tab>',
+                  },
+                  {
+                    label: 'Node',
+                    html: '<Tab name="Node">\n\n## Installation\n\n## Configuration\n\n## Usage\n\n</Tab>',
+                  },
+                ],
+              },
+            ],
+            totalTabbedChars: 200,
+            status: 'fail',
+          },
+        ],
+      }),
+    );
+    expect(result.status).toBe('fail');
+    expect(result.details?.groupsWithGenericMajority).toBe(1);
+  });
+
+  it('fails when identical headers repeat across multiple tab groups without variant context', async () => {
+    // 3 tab groups on same page, each with 2 panels, all sharing "Build a Search Query"
+    const result = await check.run(
+      makeCtx({
+        status: 'pass',
+        tabbedPages: [
+          {
+            url: 'http://test.local/tutorial',
+            tabGroups: [
+              {
+                framework: 'mdx',
+                tabCount: 2,
+                htmlSlice: '<div></div>',
+                panels: [
+                  {
+                    label: 'Python',
+                    html: '<div><h2>Build a Search Query</h2><p>python code</p></div>',
+                  },
+                  {
+                    label: 'Node',
+                    html: '<div><h2>Build a Search Query</h2><p>node code</p></div>',
+                  },
+                ],
+              },
+              {
+                framework: 'mdx',
+                tabCount: 2,
+                htmlSlice: '<div></div>',
+                panels: [
+                  {
+                    label: 'Python',
+                    html: '<div><h2>Build a Search Query</h2><p>more python</p></div>',
+                  },
+                  {
+                    label: 'Node',
+                    html: '<div><h2>Build a Search Query</h2><p>more node</p></div>',
+                  },
+                ],
+              },
+              {
+                framework: 'mdx',
+                tabCount: 2,
+                htmlSlice: '<div></div>',
+                panels: [
+                  {
+                    label: 'Python',
+                    html: '<div><h2>Build a Search Query</h2><p>even more</p></div>',
+                  },
+                  {
+                    label: 'Node',
+                    html: '<div><h2>Build a Search Query</h2><p>even more</p></div>',
+                  },
+                ],
+              },
+            ],
+            totalTabbedChars: 300,
+            status: 'pass',
+          },
+        ],
+      }),
+    );
+    expect(result.status).toBe('fail');
+    expect(result.details?.pagesAffected).toBe(1);
+    expect(result.details?.crossGroupGenericGroupCount).toBe(3);
+    expect(result.message).toContain('build a search query');
+    expect(result.message).toContain('repeats across 3 tab groups');
+    expect(result.details?.crossGroupRepeatedHeaders).toEqual(
+      expect.arrayContaining([
+        expect.objectContaining({ header: 'build a search query', groupCount: 3 }),
+      ]),
+    );
+  });
+
+  it('passes cross-group when headers include panel labels', async () => {
+    const result = await check.run(
+      makeCtx({
+        status: 'pass',
+        tabbedPages: [
+          {
+            url: 'http://test.local/tutorial',
+            tabGroups: [
+              {
+                framework: 'mdx',
+                tabCount: 2,
+                htmlSlice: '<div></div>',
+                panels: [
+                  { label: 'Python', html: '<div><h2>Python Setup</h2></div>' },
+                  { label: 'Node', html: '<div><h2>Node Setup</h2></div>' },
+                ],
+              },
+              {
+                framework: 'mdx',
+                tabCount: 2,
+                htmlSlice: '<div></div>',
+                panels: [
+                  { label: 'Python', html: '<div><h2>Python Config</h2></div>' },
+                  { label: 'Node', html: '<div><h2>Node Config</h2></div>' },
+                ],
+              },
+            ],
+            totalTabbedChars: 200,
+            status: 'pass',
+          },
+        ],
+      }),
+    );
+    expect(result.status).toBe('pass');
+    expect(result.details?.pagesAffected).toBe(0);
+    expect(result.details?.crossGroupGenericGroupCount).toBe(0);
+    expect(result.message).toContain('headers include variant context');
+  });
+
+  it('skips cross-group analysis for pages with only one tab group', async () => {
+    const result = await check.run(
+      makeCtx({
+        status: 'pass',
+        tabbedPages: [
+          {
+            url: 'http://test.local/page',
+            tabGroups: [
+              {
+                framework: 'sphinx',
+                tabCount: 2,
+                htmlSlice: '<div></div>',
+                panels: [
+                  { label: 'Python', html: '<div><h2>Python Installation</h2></div>' },
+                  { label: 'Node', html: '<div><h2>Node Installation</h2></div>' },
+                ],
+              },
+            ],
+            totalTabbedChars: 100,
+            status: 'pass',
+          },
+        ],
+      }),
+    );
+    expect(result.status).toBe('pass');
+    expect(result.details?.crossGroupTotalGroupCount).toBe(0);
+  });
+
+  it('warns when cross-group generic ratio is between 25-50%', async () => {
+    // 4 tab groups on one page. Groups 1 and 2 share "Getting Started" (cross-group generic).
+    // Groups 3 and 4 have unique headers. crossGroupGenericGroupCount = 2, total = 4, ratio = 0.5 → warn.
+    // Within-group: each group has label-contextual headers so within-group passes.
+    const result = await check.run(
+      makeCtx({
+        status: 'pass',
+        tabbedPages: [
+          {
+            url: 'http://test.local/tutorial',
+            tabGroups: [
+              {
+                framework: 'mdx',
+                tabCount: 2,
+                htmlSlice: '<div></div>',
+                panels: [
+                  {
+                    label: 'Python',
+                    html: '<div><h2>Getting Started</h2><h3>Python Basics</h3></div>',
+                  },
+                  {
+                    label: 'Node',
+                    html: '<div><h2>Getting Started</h2><h3>Node Basics</h3></div>',
+                  },
+                ],
+              },
+              {
+                framework: 'mdx',
+                tabCount: 2,
+                htmlSlice: '<div></div>',
+                panels: [
+                  {
+                    label: 'Python',
+                    html: '<div><h2>Getting Started</h2><h3>Python Advanced</h3></div>',
+                  },
+                  {
+                    label: 'Node',
+                    html: '<div><h2>Getting Started</h2><h3>Node Advanced</h3></div>',
+                  },
+                ],
+              },
+              {
+                framework: 'mdx',
+                tabCount: 2,
+                htmlSlice: '<div></div>',
+                panels: [
+                  { label: 'Python', html: '<div><h2>Python Config</h2></div>' },
+                  { label: 'Node', html: '<div><h2>Node Config</h2></div>' },
+                ],
+              },
+              {
+                framework: 'mdx',
+                tabCount: 2,
+                htmlSlice: '<div></div>',
+                panels: [
+                  { label: 'Python', html: '<div><h2>Python Deployment</h2></div>' },
+                  { label: 'Node', html: '<div><h2>Node Deployment</h2></div>' },
+                ],
+              },
+            ],
+            totalTabbedChars: 400,
+            status: 'pass',
+          },
+        ],
+      }),
+    );
+    expect(result.status).toBe('warn');
+    expect(result.details?.crossGroupGenericGroupCount).toBe(2);
+    expect(result.details?.crossGroupTotalGroupCount).toBe(4);
+  });
+
+  it('sorts cross-group repeated headers by group count to find worst', async () => {
+    // 4 tab groups. "Overview" appears in 3 groups, "Setup" appears in 2 groups.
+    // The message should mention "overview" (the worst offender) not "setup".
+    const result = await check.run(
+      makeCtx({
+        status: 'pass',
+        tabbedPages: [
+          {
+            url: 'http://test.local/guide',
+            tabGroups: [
+              {
+                framework: 'mdx',
+                tabCount: 2,
+                htmlSlice: '<div></div>',
+                panels: [
+                  { label: 'Python', html: '<div><h2>Overview</h2><h3>Setup</h3></div>' },
+                  { label: 'Node', html: '<div><h2>Overview</h2><h3>Setup</h3></div>' },
+                ],
+              },
+              {
+                framework: 'mdx',
+                tabCount: 2,
+                htmlSlice: '<div></div>',
+                panels: [
+                  { label: 'Python', html: '<div><h2>Overview</h2></div>' },
+                  { label: 'Node', html: '<div><h2>Overview</h2></div>' },
+                ],
+              },
+              {
+                framework: 'mdx',
+                tabCount: 2,
+                htmlSlice: '<div></div>',
+                panels: [
+                  { label: 'Python', html: '<div><h2>Overview</h2><h3>Setup</h3></div>' },
+                  { label: 'Node', html: '<div><h2>Overview</h2><h3>Setup</h3></div>' },
+                ],
+              },
+              {
+                framework: 'mdx',
+                tabCount: 2,
+                htmlSlice: '<div></div>',
+                panels: [
+                  { label: 'Python', html: '<div><h2>Python Testing</h2></div>' },
+                  { label: 'Node', html: '<div><h2>Node Testing</h2></div>' },
+                ],
+              },
+            ],
+            totalTabbedChars: 400,
+            status: 'pass',
+          },
+        ],
+      }),
+    );
+    expect(result.status).toBe('fail');
+    expect(result.message).toContain('overview');
+    expect(result.message).toContain('repeats across');
+    // "overview" appears in 3 groups, "setup" in 2 — worst should be "overview"
+    expect(result.details?.crossGroupRepeatedHeaders).toEqual(
+      expect.arrayContaining([
+        expect.objectContaining({ header: 'overview', groupCount: 3 }),
+        expect.objectContaining({ header: 'setup', groupCount: 2 }),
+      ]),
+    );
+  });
+
+  it('detects contextual markdown headers in MDX panels', async () => {
+    const result = await check.run(
+      makeCtx({
+        status: 'pass',
+        tabbedPages: [
+          {
+            url: 'http://test.local/page',
+            tabGroups: [
+              {
+                framework: 'mdx',
+                tabCount: 2,
+                htmlSlice: '<Tabs></Tabs>',
+                panels: [
+                  {
+                    label: 'Python',
+                    html: '<Tab name="Python">\n\n## Python Installation\n\n## Python Configuration\n\n</Tab>',
+                  },
+                  {
+                    label: 'Node',
+                    html: '<Tab name="Node">\n\n## Node Installation\n\n## Node Configuration\n\n</Tab>',
+                  },
+                ],
+              },
+            ],
+            totalTabbedChars: 200,
+            status: 'pass',
+          },
+        ],
+      }),
+    );
+    expect(result.status).toBe('pass');
+    expect(result.details?.groupsWithGenericMajority).toBe(0);
+  });
+});
diff --git a/test/unit/checks/tabbed-content-serialization.test.ts b/test/unit/checks/tabbed-content-serialization.test.ts
new file mode 100644
index 0000000..b16a6ea
--- /dev/null
+++ b/test/unit/checks/tabbed-content-serialization.test.ts
@@ -0,0 +1,385 @@
+import { describe, it, expect, beforeAll } from 'vitest';
+import { http, HttpResponse } from 'msw';
+import { setupServer } from 'msw/node';
+import { createContext } from '../../../src/runner.js';
+import { getCheck } from '../../../src/checks/registry.js';
+import '../../../src/checks/index.js';
+import type { DiscoveredFile } from '../../../src/types.js';
+
+const server = setupServer();
+
+beforeAll(() => {
+  server.listen({ onUnhandledRequest: 'bypass' });
+  return () => server.close();
+});
+
+describe('tabbed-content-serialization', () => {
+  const check = getCheck('tabbed-content-serialization')!;
+
+  function makeCtx(content?: string, opts?: Record<string, unknown>) {
+    const ctx = createContext('http://test.local', { requestDelay: 0, ...opts });
+
+    if (content) {
+      const discovered: DiscoveredFile[] = [
+        { url: 'http://test.local/llms.txt', content, status: 200, redirected: false },
+      ];
+      ctx.previousResults.set('llms-txt-exists', {
+        id: 'llms-txt-exists',
+        category: 'llms-txt',
+        status: 'pass',
+        message: 'Found',
+        details: { discoveredFiles: discovered },
+      });
+    } else {
+      ctx.previousResults.set('llms-txt-exists', {
+        id: 'llms-txt-exists',
+        category: 'llms-txt',
+        status: 'fail',
+        message: 'No llms.txt found',
+        details: { discoveredFiles: [] },
+      });
+    }
+
+    return ctx;
+  }
+
+  it('passes when page has no tabbed content', async () => {
+    server.use(
+      http.get(
+        'http://tcs-notabs.local/docs/page1',
+        () =>
+          new HttpResponse('<html><body><h1>Hello</h1><p>No tabs here.</p></body></html>', {
+            status: 200,
+            headers: { 'Content-Type': 'text/html' },
+          }),
+      ),
+    );
+
+    const content = `# Docs\n> Summary\n## Links\n- [Page 1](http://tcs-notabs.local/docs/page1): First\n`;
+    const result = await check.run(makeCtx(content));
+    expect(result.status).toBe('pass');
+    expect(result.message).toContain('No tabbed content');
+  });
+
+  it('passes when tabbed content serializes under threshold', async () => {
+    const tabHtml = `
+      <div class="sphinx-tabs">
+        <div class="sphinx-tabs-tab">Python</div>
+        <div class="sphinx-tabs-tab">JS</div>
+        <div class="sphinx-tabs-panel"><pre>print("hi")</pre></div>
+        <div class="sphinx-tabs-panel"><pre>console.log("hi")</pre></div>
+      </div>
+    `;
+    server.use(
+      http.get(
+        'http://tcs-small.local/docs/page1',
+        () =>
+          new HttpResponse(`<html><body>${tabHtml}</body></html>`, {
+            status: 200,
+            headers: { 'Content-Type': 'text/html' },
+          }),
+      ),
+    );
+
+    const content = `# Docs\n> Summary\n## Links\n- [Page 1](http://tcs-small.local/docs/page1): First\n`;
+    const result = await check.run(makeCtx(content));
+    expect(result.status).toBe('pass');
+    expect(result.details?.totalGroupsFound).toBe(1);
+    expect(result.details?.pagesWithTabs).toBe(1);
+  });
+
+  it('fails when tabbed content exceeds 100K chars', async () => {
+    // Create large tab panels that serialize to over 100K
+    const bigPanel = '<p>' + 'x'.repeat(60_000) + '</p>';
+    const tabHtml = `
+      <div class="sphinx-tabs">
+        <div class="sphinx-tabs-tab">A</div>
+        <div class="sphinx-tabs-tab">B</div>
+        <div class="sphinx-tabs-panel">${bigPanel}</div>
+        <div class="sphinx-tabs-panel">${bigPanel}</div>
+      </div>
+    `;
+    server.use(
+      http.get(
+        'http://tcs-big.local/docs/page1',
+        () =>
+          new HttpResponse(`<html><body>${tabHtml}</body></html>`, {
+            status: 200,
+            headers: { 'Content-Type': 'text/html' },
+          }),
+      ),
+    );
+
+    const content = `# Docs\n> Summary\n## Links\n- [Page 1](http://tcs-big.local/docs/page1): First\n`;
+    const result = await check.run(makeCtx(content));
+    expect(result.status).toBe('fail');
+    expect(result.message).toContain('over 100K');
+  });
+
+  it('handles fetch errors gracefully', async () => {
+    server.use(http.get('http://tcs-err.local/docs/page1', () => HttpResponse.error()));
+
+    const content = `# Docs\n> Summary\n## Links\n- [Page 1](http://tcs-err.local/docs/page1): First\n`;
+    const result = await check.run(makeCtx(content));
+    expect(result.status).toBe('fail');
+    expect(result.details?.fetchErrors).toBe(1);
+  });
+
+  it('skips conversion for markdown responses', async () => {
+    server.use(
+      http.get(
+        'http://tcs-md.local/docs/page1',
+        () =>
+          new HttpResponse('# Hello\n\nNo tabs in markdown.', {
+            status: 200,
+            headers: { 'Content-Type': 'text/markdown' },
+          }),
+      ),
+    );
+
+    const content = `# Docs\n> Summary\n## Links\n- [Page 1](http://tcs-md.local/docs/page1): First\n`;
+    const result = await check.run(makeCtx(content));
+    expect(result.status).toBe('pass');
+    const tabbedPages = result.details?.tabbedPages as Array<{ totalTabbedChars: number }>;
+    expect(tabbedPages[0].totalTabbedChars).toBe(0);
+  });
+
+  it('stores tabbedPages in details for downstream checks', async () => {
+    const tabHtml = `
+      <div class="tabbed-set">
+        <div class="tabbed-labels"><label>Go</label></div>
+        <div class="tabbed-content"><div class="tabbed-block"><pre>fmt.Println</pre></div></div>
+      </div>
+    `;
+    server.use(
+      http.get(
+        'http://tcs-details.local/docs/page1',
+        () =>
+          new HttpResponse(`<html><body>${tabHtml}</body></html>`, {
+            status: 200,
+            headers: { 'Content-Type': 'text/html' },
+          }),
+      ),
+    );
+
+    const content = `# Docs\n> Summary\n## Links\n- [Page 1](http://tcs-details.local/docs/page1): First\n`;
+    const result = await check.run(makeCtx(content));
+    expect(result.details?.tabbedPages).toBeDefined();
+    const tabbedPages = result.details?.tabbedPages as Array<{
+      url: string;
+      tabGroups: Array<{ framework: string }>;
+    }>;
+    expect(tabbedPages[0].tabGroups[0].framework).toBe('mkdocs');
+  });
+
+  it('detects MDX tabs in markdown responses', async () => {
+    const mdContent = `# Guide\n\n<Tabs>\n<Tab name="Python">\n\npip install foo\n\n</Tab>\n<Tab name="Node">\n\nnpm install foo\n\n</Tab>\n</Tabs>\n`;
+    server.use(
+      http.get(
+        'http://tcs-mdx.local/docs/page1',
+        () =>
+          new HttpResponse(mdContent, {
+            status: 200,
+            headers: { 'Content-Type': 'text/markdown' },
+          }),
+      ),
+    );
+
+    const content = `# Docs\n> Summary\n## Links\n- [Page 1](http://tcs-mdx.local/docs/page1): First\n`;
+    const result = await check.run(makeCtx(content));
+    expect(result.status).toBe('pass');
+    const tabbedPages = result.details?.tabbedPages as Array<{
+      tabGroups: Array<{ framework: string }>;
+      source: string;
+    }>;
+    expect(tabbedPages[0].tabGroups).toHaveLength(1);
+    expect(tabbedPages[0].tabGroups[0].framework).toBe('mdx');
+    expect(tabbedPages[0].source).toBe('markdown');
+  });
+
+  it('falls back to .md URL when HTML is an SPA shell', async () => {
+    // SPA shell: large HTML with minimal text content
+    const spaHtml =
+      '<html><head><style>' +
+      'x'.repeat(15_000) +
+      '</style></head><body><div id="___gatsby"></div></body></html>';
+    const mdContent = `# Tutorial\n\n<Tabs>\n<Tab name="Atlas">Atlas content</Tab>\n<Tab name="Local">Local content</Tab>\n</Tabs>\n`;
+
+    server.use(
+      http.get(
+        'http://tcs-spa.local/docs/page1',
+        () =>
+          new HttpResponse(spaHtml, {
+            status: 200,
+            headers: { 'Content-Type': 'text/html' },
+          }),
+      ),
+      http.get(
+        'http://tcs-spa.local/docs/page1.md',
+        () =>
+          new HttpResponse(mdContent, {
+            status: 200,
+            headers: { 'Content-Type': 'text/markdown' },
+          }),
+      ),
+    );
+
+    const content = `# Docs\n> Summary\n## Links\n- [Page 1](http://tcs-spa.local/docs/page1): First\n`;
+    const ctx = makeCtx(content);
+    // Simulate rendering-strategy having flagged this URL as an SPA shell
+    ctx.previousResults.set('rendering-strategy', {
+      id: 'rendering-strategy',
+      category: 'page-size',
+      status: 'fail',
+      message: 'SPA shell detected',
+      details: {
+        pageResults: [{ url: 'http://tcs-spa.local/docs/page1', status: 'fail' }],
+      },
+    });
+    const result = await check.run(ctx);
+    const tabbedPages = result.details?.tabbedPages as Array<{
+      tabGroups: Array<{ framework: string }>;
+      source: string;
+    }>;
+    expect(tabbedPages[0].tabGroups).toHaveLength(1);
+    expect(tabbedPages[0].tabGroups[0].framework).toBe('mdx');
+    expect(tabbedPages[0].source).toBe('md-fallback');
+  });
+
+  it('warns when tabbed content is between 50K-100K chars', async () => {
+    // Create tab panels that serialize to ~75K chars in markdown
+    const panelContent = '<p>' + 'w'.repeat(37_000) + '</p>';
+    const tabHtml = `
+      <div class="sphinx-tabs">
+        <div class="sphinx-tabs-tab">Alpha</div>
+        <div class="sphinx-tabs-tab">Beta</div>
+        <div class="sphinx-tabs-panel">${panelContent}</div>
+        <div class="sphinx-tabs-panel">${panelContent}</div>
+      </div>
+    `;
+    server.use(
+      http.get(
+        'http://tcs-warn.local/docs/page1',
+        () =>
+          new HttpResponse(`<html><body>${tabHtml}</body></html>`, {
+            status: 200,
+            headers: { 'Content-Type': 'text/html' },
+          }),
+      ),
+    );
+
+    const content = `# Docs\n> Summary\n## Links\n- [Page 1](http://tcs-warn.local/docs/page1): First\n`;
+    const result = await check.run(makeCtx(content));
+    expect(result.status).toBe('warn');
+    expect(result.message).toContain('50K–100K');
+  });
+
+  it('includes fetch errors in message when tabs are found', async () => {
+    const tabHtml = `
+      <div class="sphinx-tabs">
+        <div class="sphinx-tabs-tab">Python</div>
+        <div class="sphinx-tabs-tab">JS</div>
+        <div class="sphinx-tabs-panel"><pre>print("hi")</pre></div>
+        <div class="sphinx-tabs-panel"><pre>console.log("hi")</pre></div>
+      </div>
+    `;
+    server.use(
+      http.get(
+        'http://tcs-partial1.local/docs/page1',
+        () =>
+          new HttpResponse(`<html><body>${tabHtml}</body></html>`, {
+            status: 200,
+            headers: { 'Content-Type': 'text/html' },
+          }),
+      ),
+      http.get('http://tcs-partial2.local/docs/page2', () => HttpResponse.error()),
+    );
+
+    const content = `# Docs\n> Summary\n## Links\n- [Page 1](http://tcs-partial1.local/docs/page1): First\n- [Page 2](http://tcs-partial2.local/docs/page2): Second\n`;
+    const result = await check.run(makeCtx(content));
+    expect(result.message).toContain('1 failed to fetch');
+    expect(result.details?.fetchErrors).toBe(1);
+    // Should still have found the tab groups from the successful page
+    expect(result.details?.totalGroupsFound).toBeGreaterThan(0);
+  });
+
+  it('SPA shell falls through when tryMdFallback returns null (all candidates fail)', async () => {
+    const spaHtml =
+      '<html><head><style>' +
+      'x'.repeat(15_000) +
+      '</style></head><body><div id="___gatsby"></div></body></html>';
+
+    server.use(
+      http.get(
+        'http://tcs-spa-notabs.local/docs/page1',
+        () =>
+          new HttpResponse(spaHtml, {
+            status: 200,
+            headers: { 'Content-Type': 'text/html' },
+          }),
+      ),
+      // .md candidate returns 404 so tryMdFallback returns null
+      http.get(
+        'http://tcs-spa-notabs.local/docs/page1.md',
+        () => new HttpResponse('Not found', { status: 404 }),
+      ),
+      http.get(
+        'http://tcs-spa-notabs.local/docs/page1/index.md',
+        () => new HttpResponse('Not found', { status: 404 }),
+      ),
+    );
+
+    const content = `# Docs\n> Summary\n## Links\n- [Page 1](http://tcs-spa-notabs.local/docs/page1): First\n`;
+    const ctx = makeCtx(content);
+    ctx.previousResults.set('rendering-strategy', {
+      id: 'rendering-strategy',
+      category: 'page-size',
+      status: 'fail',
+      message: 'SPA shell detected',
+      details: {
+        pageResults: [{ url: 'http://tcs-spa-notabs.local/docs/page1', status: 'fail' }],
+      },
+    });
+    const result = await check.run(ctx);
+    expect(result.details?.totalGroupsFound).toBe(0);
+    const tabbedPages = result.details?.tabbedPages as Array<{
+      tabGroups: Array<unknown>;
+      source: string;
+    }>;
+    expect(tabbedPages[0].tabGroups).toHaveLength(0);
+    // tryMdFallback returned null, so falls through to default html source
+    expect(tabbedPages[0].source).toBe('html');
+  });
+
+  it('does not try .md fallback for non-SPA HTML', async () => {
+    // Regular server-rendered HTML with no tabs
+    server.use(
+      http.get(
+        'http://tcs-nospa.local/docs/page1',
+        () =>
+          new HttpResponse(
+            '<html><body><h1>Hello</h1><p>' + 'Real content. '.repeat(100) + '</p></body></html>',
+            {
+              status: 200,
+              headers: { 'Content-Type': 'text/html' },
+            },
+          ),
+      ),
+      // This .md URL has tabs, but should NOT be fetched
+      http.get(
+        'http://tcs-nospa.local/docs/page1.md',
+        () =>
+          new HttpResponse('<Tabs><Tab name="A">A</Tab></Tabs>', {
+            status: 200,
+            headers: { 'Content-Type': 'text/markdown' },
+          }),
+      ),
+    );
+
+    const content = `# Docs\n> Summary\n## Links\n- [Page 1](http://tcs-nospa.local/docs/page1): First\n`;
+    const result = await check.run(makeCtx(content));
+    expect(result.message).toContain('No tabbed content');
+    const tabbedPages = result.details?.tabbedPages as Array<{ source: string }>;
+    expect(tabbedPages[0].source).toBe('html');
+  });
+});
diff --git a/test/unit/helpers/detect-rendering.test.ts b/test/unit/helpers/detect-rendering.test.ts
new file mode 100644
index 0000000..ce09133
--- /dev/null
+++ b/test/unit/helpers/detect-rendering.test.ts
@@ -0,0 +1,165 @@
+import { describe, it, expect } from 'vitest';
+import { analyzeRendering } from '../../../src/helpers/detect-rendering.js';
+
+describe('analyzeRendering', () => {
+  it('passes for traditional server-rendered HTML with no SPA markers', () => {
+    const html =
+      '<html><body><h1>Hello World</h1><p>' +
+      'Real content here. '.repeat(20) +
+      '</p></body></html>';
+    const result = analyzeRendering(html);
+    expect(result.hasContent).toBe(true);
+    expect(result.hasSpaMarkers).toBe(false);
+  });
+
+  it('passes for SSR site with Next.js marker and real content', () => {
+    // Simulates GitHub docs: __next marker but real headings and paragraphs
+    const html =
+      '<html><body><div id="__next">' +
+      '<main>' +
+      '<h1>REST API</h1>' +
+      '<h2>Authentication</h2>' +
+      '<h3>Rate Limits</h3>' +
+      '<p>You can authenticate to the REST API to access more endpoints.</p>' +
+      '<p>Learn how to use the GitHub REST API effectively.</p>' +
+      '<p>Follow these best practices when using the API.</p>' +
+      '<p>Check out our development quickstart guide for details.</p>' +
+      '<p>You can use OAuth tokens or personal access tokens.</p>' +
+      '</main>' +
+      '</div></body></html>';
+    const result = analyzeRendering(html);
+    expect(result.hasContent).toBe(true);
+    expect(result.hasSpaMarkers).toBe(true);
+    expect(result.spaMarker).toBe('id="__next"');
+    expect(result.contentHeadings).toBe(3);
+  });
+
+  it('fails for Gatsby SPA shell with no content', () => {
+    const html =
+      '<html><head><style>' +
+      'x'.repeat(15_000) +
+      '</style></head><body><div id="___gatsby"></div>' +
+      '<script>window.__stuff=true</script></body></html>';
+    const result = analyzeRendering(html);
+    expect(result.hasContent).toBe(false);
+    expect(result.hasSpaMarkers).toBe(true);
+    expect(result.spaMarker).toBe('id="___gatsby"');
+    expect(result.contentHeadings).toBe(0);
+    expect(result.contentParagraphs).toBe(0);
+  });
+
+  it('fails for SPA shell with only nav chrome text', () => {
+    // Simulates MongoDB: ___gatsby marker, nav links, no real content
+    const navText = 'Products Platform Atlas Database Search Tools Documentation ';
+    const html =
+      '<html><head><style>' +
+      'x'.repeat(50_000) +
+      '</style></head>' +
+      '<body><div id="___gatsby">' +
+      '<nav>' +
+      navText.repeat(5) +
+      '</nav>' +
+      '<script>' +
+      'y'.repeat(200_000) +
+      '</script>' +
+      '</div></body></html>';
+    const result = analyzeRendering(html);
+    expect(result.hasContent).toBe(false);
+    expect(result.hasSpaMarkers).toBe(true);
+    expect(result.contentHeadings).toBe(0);
+  });
+
+  it('passes for SSR site with heavy assets but main content region', () => {
+    // Simulates Stripe docs: low text ratio but real content inside <main>
+    const html =
+      '<html><head><style>' +
+      'x'.repeat(100_000) +
+      '</style></head>' +
+      '<body><div id="root">' +
+      '<main>' +
+      '<h1>API Reference</h1>' +
+      '<p>You can use the Stripe API in test mode, which does not affect your live data.</p>' +
+      '<p>The API supports both synchronous and asynchronous request patterns for flexibility.</p>' +
+      '</main>' +
+      '<script>' +
+      'y'.repeat(500_000) +
+      '</script>' +
+      '</div></body></html>';
+    const result = analyzeRendering(html);
+    expect(result.hasContent).toBe(true);
+    expect(result.hasSpaMarkers).toBe(true);
+    expect(result.hasMainContent).toBe(true);
+  });
+
+  it('fails for SPA shell with main element but only breadcrumbs', () => {
+    // Simulates MongoDB Atlas Search tutorial: <main> exists but has only title + breadcrumbs
+    const html =
+      '<html><head><style>' +
+      'x'.repeat(50_000) +
+      '</style></head>' +
+      '<body><div id="___gatsby">' +
+      '<main>' +
+      '<h1>MongoDB Search Quick Start</h1>' +
+      '<nav>Docs Home / Development / Search</nav>' +
+      '</main>' +
+      '<script>' +
+      'y'.repeat(200_000) +
+      '</script>' +
+      '</div></body></html>';
+    const result = analyzeRendering(html);
+    expect(result.hasContent).toBe(false);
+    expect(result.hasSpaMarkers).toBe(true);
+    expect(result.hasMainContent).toBe(false);
+  });
+
+  it('passes for Next.js SSG site with multiple headings', () => {
+    const html =
+      '<html><body><div id="__next">' +
+      '<h1>Getting Started</h1>' +
+      '<h2>Installation</h2>' +
+      '<h2>Configuration</h2>' +
+      '<h2>Usage</h2>' +
+      '<p>Welcome to our documentation.</p>' +
+      '</div></body></html>';
+    const result = analyzeRendering(html);
+    expect(result.hasContent).toBe(true);
+    expect(result.contentHeadings).toBe(4);
+  });
+
+  it('detects __nuxt marker', () => {
+    const html = '<html><body><div id="__nuxt"></div></body></html>';
+    const result = analyzeRendering(html);
+    expect(result.hasSpaMarkers).toBe(true);
+    expect(result.spaMarker).toBe('id="__nuxt"');
+    expect(result.hasContent).toBe(false);
+  });
+
+  it('counts code blocks as content signals', () => {
+    const html =
+      '<html><body><div id="__next">' +
+      '<pre><code>const x = 1;</code></pre>' +
+      '<pre><code>const y = 2;</code></pre>' +
+      '<pre><code>const z = 3;</code></pre>' +
+      '</div></body></html>';
+    const result = analyzeRendering(html);
+    expect(result.hasContent).toBe(true);
+    expect(result.codeBlocks).toBeGreaterThanOrEqual(3);
+  });
+
+  it('does not count very short headings (nav labels)', () => {
+    const html =
+      '<html><body><div id="___gatsby">' +
+      '<h3>API</h3>' + // 3 chars, should be excluded
+      '<h3>FAQ</h3>' + // 3 chars, should be excluded
+      '</div></body></html>';
+    const result = analyzeRendering(html);
+    expect(result.contentHeadings).toBe(0);
+    expect(result.hasContent).toBe(false);
+  });
+
+  it('handles empty HTML', () => {
+    const result = analyzeRendering('');
+    expect(result.hasContent).toBe(true); // No SPA markers = assume content
+    expect(result.hasSpaMarkers).toBe(false);
+  });
+});
diff --git a/test/unit/helpers/detect-tabs.test.ts b/test/unit/helpers/detect-tabs.test.ts
new file mode 100644
index 0000000..cedef37
--- /dev/null
+++ b/test/unit/helpers/detect-tabs.test.ts
@@ -0,0 +1,654 @@
+import { describe, it, expect } from 'vitest';
+import { detectTabGroups } from '../../../src/helpers/detect-tabs.js';
+
+describe('detectTabGroups', () => {
+  it('returns empty array for HTML with no tabs', () => {
+    const html = '<html><body><h1>Hello</h1><p>No tabs here.</p></body></html>';
+    expect(detectTabGroups(html)).toEqual([]);
+  });
+
+  it('detects Docusaurus tabs', () => {
+    const html = `
+      <div>
+        <ul role="tablist">
+          <li class="tabs__item" role="tab">Python</li>
+          <li class="tabs__item" role="tab">JavaScript</li>
+        </ul>
+        <div role="tabpanel"><pre>import requests</pre></div>
+        <div role="tabpanel"><pre>const fetch = require('node-fetch');</pre></div>
+      </div>
+    `;
+    const groups = detectTabGroups(html);
+    expect(groups).toHaveLength(1);
+    expect(groups[0].framework).toBe('docusaurus');
+    expect(groups[0].tabCount).toBe(2);
+    expect(groups[0].panels).toHaveLength(2);
+    expect(groups[0].panels[0].label).toBe('Python');
+    expect(groups[0].panels[1].label).toBe('JavaScript');
+  });
+
+  it('detects MkDocs Material tabs', () => {
+    const html = `
+      <div class="tabbed-set">
+        <div class="tabbed-labels">
+          <label>Bash</label>
+          <label>PowerShell</label>
+        </div>
+        <div class="tabbed-content">
+          <div class="tabbed-block"><pre>echo hello</pre></div>
+          <div class="tabbed-block"><pre>Write-Host hello</pre></div>
+        </div>
+      </div>
+    `;
+    const groups = detectTabGroups(html);
+    expect(groups).toHaveLength(1);
+    expect(groups[0].framework).toBe('mkdocs');
+    expect(groups[0].tabCount).toBe(2);
+    expect(groups[0].panels[0].label).toBe('Bash');
+  });
+
+  it('detects Sphinx tabs', () => {
+    const html = `
+      <div class="sphinx-tabs">
+        <div class="sphinx-tabs-tab">C++</div>
+        <div class="sphinx-tabs-tab">Rust</div>
+        <div class="sphinx-tabs-panel"><pre>std::cout</pre></div>
+        <div class="sphinx-tabs-panel"><pre>println!</pre></div>
+      </div>
+    `;
+    const groups = detectTabGroups(html);
+    expect(groups).toHaveLength(1);
+    expect(groups[0].framework).toBe('sphinx');
+    expect(groups[0].tabCount).toBe(2);
+    expect(groups[0].panels[0].label).toBe('C++');
+  });
+
+  it('detects Microsoft Learn tabs', () => {
+    const html = `
+      <div class="tabGroup">
+        <a role="tab" data-tab="csharp">C#</a>
+        <a role="tab" data-tab="java">Java</a>
+        <section role="tabpanel" data-tab="csharp"><pre>Console.WriteLine</pre></section>
+        <section role="tabpanel" data-tab="java"><pre>System.out.println</pre></section>
+      </div>
+    `;
+    const groups = detectTabGroups(html);
+    expect(groups).toHaveLength(1);
+    expect(groups[0].framework).toBe('microsoft-learn');
+    expect(groups[0].tabCount).toBe(2);
+    expect(groups[0].panels[0].label).toBe('C#');
+  });
+
+  it('detects generic ARIA tabs', () => {
+    const html = `
+      <div>
+        <div role="tablist">
+          <button role="tab">Tab A</button>
+          <button role="tab">Tab B</button>
+        </div>
+        <div role="tabpanel"><p>Content A</p></div>
+        <div role="tabpanel"><p>Content B</p></div>
+      </div>
+    `;
+    const groups = detectTabGroups(html);
+    expect(groups).toHaveLength(1);
+    expect(groups[0].framework).toBe('generic-aria');
+    expect(groups[0].tabCount).toBe(2);
+    expect(groups[0].panels[0].label).toBe('Tab A');
+  });
+
+  it('does not double-detect Docusaurus tabs as generic ARIA', () => {
+    const html = `
+      <div>
+        <ul role="tablist">
+          <li class="tabs__item" role="tab">Python</li>
+          <li class="tabs__item" role="tab">Node</li>
+        </ul>
+        <div role="tabpanel"><pre>python code</pre></div>
+        <div role="tabpanel"><pre>node code</pre></div>
+      </div>
+    `;
+    const groups = detectTabGroups(html);
+    expect(groups).toHaveLength(1);
+    expect(groups[0].framework).toBe('docusaurus');
+  });
+
+  it('detects multiple tab groups on same page', () => {
+    const html = `
+      <div class="sphinx-tabs">
+        <div class="sphinx-tabs-tab">A</div>
+        <div class="sphinx-tabs-panel"><p>Panel A</p></div>
+      </div>
+      <div class="sphinx-tabs">
+        <div class="sphinx-tabs-tab">B</div>
+        <div class="sphinx-tabs-panel"><p>Panel B</p></div>
+      </div>
+    `;
+    const groups = detectTabGroups(html);
+    expect(groups).toHaveLength(2);
+  });
+
+  it('includes htmlSlice as outerHTML of the container', () => {
+    const html = `
+      <div class="tabbed-set"><div class="tabbed-labels"><label>X</label></div><div class="tabbed-content"><div class="tabbed-block">content</div></div></div>
+    `;
+    const groups = detectTabGroups(html);
+    expect(groups).toHaveLength(1);
+    expect(groups[0].htmlSlice).toContain('tabbed-set');
+    expect(groups[0].htmlSlice).toContain('content');
+  });
+
+  it('handles empty HTML gracefully', () => {
+    expect(detectTabGroups('')).toEqual([]);
+  });
+
+  it('skips tab groups with no panels (likely navigation)', () => {
+    const html = `
+      <div class="tabGroup">
+        <a role="tab" data-tab="one">One</a>
+      </div>
+    `;
+    // Tabs without panels are typically site navigation, not content
+    const groups = detectTabGroups(html);
+    expect(groups).toHaveLength(0);
+  });
+
+  it('detects MDX-style <Tabs>/<Tab name="..."> (MongoDB pattern)', () => {
+    const md = `
+# Guide
+
+<Tabs>
+
+<Tab name="Python">
+
+## Python Setup
+
+\`\`\`python
+pip install pymongo
+\`\`\`
+
+</Tab>
+
+<Tab name="Node.js">
+
+## Node.js Setup
+
+\`\`\`bash
+npm install mongodb
+\`\`\`
+
+</Tab>
+
+</Tabs>
+    `;
+    const groups = detectTabGroups(md);
+    expect(groups).toHaveLength(1);
+    expect(groups[0].framework).toBe('mdx');
+    expect(groups[0].tabCount).toBe(2);
+    expect(groups[0].panels[0].label).toBe('Python');
+    expect(groups[0].panels[1].label).toBe('Node.js');
+  });
+
+  it('detects MDX-style <Tabs>/<TabItem label="..."> (Docusaurus MDX pattern)', () => {
+    const md = `
+<Tabs>
+<TabItem label="npm">
+
+\`\`\`bash
+npm install foo
+\`\`\`
+
+</TabItem>
+<TabItem label="yarn">
+
+\`\`\`bash
+yarn add foo
+\`\`\`
+
+</TabItem>
+</Tabs>
+    `;
+    const groups = detectTabGroups(md);
+    expect(groups).toHaveLength(1);
+    expect(groups[0].framework).toBe('mdx');
+    expect(groups[0].tabCount).toBe(2);
+    expect(groups[0].panels[0].label).toBe('npm');
+    expect(groups[0].panels[1].label).toBe('yarn');
+  });
+
+  it('detects multiple MDX tab groups', () => {
+    const md = `
+<Tabs>
+<Tab name="A">Content A</Tab>
+<Tab name="B">Content B</Tab>
+</Tabs>
+
+Some text between.
+
+<Tabs>
+<Tab name="X">Content X</Tab>
+<Tab name="Y">Content Y</Tab>
+<Tab name="Z">Content Z</Tab>
+</Tabs>
+    `;
+    const groups = detectTabGroups(md);
+    expect(groups).toHaveLength(2);
+    expect(groups[0].tabCount).toBe(2);
+    expect(groups[1].tabCount).toBe(3);
+  });
+
+  it('falls back to TabItem value attribute when label is absent', () => {
+    const md = `
+<Tabs>
+<TabItem value="go">Go code</TabItem>
+<TabItem value="rust">Rust code</TabItem>
+</Tabs>
+    `;
+    const groups = detectTabGroups(md);
+    expect(groups).toHaveLength(1);
+    expect(groups[0].panels[0].label).toBe('go');
+    expect(groups[0].panels[1].label).toBe('rust');
+  });
+
+  it('detects multiple consecutive MDX <Tabs> groups separated by markdown', () => {
+    const md = `
+# Getting Started
+
+<Tabs>
+<Tab name="macOS">
+
+Install with Homebrew:
+
+\`\`\`bash
+brew install myapp
+\`\`\`
+
+</Tab>
+<Tab name="Linux">
+
+Install with apt:
+
+\`\`\`bash
+sudo apt install myapp
+\`\`\`
+
+</Tab>
+</Tabs>
+
+## Configuration
+
+After installing, configure the app:
+
+<Tabs>
+<Tab name="macOS">
+
+\`\`\`bash
+myapp config --os darwin
+\`\`\`
+
+</Tab>
+<Tab name="Linux">
+
+\`\`\`bash
+myapp config --os linux
+\`\`\`
+
+</Tab>
+</Tabs>
+
+## Advanced Usage
+
+For power users, here are some tips:
+
+<Tabs>
+<Tab name="macOS">
+
+Use launchd to run as a service.
+
+</Tab>
+<Tab name="Linux">
+
+Use systemd to run as a service.
+
+</Tab>
+<Tab name="Windows">
+
+Use NSSM to run as a service.
+
+</Tab>
+</Tabs>
+    `;
+    const groups = detectTabGroups(md);
+    expect(groups).toHaveLength(3);
+    expect(groups[0].tabCount).toBe(2);
+    expect(groups[0].panels[0].label).toBe('macOS');
+    expect(groups[1].tabCount).toBe(2);
+    expect(groups[1].panels[0].label).toBe('macOS');
+    expect(groups[2].tabCount).toBe(3);
+    expect(groups[2].panels[2].label).toBe('Windows');
+  });
+
+  it('finds panels via ancestor walking (grandparent container)', () => {
+    // LeafyGreen-style: tablist and tabpanels are not direct siblings.
+    // The tabpanels are inside a separate wrapper div, both under a
+    // shared grandparent container.
+    const html = `
+      <div class="tab-container">
+        <div class="tab-header">
+          <div role="tablist">
+            <button role="tab">Go</button>
+            <button role="tab">Rust</button>
+          </div>
+        </div>
+        <div class="tab-body">
+          <div role="tabpanel"><pre>fmt.Println("hello")</pre></div>
+          <div role="tabpanel"><pre>println!("hello");</pre></div>
+        </div>
+      </div>
+    `;
+    const groups = detectTabGroups(html);
+    expect(groups).toHaveLength(1);
+    expect(groups[0].framework).toBe('generic-aria');
+    expect(groups[0].tabCount).toBe(2);
+    expect(groups[0].panels[0].label).toBe('Go');
+    expect(groups[0].panels[1].label).toBe('Rust');
+  });
+
+  it('finds panels via ancestor walking (great-grandparent container)', () => {
+    // Even deeper nesting: tablist is 3 levels below the container
+    // that holds the tabpanels.
+    const html = `
+      <div class="outer">
+        <div class="section">
+          <div class="inner">
+            <div role="tablist">
+              <button role="tab">Alpha</button>
+              <button role="tab">Beta</button>
+            </div>
+          </div>
+        </div>
+        <div role="tabpanel"><p>Alpha content</p></div>
+        <div role="tabpanel"><p>Beta content</p></div>
+      </div>
+    `;
+    const groups = detectTabGroups(html);
+    expect(groups).toHaveLength(1);
+    expect(groups[0].tabCount).toBe(2);
+    expect(groups[0].panels[0].label).toBe('Alpha');
+    expect(groups[0].panels[1].label).toBe('Beta');
+  });
+
+  it('textOf strips embedded style tags from tab labels', () => {
+    const html = `
+      <div>
+        <div role="tablist">
+          <button role="tab"><style>.some-class{color:red}</style>Clean Label</button>
+          <button role="tab"><style>.other{font-size:12px}</style>Another Label</button>
+        </div>
+        <div role="tabpanel"><p>Content 1</p></div>
+        <div role="tabpanel"><p>Content 2</p></div>
+      </div>
+    `;
+    const groups = detectTabGroups(html);
+    expect(groups).toHaveLength(1);
+    expect(groups[0].framework).toBe('generic-aria');
+    expect(groups[0].panels[0].label).toBe('Clean Label');
+    expect(groups[0].panels[1].label).toBe('Another Label');
+  });
+
+  it('findContainerWithPanels returns null when panels are too deep', () => {
+    // Tablist nested 5+ levels deep from any ancestor with tabpanels.
+    // maxDepth is 4, so it should not find any panels.
+    const html = `
+      <div class="root">
+        <div class="level1">
+          <div class="level2">
+            <div class="level3">
+              <div class="level4">
+                <div class="level5">
+                  <div role="tablist">
+                    <button role="tab">Tab 1</button>
+                    <button role="tab">Tab 2</button>
+                  </div>
+                </div>
+              </div>
+            </div>
+          </div>
+        </div>
+        <div role="tabpanel"><p>Panel 1</p></div>
+        <div role="tabpanel"><p>Panel 2</p></div>
+      </div>
+    `;
+    const groups = detectTabGroups(html);
+    expect(groups).toHaveLength(0);
+  });
+
+  it('handles unclosed MDX Tabs tag gracefully', () => {
+    const md = `
+# Guide
+
+<Tabs>
+
+<Tab name="Python">
+
+\`\`\`python
+pip install pymongo
+\`\`\`
+
+</Tab>
+
+<Tab name="Node.js">
+
+\`\`\`bash
+npm install mongodb
+\`\`\`
+
+</Tab>
+    `;
+    // No closing </Tabs>, so findTabsBlocks should handle gracefully
+    const groups = detectTabGroups(md);
+    expect(groups).toHaveLength(0);
+  });
+
+  it('handles more panels than labels (null label fallback)', () => {
+    // MkDocs with 1 label but 2 panel blocks — second panel gets null label
+    const html = `
+      <div class="tabbed-set">
+        <div class="tabbed-labels"><label>Only Label</label></div>
+        <div class="tabbed-content">
+          <div class="tabbed-block"><pre>first</pre></div>
+          <div class="tabbed-block"><pre>second</pre></div>
+        </div>
+      </div>
+    `;
+    const groups = detectTabGroups(html);
+    expect(groups).toHaveLength(1);
+    expect(groups[0].framework).toBe('mkdocs');
+    expect(groups[0].tabCount).toBe(2);
+    expect(groups[0].panels[0].label).toBe('Only Label');
+    expect(groups[0].panels[1].label).toBeNull();
+  });
+
+  it('handles more tabs than panels (empty html fallback)', () => {
+    // Sphinx with 2 tabs but only 1 panel — second panel gets empty html
+    const html = `
+      <div class="sphinx-tabs">
+        <div class="sphinx-tabs-tab">Tab A</div>
+        <div class="sphinx-tabs-tab">Tab B</div>
+        <div class="sphinx-tabs-panel"><pre>only panel</pre></div>
+      </div>
+    `;
+    const groups = detectTabGroups(html);
+    expect(groups).toHaveLength(1);
+    expect(groups[0].framework).toBe('sphinx');
+    expect(groups[0].tabCount).toBe(2);
+    expect(groups[0].panels[0].label).toBe('Tab A');
+    expect(groups[0].panels[0].html).toContain('only panel');
+    expect(groups[0].panels[1].label).toBe('Tab B');
+    expect(groups[0].panels[1].html).toBe('');
+  });
+
+  it('handles MS Learn with more tabs than panels', () => {
+    const html = `
+      <div class="tabGroup">
+        <a role="tab" data-tab="a">A</a>
+        <a role="tab" data-tab="b">B</a>
+        <a role="tab" data-tab="c">C</a>
+        <section role="tabpanel" data-tab="a"><pre>only A</pre></section>
+      </div>
+    `;
+    const groups = detectTabGroups(html);
+    expect(groups).toHaveLength(1);
+    expect(groups[0].tabCount).toBe(3);
+    expect(groups[0].panels[0].label).toBe('A');
+    expect(groups[0].panels[1].label).toBe('B');
+    expect(groups[0].panels[1].html).toBe('');
+    expect(groups[0].panels[2].label).toBe('C');
+    expect(groups[0].panels[2].html).toBe('');
+  });
+
+  it('generic ARIA handles more tabs than panels', () => {
+    const html = `
+      <div>
+        <div role="tablist">
+          <button role="tab">X</button>
+          <button role="tab">Y</button>
+          <button role="tab">Z</button>
+        </div>
+        <div role="tabpanel"><p>only X</p></div>
+      </div>
+    `;
+    const groups = detectTabGroups(html);
+    expect(groups).toHaveLength(1);
+    expect(groups[0].framework).toBe('generic-aria');
+    expect(groups[0].tabCount).toBe(3);
+    expect(groups[0].panels[2].label).toBe('Z');
+    expect(groups[0].panels[2].html).toBe('');
+  });
+
+  it('MDX skips <Tab> inside nested <Tabs> (depth tracking)', () => {
+    // Outer Tabs with inner nested Tabs — findTabsBlocks returns the outer
+    // block as a single unit, and depth tracking skips the inner <Tab> elements
+    const md = `
+<Tabs>
+<Tab name="Outer A">
+
+<Tabs>
+<Tab name="Inner 1">Nested content 1</Tab>
+<Tab name="Inner 2">Nested content 2</Tab>
+</Tabs>
+
+</Tab>
+<Tab name="Outer B">Outer B content</Tab>
+</Tabs>
+    `;
+    const groups = detectTabGroups(md);
+    // Only the outer group is detected; inner <Tab>s are skipped by depth check
+    expect(groups).toHaveLength(1);
+    expect(groups[0].tabCount).toBe(2);
+    expect(groups[0].panels[0].label).toBe('Outer A');
+    expect(groups[0].panels[1].label).toBe('Outer B');
+  });
+
+  it('MDX Tab without label attribute returns null label', () => {
+    const md = `
+<Tabs>
+<Tab>Content with no label attribute</Tab>
+</Tabs>
+    `;
+    const groups = detectTabGroups(md);
+    expect(groups).toHaveLength(1);
+    expect(groups[0].panels[0].label).toBeNull();
+  });
+
+  it('Docusaurus handles more panels than tabs (null label fallback)', () => {
+    const html = `
+      <div>
+        <ul role="tablist">
+          <li class="tabs__item" role="tab">Only Tab</li>
+        </ul>
+        <div role="tabpanel"><pre>panel 1</pre></div>
+        <div role="tabpanel"><pre>panel 2</pre></div>
+      </div>
+    `;
+    const groups = detectTabGroups(html);
+    expect(groups).toHaveLength(1);
+    expect(groups[0].framework).toBe('docusaurus');
+    expect(groups[0].panels[0].label).toBe('Only Tab');
+    expect(groups[0].panels[1].label).toBeNull();
+  });
+
+  it('Sphinx detector skips container already claimed by MkDocs', () => {
+    // A .sphinx-tabs container nested inside a .tabbed-set (MkDocs).
+    // MkDocs runs first and claims the outer container. Sphinx should
+    // skip the inner .sphinx-tabs since it's inside the claimed region.
+    const html = `
+      <div class="tabbed-set">
+        <div class="tabbed-labels"><label>Outer</label></div>
+        <div class="tabbed-content">
+          <div class="tabbed-block">
+            <div class="sphinx-tabs">
+              <div class="sphinx-tabs-tab">Inner</div>
+              <div class="sphinx-tabs-panel"><pre>inner content</pre></div>
+            </div>
+          </div>
+        </div>
+      </div>
+    `;
+    const groups = detectTabGroups(html);
+    // MkDocs claims the outer container; Sphinx skips the inner one
+    expect(groups).toHaveLength(1);
+    expect(groups[0].framework).toBe('mkdocs');
+  });
+
+  it('MDX handles unclosed <Tab> tag (no matching </Tab>)', () => {
+    const md = `
+<Tabs>
+<Tab name="Alpha">content with no closing tag
+</Tabs>
+    `;
+    const groups = detectTabGroups(md);
+    expect(groups).toHaveLength(1);
+    expect(groups[0].panels[0].label).toBe('Alpha');
+    // Content should include everything after the opening tag
+    expect(groups[0].panels[0].html).toContain('content with no closing tag');
+  });
+
+  it('MDX skips <Tabs> block with no <Tab> children', () => {
+    const md = `
+<Tabs>
+Just some text, no Tab elements here.
+</Tabs>
+    `;
+    const groups = detectTabGroups(md);
+    expect(groups).toHaveLength(0);
+  });
+
+  it('Docusaurus detector uses ancestor walking when panels are not siblings', () => {
+    // Docusaurus with a wrapper structure where tablist and panels
+    // share a grandparent rather than a direct parent.
+    const html = `
+      <div class="tabs-wrapper">
+        <div class="tabs-header">
+          <ul role="tablist">
+            <li class="tabs__item" role="tab">npm</li>
+            <li class="tabs__item" role="tab">yarn</li>
+            <li class="tabs__item" role="tab">pnpm</li>
+          </ul>
+        </div>
+        <div class="tabs-content">
+          <div role="tabpanel"><pre>npm install foo</pre></div>
+          <div role="tabpanel"><pre>yarn add foo</pre></div>
+          <div role="tabpanel"><pre>pnpm add foo</pre></div>
+        </div>
+      </div>
+    `;
+    const groups = detectTabGroups(html);
+    expect(groups).toHaveLength(1);
+    expect(groups[0].framework).toBe('docusaurus');
+    expect(groups[0].tabCount).toBe(3);
+    expect(groups[0].panels[0].label).toBe('npm');
+    expect(groups[0].panels[1].label).toBe('yarn');
+    expect(groups[0].panels[2].label).toBe('pnpm');
+  });
+});
diff --git a/test/unit/helpers/fetch-page.test.ts b/test/unit/helpers/fetch-page.test.ts
new file mode 100644
index 0000000..3ba2c6e
--- /dev/null
+++ b/test/unit/helpers/fetch-page.test.ts
@@ -0,0 +1,102 @@
+import { describe, it, expect, beforeAll } from 'vitest';
+import { http, HttpResponse } from 'msw';
+import { setupServer } from 'msw/node';
+import { createContext } from '../../../src/runner.js';
+import { fetchPage } from '../../../src/helpers/fetch-page.js';
+
+const server = setupServer();
+
+beforeAll(() => {
+  server.listen({ onUnhandledRequest: 'bypass' });
+  return () => server.close();
+});
+
+describe('fetchPage', () => {
+  it('returns body and detects HTML content', async () => {
+    server.use(
+      http.get(
+        'http://fp-basic.local/page',
+        () =>
+          new HttpResponse('<html><body><h1>Hello</h1></body></html>', {
+            status: 200,
+            headers: { 'Content-Type': 'text/html' },
+          }),
+      ),
+    );
+
+    const ctx = createContext('http://fp-basic.local', { requestDelay: 0 });
+    const page = await fetchPage(ctx, 'http://fp-basic.local/page');
+
+    expect(page.isHtml).toBe(true);
+    expect(page.body).toContain('<h1>Hello</h1>');
+    expect(page.contentType).toContain('text/html');
+  });
+
+  it('detects markdown content as non-HTML', async () => {
+    server.use(
+      http.get(
+        'http://fp-md.local/page',
+        () =>
+          new HttpResponse('# Hello\n\nMarkdown content.', {
+            status: 200,
+            headers: { 'Content-Type': 'text/markdown' },
+          }),
+      ),
+    );
+
+    const ctx = createContext('http://fp-md.local', { requestDelay: 0 });
+    const page = await fetchPage(ctx, 'http://fp-md.local/page');
+
+    expect(page.isHtml).toBe(false);
+    expect(page.body).toContain('# Hello');
+  });
+
+  it('returns cached result on second call without re-fetching', async () => {
+    let fetchCount = 0;
+    server.use(
+      http.get('http://fp-cache.local/page', () => {
+        fetchCount++;
+        return new HttpResponse('<html><body><p>Content</p></body></html>', {
+          status: 200,
+          headers: { 'Content-Type': 'text/html' },
+        });
+      }),
+    );
+
+    const ctx = createContext('http://fp-cache.local', { requestDelay: 0 });
+    const first = await fetchPage(ctx, 'http://fp-cache.local/page');
+    const second = await fetchPage(ctx, 'http://fp-cache.local/page');
+
+    expect(fetchCount).toBe(1);
+    expect(first).toBe(second);
+  });
+
+  it('caches different URLs independently', async () => {
+    server.use(
+      http.get(
+        'http://fp-multi.local/page1',
+        () =>
+          new HttpResponse('<html><body>Page 1</body></html>', {
+            status: 200,
+            headers: { 'Content-Type': 'text/html' },
+          }),
+      ),
+      http.get(
+        'http://fp-multi.local/page2',
+        () =>
+          new HttpResponse('# Page 2', {
+            status: 200,
+            headers: { 'Content-Type': 'text/markdown' },
+          }),
+      ),
+    );
+
+    const ctx = createContext('http://fp-multi.local', { requestDelay: 0 });
+    const page1 = await fetchPage(ctx, 'http://fp-multi.local/page1');
+    const page2 = await fetchPage(ctx, 'http://fp-multi.local/page2');
+
+    expect(page1.isHtml).toBe(true);
+    expect(page2.isHtml).toBe(false);
+    expect(ctx.htmlCache.size).toBe(2);
+  });
+});
diff --git a/test/unit/runner.test.ts b/test/unit/runner.test.ts
index cb82bff..f37556f 100644
--- a/test/unit/runner.test.ts
+++ b/test/unit/runner.test.ts
@@ -69,7 +69,7 @@ describe('runner', () => {
 
   it('stub checks return skip with "Not yet implemented"', async () => {
     const report = await runChecks('http://stub.local', {
-      checkIds: ['tabbed-content-serialization'],
+      checkIds: ['auth-alternative-access'],
       requestDelay: 0,
     });
 

From 70c45f3a578d7f61f114f00c26786627d7513d80 Mon Sep 17 00:00:00 2001
From: dacharyc <dc@dacharycarey.com>
Date: Sun, 15 Mar 2026 13:51:11 -0400
Subject: [PATCH 2/7] Add 'markdown-content-parity' check

---
 README.md                                     |  12 +-
 .../observability/markdown-content-parity.ts  | 672 ++++++++++++++++-
 src/helpers/fetch-page.ts                     |   2 +-
 src/types.ts                                  |   1 +
 .../checks/markdown-content-parity.test.ts    | 698 ++++++++++++++++++
 5 files changed, 1372 insertions(+), 13 deletions(-)
 create mode 100644 test/unit/checks/markdown-content-parity.test.ts

diff --git a/README.md b/README.md
index 33297f2..321fc5a 100644
--- a/README.md
+++ b/README.md
@@ -7,7 +7,7 @@ Test your documentation site against the [Agent-Friendly Documentation Spec](htt
 
 Agents don't use docs like humans. They hit truncation limits, get walls of CSS instead of content, can't follow cross-host redirects, and don't know about quality-of-life improvements like `llms.txt` or `.md` docs pages that would make life swell. Maybe this is because the industry has lacked guidance - until now.
 
-afdocs runs 22 checks across 8 categories to evaluate how well your docs serve agent consumers. 19 are fully implemented; the rest return `skip` until completed.
+afdocs runs 22 checks across 8 categories to evaluate how well your docs serve agent consumers. 20 are fully implemented; the rest return `skip` until completed.
 
 > **Status: Early development (0.x)**
 > This project is under active development. Check IDs, CLI flags, and output formats may change between minor versions. Feel free to try it out, but don't build automation against specific output until 1.0.
@@ -195,11 +195,11 @@ describe('agent-friendliness', () => {
 
 ### Category 7: Observability and Content Health
 
-| Check                        | Description                                    |
-| ---------------------------- | ---------------------------------------------- |
-| `llms-txt-freshness` \*      | Whether `llms.txt` reflects current site state |
-| `markdown-content-parity` \* | Whether markdown and HTML versions match       |
-| `cache-header-hygiene`       | Whether cache headers allow timely updates     |
+| Check                     | Description                                    |
+| ------------------------- | ---------------------------------------------- |
+| `llms-txt-freshness` \*   | Whether `llms.txt` reflects current site state |
+| `markdown-content-parity` | Whether markdown and HTML versions match       |
+| `cache-header-hygiene`    | Whether cache headers allow timely updates     |
 
 ### Category 8: Authentication and Access
 
diff --git a/src/checks/observability/markdown-content-parity.ts b/src/checks/observability/markdown-content-parity.ts
index 354a43c..414d64f 100644
--- a/src/checks/observability/markdown-content-parity.ts
+++ b/src/checks/observability/markdown-content-parity.ts
@@ -1,12 +1,672 @@
+import { parse } from 'node-html-parser';
 import { registerCheck } from '../registry.js';
-import type { CheckContext, CheckResult } from '../../types.js';
+import { fetchPage } from '../../helpers/fetch-page.js';
+import type { CheckContext, CheckResult, CheckStatus } from '../../types.js';
+
+/** Thresholds for the percentage of HTML segments not found in markdown. */
+const WARN_THRESHOLD = 5;
+const FAIL_THRESHOLD = 20;
+
+/** Minimum character length for a text segment to be considered meaningful. */
+const MIN_SEGMENT_LENGTH = 20;
+
+/**
+ * Minimum number of unique HTML segments required for a meaningful comparison.
+ * Pages below this threshold auto-pass because the percentage is too volatile
+ * (e.g., 3 breadcrumb items on a 10-segment page = 30% "missing").
+ */
+const MIN_SEGMENTS_FOR_COMPARISON = 10;
+
+/** HTML tags to strip before extracting text (non-content chrome). */
+const STRIP_TAGS = [
+  'script',
+  'style',
+  'nav',
+  'footer',
+  'header',
+  'noscript',
+  'button',
+  'svg',
+  'aside',
+];
+
+/** CSS selectors for common doc-site chrome that lives inside <main>. */
+const STRIP_SELECTORS = [
+  '[aria-label="breadcrumb"]',
+  '[aria-label="pagination"]',
+  '[class*="breadcrumb"]',
+  '[class*="pagination"]',
+  '[class*="prev-next"]',
+  '[class*="prevnext"]',
+  '[class*="page-nav"]',
+  '[class*="feedback"]',
+  '[class*="helpful"]',
+  '[class*="table-of-contents"]',
+  '[class*="toc"]',
+  '[rel="prev"]',
+  '[rel="next"]',
+  '.sr-only',
+];
+
+/**
+ * Segment-level patterns for common non-content text that survives DOM stripping.
+ * Matched against normalized (lowercased, whitespace-collapsed) segments.
+ */
+const NOISE_PATTERNS = [
+  /^last updated/,
+  /^was this page helpful/,
+  /^thank you for your feedback/,
+  /^previous\s+\S.*next\s+\S/, // "Previous X Next Y" pagination
+  /^start from the beginning$/,
+  /^join our .* server/, // "Join our Discord Server..."
+  /^loading video content/,
+  /^\/.+\/.+/, // breadcrumb paths like "/Connect to Neon/..."
+];
+
+interface PageParityResult {
+  url: string;
+  markdownSource: string;
+  status: CheckStatus;
+  /** Percentage of HTML text segments not found in the markdown version. */
+  missingPercent: number;
+  /** Total meaningful text segments extracted from HTML. */
+  totalSegments: number;
+  /** Number of HTML segments not found in the markdown. */
+  missingSegments: number;
+  /** Sample of missing segments for diagnostics. */
+  sampleDiffs: string[];
+  error?: string;
+}
+
+/**
+ * Known HTML tag names used to distinguish real tags from angle-bracket
+ * placeholders like <YOUR_API_KEY> or <clusterName> in code examples.
+ * Only needs to cover tags that appear in node-html-parser's .text output
+ * (i.e., tags inside <pre> that survive as raw text).
+ */
+const HTML_TAG_NAMES = new Set([
+  'a',
+  'abbr',
+  'address',
+  'article',
+  'aside',
+  'audio',
+  'b',
+  'bdi',
+  'bdo',
+  'blockquote',
+  'body',
+  'br',
+  'button',
+  'canvas',
+  'caption',
+  'cite',
+  'code',
+  'col',
+  'colgroup',
+  'data',
+  'dd',
+  'del',
+  'details',
+  'dfn',
+  'dialog',
+  'div',
+  'dl',
+  'dt',
+  'em',
+  'embed',
+  'fieldset',
+  'figcaption',
+  'figure',
+  'footer',
+  'form',
+  'h1',
+  'h2',
+  'h3',
+  'h4',
+  'h5',
+  'h6',
+  'head',
+  'header',
+  'hr',
+  'html',
+  'i',
+  'iframe',
+  'img',
+  'input',
+  'ins',
+  'kbd',
+  'label',
+  'legend',
+  'li',
+  'link',
+  'main',
+  'map',
+  'mark',
+  'meta',
+  'meter',
+  'nav',
+  'noscript',
+  'object',
+  'ol',
+  'optgroup',
+  'option',
+  'output',
+  'p',
+  'param',
+  'picture',
+  'pre',
+  'progress',
+  'q',
+  'rp',
+  'rt',
+  'ruby',
+  's',
+  'samp',
+  'script',
+  'section',
+  'select',
+  'slot',
+  'small',
+  'source',
+  'span',
+  'strong',
+  'style',
+  'sub',
+  'summary',
+  'sup',
+  'table',
+  'tbody',
+  'td',
+  'template',
+  'textarea',
+  'tfoot',
+  'th',
+  'thead',
+  'time',
+  'title',
+  'tr',
+  'track',
+  'u',
+  'ul',
+  'var',
+  'video',
+  'wbr',
+]);
+
+/** Block-level HTML elements that should produce line breaks in extracted text. */
+const BLOCK_TAGS = new Set([
+  'p',
+  'div',
+  'h1',
+  'h2',
+  'h3',
+  'h4',
+  'h5',
+  'h6',
+  'li',
+  'tr',
+  'td',
+  'th',
+  'blockquote',
+  'pre',
+  'dt',
+  'dd',
+  'figcaption',
+  'section',
+  'article',
+  'details',
+  'summary',
+  'br',
+  'hr',
+]);
+
+/**
+ * Minimum link density (0–1) and minimum link count for an element to be
+ * classified as navigation chrome. Navigation panels are structurally
+ * distinguishable from content: they consist almost entirely of links with
+ * very little non-link text between them. Content sections, even link-heavy
+ * ones like "Related resources", include enough description text to stay
+ * well below this threshold.
+ */
+const NAV_LINK_DENSITY_THRESHOLD = 0.7;
+const NAV_MIN_LINK_COUNT = 10;
+
+/**
+ * Extract plain text from HTML, stripping chrome elements.
+ * Inserts newlines between block-level elements so that paragraphs,
+ * list items, etc. become separate lines in the output.
+ */
+/**
+ * Heuristic selectors for content containers, tried in order when
+ * <main> and <article> are not present. Common across doc platforms
+ * like Mintlify, ReadMe, Docusaurus/Starlight, and custom sites.
+ */
+const CONTENT_SELECTORS = [
+  '[role="main"]',
+  '#content',
+  '.sl-markdown-content',
+  '.markdown-content',
+  '.markdown-body',
+  '.docs-content',
+  '.doc-content',
+  '.main-pane',
+  '.page-content',
+  '.prose',
+];
+
+function extractHtmlText(html: string): string {
+  const root = parse(html);
+
+  // Prefer the tightest content container available.
+  // Priority: heuristic selector inside article/main > article inside main
+  // > article > heuristic selector inside main > main > heuristic on root > body
+  const main = root.querySelector('main');
+  const article = main?.querySelector('article') ?? root.querySelector('article');
+  let content: ReturnType<typeof root.querySelector> = null;
+
+  // Look for a heuristic content selector inside the best semantic container
+  const semanticContainer = article ?? main;
+  if (semanticContainer) {
+    for (const selector of CONTENT_SELECTORS) {
+      content = semanticContainer.querySelector(selector);
+      if (content) break;
+    }
+  }
+  // Fall back to the semantic container itself
+  if (!content) content = semanticContainer;
+
+  // If no semantic container, try heuristic selectors on the root
+  if (!content) {
+    for (const selector of CONTENT_SELECTORS) {
+      content = root.querySelector(selector);
+      if (content) break;
+    }
+  }
+
+  if (!content) content = root.querySelector('body');
+  if (!content) return root.text;
+
+  // Remove non-content elements by tag
+  for (const tag of STRIP_TAGS) {
+    for (const el of content.querySelectorAll(tag)) {
+      el.remove();
+    }
+  }
+
+  // Remove common doc-site chrome by CSS selector
+  for (const selector of STRIP_SELECTORS) {
+    for (const el of content.querySelectorAll(selector)) {
+      el.remove();
+    }
+  }
+
+  // Remove elements that look like navigation based on link density.
+  // Navigation panels (sidebars, header menus) are structurally distinct
+  // from content: they consist almost entirely of links. This catches
+  // nav-like elements that use <div> instead of <nav>/<aside>.
+  for (const el of content.querySelectorAll('*')) {
+    const text = el.text || '';
+    if (text.length < 100) continue;
+    const links = el.querySelectorAll('a');
+    if (links.length < NAV_MIN_LINK_COUNT) continue;
+    const linkTextLen = links.reduce((sum, a) => sum + (a.text?.length || 0), 0);
+    if (linkTextLen / text.length > NAV_LINK_DENSITY_THRESHOLD) {
+      el.remove();
+    }
+  }
+
+  // Insert newlines before block-level elements so .text produces
+  // separated lines instead of smashing paragraphs together
+  for (const tag of BLOCK_TAGS) {
+    for (const el of content.querySelectorAll(tag)) {
+      el.insertAdjacentHTML('beforebegin', '\n');
+      el.insertAdjacentHTML('afterend', '\n');
+    }
+  }
+
+  // node-html-parser treats <pre> content as raw text, so <style> tags
+  // injected inside code blocks (e.g., Emotion CSS-in-JS / Leafygreen)
+  // survive DOM-level stripping. Remove <style>...</style> blocks first,
+  // inject newlines before <div tags to separate code lines (e.g.,
+  // Expressive Code / Shiki use <div class="ec-line"> inside <pre>),
+  // then strip HTML tags while preserving angle-bracket placeholders
+  // like <YOUR_API_KEY> or <clusterName> (decoded from &lt;...&gt; entities).
+  return content.text
+    .replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '')
+    .replace(/<!--[\s\S]*?-->/g, '')
+    .replace(/<div[\s>]/gi, '\n<div ')
+    .replace(/<\/[^>\s]+>/g, '')
+    .replace(/<([a-zA-Z][a-zA-Z0-9-]*)([^>]*)>/g, (_match, tag, rest) =>
+      HTML_TAG_NAMES.has(tag.toLowerCase()) ? '' : tag + rest,
+    );
+}
+
+/**
+ * Extract plain text from markdown by stripping all formatting.
+ */
+function extractMarkdownText(markdown: string): string {
+  return (
+    markdown
+      // Remove code fences but keep code content
+      .replace(/^```[\w]*\n?/gm, '')
+      // Remove heading markers
+      .replace(/^#{1,6}\s+/gm, '')
+      // Remove setext-style heading underlines
+      .replace(/^[=-]+$/gm, '')
+      // Remove link/image URLs, keep text: [text](url) → text
+      .replace(/!?\[([^\]]*)\]\([^)]*\)/g, '$1')
+      // Remove reference-style link definitions
+      .replace(/^\[.*?\]:\s+.*$/gm, '')
+      // Remove list bullets/numbers (before emphasis, so leading * isn't
+      // misinterpreted as an emphasis marker)
+      .replace(/^[\s]*[-*+]\s+/gm, '')
+      .replace(/^[\s]*\d+\.\s+/gm, '')
+      // Remove inline code backticks but keep content (before emphasis
+      // stripping so that underscores in code identifiers aren't mangled)
+      .replace(/`([^`]+)`/g, '$1')
+      // Remove emphasis markers (* only — underscores are too common in
+      // code identifiers like mongoc_client_get_database and cause false
+      // mismatches when stripped as emphasis)
+      .replace(/(\*{1,3})(.*?)\1/g, '$2')
+      // Remove blockquote markers
+      .replace(/^>\s?/gm, '')
+      // Remove horizontal rules
+      .replace(/^[-*_]{3,}$/gm, '')
+  );
+}
+
+/**
+ * Normalize text for fuzzy containment matching:
+ * strip zero-width characters, normalize typographic quotes,
+ * strip angle brackets around placeholders, collapse whitespace, and lowercase.
+ */
+function normalize(text: string): string {
+  return (
+    text
+      .replace(/\u200B/g, '')
+      .replace(/\u200C/g, '')
+      .replace(/\u200D/g, '')
+      .replace(/\uFEFF/g, '')
+      .replace(/[\u2018\u2019\u201A]/g, "'")
+      .replace(/[\u201C\u201D\u201E]/g, '"')
+      .replace(/[\u2013\u2014]/g, '-')
+      .replace(/\u2026/g, '...')
+      // Strip angle brackets but keep content — normalizes <YOUR_API_KEY> to
+      // YOUR_API_KEY so HTML-side (entities decoded, tags stripped) and
+      // markdown-side (raw angle brackets) produce the same text.
+      .replace(/<([^>]+)>/g, '$1')
+      .toLowerCase()
+      .replace(/\s+/g, ' ')
+      .trim()
+  );
+}
+
+/**
+ * Check if a normalized segment matches any common noise pattern.
+ */
+function isNoiseSegment(normalized: string): boolean {
+  return NOISE_PATTERNS.some((pattern) => pattern.test(normalized));
+}
+
+/**
+ * Split text into meaningful segments: non-empty lines of at least
+ * MIN_SEGMENT_LENGTH characters, trimmed, with common noise filtered out.
+ */
+function toSegments(text: string): string[] {
+  return text
+    .split('\n')
+    .map((line) => line.trim())
+    .filter((line) => line.length >= MIN_SEGMENT_LENGTH)
+    .filter((line) => !isNoiseSegment(line.toLowerCase()));
+}
+
+/**
+ * Check what fraction of HTML segments can be found in the markdown text.
+ * Uses normalized substring containment rather than positional diffing,
+ * so reordering and formatting differences don't cause false positives.
+ */
+function computeParity(
+  htmlText: string,
+  markdownText: string,
+): Omit<PageParityResult, 'url' | 'markdownSource' | 'error'> {
+  // Deduplicate segments so repeated chrome (breadcrumbs, nav titles) or
+  // repeated content is only counted once when checking for presence.
+  const allSegments = toSegments(htmlText);
+  const seen = new Set<string>();
+  const htmlSegments: string[] = [];
+  for (const seg of allSegments) {
+    const key = normalize(seg);
+    if (!seen.has(key)) {
+      seen.add(key);
+      htmlSegments.push(seg);
+    }
+  }
+
+  if (htmlSegments.length === 0) {
+    return {
+      status: 'pass',
+      missingPercent: 0,
+      totalSegments: 0,
+      missingSegments: 0,
+      sampleDiffs: [],
+    };
+  }
+
+  // Pages with very few segments produce volatile percentages (a couple of
+  // breadcrumb items on a 7-segment page = 30%+). Auto-pass these.
+  if (htmlSegments.length < MIN_SEGMENTS_FOR_COMPARISON) {
+    return {
+      status: 'pass',
+      missingPercent: 0,
+      totalSegments: htmlSegments.length,
+      missingSegments: 0,
+      sampleDiffs: [],
+    };
+  }
+
+  const normalizedMd = normalize(extractMarkdownText(markdownText));
+  const sampleDiffs: string[] = [];
+  let missingCount = 0;
+
+  for (const segment of htmlSegments) {
+    const normalizedSegment = normalize(segment);
+    if (!normalizedMd.includes(normalizedSegment)) {
+      missingCount++;
+      if (sampleDiffs.length < 5) {
+        sampleDiffs.push(`- ${segment}`);
+      }
+    }
+  }
+
+  const missingPercent =
+    htmlSegments.length > 0 ? Math.round((missingCount / htmlSegments.length) * 100) : 0;
+
+  let status: CheckStatus;
+  if (missingPercent < WARN_THRESHOLD) {
+    status = 'pass';
+  } else if (missingPercent < FAIL_THRESHOLD) {
+    status = 'warn';
+  } else {
+    status = 'fail';
+  }
+
+  return {
+    status,
+    missingPercent,
+    totalSegments: htmlSegments.length,
+    missingSegments: missingCount,
+    sampleDiffs,
+  };
+}
+
+/**
+ * Derive the HTML page URL from a cached page URL.
+ * Inverts the transforms from toMdUrls():
+ *   /docs/guide.md      → /docs/guide
+ *   /docs/guide/index.md → /docs/guide/
+ *   /docs/guide.mdx      → /docs/guide
+ * If the URL doesn't end in .md/.mdx, return it unchanged.
+ */
+function toHtmlUrl(url: string): string {
+  const parsed = new URL(url);
+  if (parsed.pathname.endsWith('/index.md') || parsed.pathname.endsWith('/index.mdx')) {
+    parsed.pathname = parsed.pathname.replace(/\/index\.mdx?$/, '/');
+    return parsed.toString();
+  }
+  if (/\.mdx?$/i.test(parsed.pathname)) {
+    parsed.pathname = parsed.pathname.replace(/\.mdx?$/i, '');
+    return parsed.toString();
+  }
+  return url;
+}
+
+function worstStatus(statuses: CheckStatus[]): CheckStatus {
+  if (statuses.includes('fail')) return 'fail';
+  if (statuses.includes('warn')) return 'warn';
+  return 'pass';
+}
+
+async function check(ctx: CheckContext): Promise<CheckResult> {
+  const id = 'markdown-content-parity';
+  const category = 'observability';
+
+  // Collect pages that have cached markdown from upstream checks
+  const pagesToCompare: Array<{
+    url: string;
+    markdownContent: string;
+    markdownSource: string;
+  }> = [];
+
+  for (const [url, cached] of ctx.pageCache) {
+    if (cached.markdown?.content) {
+      pagesToCompare.push({
+        url,
+        markdownContent: cached.markdown.content,
+        markdownSource: cached.markdown.source,
+      });
+    }
+  }
+
+  if (pagesToCompare.length === 0) {
+    return {
+      id,
+      category,
+      status: 'skip',
+      message: 'No pages with markdown versions available to compare',
+    };
+  }
+
+  const results: PageParityResult[] = [];
+  const concurrency = ctx.options.maxConcurrency;
+
+  for (let i = 0; i < pagesToCompare.length; i += concurrency) {
+    const batch = pagesToCompare.slice(i, i + concurrency);
+    const batchResults = await Promise.all(
+      batch.map(async ({ url, markdownContent, markdownSource }): Promise<PageParityResult> => {
+        try {
+          // Fetch the HTML version of the page
+          const htmlUrl = toHtmlUrl(url);
+          const page = await fetchPage(ctx, htmlUrl);
+
+          if (page.status >= 400) {
+            // HTML URL returned an error (e.g., 404) — skip this page
+            return {
+              url,
+              markdownSource,
+              status: 'pass',
+              missingPercent: 0,
+              totalSegments: 0,
+              missingSegments: 0,
+              sampleDiffs: [],
+              error: `HTML page returned ${page.status}`,
+            };
+          }
+
+          if (!page.isHtml) {
+            // The "HTML" version is already markdown/plain text — no meaningful comparison
+            return {
+              url,
+              markdownSource,
+              status: 'pass',
+              missingPercent: 0,
+              totalSegments: 0,
+              missingSegments: 0,
+              sampleDiffs: [],
+            };
+          }
+
+          const htmlText = extractHtmlText(page.body);
+          const parity = computeParity(htmlText, markdownContent);
+
+          return { url, markdownSource, ...parity };
+        } catch (err) {
+          return {
+            url,
+            markdownSource,
+            status: 'fail',
+            missingPercent: 100,
+            totalSegments: 0,
+            missingSegments: 0,
+            sampleDiffs: [],
+            error: err instanceof Error ? err.message : String(err),
+          };
+        }
+      }),
+    );
+    results.push(...batchResults);
+  }
+
+  const successful = results.filter((r) => !r.error);
+  const fetchErrors = results.filter((r) => r.error).length;
+
+  if (successful.length === 0) {
+    return {
+      id,
+      category,
+      status: 'fail',
+      message: `Could not fetch HTML for any pages to compare${fetchErrors > 0 ? `; ${fetchErrors} failed to fetch` : ''}`,
+      details: {
+        pagesCompared: 0,
+        fetchErrors,
+        pageResults: results,
+      },
+    };
+  }
+
+  const overallStatus = worstStatus(successful.map((r) => r.status));
+  const passBucket = successful.filter((r) => r.status === 'pass').length;
+  const warnBucket = successful.filter((r) => r.status === 'warn').length;
+  const failBucket = successful.filter((r) => r.status === 'fail').length;
+  const avgMissingPercent =
+    successful.length > 0
+      ? Math.round(successful.reduce((sum, r) => sum + r.missingPercent, 0) / successful.length)
+      : 0;
+  const suffix = fetchErrors > 0 ? `; ${fetchErrors} failed to fetch` : '';
+
+  let message: string;
+  if (overallStatus === 'pass') {
+    message = `All ${successful.length} pages have equivalent markdown and HTML content (avg ${avgMissingPercent}% missing)${suffix}`;
+  } else if (overallStatus === 'warn') {
+    message = `${warnBucket} of ${successful.length} pages have minor content differences between markdown and HTML${suffix}`;
+  } else {
+    message = `${failBucket} of ${successful.length} pages have substantive content differences between markdown and HTML (avg ${avgMissingPercent}% missing)${suffix}`;
+  }
 
-async function check(_ctx: CheckContext): Promise<CheckResult> {
   return {
-    id: 'markdown-content-parity',
-    category: 'observability',
-    status: 'skip',
-    message: 'Not yet implemented',
+    id,
+    category,
+    status: overallStatus,
+    message,
+    details: {
+      pagesCompared: successful.length,
+      passBucket,
+      warnBucket,
+      failBucket,
+      fetchErrors,
+      avgMissingPercent,
+      pageResults: results,
+    },
   };
 }
 
diff --git a/src/helpers/fetch-page.ts b/src/helpers/fetch-page.ts
index 33cc1ad..d4eb9b0 100644
--- a/src/helpers/fetch-page.ts
+++ b/src/helpers/fetch-page.ts
@@ -17,7 +17,7 @@ export async function fetchPage(ctx: CheckContext, url: string): Promise<Fetched
     contentType.includes('text/markdown') || contentType.includes('text/plain');
   const isHtml = !isMarkdownType && (contentType.includes('text/html') || looksLikeHtml(body));
 
-  const result: FetchedPage = { url, body, contentType, isHtml };
+  const result: FetchedPage = { url, status: response.status, body, contentType, isHtml };
   ctx.htmlCache.set(url, result);
   return result;
 }
diff --git a/src/types.ts b/src/types.ts
index d494f28..4a8e03b 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -21,6 +21,7 @@ export interface CachedPage {
 
 export interface FetchedPage {
   url: string;
+  status: number;
   body: string;
   contentType: string;
   isHtml: boolean;
diff --git a/test/unit/checks/markdown-content-parity.test.ts b/test/unit/checks/markdown-content-parity.test.ts
new file mode 100644
index 0000000..3c36047
--- /dev/null
+++ b/test/unit/checks/markdown-content-parity.test.ts
@@ -0,0 +1,698 @@
+import { describe, it, expect, beforeAll } from 'vitest';
+import { http, HttpResponse } from 'msw';
+import { setupServer } from 'msw/node';
+import { createContext } from '../../../src/runner.js';
+import { getCheck } from '../../../src/checks/registry.js';
+import '../../../src/checks/index.js';
+
+const server = setupServer();
+
+beforeAll(() => {
+  server.listen({ onUnhandledRequest: 'bypass' });
+  return () => server.close();
+});
+
+describe('markdown-content-parity', () => {
+  const check = getCheck('markdown-content-parity')!;
+
+  function makeCtx(
+    pages: Array<{ url: string; markdown: string; htmlBody: string }>,
+    host: string,
+  ) {
+    const ctx = createContext(`http://${host}`, { requestDelay: 0 });
+
+    // Simulate upstream markdown-url-support having run
+    ctx.previousResults.set('markdown-url-support', {
+      id: 'markdown-url-support',
+      category: 'markdown-availability',
+      status: 'pass',
+      message: 'OK',
+    });
+
+    // Populate pageCache as upstream checks would
+    for (const page of pages) {
+      ctx.pageCache.set(page.url, {
+        url: page.url,
+        markdown: { content: page.markdown, source: 'md-url' },
+      });
+    }
+
+    return ctx;
+  }
+
+  it('passes when markdown and HTML have equivalent content', async () => {
+    const html = `<html><body>
+      <h1>Getting Started</h1>
+      <p>Install the SDK with npm to get started with the integration process.</p>
+      <pre><code>npm install @example/sdk</code></pre>
+      <p>Then import the client and configure your API key for authentication.</p>
+    </body></html>`;
+    const markdown = `# Getting Started
+
+Install the SDK with npm to get started with the integration process.
+
+\`\`\`
+npm install @example/sdk
+\`\`\`
+
+Then import the client and configure your API key for authentication.`;
+    const url = 'http://mcp-pass.local/docs/getting-started';
+
+    server.use(
+      http.get(
+        url,
+        () =>
+          new HttpResponse(html, {
+            status: 200,
+            headers: { 'Content-Type': 'text/html' },
+          }),
+      ),
+    );
+
+    const ctx = makeCtx([{ url, markdown, htmlBody: html }], 'mcp-pass.local');
+    const result = await check.run(ctx);
+    expect(result.status).toBe('pass');
+  });
+
+  it('warns when markdown is missing some content from HTML', async () => {
+    const html = `<html><body>
+      <h1>API Reference</h1>
+      <p>The API supports the following operations for managing resources.</p>
+      <h2>Authentication</h2>
+      <p>Use bearer tokens for authentication with the API endpoints.</p>
+      <p>Include your API key in the Authorization header of every request.</p>
+      <h2>Rate Limiting</h2>
+      <p>Requests are limited to 100 per minute per API key.</p>
+      <h2>Errors</h2>
+      <p>Error responses include a JSON body with details about the failure.</p>
+      <p>Each error includes a code field and a human-readable message field.</p>
+      <h2>Pagination</h2>
+      <p>Use cursor-based pagination with the after parameter in your requests.</p>
+      <p>Each page returns up to 50 items by default unless configured otherwise.</p>
+      <h2>Versioning</h2>
+      <p>Pass the version header to select a specific API version for your requests.</p>
+      <p>If no version header is provided the latest stable version is used automatically.</p>
+      <h2>Webhooks</h2>
+      <p>Configure webhook endpoints to receive real-time notifications about events.</p>
+      <p>All webhook payloads include a signature header for verification purposes.</p>
+    </body></html>`;
+
+    // Markdown version is missing only the Rate Limiting section (minor omission)
+    const markdown = `# API Reference
+
+The API supports the following operations for managing resources.
+
+## Authentication
+
+Use bearer tokens for authentication with the API endpoints.
+
+Include your API key in the Authorization header of every request.
+
+## Errors
+
+Error responses include a JSON body with details about the failure.
+
+Each error includes a code field and a human-readable message field.
+
+## Pagination
+
+Use cursor-based pagination with the after parameter in your requests.
+
+Each page returns up to 50 items by default unless configured otherwise.
+
+## Versioning
+
+Pass the version header to select a specific API version for your requests.
+
+If no version header is provided the latest stable version is used automatically.
+
+## Webhooks
+
+Configure webhook endpoints to receive real-time notifications about events.
+
+All webhook payloads include a signature header for verification purposes.`;
+
+    const url = 'http://mcp-warn.local/docs/api';
+
+    server.use(
+      http.get(
+        url,
+        () =>
+          new HttpResponse(html, {
+            status: 200,
+            headers: { 'Content-Type': 'text/html' },
+          }),
+      ),
+    );
+
+    const ctx = makeCtx([{ url, markdown, htmlBody: html }], 'mcp-warn.local');
+    const result = await check.run(ctx);
+    expect(result.status).toBe('warn');
+  });
+
+  it('fails when markdown is substantially different from HTML', async () => {
+    const html = `<html><body>
+      <h1>Installation Guide</h1>
+      <p>You need Node.js 18 or later installed on your system before proceeding.</p>
+      <p>Create a configuration file with your API credentials and region settings.</p>
+      <p>Import and initialize the client using the configuration you just created.</p>
+      <p>Run the health check to verify everything is working correctly in your environment.</p>
+      <p>Common issues include connection timeouts and authentication failures with expired keys.</p>
+      <p>Check your network connectivity if you experience connection timeout errors repeatedly.</p>
+      <p>Verify your API key has not expired if you see authentication failure messages.</p>
+      <p>Make sure the target host is accessible and responding to network requests properly.</p>
+      <p>Review the troubleshooting section for additional debugging information and tips.</p>
+      <p>Contact support if you continue to experience issues after following these steps.</p>
+      <p>The installation process should take approximately five minutes from start to finish.</p>
+    </body></html>`;
+
+    // Markdown version is a completely different page
+    const markdown = `# Changelog
+
+## v2.0.0
+
+Breaking changes in this release that affect all existing integrations.
+
+## v1.5.0
+
+Added new features for managing team resources and permissions.
+
+## v1.4.0
+
+Improved error handling and added retry logic for failed requests.`;
+
+    const url = 'http://mcp-fail.local/docs/install';
+
+    server.use(
+      http.get(
+        url,
+        () =>
+          new HttpResponse(html, {
+            status: 200,
+            headers: { 'Content-Type': 'text/html' },
+          }),
+      ),
+    );
+
+    const ctx = makeCtx([{ url, markdown, htmlBody: html }], 'mcp-fail.local');
+    const result = await check.run(ctx);
+    expect(result.status).toBe('fail');
+    expect(result.details?.avgMissingPercent).toBeGreaterThanOrEqual(20);
+  });
+
+  it('skips when no pages have markdown versions', async () => {
+    const ctx = createContext('http://mcp-skip.local', { requestDelay: 0 });
+    const result = await check.run(ctx);
+    expect(result.status).toBe('skip');
+  });
+
+  it('handles fetch errors gracefully', async () => {
+    const url = 'http://mcp-err.local/docs/page';
+    server.use(http.get(url, () => HttpResponse.error()));
+
+    const ctx = makeCtx(
+      [
+        {
+          url,
+          markdown: '# Page\n\nThis is some content that is long enough to be a segment.',
+          htmlBody: '',
+        },
+      ],
+      'mcp-err.local',
+    );
+    const result = await check.run(ctx);
+    expect(result.details?.fetchErrors).toBe(1);
+  });
+
+  it('passes when HTML version is already markdown/plain text', async () => {
+    const url = 'http://mcp-plain.local/docs/page';
+    const markdown = '# Page\n\nSome content that is at least twenty characters long.';
+
+    server.use(
+      http.get(
+        url,
+        () =>
+          new HttpResponse(markdown, {
+            status: 200,
+            headers: { 'Content-Type': 'text/plain' },
+          }),
+      ),
+    );
+
+    const ctx = makeCtx([{ url, markdown, htmlBody: markdown }], 'mcp-plain.local');
+    const result = await check.run(ctx);
+    expect(result.status).toBe('pass');
+  });
+
+  it('detects missing inline code elements', async () => {
+    const html = `<html><body>
+      <h1>Usage</h1>
+      <p>Call the <code>initialize()</code> method first, then use <code>client.fetch()</code> to make requests to the server.</p>
+      <p>Set the <code>DEBUG=true</code> environment variable for verbose logging output.</p>
+      <p>The client supports automatic retry with exponential backoff for failed requests.</p>
+      <p>Configure the maximum number of retries using the <code>maxRetries</code> option parameter.</p>
+      <p>All responses include standard headers with rate limiting information for your reference.</p>
+      <p>Error responses contain a machine-readable error code and a human-readable description.</p>
+      <p>Use the <code>client.close()</code> method to cleanly shut down all connections and resources.</p>
+      <p>The library automatically handles connection pooling and keep-alive management for you.</p>
+      <p>Requests timeout after thirty seconds by default but this is configurable per request.</p>
+      <p>Authentication tokens are refreshed automatically before they expire to prevent failures.</p>
+      <p>The client validates all input parameters before sending requests to the remote server.</p>
+    </body></html>`;
+
+    // Markdown version lost the inline code content
+    const markdown = `# Usage
+
+Call the method first, then use to make requests to the server.
+
+Set the environment variable for verbose logging output.
+
+The client supports automatic retry with exponential backoff for failed requests.
+
+Configure the maximum number of retries using the option parameter.
+
+All responses include standard headers with rate limiting information for your reference.
+
+Error responses contain a machine-readable error code and a human-readable description.
+
+Use the method to cleanly shut down all connections and resources.
+
+The library automatically handles connection pooling and keep-alive management for you.
+
+Requests timeout after thirty seconds by default but this is configurable per request.
+
+Authentication tokens are refreshed automatically before they expire to prevent failures.
+
+The client validates all input parameters before sending requests to the remote server.`;
+
+    const url = 'http://mcp-code.local/docs/usage';
+
+    server.use(
+      http.get(
+        url,
+        () =>
+          new HttpResponse(html, {
+            status: 200,
+            headers: { 'Content-Type': 'text/html' },
+          }),
+      ),
+    );
+
+    const ctx = makeCtx([{ url, markdown, htmlBody: html }], 'mcp-code.local');
+    const result = await check.run(ctx);
+    // Should detect the missing inline code content
+    expect(result.status).not.toBe('pass');
+    const pageResults = result.details?.pageResults as Array<{ missingSegments: number }>;
+    expect(pageResults[0].missingSegments).toBeGreaterThan(0);
+  });
+
+  it('provides sample diffs in results', async () => {
+    const html = `<html><body><h1>Title</h1>
+      <p>Original content here that is long enough to count as a segment.</p>
+      <p>The system processes incoming requests through a middleware pipeline before routing.</p>
+      <p>Each middleware component can inspect and modify the request before passing it along.</p>
+      <p>Authentication middleware verifies the bearer token against the identity provider.</p>
+      <p>Rate limiting middleware enforces per-client request quotas based on the API key.</p>
+      <p>Logging middleware records request metadata for monitoring and debugging purposes.</p>
+      <p>Error handling middleware catches unhandled exceptions and returns structured responses.</p>
+      <p>Validation middleware checks request bodies against the defined JSON schema rules.</p>
+      <p>Compression middleware applies gzip encoding to responses larger than one kilobyte.</p>
+      <p>Caching middleware stores frequently accessed responses to reduce backend load times.</p>
+      <p>The final handler processes the request and returns the appropriate response data.</p>
+    </body></html>`;
+    const markdown =
+      '# Title\n\nDifferent content here that is long enough to count as a segment.\n\nThe system processes incoming requests through a completely different pipeline.';
+    const url = 'http://mcp-diffs.local/docs/page';
+
+    server.use(
+      http.get(
+        url,
+        () =>
+          new HttpResponse(html, {
+            status: 200,
+            headers: { 'Content-Type': 'text/html' },
+          }),
+      ),
+    );
+
+    const ctx = makeCtx([{ url, markdown, htmlBody: html }], 'mcp-diffs.local');
+    const result = await check.run(ctx);
+    const pageResults = result.details?.pageResults as Array<{ sampleDiffs: string[] }>;
+    expect(pageResults[0].sampleDiffs.length).toBeGreaterThan(0);
+  });
+
+  it('compares multiple pages and reports worst status', async () => {
+    const goodUrl = 'http://mcp-multi.local/docs/good';
+    const badUrl = 'http://mcp-multi.local/docs/bad';
+
+    const goodHtml =
+      '<html><body><h1>Good Page</h1><p>Content that matches between HTML and markdown versions exactly.</p></body></html>';
+    const goodMd =
+      '# Good Page\n\nContent that matches between HTML and markdown versions exactly.';
+
+    const badHtml = `<html><body>
+      <h1>Bad Page</h1>
+      <p>This page has lots of content that is only in the HTML version.</p>
+      <p>Details about section A that should be in the markdown but are missing.</p>
+      <p>Details about section B that should be in the markdown but are missing.</p>
+      <p>Details about section C that should be in the markdown but are missing.</p>
+      <p>Details about section D that should be in the markdown but are missing.</p>
+      <p>Details about section E that should be in the markdown but are missing.</p>
+    </body></html>`;
+    const badMd = '# Bad Page\n\nCompletely different content that does not match the HTML at all.';
+
+    server.use(
+      http.get(
+        goodUrl,
+        () =>
+          new HttpResponse(goodHtml, {
+            status: 200,
+            headers: { 'Content-Type': 'text/html' },
+          }),
+      ),
+      http.get(
+        badUrl,
+        () =>
+          new HttpResponse(badHtml, {
+            status: 200,
+            headers: { 'Content-Type': 'text/html' },
+          }),
+      ),
+    );
+
+    const ctx = makeCtx(
+      [
+        { url: goodUrl, markdown: goodMd, htmlBody: goodHtml },
+        { url: badUrl, markdown: badMd, htmlBody: badHtml },
+      ],
+      'mcp-multi.local',
+    );
+    const result = await check.run(ctx);
+    expect(result.details?.pagesCompared).toBe(2);
+    expect(result.details?.passBucket).toBeGreaterThanOrEqual(1);
+  });
+
+  it('strips navigation panels with high link density', async () => {
+    // Simulate a page where nav is inside <main> using <div> instead of <nav>
+    const navLinks = Array.from(
+      { length: 15 },
+      (_, i) => `<a href="/page-${i}">Navigation item number ${i} for testing</a>`,
+    ).join('\n');
+    const html = `<html><body><main>
+      <div class="sidebar">${navLinks}</div>
+      <div class="content">
+        <h1>Actual Page Content</h1>
+        <p>This is the real content that should be compared against markdown output.</p>
+        <p>It contains multiple paragraphs with enough text to form segments for comparison.</p>
+        <p>The sidebar navigation should be stripped because it has very high link density.</p>
+        <p>Without link density stripping the nav text would inflate the missing percentage.</p>
+        <p>Each paragraph here represents genuine documentation content for the reader.</p>
+        <p>Authentication requires a valid API key passed in the Authorization header field.</p>
+        <p>Rate limiting is applied per API key with a maximum of one hundred requests per minute.</p>
+        <p>Error responses include a JSON body with a code field and a message field for details.</p>
+        <p>Pagination uses cursor-based navigation with the after parameter in query strings.</p>
+        <p>The SDK supports automatic retries with exponential backoff for transient failures.</p>
+        <p>Configure the maximum number of retries using the maxRetries constructor option.</p>
+      </div>
+    </main></body></html>`;
+
+    const markdown = `# Actual Page Content
+
+This is the real content that should be compared against markdown output.
+
+It contains multiple paragraphs with enough text to form segments for comparison.
+
+The sidebar navigation should be stripped because it has very high link density.
+
+Without link density stripping the nav text would inflate the missing percentage.
+
+Each paragraph here represents genuine documentation content for the reader.
+
+Authentication requires a valid API key passed in the Authorization header field.
+
+Rate limiting is applied per API key with a maximum of one hundred requests per minute.
+
+Error responses include a JSON body with a code field and a message field for details.
+
+Pagination uses cursor-based navigation with the after parameter in query strings.
+
+The SDK supports automatic retries with exponential backoff for transient failures.
+
+Configure the maximum number of retries using the maxRetries constructor option.`;
+
+    const url = 'http://mcp-nav.local/docs/page';
+
+    server.use(
+      http.get(
+        url,
+        () =>
+          new HttpResponse(html, {
+            status: 200,
+            headers: { 'Content-Type': 'text/html' },
+          }),
+      ),
+    );
+
+    const ctx = makeCtx([{ url, markdown, htmlBody: html }], 'mcp-nav.local');
+    const result = await check.run(ctx);
+    // Should pass because nav was stripped; without stripping it would fail
+    expect(result.status).toBe('pass');
+  });
+
+  it('separates code lines from div-based syntax highlighting', async () => {
+    // Simulate Expressive Code / Shiki / Geist code blocks with <div> per line inside <pre>
+    const html = `<html><body>
+      <h1>Installation Guide</h1>
+      <p>Install the package using npm to add it to your project dependencies.</p>
+      <p>Then import the client module and configure it with your API credentials.</p>
+      <p>Run the following commands to get started with the installation process.</p>
+      <pre><code><div class="ec-line"><span style="--0:#fff">npm install @example/sdk</span></div><div class="ec-line"><span style="--0:#fff">npm install @example/cli</span></div></code></pre>
+      <p>After installation import the client and call the initialize method first.</p>
+      <p>The client will automatically detect your configuration from environment variables.</p>
+      <p>You can override any configuration option by passing it to the constructor directly.</p>
+      <p>Make sure your API key is set before attempting to make any requests to the server.</p>
+      <p>The library validates all configuration options and throws helpful error messages.</p>
+      <p>Connection pooling is handled automatically for optimal performance and throughput.</p>
+      <p>TLS certificates are verified by default to ensure secure communication channels.</p>
+    </body></html>`;
+
+    const markdown = `# Installation Guide
+
+Install the package using npm to add it to your project dependencies.
+
+Then import the client module and configure it with your API credentials.
+
+Run the following commands to get started with the installation process.
+
+\`\`\`
+npm install @example/sdk
+npm install @example/cli
+\`\`\`
+
+After installation import the client and call the initialize method first.
+
+The client will automatically detect your configuration from environment variables.
+
+You can override any configuration option by passing it to the constructor directly.
+
+Make sure your API key is set before attempting to make any requests to the server.
+
+The library validates all configuration options and throws helpful error messages.
+
+Connection pooling is handled automatically for optimal performance and throughput.
+
+TLS certificates are verified by default to ensure secure communication channels.`;
+
+    const url = 'http://mcp-codediv.local/docs/install';
+
+    server.use(
+      http.get(
+        url,
+        () =>
+          new HttpResponse(html, {
+            status: 200,
+            headers: { 'Content-Type': 'text/html' },
+          }),
+      ),
+    );
+
+    const ctx = makeCtx([{ url, markdown, htmlBody: html }], 'mcp-codediv.local');
+    const result = await check.run(ctx);
+    expect(result.status).toBe('pass');
+  });
+
+  it('skips pages that return HTTP 404', async () => {
+    const url = 'http://mcp-404.local/docs/page';
+
+    server.use(
+      http.get(
+        url,
+        () =>
+          new HttpResponse('Not Found', {
+            status: 404,
+            headers: { 'Content-Type': 'text/html' },
+          }),
+      ),
+    );
+
+    const ctx = makeCtx(
+      [
+        {
+          url,
+          markdown: '# Page\n\nContent that is long enough for a meaningful segment.',
+          htmlBody: '',
+        },
+      ],
+      'mcp-404.local',
+    );
+    const result = await check.run(ctx);
+    // Page should be skipped (auto-pass), not counted as a failure
+    const pageResults = result.details?.pageResults as Array<{ error?: string }>;
+    expect(pageResults[0].error).toContain('404');
+  });
+
+  it('uses heuristic selectors when no semantic container exists', async () => {
+    // Page with no <main> or <article>, but has #content
+    const html = `<html><body>
+      <div class="site-header"><a href="/">Home</a><a href="/docs">Docs</a></div>
+      <div id="content">
+        <h1>Getting Started Guide</h1>
+        <p>This tutorial walks you through the initial setup process for the platform.</p>
+        <p>You will need an active account and valid API credentials before proceeding.</p>
+        <p>The setup process takes approximately five minutes from start to finish today.</p>
+        <p>Begin by installing the command line tool using your preferred package manager.</p>
+        <p>After installation run the init command to create a configuration file locally.</p>
+        <p>The configuration file stores your API key and preferred region settings securely.</p>
+        <p>Next authenticate by running the login command with your account credentials here.</p>
+        <p>Once authenticated you can begin making API calls and managing your resources now.</p>
+        <p>The dashboard provides a visual overview of all your resources and their status.</p>
+        <p>Check the troubleshooting guide if you encounter any issues during the setup.</p>
+        <p>Our support team is available around the clock to help with technical questions.</p>
+      </div>
+      <div class="site-footer">Copyright 2024</div>
+    </body></html>`;
+
+    const markdown = `# Getting Started Guide
+
+This tutorial walks you through the initial setup process for the platform.
+
+You will need an active account and valid API credentials before proceeding.
+
+The setup process takes approximately five minutes from start to finish today.
+
+Begin by installing the command line tool using your preferred package manager.
+
+After installation run the init command to create a configuration file locally.
+
+The configuration file stores your API key and preferred region settings securely.
+
+Next authenticate by running the login command with your account credentials here.
+
+Once authenticated you can begin making API calls and managing your resources now.
+
+The dashboard provides a visual overview of all your resources and their status.
+
+Check the troubleshooting guide if you encounter any issues during the setup.
+
+Our support team is available around the clock to help with technical questions.`;
+
+    const url = 'http://mcp-heuristic.local/docs/start';
+
+    server.use(
+      http.get(
+        url,
+        () =>
+          new HttpResponse(html, {
+            status: 200,
+            headers: { 'Content-Type': 'text/html' },
+          }),
+      ),
+    );
+
+    const ctx = makeCtx([{ url, markdown, htmlBody: html }], 'mcp-heuristic.local');
+    const result = await check.run(ctx);
+    expect(result.status).toBe('pass');
+  });
+
+  it('handles index.md URL by stripping to directory path', async () => {
+    const mdUrl = 'http://mcp-index.local/docs/guide/index.md';
+    const htmlUrl = 'http://mcp-index.local/docs/guide/';
+    const html =
+      '<html><body><h1>Guide</h1><p>Step one of the installation process requires downloading the package.</p><p>Step two involves configuring the environment variables for your setup.</p></body></html>';
+    const markdown =
+      '# Guide\n\nStep one of the installation process requires downloading the package.\n\nStep two involves configuring the environment variables for your setup.';
+
+    server.use(
+      http.get(
+        htmlUrl,
+        () =>
+          new HttpResponse(html, {
+            status: 200,
+            headers: { 'Content-Type': 'text/html' },
+          }),
+      ),
+    );
+
+    const ctx = createContext('http://mcp-index.local', { requestDelay: 0 });
+    ctx.previousResults.set('markdown-url-support', {
+      id: 'markdown-url-support',
+      category: 'markdown-availability',
+      status: 'pass',
+      message: 'OK',
+    });
+    ctx.pageCache.set(mdUrl, {
+      url: mdUrl,
+      markdown: { content: markdown, source: 'md-url' },
+    });
+
+    const result = await check.run(ctx);
+    expect(result.status).toBe('pass');
+    const pageResults = result.details?.pageResults as Array<{ totalSegments: number }>;
+    expect(pageResults[0].totalSegments).toBeGreaterThan(0);
+  });
+
+  it('strips .md from cached URL to fetch the HTML version', async () => {
+    const mdUrl = 'http://mcp-mdurl.local/docs/guide.md';
+    const htmlUrl = 'http://mcp-mdurl.local/docs/guide';
+    const html =
+      '<html><body><h1>Guide</h1><p>Step one of the installation process requires downloading the package.</p><p>Step two involves configuring the environment variables for your setup.</p></body></html>';
+    const markdown =
+      '# Guide\n\nStep one of the installation process requires downloading the package.\n\nStep two involves configuring the environment variables for your setup.';
+
+    server.use(
+      http.get(
+        mdUrl,
+        () =>
+          new HttpResponse(markdown, {
+            status: 200,
+            headers: { 'Content-Type': 'text/markdown' },
+          }),
+      ),
+      http.get(
+        htmlUrl,
+        () =>
+          new HttpResponse(html, {
+            status: 200,
+            headers: { 'Content-Type': 'text/html' },
+          }),
+      ),
+    );
+
+    const ctx = createContext('http://mcp-mdurl.local', { requestDelay: 0 });
+    ctx.previousResults.set('markdown-url-support', {
+      id: 'markdown-url-support',
+      category: 'markdown-availability',
+      status: 'pass',
+      message: 'OK',
+    });
+    ctx.pageCache.set(mdUrl, {
+      url: mdUrl,
+      markdown: { content: markdown, source: 'md-url' },
+    });
+
+    const result = await check.run(ctx);
+    expect(result.status).toBe('pass');
+    const pageResults = result.details?.pageResults as Array<{ totalSegments: number }>;
+    expect(pageResults[0].totalSegments).toBeGreaterThan(0);
+  });
+});

From 68df65619194b31955c5630010ecebbb8e248dd9 Mon Sep 17 00:00:00 2001
From: dacharyc <dc@dacharycarey.com>
Date: Sun, 15 Mar 2026 15:45:52 -0400
Subject: [PATCH 3/7] Fix bugs in llms.txt progressive disclosure discovery,
 content-negotiation checks

---
 src/helpers/get-page-urls.ts            | 89 +++++++++++++++++++++-
 test/unit/helpers/get-page-urls.test.ts | 98 +++++++++++++++++++++++++
 2 files changed, 183 insertions(+), 4 deletions(-)

diff --git a/src/helpers/get-page-urls.ts b/src/helpers/get-page-urls.ts
index 2f42a71..2c92c21 100644
--- a/src/helpers/get-page-urls.ts
+++ b/src/helpers/get-page-urls.ts
@@ -1,5 +1,6 @@
 import { extractMarkdownLinks } from '../checks/llms-txt/llms-txt-valid.js';
 import { MAX_SITEMAP_URLS } from '../constants.js';
+import { isNonPageUrl } from './to-md-urls.js';
 import type { CheckContext, DiscoveredFile } from '../types.js';
 
 /**
@@ -35,11 +36,12 @@ export function parseSitemapUrls(xml: string): { urls: string[]; sitemapIndexUrl
   return { urls, sitemapIndexUrls };
 }
 
-function getUrlsFromCachedLlmsTxt(ctx: CheckContext): string[] {
+async function getUrlsFromCachedLlmsTxt(ctx: CheckContext): Promise<string[]> {
   const existsResult = ctx.previousResults.get('llms-txt-exists');
   const discovered = (existsResult?.details?.discoveredFiles ?? []) as DiscoveredFile[];
 
-  return extractLinksFromLlmsTxtFiles(discovered);
+  const urls = extractLinksFromLlmsTxtFiles(discovered);
+  return walkAggregateLinks(ctx, urls);
 }
 
 function extractLinksFromLlmsTxtFiles(files: DiscoveredFile[]): string[] {
@@ -49,12 +51,90 @@ function extractLinksFromLlmsTxtFiles(files: DiscoveredFile[]): string[] {
     for (const link of links) {
       if (link.url.startsWith('http://') || link.url.startsWith('https://')) {
         urls.add(link.url);
+      } else if (link.url.startsWith('/')) {
+        // Resolve root-relative URLs against the source file's origin
+        try {
+          const base = new URL(file.url);
+          urls.add(new URL(link.url, base.origin).toString());
+        } catch {
+          // Skip malformed URLs
+        }
       }
     }
   }
   return Array.from(urls);
 }
 
+/**
+ * Identify .txt links that are likely aggregate/index files (progressive
+ * disclosure pattern) and walk them one level deep to find page URLs.
+ *
+ * A link is considered walkable when it ends in .txt and is on the same
+ * origin as the site being tested. This covers both sub-product llms.txt
+ * files (Cloudflare) and aggregate content files (Supabase).
+ */
+async function walkAggregateLinks(ctx: CheckContext, urls: string[]): Promise<string[]> {
+  const pageUrls: string[] = [];
+  const aggregateUrls: string[] = [];
+
+  for (const url of urls) {
+    try {
+      const parsed = new URL(url);
+      if (/\.txt$/i.test(parsed.pathname)) {
+        // .txt files are either aggregate indexes to walk (same origin)
+        // or external resources to skip — never page URLs themselves
+        if (parsed.origin === ctx.origin) {
+          aggregateUrls.push(url);
+        }
+      } else {
+        pageUrls.push(url);
+      }
+    } catch {
+      pageUrls.push(url);
+    }
+  }
+
+  if (aggregateUrls.length === 0) return pageUrls;
+
+  // Fetch aggregate files and extract their links
+  for (const aggUrl of aggregateUrls) {
+    try {
+      const response = await ctx.http.fetch(aggUrl);
+      if (!response.ok) continue;
+      const contentType = response.headers.get('content-type') ?? '';
+      if (!contentType.includes('text/')) continue;
+      const content = await response.text();
+      const trimmed = content.trimStart().toLowerCase();
+      if (trimmed.startsWith('<!') || trimmed.startsWith('<html')) continue;
+      if (content.trim().length === 0) continue;
+
+      const subFile: DiscoveredFile = {
+        url: aggUrl,
+        content,
+        status: response.status,
+        redirected: response.redirected,
+      };
+      const subUrls = extractLinksFromLlmsTxtFiles([subFile]);
+
+      for (const subUrl of subUrls) {
+        // Only keep same-origin page URLs (skip further .txt nesting)
+        try {
+          const parsed = new URL(subUrl);
+          if (parsed.origin === ctx.origin && !isNonPageUrl(subUrl)) {
+            pageUrls.push(subUrl);
+          }
+        } catch {
+          // Skip malformed URLs
+        }
+      }
+    } catch {
+      // Skip failed fetches
+    }
+  }
+
+  return pageUrls;
+}
+
 /**
  * Directly fetch llms.txt candidate URLs and extract links.
  * Used when `llms-txt-exists` hasn't run (e.g. standalone check mode).
@@ -87,7 +167,8 @@ async function fetchLlmsTxtUrls(ctx: CheckContext): Promise<string[]> {
     }
   }
 
-  return extractLinksFromLlmsTxtFiles(discovered);
+  const urls = extractLinksFromLlmsTxtFiles(discovered);
+  return walkAggregateLinks(ctx, urls);
 }
 
 /**
@@ -213,7 +294,7 @@ export async function getPageUrls(ctx: CheckContext): Promise<PageUrlResult> {
   const warnings: string[] = [];
 
   // 1. Try llms.txt links from cached results (if llms-txt-exists ran)
-  const cachedUrls = getUrlsFromCachedLlmsTxt(ctx);
+  const cachedUrls = await getUrlsFromCachedLlmsTxt(ctx);
   if (cachedUrls.length > 0) return { urls: cachedUrls, warnings };
 
   // 2. Try fetching llms.txt directly (standalone mode, llms-txt-exists didn't run)
diff --git a/test/unit/helpers/get-page-urls.test.ts b/test/unit/helpers/get-page-urls.test.ts
index 9877afb..9e1b745 100644
--- a/test/unit/helpers/get-page-urls.test.ts
+++ b/test/unit/helpers/get-page-urls.test.ts
@@ -414,6 +414,104 @@ describe('getPageUrls', () => {
     expect(result.warnings[0]).toContain('sitemap-docs.xml.gz');
   });
 
+  // ── Progressive disclosure: walking aggregate .txt files ──
+
+  it('walks aggregate .txt files linked from llms.txt (Cloudflare pattern)', async () => {
+    // Root llms.txt links to per-product llms.txt files
+    const rootContent = `# Docs\n- [Workers](http://walk-test.local/workers/llms.txt)\n- [Cache](http://walk-test.local/cache/llms.txt)\n`;
+    const workersContent = `# Workers\n- [Guide](http://walk-test.local/workers/guide/index.md): Get started\n- [API](http://walk-test.local/workers/api/index.md): API ref\n`;
+    const cacheContent = `# Cache\n- [Overview](http://walk-test.local/cache/overview/index.md): Overview\n`;
+
+    server.use(
+      http.get(
+        'http://walk-test.local/workers/llms.txt',
+        () =>
+          new HttpResponse(workersContent, {
+            status: 200,
+            headers: { 'Content-Type': 'text/plain' },
+          }),
+      ),
+      http.get(
+        'http://walk-test.local/cache/llms.txt',
+        () =>
+          new HttpResponse(cacheContent, {
+            status: 200,
+            headers: { 'Content-Type': 'text/plain' },
+          }),
+      ),
+    );
+
+    const ctx = makeCtx('http://walk-test.local', rootContent);
+    const result = await getPageUrls(ctx);
+    expect(result.urls).toContain('http://walk-test.local/workers/guide/index.md');
+    expect(result.urls).toContain('http://walk-test.local/workers/api/index.md');
+    expect(result.urls).toContain('http://walk-test.local/cache/overview/index.md');
+    expect(result.urls).toHaveLength(3);
+  });
+
+  it('walks aggregate .txt files with relative URLs (Supabase pattern)', async () => {
+    // Root llms.txt links to aggregate content files
+    const rootContent = `# Docs\n- [Guides](http://walk-rel.local/llms/guides.txt)\n`;
+    const guidesContent = `# Guides\n\nLearn about [auth](/docs/guides/auth) and [storage](/docs/guides/storage).\n`;
+
+    server.use(
+      http.get(
+        'http://walk-rel.local/llms/guides.txt',
+        () =>
+          new HttpResponse(guidesContent, {
+            status: 200,
+            headers: { 'Content-Type': 'text/plain' },
+          }),
+      ),
+    );
+
+    const ctx = makeCtx('http://walk-rel.local', rootContent);
+    const result = await getPageUrls(ctx);
+    expect(result.urls).toContain('http://walk-rel.local/docs/guides/auth');
+    expect(result.urls).toContain('http://walk-rel.local/docs/guides/storage');
+  });
+
+  it('resolves relative URLs in root llms.txt against origin', async () => {
+    const content = `# Docs\n- [Guide](/docs/guide): A guide\n- [Ref](/docs/ref): A ref\n`;
+    const ctx = makeCtx('http://rel-root.local', content);
+    const result = await getPageUrls(ctx);
+    expect(result.urls).toEqual([
+      'http://rel-root.local/docs/guide',
+      'http://rel-root.local/docs/ref',
+    ]);
+  });
+
+  it('does not walk .txt files from a different origin', async () => {
+    const content = `# Docs\n- [External](http://other-site.com/llms.txt)\n- [Local](http://no-walk.local/docs/page): Page\n`;
+    const ctx = makeCtx('http://no-walk.local', content);
+    const result = await getPageUrls(ctx);
+    // Should only have the local page URL, not try to fetch the external .txt
+    expect(result.urls).toEqual(['http://no-walk.local/docs/page']);
+  });
+
+  it('falls through to baseUrl when all aggregate files fail', async () => {
+    const rootContent = `# Docs\n- [Bad](http://walk-err.local/bad.txt)\n- [Html](http://walk-err.local/html.txt)\n`;
+
+    server.use(
+      http.get('http://walk-err.local/bad.txt', () => new HttpResponse('', { status: 404 })),
+      http.get(
+        'http://walk-err.local/html.txt',
+        () =>
+          new HttpResponse('<!DOCTYPE html><html></html>', {
+            status: 200,
+            headers: { 'Content-Type': 'text/html' },
+          }),
+      ),
+      http.get('http://walk-err.local/robots.txt', () => new HttpResponse('', { status: 404 })),
+      http.get('http://walk-err.local/sitemap.xml', () => new HttpResponse('', { status: 404 })),
+    );
+
+    const ctx = makeCtx('http://walk-err.local', rootContent);
+    const result = await getPageUrls(ctx);
+    // All aggregate files failed → no page URLs → falls through to baseUrl
+    expect(result.urls).toEqual(['http://walk-err.local']);
+  });
+
   // ── Direct llms.txt fetch (standalone mode) ──
 
   it('fetches llms.txt directly when llms-txt-exists has not run', async () => {

From ed287706ce0bbe29a418b9299fc927f9250b803f Mon Sep 17 00:00:00 2001
From: dacharyc <dc@dacharycarey.com>
Date: Sun, 15 Mar 2026 16:11:16 -0400
Subject: [PATCH 4/7] Add the ability to specify a sampling strategy

---
 README.md                               | 22 ++++++++++++
 src/cli/commands/check.ts               | 18 ++++++++++
 src/constants.ts                        |  1 +
 src/helpers/get-page-urls.ts            | 40 ++++++++++++++++++---
 src/types.ts                            |  4 +++
 test/unit/helpers/get-page-urls.test.ts | 46 +++++++++++++++++++++++++
 6 files changed, 127 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 321fc5a..7ac2410 100644
--- a/README.md
+++ b/README.md
@@ -75,12 +75,34 @@ afdocs check https://docs.example.com --pass-threshold 30000 --fail-threshold 80
 | `--format <format>`     | `text`   | Output format: `text` or `json`              |
 | `-v, --verbose`         |          | Show per-page details for checks with issues |
 | `--checks <ids>`        | all      | Comma-separated list of check IDs            |
+| `--sampling <strategy>` | `random` | URL sampling strategy (see below)            |
 | `--max-concurrency <n>` | `3`      | Maximum concurrent HTTP requests             |
 | `--request-delay <ms>`  | `200`    | Delay between requests                       |
 | `--max-links <n>`       | `50`     | Maximum links to test in link checks         |
 | `--pass-threshold <n>`  | `50000`  | Size pass threshold (characters)             |
 | `--fail-threshold <n>`  | `100000` | Size fail threshold (characters)             |
 
+### Sampling strategies
+
+By default, afdocs discovers pages from your site (via `llms.txt`, sitemap, or both) and randomly samples up to `--max-links` pages to check. The `--sampling` flag gives you control over how that sample is selected.
+
+| Strategy        | Behavior                                                                                                                                                     |
+| --------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| `random`        | Shuffle discovered URLs and take the first N. Fast and broad, but results vary between runs.                                                                 |
+| `deterministic` | Sort discovered URLs alphabetically, then pick every Nth URL for an even spread. Produces the same sample on repeated runs as long as the URL set is stable. |
+| `none`          | Skip discovery entirely. Only check the URL you pass on the command line.                                                                                    |
+
+```bash
+# Reproducible runs for CI or iteration (same pages every time)
+afdocs check https://docs.example.com --sampling deterministic
+
+# Check a single page without any discovery
+afdocs check https://docs.example.com/api/auth --sampling none
+
+# Check a single page with specific checks
+afdocs check https://docs.example.com/api/auth --sampling none --checks page-size-html,redirect-behavior
+```
+
 ### Exit codes
 
 - `0` if all checks pass or warn
diff --git a/src/cli/commands/check.ts b/src/cli/commands/check.ts
index 5e67452..d2e0eff 100644
--- a/src/cli/commands/check.ts
+++ b/src/cli/commands/check.ts
@@ -2,10 +2,13 @@ import type { Command } from 'commander';
 import { runChecks } from '../../runner.js';
 import { formatText } from '../formatters/text.js';
 import { formatJson } from '../formatters/json.js';
+import type { SamplingStrategy } from '../../types.js';
 
 // Ensure all checks are registered
 import '../../checks/index.js';
 
+const SAMPLING_STRATEGIES = ['random', 'deterministic', 'none'] as const;
+
 export function registerCheckCommand(program: Command): void {
   program
     .command('check <url>')
@@ -15,6 +18,11 @@ export function registerCheckCommand(program: Command): void {
     .option('--max-concurrency <n>', 'Maximum concurrent requests', '3')
     .option('--request-delay <ms>', 'Delay between requests in ms', '200')
     .option('--max-links <n>', 'Maximum links to test', '50')
+    .option(
+      '--sampling <strategy>',
+      'URL sampling strategy: random, deterministic, or none',
+      'random',
+    )
     .option('--pass-threshold <n>', 'Pass threshold in characters', '50000')
     .option('--fail-threshold <n>', 'Fail threshold in characters', '100000')
     .option('-v, --verbose', 'Show per-page details for checks with issues')
@@ -22,6 +30,15 @@ export function registerCheckCommand(program: Command): void {
       const checkIds = opts.checks ? opts.checks.split(',').map((s) => s.trim()) : undefined;
       const isText = opts.format !== 'json';
 
+      const sampling = opts.sampling as SamplingStrategy;
+      if (!SAMPLING_STRATEGIES.includes(sampling)) {
+        process.stderr.write(
+          `Error: Invalid sampling strategy "${sampling}". Must be one of: ${SAMPLING_STRATEGIES.join(', ')}\n`,
+        );
+        process.exitCode = 1;
+        return;
+      }
+
       if (isText) {
         const domain = new URL(url).hostname;
         process.stderr.write(`Running checks on ${domain}...\n`);
@@ -32,6 +49,7 @@ export function registerCheckCommand(program: Command): void {
         maxConcurrency: parseInt(opts.maxConcurrency, 10),
         requestDelay: parseInt(opts.requestDelay, 10),
         maxLinksToTest: parseInt(opts.maxLinks, 10),
+        samplingStrategy: sampling,
         thresholds: {
           pass: parseInt(opts.passThreshold, 10),
           fail: parseInt(opts.failThreshold, 10),
diff --git a/src/constants.ts b/src/constants.ts
index db2e04d..b2fba24 100644
--- a/src/constants.ts
+++ b/src/constants.ts
@@ -10,6 +10,7 @@ export const DEFAULT_OPTIONS: CheckOptions = {
   requestDelay: 200,
   requestTimeout: 15_000,
   maxLinksToTest: 50,
+  samplingStrategy: 'random',
   thresholds: DEFAULT_THRESHOLDS,
 };
 
diff --git a/src/helpers/get-page-urls.ts b/src/helpers/get-page-urls.ts
index 2c92c21..ae22c23 100644
--- a/src/helpers/get-page-urls.ts
+++ b/src/helpers/get-page-urls.ts
@@ -324,21 +324,53 @@ export interface SampledPages {
  *
  * The result is cached on ctx so that all checks within a single run
  * share the same sampled page list, avoiding inconsistent results.
+ *
+ * Sampling strategies:
+ * - `random`: Fisher-Yates shuffle, then take the first maxLinksToTest. (Default.)
+ * - `deterministic`: Sort URLs lexicographically, then pick every Nth URL
+ *   so that the result is reproducible across runs (as long as the discovered
+ *   URL set is stable).
+ * - `none`: Skip discovery entirely; return only the baseUrl.
  */
 export async function discoverAndSamplePages(ctx: CheckContext): Promise<SampledPages> {
   if (ctx._sampledPages) return ctx._sampledPages;
 
+  const strategy = ctx.options.samplingStrategy;
+
+  // "none" skips discovery and uses only the URL the user provided.
+  if (strategy === 'none') {
+    ctx._sampledPages = {
+      urls: [ctx.baseUrl],
+      totalPages: 1,
+      sampled: false,
+      warnings: [],
+    };
+    return ctx._sampledPages;
+  }
+
   const discovery = await getPageUrls(ctx);
   let urls = discovery.urls;
   const totalPages = urls.length;
 
   const sampled = totalPages > ctx.options.maxLinksToTest;
   if (sampled) {
-    for (let i = urls.length - 1; i > 0; i--) {
-      const j = Math.floor(Math.random() * (i + 1));
-      [urls[i], urls[j]] = [urls[j], urls[i]];
+    if (strategy === 'deterministic') {
+      // Sort lexicographically for a stable ordering, then pick evenly-spaced URLs.
+      urls.sort();
+      const stride = urls.length / ctx.options.maxLinksToTest;
+      const picked: string[] = [];
+      for (let i = 0; i < ctx.options.maxLinksToTest; i++) {
+        picked.push(urls[Math.floor(i * stride)]);
+      }
+      urls = picked;
+    } else {
+      // "random" — Fisher-Yates shuffle
+      for (let i = urls.length - 1; i > 0; i--) {
+        const j = Math.floor(Math.random() * (i + 1));
+        [urls[i], urls[j]] = [urls[j], urls[i]];
+      }
+      urls = urls.slice(0, ctx.options.maxLinksToTest);
     }
-    urls = urls.slice(0, ctx.options.maxLinksToTest);
   }
 
   ctx._sampledPages = { urls, totalPages, sampled, warnings: discovery.warnings };
diff --git a/src/types.ts b/src/types.ts
index 4a8e03b..09fc35d 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -46,6 +46,8 @@ export interface CheckContext {
   _sampledPages?: SampledPages;
 }
 
+export type SamplingStrategy = 'random' | 'deterministic' | 'none';
+
 export interface CheckOptions {
   /** Maximum concurrent HTTP requests within a single check. */
   maxConcurrency: number;
@@ -55,6 +57,8 @@ export interface CheckOptions {
   requestTimeout: number;
   /** Maximum number of links to test in link-resolution checks. */
   maxLinksToTest: number;
+  /** URL sampling strategy: random (default), deterministic, or none. */
+  samplingStrategy: SamplingStrategy;
   /** Size thresholds. */
   thresholds: SizeThresholds;
 }
diff --git a/test/unit/helpers/get-page-urls.test.ts b/test/unit/helpers/get-page-urls.test.ts
index 9e1b745..f6b2b7e 100644
--- a/test/unit/helpers/get-page-urls.test.ts
+++ b/test/unit/helpers/get-page-urls.test.ts
@@ -696,6 +696,52 @@ describe('discoverAndSamplePages', () => {
     }
   });
 
+  it('deterministic strategy produces stable evenly-spaced results', async () => {
+    const links = Array.from(
+      { length: 10 },
+      (_, i) => `- [Page ${i}](http://det.local/page-${String(i).padStart(2, '0')}): Page ${i}`,
+    ).join('\n');
+    const content = `# Docs\n> Summary\n## Links\n${links}\n`;
+    const ctx = makeCtx('http://det.local', content, {
+      maxLinksToTest: 3,
+      samplingStrategy: 'deterministic',
+    });
+
+    const result = await discoverAndSamplePages(ctx);
+    expect(result.urls).toHaveLength(3);
+    expect(result.totalPages).toBe(10);
+    expect(result.sampled).toBe(true);
+
+    // Run again with a fresh context — should produce the same URLs
+    const ctx2 = makeCtx('http://det.local', content, {
+      maxLinksToTest: 3,
+      samplingStrategy: 'deterministic',
+    });
+    const result2 = await discoverAndSamplePages(ctx2);
+    expect(result2.urls).toEqual(result.urls);
+
+    // URLs should be evenly spaced from the sorted list
+    // Sorted: page-00 through page-09, stride = 10/3 ≈ 3.33
+    // Indices: floor(0*3.33)=0, floor(1*3.33)=3, floor(2*3.33)=6
+    expect(result.urls).toEqual([
+      'http://det.local/page-00',
+      'http://det.local/page-03',
+      'http://det.local/page-06',
+    ]);
+  });
+
+  it('none strategy returns only the baseUrl without discovery', async () => {
+    const content = `# Docs\n> Summary\n## Links\n- [A](http://none-test.local/a): A\n- [B](http://none-test.local/b): B\n`;
+    const ctx = makeCtx('http://none-test.local', content, {
+      samplingStrategy: 'none',
+    });
+
+    const result = await discoverAndSamplePages(ctx);
+    expect(result.urls).toEqual(['http://none-test.local']);
+    expect(result.totalPages).toBe(1);
+    expect(result.sampled).toBe(false);
+  });
+
   it('passes through warnings from discovery', async () => {
     server.use(
       http.get(

From 63d3b2ddc42805be50520f24093f83caf2d5404b Mon Sep 17 00:00:00 2001
From: dacharyc <dc@dacharycarey.com>
Date: Sun, 15 Mar 2026 18:41:02 -0400
Subject: [PATCH 5/7] Add 'llms-txt-freshness' check

---
 README.md                                     |   4 +-
 src/checks/llms-txt/llms-txt-exists.ts        |  26 +-
 .../observability/llms-txt-freshness.ts       | 433 +++++++++++-
 src/helpers/get-page-urls.ts                  |  48 +-
 src/helpers/to-md-urls.ts                     |  13 +
 src/types.ts                                  |   7 +
 test/unit/checks/llms-txt-freshness.test.ts   | 665 ++++++++++++++++++
 7 files changed, 1165 insertions(+), 31 deletions(-)
 create mode 100644 test/unit/checks/llms-txt-freshness.test.ts

diff --git a/README.md b/README.md
index 7ac2410..a803181 100644
--- a/README.md
+++ b/README.md
@@ -7,7 +7,7 @@ Test your documentation site against the [Agent-Friendly Documentation Spec](htt
 
 Agents don't use docs like humans. They hit truncation limits, get walls of CSS instead of content, can't follow cross-host redirects, and don't know about quality-of-life improvements like `llms.txt` or `.md` docs pages that would make life swell. Maybe this is because the industry has lacked guidance - until now.
 
-afdocs runs 22 checks across 8 categories to evaluate how well your docs serve agent consumers. 20 are fully implemented; the rest return `skip` until completed.
+afdocs runs 22 checks across 8 categories to evaluate how well your docs serve agent consumers. 21 are fully implemented; the rest return `skip` until completed.
 
 > **Status: Early development (0.x)**
 > This project is under active development. Check IDs, CLI flags, and output formats may change between minor versions. Feel free to try it out, but don't build automation against specific output until 1.0.
@@ -219,7 +219,7 @@ describe('agent-friendliness', () => {
 
 | Check                     | Description                                    |
 | ------------------------- | ---------------------------------------------- |
-| `llms-txt-freshness` \*   | Whether `llms.txt` reflects current site state |
+| `llms-txt-freshness`      | Whether `llms.txt` reflects current site state |
 | `markdown-content-parity` | Whether markdown and HTML versions match       |
 | `cache-header-hygiene`    | Whether cache headers allow timely updates     |
 
diff --git a/src/checks/llms-txt/llms-txt-exists.ts b/src/checks/llms-txt/llms-txt-exists.ts
index c813277..9631a26 100644
--- a/src/checks/llms-txt/llms-txt-exists.ts
+++ b/src/checks/llms-txt/llms-txt-exists.ts
@@ -1,4 +1,5 @@
 import { registerCheck } from '../registry.js';
+import { isCrossHostRedirect } from '../../helpers/to-md-urls.js';
 import type { CheckContext, CheckResult, DiscoveredFile } from '../../types.js';
 
 /**
@@ -14,16 +15,6 @@ function getCandidateUrls(baseUrl: string, origin: string): string[] {
   return Array.from(candidates);
 }
 
-function isCrossHostRedirect(originalUrl: string, finalUrl: string): boolean {
-  try {
-    const original = new URL(originalUrl);
-    const final_ = new URL(finalUrl);
-    return original.host !== final_.host;
-  } catch {
-    return false;
-  }
-}
-
 async function checkLlmsTxtExists(ctx: CheckContext): Promise<CheckResult> {
   const candidates = getCandidateUrls(ctx.baseUrl, ctx.origin);
   const discovered: DiscoveredFile[] = [];
@@ -155,6 +146,21 @@ async function checkLlmsTxtExists(ctx: CheckContext): Promise<CheckResult> {
     details.redirectedOrigins = redirectedOrigins;
   }
 
+  // Set effectiveOrigin for downstream checks when content lives at a different host.
+  // Derive from redirect URLs on discovered files, or from the fallback redirectedOrigins.
+  if (!ctx.effectiveOrigin) {
+    const crossHostFile = discovered.find((f) => f.crossHostRedirect && f.redirectUrl);
+    if (crossHostFile?.redirectUrl) {
+      try {
+        ctx.effectiveOrigin = new URL(crossHostFile.redirectUrl).origin;
+      } catch {
+        /* ignore malformed */
+      }
+    } else if (redirectedOrigins.length > 0) {
+      ctx.effectiveOrigin = redirectedOrigins[0];
+    }
+  }
+
   if (discovered.length === 0) {
     const redirectNote =
       redirectedOrigins.length > 0
diff --git a/src/checks/observability/llms-txt-freshness.ts b/src/checks/observability/llms-txt-freshness.ts
index 91a2ed9..62a801a 100644
--- a/src/checks/observability/llms-txt-freshness.ts
+++ b/src/checks/observability/llms-txt-freshness.ts
@@ -1,12 +1,435 @@
 import { registerCheck } from '../registry.js';
+import {
+  getUrlsFromCachedLlmsTxt,
+  getUrlsFromSitemap,
+  parseSitemapUrls,
+} from '../../helpers/get-page-urls.js';
+import { isNonPageUrl } from '../../helpers/to-md-urls.js';
 import type { CheckContext, CheckResult } from '../../types.js';
 
-async function check(_ctx: CheckContext): Promise<CheckResult> {
+/**
+ * Normalize a URL to a canonical path for comparison.
+ * Strips trailing slashes, .md/.mdx/.html extensions, and /index variants,
+ * then lowercases the path.
+ */
+export function normalizeUrlPath(url: string): string {
+  try {
+    const parsed = new URL(url);
+    let path = parsed.pathname;
+
+    // Strip /index.md, /index.mdx, /index.html
+    path = path.replace(/\/index\.(?:md|mdx|html?)$/i, '/');
+
+    // Strip .md, .mdx, .html extensions
+    path = path.replace(/\.(?:md|mdx|html?)$/i, '');
+
+    // Strip trailing slash (but keep root /)
+    if (path.length > 1 && path.endsWith('/')) {
+      path = path.slice(0, -1);
+    }
+
+    return path.toLowerCase();
+  } catch {
+    return url.toLowerCase();
+  }
+}
+
+/**
+ * Path patterns that are unlikely to need llms.txt coverage.
+ * These are non-doc pages that commonly appear in sitemaps.
+ */
+const EXCLUDED_PATH_PATTERNS = [
+  /^\/blog(\/|$)/i,
+  /^\/changelog(\/|$)/i,
+  /^\/releases?(\/|$)/i,
+  /^\/pricing(\/|$)/i,
+  /^\/about(\/|$)/i,
+  /^\/careers?(\/|$)/i,
+  /^\/jobs?(\/|$)/i,
+  /^\/contact(\/|$)/i,
+  /^\/legal(\/|$)/i,
+  /^\/privacy(\/|$)/i,
+  /^\/terms(\/|$)/i,
+  /^\/security(\/|$)/i,
+  /^\/status(\/|$)/i,
+  /^\/login(\/|$)/i,
+  /^\/signup(\/|$)/i,
+  /^\/sign-up(\/|$)/i,
+  /^\/sign-in(\/|$)/i,
+  /^\/register(\/|$)/i,
+  /^\/404(\/|$)/i,
+  /^\/500(\/|$)/i,
+];
+
+export function isExcludedPath(normalizedPath: string, baseUrlPath?: string): boolean {
+  if (EXCLUDED_PATH_PATTERNS.some((pattern) => pattern.test(normalizedPath))) {
+    return true;
+  }
+  // Also check relative to the base path prefix (e.g. /docs/changelog → /changelog)
+  if (baseUrlPath && baseUrlPath !== '/' && normalizedPath.startsWith(baseUrlPath)) {
+    const relative = normalizedPath.slice(baseUrlPath.length) || '/';
+    if (EXCLUDED_PATH_PATTERNS.some((pattern) => pattern.test(relative))) {
+      return true;
+    }
+  }
+  return false;
+}
+
+/**
+ * Detect whether a URL set uses locale-prefixed paths and, if so, return the
+ * path segment position where locales appear.
+ *
+ * Detection is empirical: for each path segment position, count how many
+ * distinct 2-letter (or xx-yy) codes appear. If a position has ≥2 distinct
+ * codes and those codes cover >50% of URLs, it's a locale segment.
+ *
+ * Example: `/docs/en/intro` and `/docs/de/intro` → position 1 has codes
+ * `en` and `de` → locale position detected at index 1.
+ */
+export function detectLocalePosition(urls: string[]): number | null {
+  const positionCounts = new Map<number, Map<string, number>>();
+  const positionTotals = new Map<number, number>();
+
+  for (const url of urls) {
+    try {
+      const segments = new URL(url).pathname.split('/').filter(Boolean);
+      for (let i = 0; i < segments.length; i++) {
+        const seg = segments[i].toLowerCase();
+        if (/^[a-z]{2}(-[a-z]{2})?$/.test(seg)) {
+          if (!positionCounts.has(i)) positionCounts.set(i, new Map());
+          const counts = positionCounts.get(i)!;
+          counts.set(seg, (counts.get(seg) ?? 0) + 1);
+          positionTotals.set(i, (positionTotals.get(i) ?? 0) + 1);
+        }
+      }
+    } catch {
+      continue;
+    }
+  }
+
+  for (const [pos, counts] of positionCounts) {
+    if (counts.size < 2) continue;
+    const total = positionTotals.get(pos) ?? 0;
+    if (total > urls.length * 0.5) {
+      return pos;
+    }
+  }
+
+  return null;
+}
+
+/**
+ * Get the dominant value at a given path segment position across a URL set.
+ * Returns null if no consistent value is found.
+ */
+export function getDominantSegment(urls: string[], position: number): string | null {
+  const counts = new Map<string, number>();
+  for (const url of urls) {
+    try {
+      const segments = new URL(url).pathname.split('/').filter(Boolean);
+      if (segments.length > position) {
+        const seg = segments[position].toLowerCase();
+        counts.set(seg, (counts.get(seg) ?? 0) + 1);
+      }
+    } catch {
+      continue;
+    }
+  }
+
+  let dominant = '';
+  let dominantCount = 0;
+  for (const [seg, count] of counts) {
+    if (count > dominantCount) {
+      dominant = seg;
+      dominantCount = count;
+    }
+  }
+
+  // Only return if it covers >50% of the URLs
+  return dominantCount > urls.length * 0.5 ? dominant : null;
+}
+
+/**
+ * Filter URLs to only those whose path segment at `position` matches `locale`.
+ */
+function filterByLocale(urls: string[], locale: string, position: number): string[] {
+  return urls.filter((url) => {
+    try {
+      const segments = new URL(url).pathname.split('/').filter(Boolean);
+      return segments.length > position && segments[position].toLowerCase() === locale;
+    } catch {
+      return false;
+    }
+  });
+}
+
+/** Coverage thresholds */
+const COVERAGE_PASS = 0.95;
+const COVERAGE_WARN = 0.8;
+
+/**
+ * Maximum sitemap URLs to collect for freshness comparison.
+ * Higher than the default MAX_SITEMAP_URLS (500) used for page sampling,
+ * because freshness needs the full sitemap to produce meaningful coverage
+ * percentages. Enterprise docs sites (Stripe, MongoDB) can have thousands
+ * of pages.
+ */
+const MAX_FRESHNESS_SITEMAP_URLS = 50_000;
+
+/**
+ * Try to fetch a docs-specific sitemap at {baseUrl}/sitemap.xml.
+ * Many docs sites host their own sitemap that isn't referenced from robots.txt
+ * (e.g., Loops /docs/sitemap.xml, Supabase /docs/sitemap.xml).
+ */
+async function fetchDocsSitemap(ctx: CheckContext): Promise<string[]> {
+  const baseUrlPath = new URL(ctx.baseUrl).pathname.replace(/\/$/, '');
+  if (!baseUrlPath || baseUrlPath === '/') return [];
+
+  const docsSitemapUrl = `${ctx.origin}${baseUrlPath}/sitemap.xml`;
+  try {
+    const response = await ctx.http.fetch(docsSitemapUrl);
+    if (!response.ok) return [];
+    const xml = await response.text();
+    const parsed = parseSitemapUrls(xml);
+
+    // If it's a sitemap index, follow one level
+    if (parsed.sitemapIndexUrls.length > 0) {
+      const urls: string[] = [];
+      for (const subUrl of parsed.sitemapIndexUrls) {
+        try {
+          const subResp = await ctx.http.fetch(subUrl);
+          if (!subResp.ok) continue;
+          const subXml = await subResp.text();
+          const subParsed = parseSitemapUrls(subXml);
+          urls.push(...subParsed.urls);
+        } catch {
+          // Skip failed fetches
+        }
+      }
+      return urls;
+    }
+
+    return parsed.urls;
+  } catch {
+    return [];
+  }
+}
+
+/**
+ * Scope URLs to the baseUrl path prefix and same origin.
+ */
+function scopeUrls(urls: string[], origin: string, baseUrlPath: string): string[] {
+  return urls.filter((url) => {
+    try {
+      const parsed = new URL(url);
+      if (parsed.origin !== origin) return false;
+      if (baseUrlPath && baseUrlPath !== '/') {
+        if (!parsed.pathname.startsWith(baseUrlPath + '/') && parsed.pathname !== baseUrlPath) {
+          return false;
+        }
+      }
+      if (isNonPageUrl(url)) return false;
+      return true;
+    } catch {
+      return false;
+    }
+  });
+}
+
+async function check(ctx: CheckContext): Promise<CheckResult> {
+  const id = 'llms-txt-freshness';
+  const category = 'observability';
+
+  // 1. Get llms.txt page URLs (with progressive disclosure walking)
+  const llmsTxtUrls = await getUrlsFromCachedLlmsTxt(ctx);
+  if (llmsTxtUrls.length === 0) {
+    return {
+      id,
+      category,
+      status: 'skip',
+      message: 'No page URLs found in llms.txt',
+    };
+  }
+
+  // 2. Get sitemap URLs, with docs-specific sitemap fallback
+  //    Use effectiveOrigin when a cross-host redirect was detected, so that
+  //    sitemap URLs at the redirected host are accepted rather than filtered out.
+  const effectiveOrigin = ctx.effectiveOrigin ?? ctx.origin;
+  const sitemapWarnings: string[] = [];
+  let sitemapUrls = await getUrlsFromSitemap(
+    ctx,
+    sitemapWarnings,
+    MAX_FRESHNESS_SITEMAP_URLS,
+    effectiveOrigin,
+  );
+  let sitemapSource = 'robots.txt/sitemap.xml';
+  const baseUrlPath = new URL(ctx.baseUrl).pathname.replace(/\/$/, '');
+
+  // Check if main sitemap has any docs URLs
+  let scopedSitemapUrls = scopeUrls(sitemapUrls, effectiveOrigin, baseUrlPath);
+
+  // If the main sitemap has no docs URLs, try a docs-specific sitemap
+  if (scopedSitemapUrls.length === 0 && baseUrlPath && baseUrlPath !== '/') {
+    const docsSitemapUrls = await fetchDocsSitemap(ctx);
+    if (docsSitemapUrls.length > 0) {
+      sitemapUrls = docsSitemapUrls;
+      scopedSitemapUrls = scopeUrls(docsSitemapUrls, effectiveOrigin, baseUrlPath);
+      sitemapSource = `${baseUrlPath}/sitemap.xml`;
+    }
+  }
+
+  if (sitemapUrls.length === 0) {
+    return {
+      id,
+      category,
+      status: 'skip',
+      message:
+        'No sitemap found; cannot assess llms.txt freshness without a sitemap as ground truth',
+      details: { sitemapWarnings },
+    };
+  }
+
+  if (scopedSitemapUrls.length === 0) {
+    return {
+      id,
+      category,
+      status: 'skip',
+      message: `Sitemap has ${sitemapUrls.length} URLs but none are under the docs path prefix (${baseUrlPath || '/'})`,
+      details: {
+        totalSitemapUrls: sitemapUrls.length,
+        baseUrlPath: baseUrlPath || '/',
+        sitemapWarnings,
+      },
+    };
+  }
+
+  // 2b. Locale filtering: if the sitemap uses locale-prefixed paths (e.g. /docs/en/,
+  //     /docs/de/), filter to the same locale as the llms.txt URLs. This avoids
+  //     penalizing sites for not listing every localized variant in llms.txt.
+  let localeFiltered = false;
+  let detectedLocale: string | null = null;
+  const localePosition = detectLocalePosition(scopedSitemapUrls);
+
+  if (localePosition !== null) {
+    const llmsLocale = getDominantSegment(llmsTxtUrls, localePosition);
+    if (llmsLocale) {
+      detectedLocale = llmsLocale;
+      const before = scopedSitemapUrls.length;
+      scopedSitemapUrls = filterByLocale(scopedSitemapUrls, llmsLocale, localePosition);
+      localeFiltered = scopedSitemapUrls.length < before;
+    }
+  }
+
+  // 3. Normalize both sets for comparison
+  const llmsNormalized = new Set(llmsTxtUrls.map(normalizeUrlPath));
+  const sitemapNormalized = new Map<string, string>(); // normalized -> original URL
+  for (const url of scopedSitemapUrls) {
+    const norm = normalizeUrlPath(url);
+    if (!isExcludedPath(norm, baseUrlPath)) {
+      sitemapNormalized.set(norm, url);
+    }
+  }
+
+  const excludedCount = scopedSitemapUrls.length - sitemapNormalized.size;
+
+  // 4. Missing coverage: in sitemap but not in llms.txt
+  const missingFromLlmsTxt: string[] = [];
+  for (const [norm, originalUrl] of sitemapNormalized) {
+    if (!llmsNormalized.has(norm)) {
+      missingFromLlmsTxt.push(originalUrl);
+    }
+  }
+
+  // 5. Unmatched llms.txt links: in llms.txt but not in sitemap
+  //    This could mean either (a) the page was removed (truly stale) or
+  //    (b) the sitemap is incomplete. We report it but don't use it to
+  //    determine the overall status since we can't distinguish the two
+  //    without fetching every URL (which llms-txt-links-resolve handles).
+  const sitemapNormalizedSet = new Set(sitemapNormalized.keys());
+  const unmatchedLlmsTxtUrls: string[] = [];
+  for (const url of llmsTxtUrls) {
+    const norm = normalizeUrlPath(url);
+    // Only check URLs under the same origin and path prefix
+    try {
+      const parsed = new URL(url);
+      if (parsed.origin !== effectiveOrigin) continue;
+      if (
+        baseUrlPath &&
+        baseUrlPath !== '/' &&
+        !parsed.pathname.startsWith(baseUrlPath + '/') &&
+        parsed.pathname !== baseUrlPath
+      ) {
+        continue;
+      }
+    } catch {
+      continue;
+    }
+    if (isExcludedPath(norm, baseUrlPath)) continue;
+    if (!sitemapNormalizedSet.has(norm)) {
+      unmatchedLlmsTxtUrls.push(url);
+    }
+  }
+
+  // 6. Compute metrics
+  const sitemapDocPages = sitemapNormalized.size;
+  const coveredCount = sitemapDocPages - missingFromLlmsTxt.length;
+  const coverageRate = sitemapDocPages > 0 ? coveredCount / sitemapDocPages : 1;
+  const unmatchedRate =
+    llmsTxtUrls.length > 0 ? unmatchedLlmsTxtUrls.length / llmsTxtUrls.length : 0;
+
+  const coveragePct = Math.round(coverageRate * 100);
+  const unmatchedPct = Math.round(unmatchedRate * 100);
+
+  // 7. Determine status based on coverage only
+  //    Unmatched links are informational (see note in step 5)
+  let overallStatus: 'pass' | 'warn' | 'fail';
+  if (coverageRate >= COVERAGE_PASS) {
+    overallStatus = 'pass';
+  } else if (coverageRate >= COVERAGE_WARN) {
+    overallStatus = 'warn';
+  } else {
+    overallStatus = 'fail';
+  }
+
+  // 8. Build message
+  const parts: string[] = [];
+  if (overallStatus === 'pass') {
+    parts.push(`llms.txt covers ${coveragePct}% of ${sitemapDocPages} sitemap doc pages`);
+  } else {
+    parts.push(
+      `llms.txt covers ${coveredCount}/${sitemapDocPages} sitemap doc pages (${coveragePct}%); ${missingFromLlmsTxt.length} missing`,
+    );
+  }
+  if (unmatchedLlmsTxtUrls.length > 0) {
+    parts.push(
+      `${unmatchedLlmsTxtUrls.length} llms.txt links not in sitemap (may indicate stale links or incomplete sitemap)`,
+    );
+  }
+
+  const message = parts.join('; ');
+
   return {
-    id: 'llms-txt-freshness',
-    category: 'observability',
-    status: 'skip',
-    message: 'Not yet implemented',
+    id,
+    category,
+    status: overallStatus,
+    message,
+    details: {
+      llmsTxtPageCount: llmsTxtUrls.length,
+      sitemapTotal: sitemapUrls.length,
+      sitemapScoped: scopedSitemapUrls.length,
+      sitemapDocPages,
+      sitemapSource,
+      excludedNonDocPages: excludedCount,
+      ...(localeFiltered ? { localeFiltered: true, detectedLocale } : {}),
+      baseUrlPath: baseUrlPath || '/',
+      coverageRate: coveragePct,
+      missingFromLlmsTxt: missingFromLlmsTxt.slice(0, 50),
+      missingCount: missingFromLlmsTxt.length,
+      unmatchedLlmsTxtUrls: unmatchedLlmsTxtUrls.slice(0, 50),
+      unmatchedCount: unmatchedLlmsTxtUrls.length,
+      unmatchedPct,
+      sitemapWarnings,
+    },
   };
 }
 
diff --git a/src/helpers/get-page-urls.ts b/src/helpers/get-page-urls.ts
index ae22c23..7dfe00c 100644
--- a/src/helpers/get-page-urls.ts
+++ b/src/helpers/get-page-urls.ts
@@ -36,7 +36,7 @@ export function parseSitemapUrls(xml: string): { urls: string[]; sitemapIndexUrl
   return { urls, sitemapIndexUrls };
 }
 
-async function getUrlsFromCachedLlmsTxt(ctx: CheckContext): Promise<string[]> {
+export async function getUrlsFromCachedLlmsTxt(ctx: CheckContext): Promise<string[]> {
   const existsResult = ctx.previousResults.get('llms-txt-exists');
   const discovered = (existsResult?.details?.discoveredFiles ?? []) as DiscoveredFile[];
 
@@ -189,8 +189,8 @@ export function parseSitemapDirectives(robotsTxt: string): string[] {
 /**
  * Discover sitemap URLs by checking robots.txt first, then falling back to /sitemap.xml.
  */
-async function discoverSitemapUrls(ctx: CheckContext): Promise<string[]> {
-  // Try robots.txt for Sitemap directives
+async function discoverSitemapUrls(ctx: CheckContext, originOverride?: string): Promise<string[]> {
+  // Try robots.txt for Sitemap directives at the original origin first
   try {
     const robotsResponse = await ctx.http.fetch(`${ctx.origin}/robots.txt`);
     if (robotsResponse.ok) {
@@ -202,8 +202,22 @@ async function discoverSitemapUrls(ctx: CheckContext): Promise<string[]> {
     // robots.txt fetch failed; fall through
   }
 
-  // Default to /sitemap.xml
-  return [`${ctx.origin}/sitemap.xml`];
+  // If there's an effective origin (cross-host redirect), try its robots.txt too
+  if (originOverride && originOverride !== ctx.origin) {
+    try {
+      const robotsResponse = await ctx.http.fetch(`${originOverride}/robots.txt`);
+      if (robotsResponse.ok) {
+        const body = await robotsResponse.text();
+        const directives = parseSitemapDirectives(body);
+        if (directives.length > 0) return directives;
+      }
+    } catch {
+      // fall through
+    }
+  }
+
+  // Default to /sitemap.xml (prefer effective origin if available)
+  return [`${originOverride ?? ctx.origin}/sitemap.xml`];
 }
 
 export interface PageUrlResult {
@@ -235,21 +249,27 @@ async function fetchSitemap(
   }
 }
 
-async function getUrlsFromSitemap(ctx: CheckContext, warnings: string[]): Promise<string[]> {
-  const sitemapUrls = await discoverSitemapUrls(ctx);
+export async function getUrlsFromSitemap(
+  ctx: CheckContext,
+  warnings: string[],
+  maxUrls: number = MAX_SITEMAP_URLS,
+  originOverride?: string,
+): Promise<string[]> {
+  const sitemapUrls = await discoverSitemapUrls(ctx, originOverride);
   const urls: string[] = [];
+  const matchOrigin = originOverride ?? ctx.origin;
 
   for (const sitemapUrl of sitemapUrls) {
-    if (urls.length >= MAX_SITEMAP_URLS) break;
+    if (urls.length >= maxUrls) break;
 
     const parsed = await fetchSitemap(ctx, sitemapUrl, warnings);
 
     // Add direct URLs (filtered to same origin)
     for (const url of parsed.urls) {
-      if (urls.length >= MAX_SITEMAP_URLS) break;
+      if (urls.length >= maxUrls) break;
       try {
         const u = new URL(url);
-        if (u.origin === ctx.origin) {
+        if (u.origin === matchOrigin) {
           urls.push(url);
         }
       } catch {
@@ -258,17 +278,17 @@ async function getUrlsFromSitemap(ctx: CheckContext, warnings: string[]): Promis
     }
 
     // Follow one level of sitemap index
-    if (parsed.sitemapIndexUrls.length > 0 && urls.length < MAX_SITEMAP_URLS) {
+    if (parsed.sitemapIndexUrls.length > 0 && urls.length < maxUrls) {
       for (const subSitemapUrl of parsed.sitemapIndexUrls) {
-        if (urls.length >= MAX_SITEMAP_URLS) break;
+        if (urls.length >= maxUrls) break;
 
         const subParsed = await fetchSitemap(ctx, subSitemapUrl, warnings);
 
         for (const url of subParsed.urls) {
-          if (urls.length >= MAX_SITEMAP_URLS) break;
+          if (urls.length >= maxUrls) break;
           try {
             const u = new URL(url);
-            if (u.origin === ctx.origin) {
+            if (u.origin === matchOrigin) {
               urls.push(url);
             }
           } catch {
diff --git a/src/helpers/to-md-urls.ts b/src/helpers/to-md-urls.ts
index 2c30224..4bd4503 100644
--- a/src/helpers/to-md-urls.ts
+++ b/src/helpers/to-md-urls.ts
@@ -1,3 +1,16 @@
+/**
+ * Returns true if the two URLs have different hosts (i.e. a cross-host redirect).
+ */
+export function isCrossHostRedirect(originalUrl: string, finalUrl: string): boolean {
+  try {
+    const original = new URL(originalUrl);
+    const final_ = new URL(finalUrl);
+    return original.host !== final_.host;
+  } catch {
+    return false;
+  }
+}
+
 /**
  * Returns true if the URL points to a non-page file type (e.g. .json, .xml, .txt)
  * where we would not expect a markdown equivalent.
diff --git a/src/types.ts b/src/types.ts
index 09fc35d..58e9e2a 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -32,6 +32,13 @@ export interface CheckContext {
   baseUrl: string;
   /** The origin (scheme + host) derived from baseUrl. */
   origin: string;
+  /**
+   * The actual origin where content lives, when the baseUrl origin redirects
+   * cross-host. Set by llms-txt-exists when it detects a cross-host redirect.
+   * Checks that need ground-truth data (e.g. sitemap for freshness) should
+   * use this over `origin`; checks that test agent experience should use `origin`.
+   */
+  effectiveOrigin?: string;
   /** Results from previously-run checks, keyed by check ID. */
   previousResults: Map<string, CheckResult>;
   /** HTTP client with rate limiting. */
diff --git a/test/unit/checks/llms-txt-freshness.test.ts b/test/unit/checks/llms-txt-freshness.test.ts
new file mode 100644
index 0000000..0528b4c
--- /dev/null
+++ b/test/unit/checks/llms-txt-freshness.test.ts
@@ -0,0 +1,665 @@
+import { describe, test, expect, beforeAll } from 'vitest';
+import { http, HttpResponse } from 'msw';
+import { setupServer } from 'msw/node';
+import { getCheck } from '../../../src/checks/registry.js';
+import { createContext } from '../../../src/runner.js';
+import type { DiscoveredFile } from '../../../src/types.js';
+
+// Ensure the check is registered
+import '../../../src/checks/observability/llms-txt-freshness.js';
+
+const server = setupServer();
+
+beforeAll(() => {
+  server.listen({ onUnhandledRequest: 'bypass' });
+  return () => server.close();
+});
+
+const check = getCheck('llms-txt-freshness');
+
+/**
+ * Build a minimal llms.txt content string from an array of URLs.
+ */
+function makeLlmsTxt(urls: string[]): string {
+  return ['# Docs\n', ...urls.map((u) => `- [Page](${u})`)].join('\n');
+}
+
+/**
+ * Build a minimal sitemap XML from an array of URLs.
+ */
+function makeSitemap(urls: string[]): string {
+  const locs = urls.map((u) => `<url><loc>${u}</loc></url>`).join('\n');
+  return `<?xml version="1.0"?>\n<urlset>\n${locs}\n</urlset>`;
+}
+
+/**
+ * Build a sitemap index XML pointing to child sitemaps.
+ */
+function makeSitemapIndex(sitemapUrls: string[]): string {
+  const entries = sitemapUrls.map((u) => `<sitemap><loc>${u}</loc></sitemap>`).join('\n');
+  return `<?xml version="1.0"?>\n<sitemapindex>\n${entries}\n</sitemapindex>`;
+}
+
+/**
+ * Create a test context with llms-txt-exists populated.
+ */
+function makeCtx(host: string, llmsTxtUrls: string[], basePath = '') {
+  const baseUrl = `http://${host}${basePath}`;
+  const ctx = createContext(baseUrl, { requestDelay: 0 });
+  const content = makeLlmsTxt(llmsTxtUrls);
+  const discovered: DiscoveredFile[] = [
+    { url: `http://${host}/llms.txt`, content, status: 200, redirected: false },
+  ];
+  ctx.previousResults.set('llms-txt-exists', {
+    id: 'llms-txt-exists',
+    category: 'llms-txt',
+    status: 'pass',
+    message: 'Found',
+    details: { discoveredFiles: discovered },
+  });
+  return ctx;
+}
+
+describe('llms-txt-freshness', () => {
+  test('passes when llms.txt fully covers sitemap', async () => {
+    const host = 'fresh-pass.local';
+    const pages = [
+      `http://${host}/docs/getting-started`,
+      `http://${host}/docs/api-reference`,
+      `http://${host}/docs/guides/auth`,
+    ];
+
+    const ctx = makeCtx(host, pages, '/docs');
+
+    server.use(
+      http.get(
+        `http://${host}/robots.txt`,
+        () => new HttpResponse(`Sitemap: http://${host}/sitemap.xml`, { status: 200 }),
+      ),
+      http.get(
+        `http://${host}/sitemap.xml`,
+        () =>
+          new HttpResponse(makeSitemap(pages), {
+            status: 200,
+            headers: { 'content-type': 'application/xml' },
+          }),
+      ),
+    );
+
+    const result = await check.run(ctx);
+    expect(result.status).toBe('pass');
+    expect(result.details?.coverageRate).toBe(100);
+    expect(result.details?.unmatchedCount).toBe(0);
+  });
+
+  test('passes when llms.txt uses .md URLs matching sitemap HTML URLs', async () => {
+    const host = 'fresh-md.local';
+    const llmsUrls = [
+      `http://${host}/docs/getting-started.md`,
+      `http://${host}/docs/api-reference.md`,
+    ];
+    const sitemapUrls = [
+      `http://${host}/docs/getting-started`,
+      `http://${host}/docs/api-reference`,
+    ];
+
+    const ctx = makeCtx(host, llmsUrls, '/docs');
+
+    server.use(
+      http.get(
+        `http://${host}/robots.txt`,
+        () => new HttpResponse(`Sitemap: http://${host}/sitemap.xml`, { status: 200 }),
+      ),
+      http.get(
+        `http://${host}/sitemap.xml`,
+        () =>
+          new HttpResponse(makeSitemap(sitemapUrls), {
+            status: 200,
+            headers: { 'content-type': 'application/xml' },
+          }),
+      ),
+    );
+
+    const result = await check.run(ctx);
+    expect(result.status).toBe('pass');
+    expect(result.details?.coverageRate).toBe(100);
+  });
+
+  test('passes with trailing slash differences', async () => {
+    const host = 'fresh-slash.local';
+    const llmsUrls = [`http://${host}/docs/guide`];
+    const sitemapUrls = [`http://${host}/docs/guide/`];
+
+    const ctx = makeCtx(host, llmsUrls, '/docs');
+
+    server.use(
+      http.get(
+        `http://${host}/robots.txt`,
+        () => new HttpResponse(`Sitemap: http://${host}/sitemap.xml`, { status: 200 }),
+      ),
+      http.get(
+        `http://${host}/sitemap.xml`,
+        () =>
+          new HttpResponse(makeSitemap(sitemapUrls), {
+            status: 200,
+            headers: { 'content-type': 'application/xml' },
+          }),
+      ),
+    );
+
+    const result = await check.run(ctx);
+    expect(result.status).toBe('pass');
+    expect(result.details?.coverageRate).toBe(100);
+  });
+
+  test('warns when coverage is between 80% and 95%', async () => {
+    const host = 'fresh-warn.local';
+    // llms.txt has 9 of 10 pages (90% coverage)
+    const allPages = Array.from({ length: 10 }, (_, i) => `http://${host}/docs/page-${i}`);
+    const llmsPages = allPages.slice(0, 9);
+
+    const ctx = makeCtx(host, llmsPages, '/docs');
+
+    server.use(
+      http.get(
+        `http://${host}/robots.txt`,
+        () => new HttpResponse(`Sitemap: http://${host}/sitemap.xml`, { status: 200 }),
+      ),
+      http.get(
+        `http://${host}/sitemap.xml`,
+        () =>
+          new HttpResponse(makeSitemap(allPages), {
+            status: 200,
+            headers: { 'content-type': 'application/xml' },
+          }),
+      ),
+    );
+
+    const result = await check.run(ctx);
+    expect(result.status).toBe('warn');
+    expect(result.details?.coverageRate).toBe(90);
+    expect(result.details?.missingCount).toBe(1);
+  });
+
+  test('fails when coverage is below 80%', async () => {
+    const host = 'fresh-fail.local';
+    // llms.txt has 5 of 10 pages (50% coverage)
+    const allPages = Array.from({ length: 10 }, (_, i) => `http://${host}/docs/page-${i}`);
+    const llmsPages = allPages.slice(0, 5);
+
+    const ctx = makeCtx(host, llmsPages, '/docs');
+
+    server.use(
+      http.get(
+        `http://${host}/robots.txt`,
+        () => new HttpResponse(`Sitemap: http://${host}/sitemap.xml`, { status: 200 }),
+      ),
+      http.get(
+        `http://${host}/sitemap.xml`,
+        () =>
+          new HttpResponse(makeSitemap(allPages), {
+            status: 200,
+            headers: { 'content-type': 'application/xml' },
+          }),
+      ),
+    );
+
+    const result = await check.run(ctx);
+    expect(result.status).toBe('fail');
+    expect(result.details?.coverageRate).toBe(50);
+    expect(result.details?.missingCount).toBe(5);
+  });
+
+  test('reports unmatched llms.txt links not in sitemap', async () => {
+    const host = 'fresh-unmatched.local';
+    const sitemapPages = Array.from({ length: 10 }, (_, i) => `http://${host}/docs/page-${i}`);
+    // llms.txt has all sitemap pages plus 3 extras not in sitemap
+    const llmsPages = [
+      ...sitemapPages,
+      `http://${host}/docs/extra-page-a`,
+      `http://${host}/docs/extra-page-b`,
+      `http://${host}/docs/extra-page-c`,
+    ];
+
+    const ctx = makeCtx(host, llmsPages, '/docs');
+
+    server.use(
+      http.get(
+        `http://${host}/robots.txt`,
+        () => new HttpResponse(`Sitemap: http://${host}/sitemap.xml`, { status: 200 }),
+      ),
+      http.get(
+        `http://${host}/sitemap.xml`,
+        () =>
+          new HttpResponse(makeSitemap(sitemapPages), {
+            status: 200,
+            headers: { 'content-type': 'application/xml' },
+          }),
+      ),
+    );
+
+    const result = await check.run(ctx);
+    // Unmatched links are informational; coverage is 100% so status is pass
+    expect(result.status).toBe('pass');
+    expect(result.details?.unmatchedCount).toBe(3);
+    expect(result.details?.coverageRate).toBe(100);
+  });
+
+  test('unmatched links do not affect overall status', async () => {
+    const host = 'fresh-unmatched-pass.local';
+    // Coverage is fine (100%) but many unmatched llms.txt links
+    const sitemapPages = Array.from({ length: 5 }, (_, i) => `http://${host}/docs/page-${i}`);
+    const llmsPages = [
+      ...sitemapPages,
+      `http://${host}/docs/unmatched-a`,
+      `http://${host}/docs/unmatched-b`,
+      `http://${host}/docs/unmatched-c`,
+      `http://${host}/docs/unmatched-d`,
+    ];
+
+    const ctx = makeCtx(host, llmsPages, '/docs');
+
+    server.use(
+      http.get(
+        `http://${host}/robots.txt`,
+        () => new HttpResponse(`Sitemap: http://${host}/sitemap.xml`, { status: 200 }),
+      ),
+      http.get(
+        `http://${host}/sitemap.xml`,
+        () =>
+          new HttpResponse(makeSitemap(sitemapPages), {
+            status: 200,
+            headers: { 'content-type': 'application/xml' },
+          }),
+      ),
+    );
+
+    const result = await check.run(ctx);
+    // Status based on coverage only (pass), unmatched is informational
+    expect(result.status).toBe('pass');
+    expect(result.details?.unmatchedCount).toBe(4);
+    expect(result.message).toContain('not in sitemap');
+  });
+
+  test('skips when no sitemap is available', async () => {
+    const host = 'fresh-no-sitemap.local';
+    const ctx = makeCtx(host, [`http://${host}/docs/page`], '/docs');
+
+    server.use(
+      http.get(`http://${host}/robots.txt`, () => new HttpResponse('', { status: 404 })),
+      http.get(`http://${host}/sitemap.xml`, () => new HttpResponse('', { status: 404 })),
+      http.get(`http://${host}/docs/sitemap.xml`, () => new HttpResponse('', { status: 404 })),
+    );
+
+    const result = await check.run(ctx);
+    expect(result.status).toBe('skip');
+    expect(result.message).toContain('No sitemap found');
+  });
+
+  test('skips when no page URLs in llms.txt', async () => {
+    const host = 'fresh-no-pages.local';
+    const ctx = createContext(`http://${host}/docs`, { requestDelay: 0 });
+    ctx.previousResults.set('llms-txt-exists', {
+      id: 'llms-txt-exists',
+      category: 'llms-txt',
+      status: 'pass',
+      message: 'Found',
+      details: {
+        discoveredFiles: [
+          {
+            url: `http://${host}/llms.txt`,
+            content: '# Docs\n\nNo links here.',
+            status: 200,
+            redirected: false,
+          },
+        ],
+      },
+    });
+
+    const result = await check.run(ctx);
+    expect(result.status).toBe('skip');
+    expect(result.message).toContain('No page URLs found');
+  });
+
+  test('scopes sitemap URLs to baseUrl path prefix', async () => {
+    const host = 'fresh-scope.local';
+    const docPages = [`http://${host}/docs/guide`, `http://${host}/docs/api`];
+    const allSitemapPages = [
+      ...docPages,
+      `http://${host}/marketing/page`,
+      `http://${host}/about`,
+      `http://${host}/pricing`,
+    ];
+
+    const ctx = makeCtx(host, docPages, '/docs');
+
+    server.use(
+      http.get(
+        `http://${host}/robots.txt`,
+        () => new HttpResponse(`Sitemap: http://${host}/sitemap.xml`, { status: 200 }),
+      ),
+      http.get(
+        `http://${host}/sitemap.xml`,
+        () =>
+          new HttpResponse(makeSitemap(allSitemapPages), {
+            status: 200,
+            headers: { 'content-type': 'application/xml' },
+          }),
+      ),
+    );
+
+    const result = await check.run(ctx);
+    expect(result.status).toBe('pass');
+    // Only 2 doc pages in scope, both covered
+    expect(result.details?.sitemapDocPages).toBe(2);
+    expect(result.details?.coverageRate).toBe(100);
+  });
+
+  test('excludes blog/changelog/pricing paths from sitemap comparison', async () => {
+    const host = 'fresh-exclude.local';
+    const docPages = [`http://${host}/guide`];
+    const sitemapPages = [
+      `http://${host}/guide`,
+      `http://${host}/blog/post-1`,
+      `http://${host}/changelog/v2`,
+      `http://${host}/pricing`,
+      `http://${host}/careers/engineer`,
+    ];
+
+    // baseUrl is root, so all paths are in scope
+    const ctx = makeCtx(host, docPages, '');
+
+    server.use(
+      http.get(
+        `http://${host}/robots.txt`,
+        () => new HttpResponse(`Sitemap: http://${host}/sitemap.xml`, { status: 200 }),
+      ),
+      http.get(
+        `http://${host}/sitemap.xml`,
+        () =>
+          new HttpResponse(makeSitemap(sitemapPages), {
+            status: 200,
+            headers: { 'content-type': 'application/xml' },
+          }),
+      ),
+    );
+
+    const result = await check.run(ctx);
+    // Only /guide should be in the doc pages set (others excluded)
+    expect(result.details?.sitemapDocPages).toBe(1);
+    expect(result.details?.excludedNonDocPages).toBe(4);
+    expect(result.status).toBe('pass');
+  });
+
+  test('handles index.md normalization', async () => {
+    const host = 'fresh-index.local';
+    const llmsUrls = [`http://${host}/docs/guide/index.md`];
+    const sitemapUrls = [`http://${host}/docs/guide/`];
+
+    const ctx = makeCtx(host, llmsUrls, '/docs');
+
+    server.use(
+      http.get(
+        `http://${host}/robots.txt`,
+        () => new HttpResponse(`Sitemap: http://${host}/sitemap.xml`, { status: 200 }),
+      ),
+      http.get(
+        `http://${host}/sitemap.xml`,
+        () =>
+          new HttpResponse(makeSitemap(sitemapUrls), {
+            status: 200,
+            headers: { 'content-type': 'application/xml' },
+          }),
+      ),
+    );
+
+    const result = await check.run(ctx);
+    expect(result.status).toBe('pass');
+    expect(result.details?.coverageRate).toBe(100);
+  });
+
+  test('skips when sitemap has no URLs under docs path prefix', async () => {
+    const host = 'fresh-no-scope.local';
+    const ctx = makeCtx(host, [`http://${host}/docs/page`], '/docs');
+    const sitemapPages = [`http://${host}/marketing/page1`, `http://${host}/marketing/page2`];
+
+    server.use(
+      http.get(
+        `http://${host}/robots.txt`,
+        () => new HttpResponse(`Sitemap: http://${host}/sitemap.xml`, { status: 200 }),
+      ),
+      http.get(
+        `http://${host}/sitemap.xml`,
+        () =>
+          new HttpResponse(makeSitemap(sitemapPages), {
+            status: 200,
+            headers: { 'content-type': 'application/xml' },
+          }),
+      ),
+      http.get(`http://${host}/docs/sitemap.xml`, () => new HttpResponse('', { status: 404 })),
+    );
+
+    const result = await check.run(ctx);
+    expect(result.status).toBe('skip');
+    expect(result.message).toContain('none are under the docs path prefix');
+  });
+
+  test('does not count cross-origin llms.txt URLs as unmatched', async () => {
+    const host = 'fresh-cross.local';
+    const sitemapPages = [`http://${host}/docs/page`];
+    // llms.txt links to a page on a different host — should not be flagged
+    const llmsPages = [`http://${host}/docs/page`, `http://other-host.local/docs/external`];
+
+    const ctx = makeCtx(host, llmsPages, '/docs');
+
+    server.use(
+      http.get(
+        `http://${host}/robots.txt`,
+        () => new HttpResponse(`Sitemap: http://${host}/sitemap.xml`, { status: 200 }),
+      ),
+      http.get(
+        `http://${host}/sitemap.xml`,
+        () =>
+          new HttpResponse(makeSitemap(sitemapPages), {
+            status: 200,
+            headers: { 'content-type': 'application/xml' },
+          }),
+      ),
+    );
+
+    const result = await check.run(ctx);
+    expect(result.status).toBe('pass');
+    expect(result.details?.unmatchedCount).toBe(0);
+  });
+
+  test('falls back to docs-specific sitemap when main sitemap has no docs URLs', async () => {
+    const host = 'fresh-docs-sitemap.local';
+    const docPages = [`http://${host}/docs/guide`, `http://${host}/docs/api`];
+    const marketingPages = [`http://${host}/about`, `http://${host}/pricing`];
+
+    const ctx = makeCtx(host, docPages, '/docs');
+
+    server.use(
+      // Main sitemap has only marketing pages
+      http.get(
+        `http://${host}/robots.txt`,
+        () => new HttpResponse(`Sitemap: http://${host}/sitemap.xml`, { status: 200 }),
+      ),
+      http.get(
+        `http://${host}/sitemap.xml`,
+        () =>
+          new HttpResponse(makeSitemap(marketingPages), {
+            status: 200,
+            headers: { 'content-type': 'application/xml' },
+          }),
+      ),
+      // Docs-specific sitemap has the doc pages
+      http.get(
+        `http://${host}/docs/sitemap.xml`,
+        () =>
+          new HttpResponse(makeSitemap(docPages), {
+            status: 200,
+            headers: { 'content-type': 'application/xml' },
+          }),
+      ),
+    );
+
+    const result = await check.run(ctx);
+    expect(result.status).toBe('pass');
+    expect(result.details?.sitemapDocPages).toBe(2);
+    expect(result.details?.sitemapSource).toBe('/docs/sitemap.xml');
+    expect(result.details?.coverageRate).toBe(100);
+  });
+
+  test('follows docs-specific sitemap index one level deep', async () => {
+    const host = 'fresh-docs-index.local';
+    const docPages = [
+      `http://${host}/docs/guide`,
+      `http://${host}/docs/api`,
+      `http://${host}/docs/reference`,
+    ];
+
+    const ctx = makeCtx(host, docPages, '/docs');
+
+    server.use(
+      // No main sitemap
+      http.get(`http://${host}/robots.txt`, () => new HttpResponse('', { status: 404 })),
+      http.get(`http://${host}/sitemap.xml`, () => new HttpResponse('', { status: 404 })),
+      // Docs sitemap is an index
+      http.get(
+        `http://${host}/docs/sitemap.xml`,
+        () =>
+          new HttpResponse(makeSitemapIndex([`http://${host}/docs/sitemap-pages.xml`]), {
+            status: 200,
+            headers: { 'content-type': 'application/xml' },
+          }),
+      ),
+      http.get(
+        `http://${host}/docs/sitemap-pages.xml`,
+        () =>
+          new HttpResponse(makeSitemap(docPages), {
+            status: 200,
+            headers: { 'content-type': 'application/xml' },
+          }),
+      ),
+    );
+
+    const result = await check.run(ctx);
+    expect(result.status).toBe('pass');
+    expect(result.details?.sitemapDocPages).toBe(3);
+    expect(result.details?.sitemapSource).toBe('/docs/sitemap.xml');
+  });
+
+  test('filters sitemap to matching locale when locale pattern detected', async () => {
+    const host = 'locale-filter.local';
+    // llms.txt only covers English pages
+    const llmsPages = [
+      `http://${host}/docs/en/getting-started`,
+      `http://${host}/docs/en/api-reference`,
+      `http://${host}/docs/en/guides`,
+    ];
+
+    // Sitemap has 3 English + 3 German + 3 French = 9 pages
+    const sitemapPages = [
+      ...llmsPages,
+      `http://${host}/docs/de/getting-started`,
+      `http://${host}/docs/de/api-reference`,
+      `http://${host}/docs/de/guides`,
+      `http://${host}/docs/fr/getting-started`,
+      `http://${host}/docs/fr/api-reference`,
+      `http://${host}/docs/fr/guides`,
+    ];
+
+    const ctx = makeCtx(host, llmsPages, '/docs');
+
+    server.use(
+      http.get(
+        `http://${host}/robots.txt`,
+        () => new HttpResponse(`Sitemap: http://${host}/sitemap.xml`, { status: 200 }),
+      ),
+      http.get(
+        `http://${host}/sitemap.xml`,
+        () =>
+          new HttpResponse(makeSitemap(sitemapPages), {
+            headers: { 'content-type': 'application/xml' },
+          }),
+      ),
+    );
+
+    const result = await check.run(ctx);
+    expect(result.status).toBe('pass');
+    expect(result.details?.localeFiltered).toBe(true);
+    expect(result.details?.detectedLocale).toBe('en');
+    // After locale filtering, only the 3 English pages remain
+    expect(result.details?.sitemapDocPages).toBe(3);
+  });
+
+  test('uses effectiveOrigin for sitemap discovery and scoping', async () => {
+    const oldHost = 'old-host.local';
+    const newHost = 'new-host.local';
+    const pages = [
+      `http://${newHost}/docs/getting-started`,
+      `http://${newHost}/docs/api-reference`,
+    ];
+
+    const ctx = makeCtx(oldHost, pages, '/docs');
+    // Simulate llms-txt-exists having detected a cross-host redirect
+    ctx.effectiveOrigin = `http://${newHost}`;
+
+    server.use(
+      // robots.txt at old host fails
+      http.get(`http://${oldHost}/robots.txt`, () => new HttpResponse(null, { status: 404 })),
+      // robots.txt at new host works
+      http.get(
+        `http://${newHost}/robots.txt`,
+        () => new HttpResponse(`Sitemap: http://${newHost}/sitemap.xml`, { status: 200 }),
+      ),
+      http.get(
+        `http://${newHost}/sitemap.xml`,
+        () =>
+          new HttpResponse(makeSitemap(pages), {
+            headers: { 'content-type': 'application/xml' },
+          }),
+      ),
+    );
+
+    const result = await check.run(ctx);
+    expect(result.status).toBe('pass');
+    expect(result.details?.sitemapDocPages).toBe(2);
+  });
+
+  test('excludes paths relative to base URL prefix', async () => {
+    const host = 'basepath-exclude.local';
+    const pages = [`http://${host}/docs/getting-started`, `http://${host}/docs/api-reference`];
+
+    // Sitemap includes /docs/changelog pages that should be excluded
+    const sitemapPages = [
+      ...pages,
+      `http://${host}/docs/changelog/2024-01-01`,
+      `http://${host}/docs/changelog/2024-02-01`,
+      `http://${host}/docs/blog/post-1`,
+    ];
+
+    const ctx = makeCtx(host, pages, '/docs');
+
+    server.use(
+      http.get(
+        `http://${host}/robots.txt`,
+        () => new HttpResponse(`Sitemap: http://${host}/sitemap.xml`, { status: 200 }),
+      ),
+      http.get(
+        `http://${host}/sitemap.xml`,
+        () =>
+          new HttpResponse(makeSitemap(sitemapPages), {
+            headers: { 'content-type': 'application/xml' },
+          }),
+      ),
+    );
+
+    const result = await check.run(ctx);
+    expect(result.status).toBe('pass');
+    // Only 2 doc pages remain after excluding /docs/changelog and /docs/blog
+    expect(result.details?.sitemapDocPages).toBe(2);
+    expect(result.details?.excludedNonDocPages).toBe(3);
+  });
+});

From 07924ec749cedc550eee65f296031cee72b6c7bf Mon Sep 17 00:00:00 2001
From: dacharyc <dc@dacharycarey.com>
Date: Sun, 15 Mar 2026 20:54:25 -0400
Subject: [PATCH 6/7] Add 'auth-alternative-access' check

---
 README.md                                     |  12 +-
 .../authentication/auth-alternative-access.ts | 149 +++++-
 .../authentication/auth-gate-detection.ts     |  13 +-
 test/integration/check-pipeline.test.ts       | 443 ++++++++++++++++++
 .../checks/auth-alternative-access.test.ts    | 278 +++++++++++
 test/unit/checks/auth-gate-detection.test.ts  |  36 ++
 test/unit/runner.test.ts                      |   4 +-
 7 files changed, 918 insertions(+), 17 deletions(-)
 create mode 100644 test/unit/checks/auth-alternative-access.test.ts

diff --git a/README.md b/README.md
index a803181..2cde6af 100644
--- a/README.md
+++ b/README.md
@@ -7,7 +7,7 @@ Test your documentation site against the [Agent-Friendly Documentation Spec](htt
 
 Agents don't use docs like humans. They hit truncation limits, get walls of CSS instead of content, can't follow cross-host redirects, and don't know about quality-of-life improvements like `llms.txt` or `.md` docs pages that would make life swell. Maybe this is because the industry has lacked guidance - until now.
 
-afdocs runs 22 checks across 8 categories to evaluate how well your docs serve agent consumers. 21 are fully implemented; the rest return `skip` until completed.
+afdocs runs 22 checks across 8 categories to evaluate how well your docs serve agent consumers.
 
 > **Status: Early development (0.x)**
 > This project is under active development. Check IDs, CLI flags, and output formats may change between minor versions. Feel free to try it out, but don't build automation against specific output until 1.0.
@@ -166,7 +166,7 @@ describe('agent-friendliness', () => {
 
 ## Checks
 
-22 checks across 8 categories. Checks marked with \* are not yet implemented and return `skip`.
+22 checks across 8 categories.
 
 ### Category 1: llms.txt
 
@@ -225,10 +225,10 @@ describe('agent-friendliness', () => {
 
 ### Category 8: Authentication and Access
 
-| Check                        | Description                                                          |
-| ---------------------------- | -------------------------------------------------------------------- |
-| `auth-gate-detection`        | Whether documentation pages require authentication to access content |
-| `auth-alternative-access` \* | Whether auth-gated sites provide alternative access paths for agents |
+| Check                     | Description                                                          |
+| ------------------------- | -------------------------------------------------------------------- |
+| `auth-gate-detection`     | Whether documentation pages require authentication to access content |
+| `auth-alternative-access` | Whether auth-gated sites provide alternative access paths for agents |
 
 ## Check dependencies
 
diff --git a/src/checks/authentication/auth-alternative-access.ts b/src/checks/authentication/auth-alternative-access.ts
index 2eb08b6..d5353b9 100644
--- a/src/checks/authentication/auth-alternative-access.ts
+++ b/src/checks/authentication/auth-alternative-access.ts
@@ -1,12 +1,149 @@
 import { registerCheck } from '../registry.js';
 import type { CheckContext, CheckResult } from '../../types.js';
 
-async function check(_ctx: CheckContext): Promise<CheckResult> {
+interface AuthGateDetails {
+  accessible?: number;
+  authRequired?: number;
+  softAuthGate?: number;
+  authRedirect?: number;
+  testedPages?: number;
+  pageResults?: Array<{
+    url: string;
+    classification: string;
+  }>;
+}
+
+interface DetectedPath {
+  type: string;
+  description: string;
+}
+
+async function check(ctx: CheckContext): Promise<CheckResult> {
+  const id = 'auth-alternative-access';
+  const category = 'authentication';
+
+  // Read auth-gate-detection result; skip if it didn't run or docs are all public
+  const authResult = ctx.previousResults.get('auth-gate-detection');
+  if (!authResult) {
+    return {
+      id,
+      category,
+      status: 'skip',
+      message: 'auth-gate-detection did not run',
+    };
+  }
+
+  if (authResult.status === 'pass') {
+    return {
+      id,
+      category,
+      status: 'skip',
+      message: 'All docs pages are publicly accessible; no alternative access paths needed',
+    };
+  }
+
+  if (authResult.status === 'skip' || authResult.status === 'error') {
+    return {
+      id,
+      category,
+      status: 'skip',
+      message: `auth-gate-detection ${authResult.status === 'error' ? 'errored' : 'was skipped'}; cannot assess alternative access`,
+    };
+  }
+
+  // Auth-gate-detection returned warn or fail — look for alternative access paths
+  const authDetails = (authResult.details ?? {}) as AuthGateDetails;
+  const gatedCount =
+    (authDetails.authRequired ?? 0) +
+    (authDetails.softAuthGate ?? 0) +
+    (authDetails.authRedirect ?? 0);
+  const accessibleCount = authDetails.accessible ?? 0;
+  const testedCount = authDetails.testedPages ?? 0;
+
+  const detectedPaths: DetectedPath[] = [];
+
+  // 1. Check for public llms.txt
+  const llmsResult = ctx.previousResults.get('llms-txt-exists');
+  if (llmsResult?.status === 'pass' || llmsResult?.status === 'warn') {
+    detectedPaths.push({
+      type: 'public-llms-txt',
+      description:
+        'Site serves a public llms.txt file, giving agents a navigational index even though docs pages are gated',
+    });
+  }
+
+  // 2. Check for publicly accessible markdown
+  const mdUrlResult = ctx.previousResults.get('markdown-url-support');
+  const cnResult = ctx.previousResults.get('content-negotiation');
+  if (mdUrlResult?.status === 'pass' || mdUrlResult?.status === 'warn') {
+    detectedPaths.push({
+      type: 'public-markdown',
+      description:
+        'Some pages serve markdown via .md URLs, providing agent-readable content without authentication',
+    });
+  } else if (cnResult?.status === 'pass' || cnResult?.status === 'warn') {
+    detectedPaths.push({
+      type: 'public-markdown',
+      description:
+        'Some pages serve markdown via content negotiation, providing agent-readable content without authentication',
+    });
+  }
+
+  // 3. Check for partially accessible pages (from auth-gate-detection itself)
+  if (accessibleCount > 0 && gatedCount > 0) {
+    const pct = Math.round((accessibleCount / testedCount) * 100);
+    detectedPaths.push({
+      type: 'partial-public-access',
+      description: `${accessibleCount} of ${testedCount} tested pages (${pct}%) are publicly accessible without authentication`,
+    });
+  }
+
+  // Determine status
+  const manualOnlyNote =
+    'Some access paths cannot be detected automatically: bundled SDK docs, CLI doc commands, and MCP servers';
+
+  let status: 'pass' | 'warn' | 'fail';
+  let message: string;
+
+  if (detectedPaths.length === 0) {
+    status = 'fail';
+    message = `No alternative access paths detected for ${gatedCount} auth-gated pages. ${manualOnlyNote}`;
+  } else {
+    // Pass if we found a full-content path (llms.txt + markdown, or most pages accessible).
+    // Warn if we only found partial paths (llms.txt alone is just an index, not content).
+    const hasContentPath = detectedPaths.some((p) => p.type === 'public-markdown');
+    const hasHighAccessibility =
+      accessibleCount > 0 && testedCount > 0 && accessibleCount / testedCount >= 0.5;
+
+    if (hasContentPath || hasHighAccessibility) {
+      status = 'pass';
+    } else {
+      status = 'warn';
+    }
+
+    const pathSummary = detectedPaths.map((p) => p.type).join(', ');
+    message =
+      status === 'pass'
+        ? `Alternative access detected (${pathSummary}) for site with ${gatedCount} auth-gated pages`
+        : `Partial alternative access detected (${pathSummary}) for site with ${gatedCount} auth-gated pages. ${manualOnlyNote}`;
+  }
+
   return {
-    id: 'auth-alternative-access',
-    category: 'authentication',
-    status: 'skip',
-    message: 'Not yet implemented',
+    id,
+    category,
+    status,
+    message,
+    details: {
+      gatedPages: gatedCount,
+      accessiblePages: accessibleCount,
+      testedPages: testedCount,
+      detectedPaths,
+      manualVerificationNeeded: [
+        'Bundled documentation (docs shipped in package/SDK)',
+        'CLI-based doc access (e.g. `yourproduct docs search "topic"`)',
+        'MCP server providing doc access through tool calls',
+      ],
+    },
   };
 }
 
@@ -15,6 +152,6 @@ registerCheck({
   category: 'authentication',
   description:
     'Whether an auth-gated documentation site provides alternative access paths for agents',
-  dependsOn: [['auth-gate-detection']],
+  dependsOn: [],
   run: check,
 });
diff --git a/src/checks/authentication/auth-gate-detection.ts b/src/checks/authentication/auth-gate-detection.ts
index 726451d..b36cbe1 100644
--- a/src/checks/authentication/auth-gate-detection.ts
+++ b/src/checks/authentication/auth-gate-detection.ts
@@ -45,11 +45,18 @@ function detectLoginForm(body: string): string | undefined {
     return 'Contains password input field';
   }
 
-  // Check page title for login indicators
+  // Check page title for login indicators.
+  // Only match titles that suggest the page IS a login form, not pages that
+  // mention login as a topic (e.g. "unable to login" in a knowledge base article).
+  // We require the login keyword to appear at the start or after a separator.
   const titleMatch = /<title[^>]*>(.*?)<\/title>/i.exec(sample);
   if (titleMatch) {
-    const title = titleMatch[1].toLowerCase();
-    if (/sign\s*in|log\s*in|authenticate/i.test(title)) {
+    const title = titleMatch[1].toLowerCase().trim();
+    if (
+      /^(sign\s*in|log\s*in)\b/.test(title) ||
+      /[|\-–—:]\s*(sign\s*in|log\s*in)\s*$/i.test(title) ||
+      /^authenticate\b/.test(title)
+    ) {
       return `Page title suggests login: "${titleMatch[1].trim()}"`;
     }
   }
diff --git a/test/integration/check-pipeline.test.ts b/test/integration/check-pipeline.test.ts
index 506557d..efe891b 100644
--- a/test/integration/check-pipeline.test.ts
+++ b/test/integration/check-pipeline.test.ts
@@ -630,3 +630,446 @@ describe('check pipeline: independent checks share sampling', () => {
     expect(authPages).toEqual(cachePageUrls);
   });
 });
+
+describe('check pipeline: auth-gate-detection → auth-alternative-access', () => {
+  it('auth-alternative-access detects public llms.txt when docs are gated', async () => {
+    // Site where llms.txt is public but doc pages return 401
+    server.use(
+      http.get('http://pipe-auth-llms.local/llms.txt', () =>
+        HttpResponse.text(
+          '# Docs\n## Links\n- [Page](http://pipe-auth-llms.local/docs/page): A page\n',
+        ),
+      ),
+      http.get(
+        'http://pipe-auth-llms.local/docs/llms.txt',
+        () => new HttpResponse(null, { status: 404 }),
+      ),
+      http.get(
+        'http://pipe-auth-llms.local/docs/page',
+        () => new HttpResponse('Unauthorized', { status: 401 }),
+      ),
+      http.get(
+        'http://pipe-auth-llms.local/docs/page.md',
+        () => new HttpResponse(null, { status: 404 }),
+      ),
+      http.get(
+        'http://pipe-auth-llms.local/docs/page-afdocs-nonexistent-8f3a',
+        () => new HttpResponse('Not Found', { status: 404 }),
+      ),
+    );
+
+    const report = await runChecks('http://pipe-auth-llms.local', {
+      checkIds: ['llms-txt-exists', 'auth-gate-detection', 'auth-alternative-access'],
+      requestDelay: 0,
+    });
+
+    const llmsResult = report.results.find((r) => r.id === 'llms-txt-exists')!;
+    const authResult = report.results.find((r) => r.id === 'auth-gate-detection')!;
+    const altResult = report.results.find((r) => r.id === 'auth-alternative-access')!;
+
+    expect(llmsResult.status).toBe('pass');
+    expect(authResult.status).toBe('fail');
+    // auth-alternative-access should detect the public llms.txt
+    expect(altResult.status).toBe('warn'); // llms.txt alone is partial
+    const paths = altResult.details?.detectedPaths as Array<{ type: string }>;
+    expect(paths.some((p) => p.type === 'public-llms-txt')).toBe(true);
+  });
+
+  it('auth-alternative-access skips when all pages are public', async () => {
+    setupSite('pipe-auth-public.local', {
+      llmsTxt: '# Docs\n## Links\n- [Page](http://pipe-auth-public.local/docs/page): A page\n',
+      pages: [{ path: '/docs/page' }],
+    });
+
+    const report = await runChecks('http://pipe-auth-public.local', {
+      checkIds: ['llms-txt-exists', 'auth-gate-detection', 'auth-alternative-access'],
+      requestDelay: 0,
+    });
+
+    const authResult = report.results.find((r) => r.id === 'auth-gate-detection')!;
+    const altResult = report.results.find((r) => r.id === 'auth-alternative-access')!;
+
+    expect(authResult.status).toBe('pass');
+    expect(altResult.status).toBe('skip');
+    expect(altResult.message).toContain('publicly accessible');
+  });
+
+  it('auth-alternative-access passes when gated site has public markdown', async () => {
+    // Site where some pages require auth but markdown is available
+    server.use(
+      http.get('http://pipe-auth-md.local/llms.txt', () =>
+        HttpResponse.text(
+          '# Docs\n## Links\n- [Public](http://pipe-auth-md.local/docs/public): Public\n- [Private](http://pipe-auth-md.local/docs/private): Private\n',
+        ),
+      ),
+      http.get(
+        'http://pipe-auth-md.local/docs/llms.txt',
+        () => new HttpResponse(null, { status: 404 }),
+      ),
+      http.get(
+        'http://pipe-auth-md.local/docs/public',
+        () =>
+          new HttpResponse('<html><body><h1>Public Page</h1><p>Content here.</p></body></html>', {
+            status: 200,
+            headers: { 'Content-Type': 'text/html' },
+          }),
+      ),
+      http.get(
+        'http://pipe-auth-md.local/docs/public.md',
+        () =>
+          new HttpResponse('# Public Page\n\nContent here.\n', {
+            status: 200,
+            headers: { 'Content-Type': 'text/markdown' },
+          }),
+      ),
+      http.get(
+        'http://pipe-auth-md.local/docs/private',
+        () => new HttpResponse('Forbidden', { status: 403 }),
+      ),
+      http.get(
+        'http://pipe-auth-md.local/docs/private.md',
+        () => new HttpResponse(null, { status: 403 }),
+      ),
+      http.get(
+        'http://pipe-auth-md.local/docs/public-afdocs-nonexistent-8f3a',
+        () => new HttpResponse('Not Found', { status: 404 }),
+      ),
+      http.get(
+        'http://pipe-auth-md.local/docs/private-afdocs-nonexistent-8f3a',
+        () => new HttpResponse('Not Found', { status: 404 }),
+      ),
+    );
+
+    const report = await runChecks('http://pipe-auth-md.local', {
+      checkIds: [
+        'llms-txt-exists',
+        'markdown-url-support',
+        'auth-gate-detection',
+        'auth-alternative-access',
+      ],
+      requestDelay: 0,
+    });
+
+    const authResult = report.results.find((r) => r.id === 'auth-gate-detection')!;
+    const mdResult = report.results.find((r) => r.id === 'markdown-url-support')!;
+    const altResult = report.results.find((r) => r.id === 'auth-alternative-access')!;
+
+    expect(authResult.status).toBe('warn'); // mixed: some accessible, some gated
+    expect(mdResult.status).not.toBe('skip');
+    expect(altResult.status).toBe('pass');
+    const paths = altResult.details?.detectedPaths as Array<{ type: string }>;
+    expect(paths.some((p) => p.type === 'public-llms-txt')).toBe(true);
+    expect(paths.some((p) => p.type === 'public-markdown')).toBe(true);
+    expect(paths.some((p) => p.type === 'partial-public-access')).toBe(true);
+  });
+});
+
+describe('check pipeline: rendering-strategy → tabbed-content-serialization', () => {
+  it('tabbed-content-serialization uses SPA shell detection to try markdown fallback', async () => {
+    // Page is an SPA shell (minimal HTML with React root), but has tabs in markdown
+    const spaHtml =
+      '<html><head><title>Docs</title></head><body><div id="root"></div><script src="/app.js"></script></body></html>';
+    const mdContent =
+      '# Guide\n\n:::tabs\n::tab{label="Python"}\n```python\nprint("hi")\n```\n::tab{label="JS"}\n```js\nconsole.log("hi");\n```\n:::';
+
+    server.use(
+      http.get('http://pipe-spa-tab.local/llms.txt', () =>
+        HttpResponse.text(
+          '# Docs\n## Links\n- [Guide](http://pipe-spa-tab.local/docs/guide): Guide\n',
+        ),
+      ),
+      http.get(
+        'http://pipe-spa-tab.local/docs/llms.txt',
+        () => new HttpResponse(null, { status: 404 }),
+      ),
+      http.get('http://pipe-spa-tab.local/docs/guide', ({ request }) => {
+        const accept = request.headers.get('accept') ?? '';
+        if (accept.includes('text/markdown')) {
+          return new HttpResponse(mdContent, {
+            status: 200,
+            headers: { 'Content-Type': 'text/markdown' },
+          });
+        }
+        return new HttpResponse(spaHtml, {
+          status: 200,
+          headers: { 'Content-Type': 'text/html' },
+        });
+      }),
+      http.get(
+        'http://pipe-spa-tab.local/docs/guide.md',
+        () => new HttpResponse(null, { status: 404 }),
+      ),
+      http.get(
+        'http://pipe-spa-tab.local/docs/guide-afdocs-nonexistent-8f3a',
+        () => new HttpResponse('Not Found', { status: 404 }),
+      ),
+    );
+
+    const report = await runChecks('http://pipe-spa-tab.local', {
+      checkIds: ['llms-txt-exists', 'rendering-strategy', 'tabbed-content-serialization'],
+      requestDelay: 0,
+    });
+
+    const renderResult = report.results.find((r) => r.id === 'rendering-strategy')!;
+    const tabResult = report.results.find((r) => r.id === 'tabbed-content-serialization')!;
+
+    // rendering-strategy should flag the page as SPA shell
+    expect(renderResult.status).toBe('fail');
+    const renderPages = renderResult.details?.pageResults as Array<{ url: string; status: string }>;
+    expect(renderPages.some((p) => p.status === 'fail')).toBe(true);
+
+    // tabbed-content-serialization should have used the md-fallback path
+    // since the page was detected as an SPA shell by rendering-strategy
+    const tabbedPages = tabResult.details?.tabbedPages as Array<{ url: string; source: string }>;
+    expect(tabbedPages).toBeDefined();
+    // If tabs were detected via the fallback, source should be 'md-fallback'
+    // If no tabs found in md either, source stays 'html' — either way it should not error
+    expect(tabResult.status).not.toBe('error');
+  });
+});
+
+describe('check pipeline: tabbed-content-serialization → section-header-quality', () => {
+  it('section-header-quality reads tabbed page data from tabbed-content-serialization', async () => {
+    // Page with Sphinx-style tabs containing headers in each panel
+    const tabbedHtml = `<html><body>
+      <h1>Installation</h1>
+      <div class="sphinx-tabs">
+        <div class="sphinx-tabs-tab">Python</div>
+        <div class="sphinx-tabs-panel"><h2>Installation</h2><pre>pip install sdk</pre></div>
+        <div class="sphinx-tabs-tab">JavaScript</div>
+        <div class="sphinx-tabs-panel"><h2>Installation</h2><pre>npm install sdk</pre></div>
+      </div>
+    </body></html>`;
+
+    server.use(
+      http.get('http://pipe-tab-hdr.local/llms.txt', () =>
+        HttpResponse.text(
+          '# Docs\n## Links\n- [Install](http://pipe-tab-hdr.local/docs/install): Installation\n',
+        ),
+      ),
+      http.get(
+        'http://pipe-tab-hdr.local/docs/llms.txt',
+        () => new HttpResponse(null, { status: 404 }),
+      ),
+      http.get(
+        'http://pipe-tab-hdr.local/docs/install',
+        () =>
+          new HttpResponse(tabbedHtml, {
+            status: 200,
+            headers: { 'Content-Type': 'text/html' },
+          }),
+      ),
+      http.get(
+        'http://pipe-tab-hdr.local/docs/install.md',
+        () => new HttpResponse(null, { status: 404 }),
+      ),
+      http.get(
+        'http://pipe-tab-hdr.local/docs/install-afdocs-nonexistent-8f3a',
+        () => new HttpResponse('Not Found', { status: 404 }),
+      ),
+    );
+
+    const report = await runChecks('http://pipe-tab-hdr.local', {
+      checkIds: ['llms-txt-exists', 'tabbed-content-serialization', 'section-header-quality'],
+      requestDelay: 0,
+    });
+
+    const tabResult = report.results.find((r) => r.id === 'tabbed-content-serialization')!;
+    const headerResult = report.results.find((r) => r.id === 'section-header-quality')!;
+
+    // tabbed-content-serialization should find the tab group
+    expect(tabResult.details?.totalGroupsFound).toBeGreaterThanOrEqual(1);
+
+    // section-header-quality should consume the tabbed page data (not skip)
+    expect(headerResult.status).not.toBe('skip');
+    expect(headerResult.message).not.toContain('did not run');
+  });
+
+  it('section-header-quality skips when tabbed-content-serialization did not run', async () => {
+    const report = await runChecks('http://pipe-tab-norun.local', {
+      checkIds: ['section-header-quality'],
+      requestDelay: 0,
+    });
+
+    const headerResult = report.results.find((r) => r.id === 'section-header-quality')!;
+    expect(headerResult.status).toBe('skip');
+    expect(headerResult.message).toContain('did not run');
+  });
+});
+
+describe('check pipeline: llms-txt-exists → llms-txt-links-markdown data flow', () => {
+  it('llms-txt-links-markdown receives discovered file content and tests links', async () => {
+    server.use(
+      http.get('http://pipe-llms-md.local/llms.txt', () =>
+        HttpResponse.text(
+          '# Docs\n## Links\n- [Guide](http://pipe-llms-md.local/docs/guide): Guide\n',
+        ),
+      ),
+      http.get(
+        'http://pipe-llms-md.local/docs/llms.txt',
+        () => new HttpResponse(null, { status: 404 }),
+      ),
+      // The link in llms.txt returns HTML (not markdown)
+      http.get(
+        'http://pipe-llms-md.local/docs/guide',
+        () =>
+          new HttpResponse('<html><body><h1>Guide</h1></body></html>', {
+            status: 200,
+            headers: { 'Content-Type': 'text/html' },
+          }),
+      ),
+      // .md URL returns markdown
+      http.get(
+        'http://pipe-llms-md.local/docs/guide.md',
+        () =>
+          new HttpResponse('# Guide\n\nContent.\n', {
+            status: 200,
+            headers: { 'Content-Type': 'text/markdown' },
+          }),
+      ),
+    );
+
+    const report = await runChecks('http://pipe-llms-md.local', {
+      checkIds: ['llms-txt-exists', 'llms-txt-links-markdown'],
+      requestDelay: 0,
+    });
+
+    const existsResult = report.results.find((r) => r.id === 'llms-txt-exists')!;
+    const mdResult = report.results.find((r) => r.id === 'llms-txt-links-markdown')!;
+
+    expect(existsResult.status).toBe('pass');
+    // Should run (not skip) and test the link
+    expect(mdResult.status).not.toBe('skip');
+    expect(mdResult.details?.totalLinks).toBe(1);
+  });
+
+  it('llms-txt-links-markdown skips when llms-txt-exists fails', async () => {
+    server.use(
+      http.get(
+        'http://pipe-llms-md-nollms.local/llms.txt',
+        () => new HttpResponse(null, { status: 404 }),
+      ),
+      http.get(
+        'http://pipe-llms-md-nollms.local/docs/llms.txt',
+        () => new HttpResponse(null, { status: 404 }),
+      ),
+    );
+
+    const report = await runChecks('http://pipe-llms-md-nollms.local', {
+      checkIds: ['llms-txt-exists', 'llms-txt-links-markdown'],
+      requestDelay: 0,
+    });
+
+    expect(report.results.find((r) => r.id === 'llms-txt-exists')!.status).toBe('fail');
+    expect(report.results.find((r) => r.id === 'llms-txt-links-markdown')!.status).toBe('skip');
+  });
+});
+
+describe('check pipeline: markdown-content-parity reads from pageCache', () => {
+  it('markdown-content-parity compares cached markdown against HTML', async () => {
+    const mdContent = '# API Reference\n\nEndpoints and methods.\n';
+    const htmlContent =
+      '<html><body><h1>API Reference</h1><p>Endpoints and methods.</p></body></html>';
+
+    setupSite('pipe-parity.local', {
+      llmsTxt: '# Docs\n## Links\n- [API](http://pipe-parity.local/docs/api): Reference\n',
+      pages: [{ path: '/docs/api', md: mdContent, html: htmlContent }],
+    });
+
+    const report = await runChecks('http://pipe-parity.local', {
+      checkIds: ['llms-txt-exists', 'markdown-url-support', 'markdown-content-parity'],
+      requestDelay: 0,
+    });
+
+    const mdUrlResult = report.results.find((r) => r.id === 'markdown-url-support')!;
+    const parityResult = report.results.find((r) => r.id === 'markdown-content-parity')!;
+
+    // markdown-url-support should have populated the pageCache
+    expect(mdUrlResult.status).toBe('pass');
+
+    // markdown-content-parity should consume the cached markdown (not skip)
+    expect(parityResult.status).not.toBe('skip');
+    expect(parityResult.message).not.toContain('No pages with markdown');
+    expect(parityResult.details?.pagesCompared).toBe(1);
+  });
+
+  it('markdown-content-parity skips when no markdown is cached', async () => {
+    setupSite('pipe-parity-nomd.local', {
+      llmsTxt: '# Docs\n## Links\n- [Page](http://pipe-parity-nomd.local/docs/page): A page\n',
+      pages: [{ path: '/docs/page' }],
+    });
+
+    const report = await runChecks('http://pipe-parity-nomd.local', {
+      checkIds: [
+        'llms-txt-exists',
+        'markdown-url-support',
+        'content-negotiation',
+        'markdown-content-parity',
+      ],
+      requestDelay: 0,
+    });
+
+    const parityResult = report.results.find((r) => r.id === 'markdown-content-parity')!;
+    // Both upstream checks failed → no markdown in cache → dependency skip
+    expect(parityResult.status).toBe('skip');
+  });
+});
+
+describe('check pipeline: effectiveOrigin propagation', () => {
+  it('llms-txt-exists sets effectiveOrigin which llms-txt-freshness uses', async () => {
+    // llms.txt redirects cross-host; sitemap lives at the redirected host
+    const redirectedHost = 'pipe-effective-docs.local';
+    const llmsContent = `# Docs\n## Links\n- [Guide](http://${redirectedHost}/docs/guide): Guide\n`;
+
+    server.use(
+      // Original host redirects llms.txt to different host
+      http.get(
+        'http://pipe-effective.local/llms.txt',
+        () =>
+          new HttpResponse(null, {
+            status: 301,
+            headers: { Location: `http://${redirectedHost}/llms.txt` },
+          }),
+      ),
+      http.get(
+        'http://pipe-effective.local/docs/llms.txt',
+        () => new HttpResponse(null, { status: 404 }),
+      ),
+      // Redirected host serves llms.txt
+      http.get(`http://${redirectedHost}/llms.txt`, () => HttpResponse.text(llmsContent)),
+      // Sitemap at redirected host
+      http.get('http://pipe-effective.local/robots.txt', () =>
+        HttpResponse.text(`Sitemap: http://${redirectedHost}/sitemap.xml`),
+      ),
+      http.get(`http://${redirectedHost}/robots.txt`, () =>
+        HttpResponse.text(`Sitemap: http://${redirectedHost}/sitemap.xml`),
+      ),
+      http.get(
+        `http://${redirectedHost}/sitemap.xml`,
+        () =>
+          new HttpResponse(
+            `<?xml version="1.0" encoding="UTF-8"?>
+           <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
+             <url><loc>http://${redirectedHost}/docs/guide</loc></url>
+           </urlset>`,
+            { status: 200, headers: { 'Content-Type': 'application/xml' } },
+          ),
+      ),
+    );
+
+    const report = await runChecks('http://pipe-effective.local', {
+      checkIds: ['llms-txt-exists', 'llms-txt-freshness'],
+      requestDelay: 0,
+    });
+
+    const existsResult = report.results.find((r) => r.id === 'llms-txt-exists')!;
+    const freshnessResult = report.results.find((r) => r.id === 'llms-txt-freshness')!;
+
+    // Cross-host redirect produces 'warn' (agents may not follow it)
+    expect(existsResult.status).toBe('warn');
+    // Freshness should not skip — it should use the effectiveOrigin to find the sitemap
+    // at the redirected host and match URLs there
+    expect(freshnessResult.status).not.toBe('skip');
+    expect(freshnessResult.message).not.toContain('No sitemap found');
+  });
+});
diff --git a/test/unit/checks/auth-alternative-access.test.ts b/test/unit/checks/auth-alternative-access.test.ts
new file mode 100644
index 0000000..e34380a
--- /dev/null
+++ b/test/unit/checks/auth-alternative-access.test.ts
@@ -0,0 +1,278 @@
+import { describe, it, expect } from 'vitest';
+import { createContext } from '../../../src/runner.js';
+import { getCheck } from '../../../src/checks/registry.js';
+import '../../../src/checks/index.js';
+import type { CheckResult } from '../../../src/types.js';
+
+describe('auth-alternative-access', () => {
+  const check = getCheck('auth-alternative-access')!;
+
+  function makeCtx(authGateResult?: CheckResult, otherResults?: Record<string, CheckResult>) {
+    const ctx = createContext('http://test.local', { requestDelay: 0 });
+
+    if (authGateResult) {
+      ctx.previousResults.set('auth-gate-detection', authGateResult);
+    }
+
+    if (otherResults) {
+      for (const [id, result] of Object.entries(otherResults)) {
+        ctx.previousResults.set(id, result);
+      }
+    }
+
+    return ctx;
+  }
+
+  function authGateResult(
+    status: 'pass' | 'warn' | 'fail',
+    details: Record<string, unknown>,
+  ): CheckResult {
+    return {
+      id: 'auth-gate-detection',
+      category: 'authentication',
+      status,
+      message: 'test',
+      details,
+    };
+  }
+
+  it('skips when auth-gate-detection did not run', async () => {
+    const result = await check.run(makeCtx());
+    expect(result.status).toBe('skip');
+    expect(result.message).toContain('did not run');
+  });
+
+  it('skips when auth-gate-detection passed (all public)', async () => {
+    const result = await check.run(
+      makeCtx(
+        authGateResult('pass', {
+          accessible: 5,
+          authRequired: 0,
+          softAuthGate: 0,
+          authRedirect: 0,
+          testedPages: 5,
+        }),
+      ),
+    );
+    expect(result.status).toBe('skip');
+    expect(result.message).toContain('publicly accessible');
+  });
+
+  it('skips when auth-gate-detection errored', async () => {
+    const result = await check.run(
+      makeCtx({
+        id: 'auth-gate-detection',
+        category: 'authentication',
+        status: 'error',
+        message: 'Check error: something went wrong',
+      }),
+    );
+    expect(result.status).toBe('skip');
+    expect(result.message).toContain('errored');
+  });
+
+  it('skips when auth-gate-detection was skipped', async () => {
+    const result = await check.run(
+      makeCtx({
+        id: 'auth-gate-detection',
+        category: 'authentication',
+        status: 'skip',
+        message: 'Skipped: dependency check did not pass',
+      }),
+    );
+    expect(result.status).toBe('skip');
+    expect(result.message).toContain('was skipped');
+  });
+
+  it('fails when all pages gated and no alternative paths found', async () => {
+    const result = await check.run(
+      makeCtx(
+        authGateResult('fail', {
+          accessible: 0,
+          authRequired: 3,
+          softAuthGate: 0,
+          authRedirect: 0,
+          testedPages: 3,
+        }),
+      ),
+    );
+    expect(result.status).toBe('fail');
+    expect(result.message).toContain('No alternative access paths');
+    expect(result.details?.gatedPages).toBe(3);
+    expect(result.details?.manualVerificationNeeded).toBeInstanceOf(Array);
+  });
+
+  it('warns when only llms.txt is available (partial access)', async () => {
+    const result = await check.run(
+      makeCtx(
+        authGateResult('fail', {
+          accessible: 0,
+          authRequired: 5,
+          softAuthGate: 0,
+          authRedirect: 0,
+          testedPages: 5,
+        }),
+        {
+          'llms-txt-exists': {
+            id: 'llms-txt-exists',
+            category: 'llms-txt',
+            status: 'pass',
+            message: 'Found',
+          },
+        },
+      ),
+    );
+    expect(result.status).toBe('warn');
+    expect(result.message).toContain('public-llms-txt');
+    expect(result.message).toContain('Partial alternative access');
+    const paths = result.details?.detectedPaths as Array<{ type: string }>;
+    expect(paths.some((p) => p.type === 'public-llms-txt')).toBe(true);
+  });
+
+  it('passes when public markdown is available', async () => {
+    const result = await check.run(
+      makeCtx(
+        authGateResult('warn', {
+          accessible: 1,
+          authRequired: 4,
+          softAuthGate: 0,
+          authRedirect: 0,
+          testedPages: 5,
+        }),
+        {
+          'markdown-url-support': {
+            id: 'markdown-url-support',
+            category: 'markdown-availability',
+            status: 'pass',
+            message: 'Markdown URLs supported',
+          },
+        },
+      ),
+    );
+    expect(result.status).toBe('pass');
+    expect(result.message).toContain('public-markdown');
+    const paths = result.details?.detectedPaths as Array<{ type: string }>;
+    expect(paths.some((p) => p.type === 'public-markdown')).toBe(true);
+  });
+
+  it('passes when content negotiation provides markdown', async () => {
+    const result = await check.run(
+      makeCtx(
+        authGateResult('fail', {
+          accessible: 0,
+          authRequired: 3,
+          softAuthGate: 0,
+          authRedirect: 0,
+          testedPages: 3,
+        }),
+        {
+          'content-negotiation': {
+            id: 'content-negotiation',
+            category: 'markdown-availability',
+            status: 'pass',
+            message: 'Content negotiation supported',
+          },
+        },
+      ),
+    );
+    expect(result.status).toBe('pass');
+    const paths = result.details?.detectedPaths as Array<{ type: string }>;
+    expect(paths.some((p) => p.type === 'public-markdown')).toBe(true);
+  });
+
+  it('passes when most pages are publicly accessible', async () => {
+    const result = await check.run(
+      makeCtx(
+        authGateResult('warn', {
+          accessible: 8,
+          authRequired: 2,
+          softAuthGate: 0,
+          authRedirect: 0,
+          testedPages: 10,
+        }),
+      ),
+    );
+    expect(result.status).toBe('pass');
+    expect(result.message).toContain('partial-public-access');
+    const paths = result.details?.detectedPaths as Array<{ type: string }>;
+    expect(paths.some((p) => p.type === 'partial-public-access')).toBe(true);
+  });
+
+  it('detects multiple alternative paths', async () => {
+    const result = await check.run(
+      makeCtx(
+        authGateResult('warn', {
+          accessible: 3,
+          authRequired: 7,
+          softAuthGate: 0,
+          authRedirect: 0,
+          testedPages: 10,
+        }),
+        {
+          'llms-txt-exists': {
+            id: 'llms-txt-exists',
+            category: 'llms-txt',
+            status: 'pass',
+            message: 'Found',
+          },
+          'markdown-url-support': {
+            id: 'markdown-url-support',
+            category: 'markdown-availability',
+            status: 'pass',
+            message: 'Markdown URLs supported',
+          },
+        },
+      ),
+    );
+    expect(result.status).toBe('pass');
+    const paths = result.details?.detectedPaths as Array<{ type: string }>;
+    expect(paths).toHaveLength(3); // llms-txt, markdown, partial-public-access
+    expect(paths.some((p) => p.type === 'public-llms-txt')).toBe(true);
+    expect(paths.some((p) => p.type === 'public-markdown')).toBe(true);
+    expect(paths.some((p) => p.type === 'partial-public-access')).toBe(true);
+  });
+
+  it('counts all auth gate types toward gated pages', async () => {
+    const result = await check.run(
+      makeCtx(
+        authGateResult('fail', {
+          accessible: 0,
+          authRequired: 2,
+          softAuthGate: 1,
+          authRedirect: 1,
+          testedPages: 4,
+        }),
+      ),
+    );
+    expect(result.status).toBe('fail');
+    expect(result.details?.gatedPages).toBe(4);
+  });
+
+  it('warns for llms-txt with low public page ratio', async () => {
+    // llms.txt exists but only 1 of 10 pages is accessible (10% < 50%)
+    const result = await check.run(
+      makeCtx(
+        authGateResult('warn', {
+          accessible: 1,
+          authRequired: 9,
+          softAuthGate: 0,
+          authRedirect: 0,
+          testedPages: 10,
+        }),
+        {
+          'llms-txt-exists': {
+            id: 'llms-txt-exists',
+            category: 'llms-txt',
+            status: 'pass',
+            message: 'Found',
+          },
+        },
+      ),
+    );
+    // llms.txt alone is just an index, and <50% accessible: warn
+    expect(result.status).toBe('warn');
+    const paths = result.details?.detectedPaths as Array<{ type: string }>;
+    expect(paths.some((p) => p.type === 'public-llms-txt')).toBe(true);
+    expect(paths.some((p) => p.type === 'partial-public-access')).toBe(true);
+  });
+});
diff --git a/test/unit/checks/auth-gate-detection.test.ts b/test/unit/checks/auth-gate-detection.test.ts
index 3473b6a..bf04ff8 100644
--- a/test/unit/checks/auth-gate-detection.test.ts
+++ b/test/unit/checks/auth-gate-detection.test.ts
@@ -174,6 +174,42 @@ describe('auth-gate-detection', () => {
     expect(result.details?.softAuthGate).toBe(1);
   });
 
+  it('detects login form via title with separator pattern', async () => {
+    server.use(
+      http.get(
+        'http://agd-titlesep.local/docs/page1',
+        () =>
+          new HttpResponse(
+            '<html><head><title>Company Portal | Log In</title></head><body><div>Welcome</div></body></html>',
+            { status: 200, headers: { 'Content-Type': 'text/html' } },
+          ),
+      ),
+    );
+
+    const content = `# Docs\n## Links\n- [Page 1](http://agd-titlesep.local/docs/page1): First\n`;
+    const result = await check.run(makeCtx(content));
+    expect(result.status).toBe('fail');
+    expect(result.details?.softAuthGate).toBe(1);
+  });
+
+  it('does not flag pages that mention login as a topic', async () => {
+    server.use(
+      http.get(
+        'http://agd-notlogin.local/docs/page1',
+        () =>
+          new HttpResponse(
+            '<html><head><title>the user is unable to login</title></head><body><h1>Troubleshooting</h1><p>Steps to fix login issues.</p></body></html>',
+            { status: 200, headers: { 'Content-Type': 'text/html' } },
+          ),
+      ),
+    );
+
+    const content = `# Docs\n## Links\n- [Page 1](http://agd-notlogin.local/docs/page1): First\n`;
+    const result = await check.run(makeCtx(content));
+    expect(result.status).toBe('pass');
+    expect(result.details?.accessible).toBe(1);
+  });
+
   it('treats non-SSO redirects as accessible', async () => {
     server.use(
       http.get(
diff --git a/test/unit/runner.test.ts b/test/unit/runner.test.ts
index f37556f..658f0d9 100644
--- a/test/unit/runner.test.ts
+++ b/test/unit/runner.test.ts
@@ -67,14 +67,14 @@ describe('runner', () => {
     expect(report.summary.total).toBe(2);
   });
 
-  it('stub checks return skip with "Not yet implemented"', async () => {
+  it('auth-alternative-access skips when auth-gate-detection did not run', async () => {
     const report = await runChecks('http://stub.local', {
       checkIds: ['auth-alternative-access'],
       requestDelay: 0,
     });
 
     expect(report.results[0].status).toBe('skip');
-    expect(report.results[0].message).toBe('Not yet implemented');
+    expect(report.results[0].message).toBe('auth-gate-detection did not run');
   });
 
   it('catches check errors and reports status "error"', async () => {

From cfc929c9ef8de755b6c8427f42ea436dbde8cad8 Mon Sep 17 00:00:00 2001
From: dacharyc <dc@dacharycarey.com>
Date: Sun, 15 Mar 2026 21:39:40 -0400
Subject: [PATCH 7/7] Update two implementation details to align with spec

---
 README.md                                     |  2 +-
 .../markdown-code-fence-validity.ts           |  2 +-
 src/cli/formatters/text.ts                    | 19 ++++-
 src/cli/index.ts                              |  6 +-
 test/integration/check-pipeline.test.ts       |  9 ++-
 .../checks/llms-txt-links-markdown.test.ts    | 25 ++++++
 .../checks/llms-txt-links-resolve.test.ts     | 48 ++++++++++++
 test/unit/checks/llms-txt-size.test.ts        | 21 +++++
 test/unit/cli/formatters.test.ts              | 78 +++++++++++++++++++
 9 files changed, 201 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index 2cde6af..623a3fa 100644
--- a/README.md
+++ b/README.md
@@ -12,7 +12,7 @@ afdocs runs 22 checks across 8 categories to evaluate how well your docs serve a
 > **Status: Early development (0.x)**
 > This project is under active development. Check IDs, CLI flags, and output formats may change between minor versions. Feel free to try it out, but don't build automation against specific output until 1.0.
 >
-> Implements [spec v0.1.0](https://agentdocsspec.com/spec) (2026-02-22).
+> Implements [spec v0.2.1](https://agentdocsspec.com/spec) (2026-03-15).
 
 ## Quick start
 
diff --git a/src/checks/content-structure/markdown-code-fence-validity.ts b/src/checks/content-structure/markdown-code-fence-validity.ts
index 354982b..c32394a 100644
--- a/src/checks/content-structure/markdown-code-fence-validity.ts
+++ b/src/checks/content-structure/markdown-code-fence-validity.ts
@@ -136,6 +136,6 @@ registerCheck({
   id: 'markdown-code-fence-validity',
   category: 'content-structure',
   description: 'Whether markdown contains unclosed code fences',
-  dependsOn: [['markdown-url-support', 'content-negotiation']],
+  dependsOn: [],
   run: check,
 });
diff --git a/src/cli/formatters/text.ts b/src/cli/formatters/text.ts
index ae647f9..1870b3d 100644
--- a/src/cli/formatters/text.ts
+++ b/src/cli/formatters/text.ts
@@ -128,10 +128,23 @@ function formatDetailLine(status: string, url: string, metric: string): string {
   return `      ${icon} ${url} ${chalk.dim(metric)}`;
 }
 
-function formatResult(result: CheckResult): string {
+/** Check IDs whose results may be unreliable when rendering-strategy fails. */
+const RENDERING_SENSITIVE_CHECKS = new Set(['page-size-html', 'content-start-position']);
+
+function formatResult(result: CheckResult, allResults?: CheckResult[]): string {
   const icon = STATUS_ICONS[result.status] ?? '?';
   const color = STATUS_COLORS[result.status] ?? ((s: string) => s);
-  return `  ${icon} ${color(result.id)}: ${result.message}`;
+  let line = `  ${icon} ${color(result.id)}: ${result.message}`;
+
+  // Add caveat when rendering-strategy failed and this check measures HTML content
+  if (RENDERING_SENSITIVE_CHECKS.has(result.id) && allResults) {
+    const renderResult = allResults.find((r) => r.id === 'rendering-strategy');
+    if (renderResult?.status === 'fail') {
+      line += `\n      ${chalk.dim('Note: rendering-strategy detected SPA shells; these measurements may reflect a shell, not actual content')}`;
+    }
+  }
+
+  return line;
 }
 
 function formatVerboseDetails(result: CheckResult): string[] {
@@ -178,7 +191,7 @@ export function formatText(report: ReportResult, options?: FormatTextOptions): s
   for (const [category, results] of byCategory) {
     lines.push(chalk.bold.underline(category));
     for (const result of results) {
-      lines.push(formatResult(result));
+      lines.push(formatResult(result, report.results));
       if (verbose) {
         lines.push(...formatVerboseDetails(result));
       }
diff --git a/src/cli/index.ts b/src/cli/index.ts
index 5933e96..e96dce0 100644
--- a/src/cli/index.ts
+++ b/src/cli/index.ts
@@ -1,13 +1,17 @@
+import { createRequire } from 'node:module';
 import { Command } from 'commander';
 import { registerCheckCommand } from './commands/check.js';
 
+const require = createRequire(import.meta.url);
+const { version } = require('../../package.json') as { version: string };
+
 export function run(argv: string[]): void {
   const program = new Command();
 
   program
     .name('afdocs')
     .description('Test your documentation site against the Agent-Friendly Documentation Spec')
-    .version('0.1.0');
+    .version(version);
 
   registerCheckCommand(program);
 
diff --git a/test/integration/check-pipeline.test.ts b/test/integration/check-pipeline.test.ts
index efe891b..27b6869 100644
--- a/test/integration/check-pipeline.test.ts
+++ b/test/integration/check-pipeline.test.ts
@@ -351,11 +351,14 @@ describe('check pipeline: dependency skipping vs standalone mode', () => {
     expect(mdUrlResult.status).toBe('fail');
     expect(cnResult.status).toBe('fail');
 
-    // Downstream checks should be skipped by the runner (deps ran but failed)
+    // page-size-markdown should be skipped by the runner (deps ran but failed)
     expect(sizeResult.status).toBe('skip');
     expect(sizeResult.message).toContain('dependency');
-    expect(fenceResult.status).toBe('skip');
-    expect(fenceResult.message).toContain('dependency');
+
+    // markdown-code-fence-validity has no dependency; it runs and analyzes
+    // llms.txt content even when the site doesn't serve markdown pages
+    expect(fenceResult.status).toBe('pass');
+    expect(fenceResult.message).toContain('code fences properly closed');
   });
 
   it('downstream checks run standalone when upstream is not in the check list', async () => {
diff --git a/test/unit/checks/llms-txt-links-markdown.test.ts b/test/unit/checks/llms-txt-links-markdown.test.ts
index def363b..37eac0f 100644
--- a/test/unit/checks/llms-txt-links-markdown.test.ts
+++ b/test/unit/checks/llms-txt-links-markdown.test.ts
@@ -31,6 +31,31 @@ describe('llms-txt-links-markdown', () => {
     return ctx;
   }
 
+  it('skips when no discovered files', async () => {
+    const ctx = createContext('http://test.local', { requestDelay: 0 });
+    ctx.previousResults.set('llms-txt-exists', {
+      id: 'llms-txt-exists',
+      category: 'llms-txt',
+      status: 'fail',
+      message: 'Not found',
+      details: { discoveredFiles: [] },
+    });
+    const result = await check.run(ctx);
+    expect(result.status).toBe('skip');
+    expect(result.message).toContain('No llms.txt files');
+  });
+
+  it('skips when no HTTP links present', async () => {
+    const content = `# Test
+> Summary
+## Links
+Just text, no links here.
+`;
+    const result = await check.run(makeCtx(content));
+    expect(result.status).toBe('skip');
+    expect(result.message).toContain('No HTTP(S) links');
+  });
+
   it('passes when links have .md extensions', async () => {
     const content = `# Test
 > Summary
diff --git a/test/unit/checks/llms-txt-links-resolve.test.ts b/test/unit/checks/llms-txt-links-resolve.test.ts
index e6cd0ab..78eef28 100644
--- a/test/unit/checks/llms-txt-links-resolve.test.ts
+++ b/test/unit/checks/llms-txt-links-resolve.test.ts
@@ -31,6 +31,54 @@ describe('llms-txt-links-resolve', () => {
     return ctx;
   }
 
+  it('skips when no discovered files', async () => {
+    const ctx = createContext('http://test.local', { requestDelay: 0 });
+    ctx.previousResults.set('llms-txt-exists', {
+      id: 'llms-txt-exists',
+      category: 'llms-txt',
+      status: 'fail',
+      message: 'Not found',
+      details: { discoveredFiles: [] },
+    });
+    const result = await check.run(ctx);
+    expect(result.status).toBe('skip');
+    expect(result.message).toContain('No llms.txt files');
+  });
+
+  it('falls back to GET when HEAD returns 405', async () => {
+    server.use(
+      http.head('http://head405.local/page1', () => new HttpResponse(null, { status: 405 })),
+      http.get('http://head405.local/page1', () => new HttpResponse('OK', { status: 200 })),
+    );
+
+    const content = `# Test\n> Summary\n## Links\n- [Page 1](http://head405.local/page1): First\n`;
+    const result = await check.run(makeCtx(content));
+    expect(result.status).toBe('pass');
+    expect(result.details?.resolved).toBe(1);
+  });
+
+  it('warns when resolve rate is above threshold but not 100%', async () => {
+    // 9 resolve, 1 broken = 90% resolve rate, which is exactly at LINK_RESOLVE_THRESHOLD (0.9)
+    // Need > 0.9 for warn, so use 10 resolve + 1 broken = ~91%
+    const urls: string[] = [];
+    for (let i = 0; i < 10; i++) {
+      urls.push(`http://warn-rl.local/page${i}`);
+      server.use(
+        http.head(`http://warn-rl.local/page${i}`, () => new HttpResponse(null, { status: 200 })),
+      );
+    }
+    urls.push('http://warn-rl.local/broken');
+    server.use(
+      http.head('http://warn-rl.local/broken', () => new HttpResponse(null, { status: 404 })),
+    );
+
+    const links = urls.map((u, i) => `- [Page ${i}](${u}): Page ${i}`).join('\n');
+    const content = `# Test\n> Summary\n## Links\n${links}\n`;
+    const result = await check.run(makeCtx(content));
+    expect(result.status).toBe('warn');
+    expect(result.details?.broken).toHaveLength(1);
+  });
+
   it('passes when all links resolve', async () => {
     server.use(
       http.head('http://links.local/page1', () => new HttpResponse(null, { status: 200 })),
diff --git a/test/unit/checks/llms-txt-size.test.ts b/test/unit/checks/llms-txt-size.test.ts
index 349b212..76e7aa1 100644
--- a/test/unit/checks/llms-txt-size.test.ts
+++ b/test/unit/checks/llms-txt-size.test.ts
@@ -22,6 +22,27 @@ describe('llms-txt-size', () => {
     return ctx;
   }
 
+  it('skips when no discovered files', async () => {
+    const ctx = createContext('http://test.local', { requestDelay: 0 });
+    ctx.previousResults.set('llms-txt-exists', {
+      id: 'llms-txt-exists',
+      category: 'llms-txt',
+      status: 'fail',
+      message: 'Not found',
+      details: { discoveredFiles: [] },
+    });
+    const result = await check.run(ctx);
+    expect(result.status).toBe('skip');
+    expect(result.message).toContain('No llms.txt files');
+  });
+
+  it('skips when llms-txt-exists has no details', async () => {
+    const ctx = createContext('http://test.local', { requestDelay: 0 });
+    // No previousResults set at all
+    const result = await check.run(ctx);
+    expect(result.status).toBe('skip');
+  });
+
   it('passes for small content', async () => {
     const result = await check.run(makeCtx('# Small\n\n> Tiny file.\n'));
     expect(result.status).toBe('pass');
diff --git a/test/unit/cli/formatters.test.ts b/test/unit/cli/formatters.test.ts
index 913dbbd..a57d4df 100644
--- a/test/unit/cli/formatters.test.ts
+++ b/test/unit/cli/formatters.test.ts
@@ -77,6 +77,28 @@ describe('formatText', () => {
     expect(output).not.toContain('errors');
   });
 
+  it('shows small character counts without K suffix', () => {
+    const report = makeReport({
+      results: [
+        {
+          id: 'page-size-html',
+          category: 'page-size',
+          status: 'warn',
+          message: 'Small page',
+          details: {
+            pageResults: [
+              { url: 'https://example.com/tiny', convertedCharacters: 500, status: 'warn' },
+            ],
+          },
+        },
+      ],
+      summary: { total: 1, pass: 0, warn: 1, fail: 0, skip: 0, error: 0 },
+    });
+    const output = formatText(report, { verbose: true });
+    expect(output).toContain('500 chars');
+    expect(output).not.toContain('K chars');
+  });
+
   describe('verbose mode', () => {
     it('shows per-page details for non-passing pageResults', () => {
       const report = makeReport({
@@ -400,6 +422,62 @@ describe('formatText', () => {
       expect(output).toContain('Sitemap returned 404, used homepage links instead');
     });
 
+    it('shows SPA caveat on page-size-html when rendering-strategy fails', () => {
+      const report = makeReport({
+        results: [
+          {
+            id: 'rendering-strategy',
+            category: 'page-size',
+            status: 'fail',
+            message: 'SPA shell detected',
+          },
+          {
+            id: 'page-size-html',
+            category: 'page-size',
+            status: 'pass',
+            message: 'All pages under threshold',
+          },
+          {
+            id: 'content-start-position',
+            category: 'page-size',
+            status: 'warn',
+            message: 'Content starts late',
+          },
+        ],
+        summary: { total: 3, pass: 1, warn: 1, fail: 1, skip: 0, error: 0 },
+      });
+      const output = formatText(report);
+      // Both sensitive checks should show the SPA caveat
+      const spaNote = 'rendering-strategy detected SPA shells';
+      const lines = output.split('\n');
+      const htmlLine = lines.findIndex((l) => l.includes('page-size-html'));
+      const posLine = lines.findIndex((l) => l.includes('content-start-position'));
+      expect(lines[htmlLine + 1]).toContain(spaNote);
+      expect(lines[posLine + 1]).toContain(spaNote);
+    });
+
+    it('does not show SPA caveat when rendering-strategy passes', () => {
+      const report = makeReport({
+        results: [
+          {
+            id: 'rendering-strategy',
+            category: 'page-size',
+            status: 'pass',
+            message: 'Server-rendered',
+          },
+          {
+            id: 'page-size-html',
+            category: 'page-size',
+            status: 'pass',
+            message: 'All pages under threshold',
+          },
+        ],
+        summary: { total: 2, pass: 2, warn: 0, fail: 0, skip: 0, error: 0 },
+      });
+      const output = formatText(report);
+      expect(output).not.toContain('rendering-strategy detected SPA shells');
+    });
+
     it('does not show details without verbose flag', () => {
       const report = makeReport({
         results: [