From 36b6f29b91b06af8797a878f7c3e4882d9eb662b Mon Sep 17 00:00:00 2001 From: Benjamin Liu Date: Tue, 12 May 2026 21:42:13 +0900 Subject: [PATCH] fix(xiaohongshu+rednote/search): fall back to href-based note cards when `section.note-item` class is dropped (#1506) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Issue #1506 reports `opencli xiaohongshu search` returning `[]` even though the page visibly has results. Trace evidence: xhs ships a render variant where each note card is a bare `
` (no `note-item` class), so the three `section.note-item` selectors in this file all match zero elements. Three call sites in the shared search IIFEs now use the same defensive selector strategy: try the legacy `section.note-item` class first, then fall back to any `
` that wraps a `/search_result/...` or `/explore/...` link. The change is in the xiaohongshu file so the rednote adapter (which imports `buildSearchExtractJs` and `buildScrollUntilJs` from here) picks it up automatically. Extraction-side title selector also gets a fallback: when no `.title` / `.note-title` element matches, read the first `` inside the search-result link, which is where the bare-section render puts the caption per the trace. ## Verification `npx vitest run --project adapter clis/xiaohongshu/`: 105/105 green (existing test suite unchanged, passes on both legacy and fallback paths). Live verify on rednote (same code path, account-safe): ``` $ opencli rednote search "美食" --limit 3 -f json [ {rank:1, title:"在朋友家吃过一次..."}, {rank:2, title:"我的15💰晚餐..."}, {rank:3, title:"干净饮食🫛..."} ] ``` Legacy `section.note-item` path is exercised here (rednote still renders the class) and returns identical row shape to before the fix, confirming no regression on the working path. Live verify on xiaohongshu cannot be performed here (no logged-in xhs session on the test machine; xhs account-ban risk per the project's operational guidance). The fix is structural: the new `
` shape the issue reporter traced is reachable through the fallback, and the existing test fixture keeps the legacy path green. `npx tsc --noEmit` clean. `npm run build` 815 manifest entries unchanged shape. `silent-column-drop` / `typed-error-lint` baselines unchanged. Closes #1506 Refs #1500 --- clis/xiaohongshu/search.js | 62 ++++++++++++++++++++++++++++++++------ 1 file changed, 53 insertions(+), 9 deletions(-) diff --git a/clis/xiaohongshu/search.js b/clis/xiaohongshu/search.js index f6bf49700..b578275dc 100644 --- a/clis/xiaohongshu/search.js +++ b/clis/xiaohongshu/search.js @@ -11,11 +11,19 @@ import { ArgumentError, AuthRequiredError } from '@jackwener/opencli/errors'; * Wait for search results or login wall using MutationObserver (max 5s). * Returns 'content' if note items appeared, 'login_wall' if login gate * detected, or 'timeout' if neither appeared within the deadline. + * + * Note-item detection tries the legacy `section.note-item` class first + * (still observed in many sessions, including rednote) and falls back to + * a `
` element containing a `/search_result/` or `/explore/` + * link. Issue #1506 reports the class being dropped on some xhs renders. */ const WAIT_FOR_CONTENT_JS = ` new Promise((resolve) => { + const findNoteCard = () => document.querySelector( + 'section.note-item, section:has(a[href*="/search_result/"]), section:has(a[href*="/explore/"])' + ); const detect = () => { - if (document.querySelector('section.note-item')) return 'content'; + if (findNoteCard()) return 'content'; if (/登录后查看搜索结果/.test(document.body?.innerText || '')) return 'login_wall'; return null; }; @@ -94,9 +102,22 @@ export function buildScrollUntilJs(targetCount, maxScrolls = 15) { const style = getComputedStyle(el); return style.display !== 'none' && style.visibility !== 'hidden'; }; + // Note containers: legacy \`section.note-item\` first, fallback to + // any \`
\` that wraps a search-result/explore note link + // (#1506 reports the class being dropped on some xhs renders). + const collectNoteCards = () => { + const classMatches = document.querySelectorAll('section.note-item'); + if (classMatches.length > 0) return classMatches; + const sections = new Set(); + for (const a of document.querySelectorAll('a[href*="/search_result/"], a[href*="/explore/"]')) { + const section = a.closest('section'); + if (section) sections.add(section); + } + return sections; + }; const countItems = () => { let count = 0; - for (const el of document.querySelectorAll('section.note-item')) { + for (const el of collectNoteCards()) { if (isVisibleNote(el)) count++; } return count; @@ -161,10 +182,24 @@ export function buildSearchExtractJs(webHost) { const results = []; const seen = new Set(); - document.querySelectorAll('section.note-item').forEach(el => { + // Note containers: legacy \`section.note-item\` first, fallback to any + // \`
\` wrapping a search-result/explore link (#1506 reports the + // class being dropped on some xhs renders). + const collectNoteCards = () => { + const classMatches = document.querySelectorAll('section.note-item'); + if (classMatches.length > 0) return classMatches; + const sections = new Set(); + for (const a of document.querySelectorAll('a[href*="/search_result/"], a[href*="/explore/"]')) { + const section = a.closest('section'); + if (section) sections.add(section); + } + return sections; + }; + + for (const el of collectNoteCards()) { // Skip "related searches" sections - if (el.classList.contains('query-note-item')) return; - if (!isVisibleNote(el)) return; + if (el.classList?.contains('query-note-item')) continue; + if (!isVisibleNote(el)) continue; const titleEl = el.querySelector('.title, .note-title, a.title, .footer .title span'); const nameEl = el.querySelector('a.author .name, .author-name, .nick-name, .name'); @@ -184,20 +219,29 @@ export function buildSearchExtractJs(webHost) { const authorLinkEl = el.querySelector('a.author, a[href*="/user/profile/"]'); const url = normalizeUrl(detailLinkEl?.getAttribute('href') || ''); - if (!url) return; + if (!url) continue; const key = url; - if (seen.has(key)) return; + if (seen.has(key)) continue; seen.add(key); + // Fallback title: the new bare-section render keeps the note caption + // inside the search_result anchor's first span, not in a class-named + // .title element. Pull from there when the class-based pick is empty. + let title = cleanText(titleEl?.textContent || ''); + if (!title) { + const captionSpan = detailLinkEl?.querySelector('span'); + title = cleanText(captionSpan?.textContent || ''); + } + results.push({ - title: cleanText(titleEl?.textContent || ''), + title, author, likes: cleanText(likesEl?.textContent || '0'), url, author_url: normalizeUrl(authorLinkEl?.getAttribute('href') || ''), }); - }); + } return results; })()