diff --git a/CHANGELOG.md b/CHANGELOG.md index cdc6f7cbb..9387b8079 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,12 @@ ## Unreleased +### Bug Fixes + +* **browser** — `page.evaluate()` / `evaluateInFrame()` now return the user JavaScript value directly. Browser Bridge `exec` previously routed through a shared `pageScopedResult` helper that spread / wrapped the lease's `session` into the result `data`, contaminating arbitrary user returns: array / primitive returns came back as `{ session, data }` envelopes, and plain-object returns had an extra `session` key injected (overwriting any user `session` field). `google search` and `xiaohongshu search` were the visible repro — Chrome rendered results correctly but adapters extracted an empty array. Fixed in extension 1.0.14 by reverting `pageScopedResult` to its pre-1461 form (`{ id, ok, data, page }`); no client-side unwrap is needed. +* **google/search** — wait for `#rso a h3` before extracting, falling back to the existing fixed wait. On Chrome 148 + Linux Wayland the DOM can settle before SERP anchors are populated, making extraction return empty even with the envelope bug fixed. +* **xiaohongshu/search** — extract initially visible cards before scrolling, then merge post-scroll rows by URL. Xiaohongshu's virtualized masonry layout can evict the initial cards from the DOM after scroll, so the previous always-scroll-then-extract flow could lose the top results. + ### Features * **browser** — add `page.evaluate(fn, ...args)` for type-safe browser-context evaluation with JSON-serialized arguments. String evaluation remains supported, but new adapter code should use function form to avoid implicit `wrapForEval` auto-IIFE magic. @@ -14,6 +20,7 @@ ### Internal +* **extension 1.0.14** — `pageScopedResult` no longer injects `session` into `data`. The field had no consumers and contaminated `exec` results with arbitrary user-JS shapes; routing-relevant identity is already exposed via `Result.page`. * **extension 1.0.13** — remove the internal command-session lease-key backdoor. ## [1.7.18](https://github.com/jackwener/opencli/compare/v1.7.17...v1.7.18) (2026-05-12) diff --git a/clis/google/search.js b/clis/google/search.js index 4705aee5f..4849ee72c 100644 --- a/clis/google/search.js +++ b/clis/google/search.js @@ -29,7 +29,16 @@ cli({ const lang = encodeURIComponent(args.lang); const url = `https://www.google.com/search?q=${keyword}&hl=${lang}&num=${limit}`; await page.goto(url); - await page.wait(2); + // Wait until at least one SERP title link is present. On Chrome 148 / + // Linux Wayland, DOM stability can be reached before #rso anchors are + // populated, making browser execution look visually correct while the + // adapter extracts an empty array. + try { + await page.wait({ selector: '#rso a h3', timeout: 5 }); + } + catch { + await page.wait(2); + } const results = await page.evaluate(` (function() { var results = []; @@ -63,7 +72,7 @@ cli({ var href = link.href || ''; // Skip non-http, Google internal links, and duplicates - if (!href.match(/^https?:\\/\\//)) continue; + if (!(href.startsWith('http://') || href.startsWith('https://'))) continue; if (href.indexOf('google.com/search') !== -1) continue; if (seenUrls[href]) continue; seenUrls[href] = true; diff --git a/clis/xiaohongshu/search.js b/clis/xiaohongshu/search.js index 4acfd6043..f6bf49700 100644 --- a/clis/xiaohongshu/search.js +++ b/clis/xiaohongshu/search.js @@ -227,12 +227,32 @@ export const command = cli({ if (waitResult === 'login_wall') { throw new AuthRequiredError('www.xiaohongshu.com', 'Xiaohongshu search results are blocked behind a login wall'); } - // Scroll until enough rows are rendered or the lazy-load plateaus. - // Replaces the previous fixed `autoScroll({ times: 2 })` which capped - // extraction at ~13 notes regardless of `--limit` (#1471). - await page.evaluate(buildScrollUntilJs(limit)); - const payload = await page.evaluate(buildSearchExtractJs('www.xiaohongshu.com')); - const data = Array.isArray(payload) ? payload : []; + // Extract before scrolling. Xiaohongshu uses a virtualized masonry + // layout, so scrolling to the bottom can evict the initially visible + // note cards from the DOM and make extraction return [] even though the + // browser rendered results correctly. + const initialPayload = await page.evaluate(buildSearchExtractJs('www.xiaohongshu.com')); + let payload = Array.isArray(initialPayload) ? initialPayload : []; + if (payload.length < limit) { + // Scroll until enough rows are rendered or the lazy-load plateaus. + // Replaces the previous fixed `autoScroll({ times: 2 })` which capped + // extraction at ~13 notes regardless of `--limit` (#1471). + await page.evaluate(buildScrollUntilJs(limit)); + const scrolledPayload = await page.evaluate(buildSearchExtractJs('www.xiaohongshu.com')); + if (Array.isArray(scrolledPayload)) { + const seen = new Set(payload.map((item) => item.url).filter(Boolean)); + for (const item of scrolledPayload) { + if (item?.url && seen.has(item.url)) + continue; + if (item?.url) + seen.add(item.url); + payload.push(item); + if (payload.length >= limit) + break; + } + } + } + const data = payload; return data .filter((item) => item.title) .slice(0, limit) diff --git a/clis/xiaohongshu/search.test.js b/clis/xiaohongshu/search.test.js index a8fee04fb..578dd674a 100644 --- a/clis/xiaohongshu/search.test.js +++ b/clis/xiaohongshu/search.test.js @@ -65,9 +65,7 @@ describe('xiaohongshu search', () => { const page = createPageMock([ // First evaluate: MutationObserver wait (content appeared) 'content', - // Second evaluate: scroll-until-enough (returns final note count) - 1, - // Third evaluate: main DOM extraction (returns array directly) + // Second evaluate: initial DOM extraction (already enough results) [ { title: '某鱼买FSD被坑了4万', @@ -99,9 +97,7 @@ describe('xiaohongshu search', () => { const page = createPageMock([ // First evaluate: MutationObserver wait (content appeared) 'content', - // Second evaluate: scroll-until-enough (returns final note count) - 3, - // Third evaluate: main DOM extraction (returns array directly) + // Second evaluate: initial DOM extraction (already enough valid rows) [ { title: 'Result A', @@ -137,17 +133,36 @@ describe('xiaohongshu search', () => { const page = createPageMock([ // First evaluate: MutationObserver wait (content appeared) 'content', - // Second evaluate: scroll-until-enough (no rows rendered) - 0, - // Third evaluate: extraction (returns empty array) + // Second evaluate: initial extraction (no rows rendered) [], ]); const result = (await cmd.func(page, { query: '测试等待', limit: 5 })); expect(result).toHaveLength(0); // Only one navigation, no retry expect(page.goto).toHaveBeenCalledTimes(1); - // Three evaluate calls: wait + scroll-until + extraction - expect(page.evaluate).toHaveBeenCalledTimes(3); + // Four evaluate calls: wait, initial extraction, scroll-until, post-scroll extraction. + expect(page.evaluate).toHaveBeenCalledTimes(4); + }); + it('scrolls only when the initial extraction has fewer rows than requested', async () => { + const cmd = getRegistry().get('xiaohongshu/search'); + expect(cmd?.func).toBeTypeOf('function'); + const page = createPageMock([ + 'content', + [ + { title: 'Result A', author: 'UserA', likes: '10', url: 'https://www.xiaohongshu.com/search_result/aaa', author_url: '' }, + ], + 3, + [ + { title: 'Result A', author: 'UserA', likes: '10', url: 'https://www.xiaohongshu.com/search_result/aaa', author_url: '' }, + { title: 'Result B', author: 'UserB', likes: '5', url: 'https://www.xiaohongshu.com/search_result/bbb', author_url: '' }, + ], + ]); + + const result = (await cmd.func(page, { query: '测试等待', limit: 2 })); + + expect(result).toHaveLength(2); + expect(result.map((item) => item.title)).toEqual(['Result A', 'Result B']); + expect(page.evaluate).toHaveBeenCalledTimes(4); }); it('separates fallback author text from appended relative date', async () => { const cmd = getRegistry().get('xiaohongshu/search'); @@ -165,8 +180,6 @@ describe('xiaohongshu search', () => { markVisible(dom.window.document.querySelector('section.note-item')); const page = createPageMock([]); page.evaluate.mockImplementationOnce(async () => 'content'); - // scroll-until-enough returns the final visible row count - page.evaluate.mockImplementationOnce(async () => 1); page.evaluate.mockImplementationOnce(async (script) => Function('document', 'getComputedStyle', `return (${script})`)(dom.window.document, dom.window.getComputedStyle.bind(dom.window))); const result = await cmd.func(page, { query: '测试', limit: 1 }); diff --git a/extension/dist/background.js b/extension/dist/background.js index c6b41288f..fa085bb6f 100644 --- a/extension/dist/background.js +++ b/extension/dist/background.js @@ -1480,9 +1480,7 @@ async function resolveTab(tabId, leaseKey, initialUrl) { } async function pageScopedResult(id, tabId, data) { const page = await resolveTargetId(tabId); - const lease = [...automationSessions.values()].find((session) => session.preferredTabId === tabId); - const scopedData = data && typeof data === "object" && !Array.isArray(data) ? { session: lease?.session, ...data } : { session: lease?.session, data }; - return { id, ok: true, data: scopedData, page }; + return { id, ok: true, data, page }; } async function resolveTabId(tabId, leaseKey, initialUrl) { const resolved = await resolveTab(tabId, leaseKey, initialUrl); diff --git a/extension/manifest.json b/extension/manifest.json index af558d00c..607aac82f 100644 --- a/extension/manifest.json +++ b/extension/manifest.json @@ -1,7 +1,7 @@ { "manifest_version": 3, "name": "OpenCLI", - "version": "1.0.13", + "version": "1.0.14", "description": "Browser automation bridge for the OpenCLI CLI tool. Executes commands in Chrome tab leases via a local daemon.", "permissions": [ "debugger", diff --git a/extension/package-lock.json b/extension/package-lock.json index ce0a8409b..c133f7246 100644 --- a/extension/package-lock.json +++ b/extension/package-lock.json @@ -1,12 +1,12 @@ { "name": "opencli-extension", - "version": "1.0.13", + "version": "1.0.14", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "opencli-extension", - "version": "1.0.13", + "version": "1.0.14", "devDependencies": { "@types/chrome": "^0.0.287", "typescript": "^5.7.0", diff --git a/extension/package.json b/extension/package.json index 9dbe732f6..b95de79d5 100644 --- a/extension/package.json +++ b/extension/package.json @@ -1,6 +1,6 @@ { "name": "opencli-extension", - "version": "1.0.13", + "version": "1.0.14", "private": true, "opencli": { "compatRange": ">=1.7.0" diff --git a/extension/src/background.test.ts b/extension/src/background.test.ts index 155b69e78..551db2162 100644 --- a/extension/src/background.test.ts +++ b/extension/src/background.test.ts @@ -574,7 +574,6 @@ describe('background tab isolation', () => { title: 'bilibili', url: 'https://www.bilibili.com/', timedOut: false, - session: 'twitter', }, }); expect(update).not.toHaveBeenCalled(); diff --git a/extension/src/background.ts b/extension/src/background.ts index 7971bc121..2925c13bc 100644 --- a/extension/src/background.ts +++ b/extension/src/background.ts @@ -1110,11 +1110,7 @@ async function resolveTab(tabId: number | undefined, leaseKey: string, initialUr /** Build a page-scoped success result with targetId resolved from tabId */ async function pageScopedResult(id: string, tabId: number, data?: unknown): Promise { const page = await identity.resolveTargetId(tabId); - const lease = [...automationSessions.values()].find((session) => session.preferredTabId === tabId); - const scopedData = data && typeof data === 'object' && !Array.isArray(data) - ? { session: lease?.session, ...(data as Record) } - : { session: lease?.session, data }; - return { id, ok: true, data: scopedData, page }; + return { id, ok: true, data, page }; } /** Convenience wrapper returning just the tabId (used by most handlers) */