From e2ba54241d3db717e9694ccf89e7b1538e987db8 Mon Sep 17 00:00:00 2001 From: Shuheng Liu Date: Tue, 21 Apr 2026 02:22:03 +0900 Subject: [PATCH 1/2] fix(gemini): detect response reliably on current Gemini UI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three issues in the Gemini adapter were causing `gemini ask` to return `[NO RESPONSE]` even though Gemini had actually answered in the automation window. Reproducible with opencli v1.7.4 + extension v1.0.1 against the current gemini.google.com/app UI. 1. Turn selector pollution in zero-state The "/app" welcome panel ("Hi , Where should we start?") is rendered inside .visible-primary-message with a nested element whose class includes "message-text". This matched the `[class*="message"]` turn selector and got classified as an Assistant turn. After sending a message, the first real turn is a User turn, so hasGeminiTurnPrefix({Assistant}, {User, Assistant}) failed, breaking diffTrustedStructuredTurns. Fix: require real turns to live inside a .conversation-container / chat-history, and exclude zero-state greeting containers. 2. structuredTurnsTrusted false on zero-state With turns.length === 0 and transcriptLines.length > 0 (welcome text produces transcript lines), structuredTurnsTrusted flipped to false, which short-circuits the entire structured diff in diffTrustedStructuredTurns. Since the welcome text is no longer counted as a turn after (1), this signal is no longer informative. Trust the structured diff unconditionally. 3. Response-complete detection depends on a sticky isGenerating After a response finishes, Gemini's send-button retains aria-label="Stop response" for a long time, so isGenerating stays true and `waitForGeminiResponse` never exits via the `!isGenerating && stable >= 2` gate. Add a fallback: if the candidate text has been stable for ~8s (4 polls at 2s each), treat it as done regardless of isGenerating. Verified: 5/5 consecutive `opencli gemini ask "just reply: R"` calls succeed in 13–14s end-to-end on Fast mode. Tests: existing 64 gemini unit/adapter tests still pass. --- clis/gemini/utils.js | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/clis/gemini/utils.js b/clis/gemini/utils.js index 3a250db30..574c32300 100644 --- a/clis/gemini/utils.js +++ b/clis/gemini/utils.js @@ -299,7 +299,11 @@ function readGeminiSnapshotScript() { transcriptLines, composerHasText: composerText.length > 0, isGenerating, - structuredTurnsTrusted: turns.length > 0 || transcriptLines.length === 0, + // After filtering out zero-state greetings, empty turns means + // "no actual conversation yet" — still trust the structured diff. + // Previously this flipped to false on the /app root because the + // welcome text produced transcriptLines > 0. + structuredTurnsTrusted: true, }; })() `; @@ -417,9 +421,22 @@ function getTurnsScript() { ]; const roots = selectors.flatMap((selector) => Array.from(document.querySelectorAll(selector))); + // Filter out zero-state welcome text ("Hi , Where should we start?") + // which Gemini renders inside .visible-primary-message containers using + // .message-text class. These match our turn selectors but are NOT actual + // conversation turns, breaking the strict prefix match in diffTrustedStructuredTurns. + const isInRealConversation = (el) => { + if (el.closest('[class*="conversation-container"]')) return true; + if (el.closest('chat-history, [class*="chat-history"]')) return true; + // Exclude zero-state greeting areas + if (el.closest('[class*="visible-primary-message"]')) return false; + if (el.closest('[class*="zero-state"], [class*="empty-state"], [class*="zero_state"]')) return false; + return false; + }; const unique = roots .filter((el, index, all) => all.indexOf(el) === index) .filter(isVisible) + .filter(isInRealConversation) .sort((left, right) => { if (left === right) return 0; const relation = left.compareDocumentPosition(right); @@ -1917,7 +1934,12 @@ export async function waitForGeminiResponse(page, baseline, promptText, timeoutS lastStructured = structuredCandidate; structuredStableCount = 1; } - if (!current.isGenerating && structuredStableCount >= 2) { + // Gemini's send-button aria-label sticks at "Stop response" after + // the response has already finished streaming, making isGenerating + // unreliable as a completion signal. Fall back to: if the candidate + // text has been stable for ~8 seconds (4 polls at 2s each), treat + // it as done regardless of isGenerating. + if ((!current.isGenerating && structuredStableCount >= 2) || structuredStableCount >= 4) { return structuredCandidate; } continue; From 5658b639afedf122012b6634d07e53e4cac55d5f Mon Sep 17 00:00:00 2001 From: Shuheng Liu Date: Tue, 21 Apr 2026 02:40:04 +0900 Subject: [PATCH 2/2] fix(grok): handle contenteditable composer and English Submit aria MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit grok.com migrated from a plain