Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .changeset/keep-tmux-session-alive-after-agent-exit.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
---
"@aoagents/ao-plugin-runtime-tmux": patch
"@aoagents/ao-web": patch
---

Tmux sessions no longer die when the agent process inside them exits. When you Ctrl-C the agent in a web terminal, the pane now drops to an interactive `$SHELL` in the workspace dir instead of nuking the tmux session and leaving the dashboard in a phantom "runtime lost" state. The lifecycle manager still detects the agent exit (via `agent.isProcessRunning`) and transitions the session to `agent_process_exited`, but the runtime stays usable so you can run shell commands or manually re-launch the agent.

Also: the mux-websocket re-attach loop now checks `tmux has-session` before retrying after a PTY exit. When the tmux session is genuinely gone (e.g. `ao stop`), it skips the three doomed `attach-session` spawns from #1640 and notifies the dashboard immediately. (#1756)
56 changes: 49 additions & 7 deletions packages/plugins/runtime-tmux/src/__tests__/index.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -98,10 +98,19 @@ describe("runtime.create()", () => {
expect(handle.runtimeName).toBe("tmux");
expect(handle.data.workspacePath).toBe("/tmp/workspace");

// First call: new-session
// First call: new-session — launch command has the keep-alive shell tail
// appended so the tmux session survives agent exit (issue #1756).
expect(mockExecFileCustom).toHaveBeenCalledWith(
"tmux",
["new-session", "-d", "-s", "test-session", "-c", "/tmp/workspace", "echo hello"],
[
"new-session",
"-d",
"-s",
"test-session",
"-c",
"/tmp/workspace",
'echo hello\nexec "${SHELL:-/bin/bash}" -i',
],
expectedTmuxOptions,
);
});
Expand Down Expand Up @@ -148,7 +157,7 @@ describe("runtime.create()", () => {
expect(args).toContain("-e");
expect(args).toContain("AO_SESSION=env-session");
expect(args).toContain("FOO=bar");
expect(args.at(-1)).toBe("bash");
expect(args.at(-1)).toBe('bash\nexec "${SHELL:-/bin/bash}" -i');
});

it("starts the launch command as the initial tmux pane command", async () => {
Expand All @@ -164,15 +173,42 @@ describe("runtime.create()", () => {
environment: {},
});

// First call: new-session passes the launch command as the pane's initial command
// First call: new-session passes the launch command as the pane's initial
// command, with the keep-alive shell tail appended.
expect(mockExecFileCustom).toHaveBeenCalledWith(
"tmux",
["new-session", "-d", "-s", "launch-test", "-c", "/tmp/ws", "claude --session abc"],
[
"new-session",
"-d",
"-s",
"launch-test",
"-c",
"/tmp/ws",
'claude --session abc\nexec "${SHELL:-/bin/bash}" -i',
],
expectedTmuxOptions,
);
});

it("uses a temp launch script for long launch commands", async () => {
it("appends an interactive shell tail so the tmux pane survives agent exit (regression for #1756)", async () => {
const runtime = create();

mockTmuxSuccess();
mockTmuxSuccess();

await runtime.create({
sessionId: "keep-alive",
workspacePath: "/tmp/ws",
launchCommand: "claude --session abc",
environment: {},
});

const finalArg = (mockExecFileCustom.mock.calls[0][1] as string[]).at(-1)!;
expect(finalArg).toContain("claude --session abc");
expect(finalArg).toMatch(/exec "\$\{SHELL:-\/bin\/bash\}" -i\s*$/);
});

it("keeps the keep-alive tail in the temp script for long launch commands", async () => {
const runtime = create();
const longCommand = "x".repeat(250);

Expand All @@ -193,6 +229,12 @@ describe("runtime.create()", () => {
{ encoding: "utf-8", mode: 0o700 },
);

// The script body includes the interactive shell tail too — without it
// long-command sessions would still nuke tmux on agent exit (#1756).
const writeCall = (fs.writeFileSync as unknown as { mock: { calls: unknown[][] } }).mock
.calls[0];
expect(writeCall[1]).toMatch(/exec "\$\{SHELL:-\/bin\/bash\}" -i/);

expect(mockExecFileCustom).toHaveBeenNthCalledWith(
1,
"tmux",
Expand Down Expand Up @@ -317,7 +359,7 @@ describe("runtime.create()", () => {
"no-env",
"-c",
"/tmp/ws",
"echo hi",
'echo hi\nexec "${SHELL:-/bin/bash}" -i',
]);
});
});
Expand Down
27 changes: 24 additions & 3 deletions packages/plugins/runtime-tmux/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,27 @@ function assertValidSessionId(id: string): void {
}
}

/**
* Shell snippet appended after the agent launch command so the tmux pane
* (and therefore the tmux session) survives agent exit. Without this, the
* pane closes when the agent process exits, the only window goes away, and
* the whole tmux session dies — leaving the dashboard with a phantom
* "runtime lost" state and the user with no way to do anything in that
* workspace (issue #1756).
*
* `exec` replaces the wrapping sh/bash with the user's interactive shell,
* so the lifecycle manager still detects agent termination via
* `agent.isProcessRunning` and transitions the session correctly.
*/
const KEEP_ALIVE_SHELL = `exec "\${SHELL:-/bin/bash}" -i`;

function withKeepAliveShell(command: string): string {
return `${command.replace(/\n+$/, "")}\n${KEEP_ALIVE_SHELL}`;
}

function writeLaunchScript(command: string): string {
const scriptPath = join(tmpdir(), `ao-launch-${randomUUID()}.sh`);
const content = `#!/usr/bin/env bash\nrm -- "$0" 2>/dev/null || true\n${command}\n`;
const content = `#!/usr/bin/env bash\nrm -- "$0" 2>/dev/null || true\n${withKeepAliveShell(command)}\n`;
writeFileSync(scriptPath, content, { encoding: "utf-8", mode: 0o700 });
return `bash ${shellEscape(scriptPath)}`;
}
Expand Down Expand Up @@ -78,9 +96,12 @@ export function create(): Runtime {
// Start the launch command as the pane's initial command instead of
// typing into a live shell. A dashboard attach can trigger terminal
// device responses; if those race with tmux send-keys, they become
// literal shell input and corrupt the launch path.
// literal shell input and corrupt the launch path. The keep-alive
// tail is appended in both code paths — see KEEP_ALIVE_SHELL.
const shellCommand =
launchCommand.length > 200 ? writeLaunchScript(launchCommand) : launchCommand;
launchCommand.length > 200
? writeLaunchScript(launchCommand)
: withKeepAliveShell(launchCommand);

await tmux(
"new-session",
Expand Down
63 changes: 62 additions & 1 deletion packages/web/server/__tests__/mux-websocket.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@ import type { SessionBroadcaster as SessionBroadcasterType } from "../mux-websoc

// vi.mock factories run before module-level statements. Hoist the mock
// fns so the factories close over the same instances the tests use.
const { mockSpawn, mockPtySpawn } = vi.hoisted(() => ({
const { mockSpawn, mockPtySpawn, mockTmuxHasSession } = vi.hoisted(() => ({
mockSpawn: vi.fn(),
mockPtySpawn: vi.fn(),
mockTmuxHasSession: vi.fn(),
}));

vi.mock("node:child_process", async (importOriginal) => {
Expand All @@ -33,6 +34,7 @@ vi.mock("../tmux-utils.js", () => ({
findTmux: () => "/usr/bin/tmux",
validateSessionId: () => true,
resolveTmuxSession: () => "ao-177",
tmuxHasSession: (...args: unknown[]) => mockTmuxHasSession(...args),
}));

const { SessionBroadcaster, TerminalManager } = await import("../mux-websocket");
Expand Down Expand Up @@ -311,3 +313,62 @@ describe("TerminalManager.open — tmux target args (regression for #1714)", ()
expect(args).toEqual(["attach-session", "-t", "=ao-177"]);
});
});

describe("TerminalManager.open — re-attach skipped when tmux session is gone (regression for #1756)", () => {
// Captures the latest onExit callback registered by ptySpawn so tests can
// synthesise a PTY exit without spawning a real process. The handler is
// async (so it can await the promisified has-session probe), so tests
// await its return value before asserting.
let capturedOnExit: ((evt: { exitCode: number }) => Promise<void> | void) | undefined;

beforeEach(() => {
mockSpawn.mockReset();
mockPtySpawn.mockReset();
mockTmuxHasSession.mockReset();
capturedOnExit = undefined;

mockSpawn.mockImplementation(() => new EventEmitter());
mockPtySpawn.mockImplementation(() => ({
onData: vi.fn(),
onExit: vi.fn((cb: (evt: { exitCode: number }) => Promise<void> | void) => {
capturedOnExit = cb;
}),
write: vi.fn(),
resize: vi.fn(),
kill: vi.fn(),
}));
});

it("skips re-attach and notifies subscribers when has-session reports the tmux session is gone", async () => {
const mgr = new TerminalManager("/usr/bin/tmux");
const exitCb = vi.fn();
mgr.subscribe("ao-177", undefined, vi.fn(), exitCb);

expect(mockPtySpawn).toHaveBeenCalledTimes(1);
expect(capturedOnExit).toBeDefined();

mockTmuxHasSession.mockResolvedValueOnce(false);
await capturedOnExit!({ exitCode: 0 });

// No second attach-session was spawned — the re-attach loop was skipped.
expect(mockPtySpawn).toHaveBeenCalledTimes(1);
// Subscribers were notified with the original exit code.
expect(exitCb).toHaveBeenCalledTimes(1);
expect(exitCb).toHaveBeenCalledWith(0);
});

it("still re-attaches when has-session reports the tmux session is alive", async () => {
const mgr = new TerminalManager("/usr/bin/tmux");
const exitCb = vi.fn();
mgr.subscribe("ao-177", undefined, vi.fn(), exitCb);

expect(mockPtySpawn).toHaveBeenCalledTimes(1);

mockTmuxHasSession.mockResolvedValueOnce(true);
await capturedOnExit!({ exitCode: 1 });

// Re-attach happened: ptySpawn called a second time, exit not yet notified.
expect(mockPtySpawn).toHaveBeenCalledTimes(2);
expect(exitCb).not.toHaveBeenCalled();
});
});
50 changes: 49 additions & 1 deletion packages/web/server/__tests__/tmux-utils.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,14 @@
*/

import { describe, it, expect, vi, afterEach } from "vitest";
import { findTmux, resolveTmuxSession, resolvePipePath, validateSessionId, SESSION_ID_PATTERN } from "../tmux-utils.js";
import {
findTmux,
resolveTmuxSession,
resolvePipePath,
tmuxHasSession,
validateSessionId,
SESSION_ID_PATTERN,
} from "../tmux-utils.js";

// Default fs adapter for resolveTmuxSession tests — empty AO base directory
// so the on-disk storageKey lookup always misses and we exercise the
Expand Down Expand Up @@ -357,6 +364,47 @@ describe("findTmux", () => {
});
});

// =============================================================================
// tmuxHasSession
// =============================================================================

describe("tmuxHasSession", () => {
const TMUX = "/opt/homebrew/bin/tmux";

it("returns true when has-session resolves", async () => {
const mockExec = vi.fn().mockResolvedValue({ stdout: "", stderr: "" });

await expect(tmuxHasSession(TMUX, "ao-104", mockExec)).resolves.toBe(true);
});

it("returns false when has-session rejects (session missing)", async () => {
const mockExec = vi
.fn()
.mockRejectedValue(new Error("can't find session: ao-104"));

await expect(tmuxHasSession(TMUX, "ao-104", mockExec)).resolves.toBe(false);
});

it("uses the = exact-match prefix to avoid tmux prefix matching", async () => {
const mockExec = vi.fn().mockResolvedValue({ stdout: "", stderr: "" });

await tmuxHasSession(TMUX, "ao-1", mockExec);

expect(mockExec).toHaveBeenCalledWith(
TMUX,
["has-session", "-t", "=ao-1"],
{ timeout: 5000 },
);
});

it("returns false when tmuxPath is null without invoking exec", async () => {
const mockExec = vi.fn();

await expect(tmuxHasSession(null, "ao-104", mockExec)).resolves.toBe(false);
expect(mockExec).not.toHaveBeenCalled();
});
});

// =============================================================================
// resolveTmuxSession
// =============================================================================
Expand Down
39 changes: 37 additions & 2 deletions packages/web/server/mux-websocket.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,13 @@
import { WebSocketServer, WebSocket } from "ws";
import { spawn } from "node:child_process";
import { type Socket, connect as netConnect } from "node:net";
import { findTmux, resolveTmuxSession, resolvePipePath, validateSessionId } from "./tmux-utils.js";
import {
findTmux,
resolveTmuxSession,
resolvePipePath,
tmuxHasSession,
validateSessionId,
} from "./tmux-utils.js";
import { getEnvDefaults, isWindows } from "@aoagents/ao-core";

// These types mirror src/lib/mux-protocol.ts exactly.
Expand Down Expand Up @@ -368,10 +374,39 @@ export class TerminalManager {
});

// Handle PTY exit
pty.onExit(({ exitCode }) => {
//
// Async: the has-session probe shells out via promisified execFile and
// must be awaited. node-pty fires onExit on the main thread; a sync
// probe would freeze the entire web server (every WebSocket, HTTP
// request, in-flight terminal) for up to the subprocess timeout when
// tmux is slow to respond.
pty.onExit(async ({ exitCode }) => {
console.log(`[MuxServer] PTY exited for ${id} with code ${exitCode}`);
terminal.pty = null;

// Skip the re-attach loop entirely when the underlying tmux session is
// gone (e.g. user pressed Ctrl-C in the pane and the launch command
// exited, taking the only window with it). Without this guard we
// burn three doomed attach-session spawns and emit a noisy
// "Max re-attach attempts reached" log line for what is actually a
// clean user-initiated termination — see issue #1756. The
// MAX_REATTACH_ATTEMPTS bound from #1640 still covers tmux server
// hiccups where the session does still exist.
if (
terminal.subscribers.size > 0 &&
!(await tmuxHasSession(this.TMUX, tmuxSessionId))
) {
console.log(`[MuxServer] tmux session ${tmuxSessionId} is gone, not re-attaching`);
if (terminal.resetTimer) {
clearTimeout(terminal.resetTimer);
terminal.resetTimer = undefined;
}
for (const cb of terminal.exitCallbacks) {
cb(exitCode);
}
return;
}

// Re-attach if subscribers are still present, up to MAX_REATTACH_ATTEMPTS.
// The cap prevents an unbounded respawn loop when the PTY crashes immediately
// after every attach (e.g. resource exhaustion or a broken tmux session).
Expand Down
Loading
Loading