Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/pty-idle-grace-period.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@aoagents/ao-web": patch
---

Defer killing idle terminal PTYs by 30s in the mux server so that quick reconnects (tab reload, sleep/wake, network blip) reuse the existing PTY instead of allocating a fresh one. macOS never recycles ptmx slot numbers within a boot, so churning PTYs across these events used to drain `kern.tty.ptmx_max` (511) over weeks of dashboard uptime. (#1718)
Original file line number Diff line number Diff line change
Expand Up @@ -392,9 +392,13 @@ describe("mux terminal I/O", () => {
it("receives terminal data after open", async () => {
const ws = await connectMux();

ws.send(
JSON.stringify({ ch: "terminal", id: "test-session", tmuxName: TEST_SESSION, type: "open" }),
);
// Use a unique terminal id so we exercise a fresh tmux attach. The shared
// "test-session" id used by earlier tests in this file would land in the
// post-#1718 PTY-grace path (PTY reused, no fresh init-data flow), and
// the prior test closes its WS too quickly for the buffer to populate —
// so a grace-path replay would deliver an empty buffer and time out.
const id = `test-data-flow-${Date.now()}`;
ws.send(JSON.stringify({ ch: "terminal", id, tmuxName: TEST_SESSION, type: "open" }));
await waitForMessage(ws, (m) => m.ch === "terminal" && m.type === "opened");

// tmux sends terminal init sequences on attach — wait for any data
Expand Down
132 changes: 132 additions & 0 deletions packages/web/server/__tests__/mux-websocket.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -311,3 +311,135 @@ describe("TerminalManager.open — tmux target args (regression for #1714)", ()
expect(args).toEqual(["attach-session", "-t", "=ao-177"]);
});
});

describe("TerminalManager — PTY idle grace period (issue #1718)", () => {
// Stable mock fns shared across spawns within a single test, reset per test.
// The unit under test relies on `pty.kill()` being called exactly once on
// grace expiry, so kill needs a stable identity to count against.
let ptyKill: ReturnType<typeof vi.fn>;
let ptyOnData: ReturnType<typeof vi.fn>;
let ptyOnExit: ReturnType<typeof vi.fn>;
let manager: InstanceType<typeof TerminalManager>;

beforeEach(() => {
vi.useFakeTimers();
mockSpawn.mockReset();
mockPtySpawn.mockReset();

ptyKill = vi.fn();
ptyOnData = vi.fn();
ptyOnExit = vi.fn();

mockSpawn.mockImplementation(() => new EventEmitter());
mockPtySpawn.mockImplementation(() => ({
onData: ptyOnData,
onExit: ptyOnExit,
write: vi.fn(),
resize: vi.fn(),
kill: ptyKill,
}));

manager = new TerminalManager("/usr/bin/tmux");
});

afterEach(() => {
vi.clearAllTimers();
vi.useRealTimers();
});

it("does not kill the PTY immediately when the last subscriber unsubscribes", () => {
const unsub = manager.subscribe("ao-177", undefined, vi.fn());
expect(mockPtySpawn).toHaveBeenCalledTimes(1);

unsub();

// PTY must remain alive during the grace window — killing here would
// burn a fresh ptmx slot on the next reconnect (issue #1718).
expect(ptyKill).not.toHaveBeenCalled();
});

it("kills the PTY after the grace period expires with no reconnect", () => {
const unsub = manager.subscribe("ao-177", undefined, vi.fn());
unsub();

// Just before the window closes — still alive.
vi.advanceTimersByTime(29_999);
expect(ptyKill).not.toHaveBeenCalled();

// Window closes — kill fires.
vi.advanceTimersByTime(1);
expect(ptyKill).toHaveBeenCalledTimes(1);
});

it("preserves the PTY when a new subscriber arrives within the grace window", () => {
const unsub1 = manager.subscribe("ao-177", undefined, vi.fn());
expect(mockPtySpawn).toHaveBeenCalledTimes(1);
unsub1();

// A reconnect lands halfway through the grace window.
vi.advanceTimersByTime(15_000);
const unsub2 = manager.subscribe("ao-177", undefined, vi.fn());

// No new PTY allocated and the original is still alive — reconnect
// reused the existing slot, which is the whole point of the fix.
expect(mockPtySpawn).toHaveBeenCalledTimes(1);
expect(ptyKill).not.toHaveBeenCalled();

// After the original timer's deadline passes, the cancelled timer must
// not retroactively kill the still-subscribed PTY.
vi.advanceTimersByTime(60_000);
expect(ptyKill).not.toHaveBeenCalled();

unsub2();
});

it("only kills once when last subscriber leaves and grace expires", () => {
const unsub1 = manager.subscribe("ao-177", undefined, vi.fn());
const unsub2 = manager.subscribe("ao-177", undefined, vi.fn());

unsub1();
// First unsubscribe is not the last subscriber — no timer scheduled.
vi.advanceTimersByTime(60_000);
expect(ptyKill).not.toHaveBeenCalled();

unsub2();
// Now we are at zero subscribers — schedule the grace timer.
vi.advanceTimersByTime(30_000);
expect(ptyKill).toHaveBeenCalledTimes(1);
});

it("buffer is preserved across an unsubscribe/resubscribe within grace", () => {
const cb = vi.fn();
const unsub = manager.subscribe("ao-177", undefined, cb);

// Simulate the PTY emitting some output by invoking the captured handler.
const onDataHandler = ptyOnData.mock.calls[0]?.[0] as (data: string) => void;
onDataHandler("hello");

unsub();
vi.advanceTimersByTime(10_000);

// The terminal entry must still exist — buffered output is still there.
expect(manager.getBuffer("ao-177")).toBe("hello");

// After full grace expiry, the entry is cleaned up.
vi.advanceTimersByTime(20_001);
expect(manager.getBuffer("ao-177")).toBe("");
});

it("double-unsub after grace eviction does not re-arm the timer", () => {
const unsub = manager.subscribe("ao-177", undefined, vi.fn());

// First unsub schedules the grace timer; expiry kills + evicts the entry.
unsub();
vi.advanceTimersByTime(30_000);
expect(ptyKill).toHaveBeenCalledTimes(1);
expect(manager.getBuffer("ao-177")).toBe(""); // entry evicted

// A defensive second unsub (e.g. React strict-mode double-invoke) must
// be a no-op — no new timer scheduled on the dead terminal closure.
unsub();
vi.advanceTimersByTime(30_000 * 2);
expect(ptyKill).toHaveBeenCalledTimes(1); // still exactly one kill
});
});
60 changes: 49 additions & 11 deletions packages/web/server/mux-websocket.ts
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,15 @@ interface ManagedTerminal {
* reachable for up to 5 s after teardown.
*/
resetTimer?: ReturnType<typeof setTimeout>;
/**
* Pending kill scheduled when the last subscriber leaves. macOS never
* recycles ptmx slot numbers within a boot, so churning PTYs across
* tab reloads / sleep-wake / network blips burns slots and eventually
* exhausts kern.tty.ptmx_max=511 (issue #1718). A grace window lets a
* quick reconnect reuse the existing PTY instead of allocating a fresh
* one.
*/
idleGraceTimer: ReturnType<typeof setTimeout> | null;
}

const RING_BUFFER_MAX = 50 * 1024; // 50KB max per terminal
Expand All @@ -217,6 +226,7 @@ const MAX_REATTACH_ATTEMPTS = 3;
* crashes hours later gets a fresh retry budget.
*/
const REATTACH_RESET_GRACE_MS = 5_000;
const PTY_IDLE_GRACE_MS = 30_000; // delay before killing an unsubscribed PTY (issue #1718)

/**
* TerminalManager manages PTY processes independently of WebSocket connections.
Expand Down Expand Up @@ -265,6 +275,7 @@ export class TerminalManager {
buffer: [],
bufferBytes: 0,
reattachAttempts: 0,
idleGraceTimer: null,
};
this.terminals.set(key, terminal);
}
Expand Down Expand Up @@ -444,21 +455,48 @@ export class TerminalManager {
terminal.subscribers.add(callback);
if (onExit) terminal.exitCallbacks.add(onExit);

// A subscriber arrived in time — cancel any pending idle-grace kill
// (e.g. tab reload landed within the grace window after a previous
// unsubscribe). The PTY is preserved; no new ptmx slot is allocated.
if (terminal.idleGraceTimer) {
clearTimeout(terminal.idleGraceTimer);
terminal.idleGraceTimer = null;
}

// Return unsubscribe function
return () => {
terminal.subscribers.delete(callback);
if (onExit) terminal.exitCallbacks.delete(onExit);
// Kill PTY and clean up when the last subscriber leaves
if (terminal.subscribers.size === 0) {
if (terminal.resetTimer) {
clearTimeout(terminal.resetTimer);
terminal.resetTimer = undefined;
}
if (terminal.pty) {
terminal.pty.kill();
terminal.pty = null;
}
this.terminals.delete(key);
// Defer PTY kill when the last subscriber leaves. macOS burns a ptmx
// slot per allocation for the lifetime of a boot (issue #1718), so
// killing immediately on every disconnect drains kern.tty.ptmx_max.
// A grace window lets a quick reconnect reuse the existing PTY.
// The reattach-counter resetTimer (#1640) is cleared at kill time so
// its closure doesn't keep the evicted terminal reachable.
// The terminals.has(key) guard makes unsub idempotent: once the grace
// timer has fired and evicted the entry, a defensive double-unsub
// (React strict-mode double-invoke, redundant teardown) must not arm
// a fresh 30 s timer on the dead `terminal` closure.
if (
terminal.subscribers.size === 0 &&
terminal.idleGraceTimer === null &&
this.terminals.has(key)
) {
terminal.idleGraceTimer = setTimeout(() => {
terminal.idleGraceTimer = null;
// Re-check: a subscriber may have arrived during the window.
if (terminal.subscribers.size > 0) return;
if (terminal.resetTimer) {
clearTimeout(terminal.resetTimer);
terminal.resetTimer = undefined;
}
if (terminal.pty) {
terminal.pty.kill();
terminal.pty = null;
}
this.terminals.delete(key);
}, PTY_IDLE_GRACE_MS);
terminal.idleGraceTimer.unref();
}
};
Comment thread
harshitsinghbhandari marked this conversation as resolved.
}
Expand Down
Loading