diff --git a/cmd/entire/cli/strategy/manual_commit_hooks.go b/cmd/entire/cli/strategy/manual_commit_hooks.go index 32da8ad64..c3b3f90c7 100644 --- a/cmd/entire/cli/strategy/manual_commit_hooks.go +++ b/cmd/entire/cli/strategy/manual_commit_hooks.go @@ -661,6 +661,8 @@ type postCommitActionHandler struct { shadowBranchesToDelete map[string]struct{} committedFileSet map[string]struct{} hasNew bool + hasTranscriptGrowth bool // true when state's live transcript advanced past CheckpointTranscriptStart + hasShadowContributions bool // true when SaveStep ran in this checkpoint window (state.StepCount > 0) filesTouchedBefore []string sessionsWithCommittedFiles int // number of processable sessions that have tracked files @@ -746,10 +748,11 @@ func (h *postCommitActionHandler) HandleCondenseIfFilesTouched(state *session.St } // shouldCondenseWithOverlapCheck returns true if the session should be condensed -// into this commit. Active sessions with recent interaction condense unless they -// have no tracked files and another session claims the committed files (read-only -// gate). Stale ACTIVE and IDLE/ENDED sessions require file overlap evidence -// between tracked files and committed files. +// into this commit. Active sessions with recent interaction condense if they +// have evidence of work in this checkpoint window — either tracked files or +// transcript growth past the last condensation point. Stale ACTIVE and +// IDLE/ENDED sessions require file overlap evidence between tracked files +// and committed files. func (h *postCommitActionHandler) shouldCondenseWithOverlapCheck(isActive bool, lastInteraction *time.Time) bool { if !h.hasNew { return false @@ -759,22 +762,42 @@ func (h *postCommitActionHandler) shouldCondenseWithOverlapCheck(isActive bool, // (added trailer). The overlap check is only meaningful when we need // heuristic evidence that a commit was related to the session. // - // Exception: when another session's tracked files overlap with the - // committed files, skip this ACTIVE session if it has no tracked files - // itself. This prevents read-only sessions (e.g., codex exec from tools - // like summarize) from being condensed when a different session's commit - // triggers PostCommit. When no other session claims the committed files, - // the ACTIVE session is assumed to own the commit. + // Exception: skip ACTIVE sessions that have no evidence of work in this + // checkpoint window — neither tracked files nor transcript growth past + // CheckpointTranscriptStart. This catches read-only sessions (e.g., + // codex exec from summarize tooling) and stale-but-still-ACTIVE sessions + // (e.g., a Codex shell where the user typed "exit" hours ago but the + // process is still alive). Without this check, those sessions would be + // condensed onto another session's commit and inherit its committed + // files via filterFilesTouched's evidence-of-work fallback. // - // We check LastInteractionTime to avoid condensing stale ACTIVE sessions - // (agent killed without Stop hook) into every subsequent commit. A stale - // session has no recent interaction and falls through to the overlap check. + // We check LastInteractionTime to avoid condensing very stale ACTIVE + // sessions (agent killed without Stop hook) into every subsequent + // commit. A very stale session has no recent interaction and falls + // through to the overlap check. if isActive && isRecentInteraction(lastInteraction) { - if h.sessionsWithCommittedFiles > 0 && len(h.filesTouchedBefore) == 0 { - logging.Debug(h.ctx, "post-commit: skipping read-only ACTIVE session (no tracked files, other sessions claim committed files)", - slog.Int("sessions_with_committed_files", h.sessionsWithCommittedFiles), - ) - return false + if len(h.filesTouchedBefore) == 0 { + // No tracked files for this session. Skip when EITHER: + // (a) another session's tracked files overlap with the + // committed set — that session "owns" the commit, and + // this one is read-only relative to it (e.g. codex exec + // summarize triggered alongside the real edit session); + // (b) this session has no evidence of work in the current + // checkpoint window — no transcript growth past + // CheckpointTranscriptStart and no SaveStep contributions + // (StepCount == 0). This catches stale-but-still-ACTIVE + // sessions like a Codex shell where the user typed "exit" + // hours ago but the process is still alive. Without (b), + // such a session would slip through and inherit the + // committed files via filterFilesTouched's fallback. + if h.sessionsWithCommittedFiles > 0 || (!h.hasTranscriptGrowth && !h.hasShadowContributions) { + logging.Debug(h.ctx, "post-commit: skipping ACTIVE session with no tracked files", + slog.Int("sessions_with_committed_files", h.sessionsWithCommittedFiles), + slog.Bool("has_transcript_growth", h.hasTranscriptGrowth), + slog.Bool("has_shadow_contributions", h.hasShadowContributions), + ) + return false + } } return true } @@ -854,6 +877,29 @@ func isRecentInteraction(lastInteraction *time.Time) bool { return lastInteraction != nil && time.Since(*lastInteraction) < activeSessionInteractionThreshold } +// liveTranscriptGrew reports whether the session's live transcript file has +// grown beyond the size captured at the last condensation +// (CheckpointTranscriptSize). When the session has never been condensed +// (CheckpointTranscriptSize == 0), any non-empty transcript counts as +// growth — the agent has produced *some* content this checkpoint window. +// +// Agent-agnostic: relies only on file size, so it works for agents without +// a TranscriptAnalyzer implementation. Returns false on stat errors or +// when no transcript path is recorded. +func liveTranscriptGrew(state *SessionState) bool { + if state == nil || state.TranscriptPath == "" { + return false + } + info, err := os.Stat(state.TranscriptPath) + if err != nil { + return false + } + if state.CheckpointTranscriptSize > 0 { + return info.Size() > state.CheckpointTranscriptSize + } + return info.Size() > 0 +} + func (h *postCommitActionHandler) HandleDiscardIfNoFiles(state *session.State) error { if len(state.FilesTouched) == 0 { logging.Debug(logging.WithComponent(h.ctx, "checkpoint"), "post-commit: skipping empty ended session (no files to condense)", @@ -1227,6 +1273,24 @@ func (s *ManualCommitStrategy) postCommitProcessSession( filesTouchedBefore = make([]string, len(state.FilesTouched)) copy(filesTouchedBefore, state.FilesTouched) } + + // Probe the live transcript for growth so the gate can distinguish a + // stale-but-still-ACTIVE session (transcript unchanged since last + // condensation) from a session genuinely doing work this turn. We use + // file-size against state.CheckpointTranscriptSize so this works for + // any agent that writes a transcript file, even those without a + // TranscriptAnalyzer implementation (e.g. vogon in the canary suite). + // Cost is a single os.Stat. + var hasTranscriptGrowth bool + if state.Phase.IsActive() { + hasTranscriptGrowth = liveTranscriptGrew(state) + } + // Track shadow-branch contributions explicitly: if SaveStep ran since + // the last condensation, StepCount is > 0 and we must condense even + // when the agent's transcript hasn't grown further (e.g., a Claude + // session whose Stop hook fired and populated the shadow branch but + // whose state.FilesTouched got cleared by a subsequent flow). + hasShadowContributions := state.StepCount > 0 checkContentSpan.End() logging.Debug(logCtx, "post-commit: carry-forward prep", @@ -1234,6 +1298,7 @@ func (s *ManualCommitStrategy) postCommitProcessSession( slog.Bool("is_active", state.Phase.IsActive()), slog.String("transcript_path", state.TranscriptPath), slog.Int("files_touched_before", len(filesTouchedBefore)), + slog.Bool("transcript_growth", hasTranscriptGrowth), slog.Any("files", filesTouchedBefore), ) @@ -1252,6 +1317,8 @@ func (s *ManualCommitStrategy) postCommitProcessSession( shadowBranchesToDelete: shadowBranchesToDelete, committedFileSet: committedFileSet, hasNew: hasNew, + hasTranscriptGrowth: hasTranscriptGrowth, + hasShadowContributions: hasShadowContributions, filesTouchedBefore: filesTouchedBefore, headTree: headTree, parentTree: parentTree, diff --git a/cmd/entire/cli/strategy/phase_postcommit_test.go b/cmd/entire/cli/strategy/phase_postcommit_test.go index 7aa771afd..a58c78797 100644 --- a/cmd/entire/cli/strategy/phase_postcommit_test.go +++ b/cmd/entire/cli/strategy/phase_postcommit_test.go @@ -3,8 +3,11 @@ package strategy import ( "bytes" "context" + "encoding/json" "os" "path/filepath" + "strconv" + "strings" "testing" "time" @@ -261,6 +264,144 @@ func TestPostCommit_ReadOnlyActiveSessionNotCondensed(t *testing.T) { "shadow branch should be preserved — uncondensed ACTIVE session still references it") } +// TestPostCommit_StaleActiveSession_DoesNotInheritOtherSessionFiles is a +// regression test for the multi-session inheritance bug observed in the wild: +// a recent ACTIVE session that did no real work (e.g. a Codex shell where +// the user typed "exit" hours ago — phase still ACTIVE, LastInteractionTime +// within 24h, no FilesTouched, transcript only contains a startup banner) +// gets condensed onto a commit authored entirely by a different ACTIVE +// session and inherits that commit's file list via filterFilesTouched's +// evidence-of-work fallback. +// +// The bug requires sessionsWithCommittedFiles to be 0 at gate-check time — +// i.e. no session's state.FilesTouched overlaps with the committed set — +// because the read-only-skip in shouldCondenseWithOverlapCheck only fires +// when sessionsWithCommittedFiles > 0. In production this happens when the +// working session's state.FilesTouched on disk is empty at the moment +// PostCommit runs (e.g. SaveStep populated the shadow branch but tool-use +// hooks haven't merged the file list back into the on-disk state, or +// state.FilesTouched was cleared by a previous condensation and the next +// SaveStep hasn't run yet). The shadow branch still has the actual content +// so the working session condenses fine, but the stale session — with no +// shadow branch contribution and no real transcript activity — slips +// through the gate and inherits the committed files via filterFilesTouched. +// +// Expected behavior: the stale session must be skipped or, if it is +// condensed, must not inherit a different session's file list. +func TestPostCommit_StaleActiveSession_DoesNotInheritOtherSessionFiles(t *testing.T) { + dir := setupGitRepo(t) + t.Chdir(dir) + + repo, err := git.PlainOpen(dir) + require.NoError(t, err) + + s := &ManualCommitStrategy{} + workingSessionID := "claude-working" + staleSessionID := "codex-stale" + + // Working session: ACTIVE with a SaveStep-populated shadow branch + // (analogous to a Claude session mid-edit) but state.FilesTouched on disk + // is empty at gate-check time. See doc comment above for why this + // matters. + setupSessionWithCheckpoint(t, s, repo, dir, workingSessionID) + workingState, err := s.loadSessionState(context.Background(), workingSessionID) + require.NoError(t, err) + now := time.Now() + workingState.Phase = session.PhaseActive + workingState.LastInteractionTime = &now + workingState.FilesTouched = nil + require.NoError(t, s.saveSessionState(context.Background(), workingState)) + + // Stale session: ACTIVE in the same worktree at the same base commit, + // LastInteractionTime within 24h (so isRecentInteraction == true). No + // shadow branch contribution. Transcript file exists with content from + // before the last condensation (CheckpointTranscriptStart matches the + // current line count — no growth this checkpoint window), mirroring the + // real-world Codex case where the user typed "exit" hours ago and the + // session has been idle since. + staleTranscript := filepath.Join(dir, "stale-transcript.jsonl") + staleTranscriptLines := []string{ + `{"type":"event_msg","payload":{"type":"session_meta","session_id":"codex","tool":"codex"}}`, + `{"type":"response_item","payload":{"type":"message","role":"user","content":[{"type":"input_text","text":"exit"}]}}`, + } + staleTranscriptBytes := []byte(strings.Join(staleTranscriptLines, "\n") + "\n") + require.NoError(t, os.WriteFile(staleTranscript, staleTranscriptBytes, 0o644)) + + staleState := &SessionState{ + SessionID: staleSessionID, + BaseCommit: workingState.BaseCommit, + WorktreePath: workingState.WorktreePath, + WorktreeID: workingState.WorktreeID, + StartedAt: now, + Phase: session.PhaseActive, + LastInteractionTime: &now, + StepCount: 0, + AgentType: agent.AgentTypeCodex, + TranscriptPath: staleTranscript, + CheckpointTranscriptStart: len(staleTranscriptLines), + CheckpointTranscriptSize: int64(len(staleTranscriptBytes)), + } + require.NoError(t, s.saveSessionState(context.Background(), staleState)) + + commitWithCheckpointTrailer(t, repo, dir, "ddccbbaa1122") + + require.NoError(t, s.PostCommit(context.Background())) + + // Working session: should be condensed onto this checkpoint. + workingState, err = s.loadSessionState(context.Background(), workingSessionID) + require.NoError(t, err) + assert.Equal(t, id.CheckpointID("ddccbbaa1122"), workingState.LastCheckpointID, + "working session should be condensed onto this checkpoint") + + // The bug: stale session also gets condensed and inherits the working + // session's committed files. Either outcome below is a fix: + // (a) the gate skips the stale session entirely (LastCheckpointID empty), or + // (b) the stale session is condensed but its metadata blob does not + // claim files it didn't touch. + staleState, err = s.loadSessionState(context.Background(), staleSessionID) + require.NoError(t, err) + if !staleState.LastCheckpointID.IsEmpty() { + // Gate let it through — verify it didn't inherit. Read the metadata + // blob from entire/checkpoints/v1 and check files_touched. + staleMetadata := readCondensedSessionMetadata(t, repo, "ddccbbaa1122", staleSessionID) + assert.Empty(t, staleMetadata.FilesTouched, + "stale session must not inherit committed files from another session "+ + "(got files_touched=%v)", staleMetadata.FilesTouched) + } +} + +// readCondensedSessionMetadata loads the per-session metadata.json blob from +// the entire/checkpoints/v1 tree for a given checkpoint and session id. +// Walks the indexed session subdirs (0/, 1/, ...) and returns the one whose +// session_id field matches. +func readCondensedSessionMetadata(t *testing.T, repo *git.Repository, checkpointIDStr, sessionID string) checkpoint.CommittedMetadata { + t.Helper() + ref, err := repo.Reference(plumbing.NewBranchReferenceName(paths.MetadataBranchName), true) + require.NoError(t, err, "metadata branch should exist") + commit, err := repo.CommitObject(ref.Hash()) + require.NoError(t, err) + tree, err := commit.Tree() + require.NoError(t, err) + + cpDir := checkpointIDStr[:2] + "/" + checkpointIDStr[2:] + for i := range 10 { + blobPath := cpDir + "/" + strconv.Itoa(i) + "/metadata.json" + file, err := tree.File(blobPath) + if err != nil { + break + } + contents, err := file.Contents() + require.NoError(t, err) + var meta checkpoint.CommittedMetadata + require.NoError(t, json.Unmarshal([]byte(contents), &meta)) + if meta.SessionID == sessionID { + return meta + } + } + t.Fatalf("no metadata.json found for session %s under checkpoint %s", sessionID, checkpointIDStr) + return checkpoint.CommittedMetadata{} +} + // TestPostCommit_CondensationFailure_PreservesShadowBranch verifies that when // condensation fails (corrupted shadow branch), BaseCommit is NOT updated. func TestPostCommit_CondensationFailure_PreservesShadowBranch(t *testing.T) {