From 37b55bee1f4bfcaf74642ea10bb78cf9c936c94d Mon Sep 17 00:00:00 2001 From: computermode <2917645+computermode@users.noreply.github.com> Date: Wed, 13 May 2026 17:59:03 -0700 Subject: [PATCH 1/3] Speed up v2 pre-push recovery on large repos MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three independent fixes that drop entiredb's pre-push from ~60s to ~5s: - Probe rotation archive refs with --filter=blob:none and top up only the matched archive's blobs after pick. Falls back to a full fetch on any first-fetch error (e.g. server without uploadpack.allowFilter). - Bound the per-archive ancestry walk to 1s. On repos whose archives are fully disjoint from local /full/current we'd otherwise burn seconds concluding nothing matches. A future /full/root anchor will replace this with a constant-time lookup. - Replace the v1↔v2 cross-check in the migration hint with a single "does v2 /main exist?" ref probe. The hint now only fires when migration was never run; partial migrations are no longer flagged (drift is acceptable post-migration). Co-Authored-By: Claude Opus 4.7 (1M context) Entire-Checkpoint: a15ddf881892 --- cmd/entire/cli/strategy/push_common.go | 45 ++------ cmd/entire/cli/strategy/push_common_test.go | 102 +---------------- cmd/entire/cli/strategy/push_v2.go | 95 ++++++++++++++-- cmd/entire/cli/strategy/push_v2_test.go | 118 ++++++++++++++++++++ 4 files changed, 219 insertions(+), 141 deletions(-) diff --git a/cmd/entire/cli/strategy/push_common.go b/cmd/entire/cli/strategy/push_common.go index 96b344831a..bf8f4d63cd 100644 --- a/cmd/entire/cli/strategy/push_common.go +++ b/cmd/entire/cli/strategy/push_common.go @@ -15,6 +15,7 @@ import ( "github.com/entireio/cli/cmd/entire/cli/checkpoint" "github.com/entireio/cli/cmd/entire/cli/checkpoint/remote" "github.com/entireio/cli/cmd/entire/cli/logging" + "github.com/entireio/cli/cmd/entire/cli/paths" "github.com/entireio/cli/cmd/entire/cli/settings" "github.com/go-git/go-git/v6" @@ -158,56 +159,30 @@ func printSettingsCommitHint(ctx context.Context, target string) { }) } -// printCheckpointsV2MigrationHint prints a hint when the committed project -// settings enable checkpoints_version: 2 AND there are v1 checkpoints that have -// not yet been mirrored into v2. Suppressed when v2 already has every v1 -// checkpoint (nothing to migrate) so the hint does not become noise once the -// migration is done. +// printCheckpointsV2MigrationHint prints a hint when checkpoints_version: 2 is +// committed but the local repo has no v2 /main ref (migration was never run). +// Partial migrations are not detected — drift between v1 and v2 is acceptable +// post-migration. func printCheckpointsV2MigrationHint(ctx context.Context) { checkpointsV2MigrationHintOnce.Do(func() { if !isCheckpointsVersion2Committed(ctx) { return } - if !hasUnmigratedV1Checkpoints(ctx) { + if v2MainRefExists(ctx) { return } - fmt.Fprintln(os.Stderr, "[entire] Note: .entire/settings.json sets checkpoints_version: 2, but there are some v1 checkpoints that have not been migrated to v2.") + fmt.Fprintln(os.Stderr, "[entire] Note: .entire/settings.json sets checkpoints_version: 2, but no v2 /main ref was found in this repo.") fmt.Fprintln(os.Stderr, "[entire] Run 'entire migrate --checkpoints v2' to migrate missing checkpoints to v2.") }) } -// hasUnmigratedV1Checkpoints reports whether any v1 checkpoint has no matching -// entry in v2. Any failure opening the repo or listing either store is treated -// as "no migration needed" so we stay silent instead of printing a speculative -// hint — the hint is advisory and should never be the reason a push gets noisy. -func hasUnmigratedV1Checkpoints(ctx context.Context) bool { +func v2MainRefExists(ctx context.Context) bool { repo, err := OpenRepository(ctx) if err != nil { return false } - v1Store := checkpoint.NewGitStore(repo) - v1List, err := v1Store.ListCommitted(ctx) - if err != nil || len(v1List) == 0 { - return false - } - v2List, err := checkpoint.NewV2GitStore(repo, "").ListCommitted(ctx) - if err != nil { - return false - } - v2Set := make(map[string]struct{}, len(v2List)) - for _, info := range v2List { - v2Set[info.CheckpointID.String()] = struct{}{} - } - for _, info := range v1List { - if _, ok := v2Set[info.CheckpointID.String()]; !ok { - summary, readErr := v1Store.ReadCommitted(ctx, info.CheckpointID) - if readErr != nil || summary == nil { - continue - } - return true - } - } - return false + _, err = repo.Reference(plumbing.ReferenceName(paths.V2MainRefName), true) + return err == nil } // isCheckpointRemoteCommitted returns true if the committed .entire/settings.json diff --git a/cmd/entire/cli/strategy/push_common_test.go b/cmd/entire/cli/strategy/push_common_test.go index 9085bf365a..e89e76a146 100644 --- a/cmd/entire/cli/strategy/push_common_test.go +++ b/cmd/entire/cli/strategy/push_common_test.go @@ -15,11 +15,9 @@ import ( "github.com/entireio/cli/cmd/entire/cli/checkpoint/id" "github.com/entireio/cli/cmd/entire/cli/paths" "github.com/entireio/cli/cmd/entire/cli/testutil" - "github.com/entireio/cli/redact" "github.com/go-git/go-git/v6" "github.com/go-git/go-git/v6/plumbing" - "github.com/go-git/go-git/v6/plumbing/filemode" "github.com/go-git/go-git/v6/plumbing/object" "github.com/stretchr/testify/assert" @@ -1269,74 +1267,22 @@ func setupCheckpointsV2CommittedRepo(t *testing.T) *git.Repository { return repo } -// writeV1Checkpoint writes a minimal checkpoint to the v1 metadata branch. -func writeV1Checkpoint(t *testing.T, repo *git.Repository, cpID id.CheckpointID, sessionID string) { - t.Helper() - err := checkpoint.NewGitStore(repo).WriteCommitted(context.Background(), checkpoint.WriteCommittedOptions{ - CheckpointID: cpID, - SessionID: sessionID, - Strategy: "manual-commit", - Transcript: redact.AlreadyRedacted([]byte(`{"from":"` + sessionID + `"}`)), - AuthorName: "Test", - AuthorEmail: "test@test.com", - }) - require.NoError(t, err) -} - -func writeMalformedV1CheckpointWithoutSummary(t *testing.T, repo *git.Repository, cpID id.CheckpointID) { - t.Helper() - ctx := context.Background() - - blobHash, err := checkpoint.CreateBlobFromContent(repo, []byte("transcript without root metadata")) - require.NoError(t, err) - - treeHash, err := checkpoint.BuildTreeFromEntries(ctx, repo, map[string]object.TreeEntry{ - cpID.Path() + "/0/" + paths.TranscriptFileName: { - Mode: filemode.Regular, - Hash: blobHash, - }, - }) - require.NoError(t, err) - - commitHash, err := checkpoint.CreateCommit(ctx, repo, treeHash, plumbing.ZeroHash, "malformed v1 checkpoint", "Test", "test@test.com") - require.NoError(t, err) - - refName := plumbing.NewBranchReferenceName(paths.MetadataBranchName) - require.NoError(t, repo.Storer.SetReference(plumbing.NewHashReference(refName, commitHash))) -} - func TestPrintCheckpointsV2MigrationHint(t *testing.T) { - t.Run("suppressed when no v1 checkpoints exist", func(t *testing.T) { - checkpointsV2MigrationHintOnce = sync.Once{} - setupCheckpointsV2CommittedRepo(t) - - restore := captureStderr(t) - printCheckpointsV2MigrationHint(context.Background()) - output := restore() - - assert.Empty(t, output, "hint should not print when there are no v1 checkpoints to migrate") - }) - - t.Run("suppressed when every v1 checkpoint is already in v2", func(t *testing.T) { + t.Run("suppressed when v2 /main exists", func(t *testing.T) { checkpointsV2MigrationHintOnce = sync.Once{} repo := setupCheckpointsV2CommittedRepo(t) - - cpID := id.MustCheckpointID("aabbccddeeff") - writeV1Checkpoint(t, repo, cpID, "session-1") - writeV2Checkpoint(t, repo, cpID, "session-1") + writeV2Checkpoint(t, repo, id.MustCheckpointID("aabbccddeeff"), "session-1") restore := captureStderr(t) printCheckpointsV2MigrationHint(context.Background()) output := restore() - assert.Empty(t, output, "hint should not print once v2 already mirrors every v1 checkpoint") + assert.Empty(t, output, "hint should not print once v2 /main has been populated") }) - t.Run("prints when v1 has checkpoints not in v2", func(t *testing.T) { + t.Run("prints when v2 /main is missing", func(t *testing.T) { checkpointsV2MigrationHintOnce = sync.Once{} - repo := setupCheckpointsV2CommittedRepo(t) - - writeV1Checkpoint(t, repo, id.MustCheckpointID("111111111111"), "session-1") + setupCheckpointsV2CommittedRepo(t) restore := captureStderr(t) printCheckpointsV2MigrationHint(context.Background()) @@ -1347,9 +1293,7 @@ func TestPrintCheckpointsV2MigrationHint(t *testing.T) { t.Run("prints only once per process", func(t *testing.T) { checkpointsV2MigrationHintOnce = sync.Once{} - repo := setupCheckpointsV2CommittedRepo(t) - - writeV1Checkpoint(t, repo, id.MustCheckpointID("222222222222"), "session-2") + setupCheckpointsV2CommittedRepo(t) restore := captureStderr(t) printCheckpointsV2MigrationHint(context.Background()) @@ -1361,40 +1305,6 @@ func TestPrintCheckpointsV2MigrationHint(t *testing.T) { }) } -func TestHasUnmigratedV1Checkpoints(t *testing.T) { - t.Run("false when no v1 checkpoints exist", func(t *testing.T) { - setupCheckpointsV2CommittedRepo(t) - assert.False(t, hasUnmigratedV1Checkpoints(context.Background())) - }) - - t.Run("false when every v1 checkpoint is in v2", func(t *testing.T) { - repo := setupCheckpointsV2CommittedRepo(t) - cpID := id.MustCheckpointID("333333333333") - writeV1Checkpoint(t, repo, cpID, "session-a") - writeV2Checkpoint(t, repo, cpID, "session-a") - - assert.False(t, hasUnmigratedV1Checkpoints(context.Background())) - }) - - t.Run("true when at least one v1 checkpoint is missing from v2", func(t *testing.T) { - repo := setupCheckpointsV2CommittedRepo(t) - mirrored := id.MustCheckpointID("444444444444") - missing := id.MustCheckpointID("555555555555") - writeV1Checkpoint(t, repo, mirrored, "session-b") - writeV2Checkpoint(t, repo, mirrored, "session-b") - writeV1Checkpoint(t, repo, missing, "session-c") - - assert.True(t, hasUnmigratedV1Checkpoints(context.Background())) - }) - - t.Run("false when only malformed v1 checkpoint entries are missing from v2", func(t *testing.T) { - repo := setupCheckpointsV2CommittedRepo(t) - writeMalformedV1CheckpointWithoutSummary(t, repo, id.MustCheckpointID("666666666666")) - - assert.False(t, hasUnmigratedV1Checkpoints(context.Background())) - }) -} - // captureStderr redirects os.Stderr to a pipe and returns a function that restores // stderr and returns the captured output. Must be called on the main goroutine // (not parallel-safe). Uses t.Cleanup as a safety net to restore stderr and close diff --git a/cmd/entire/cli/strategy/push_v2.go b/cmd/entire/cli/strategy/push_v2.go index 9e7f53c90b..f10ecf8e11 100644 --- a/cmd/entire/cli/strategy/push_v2.go +++ b/cmd/entire/cli/strategy/push_v2.go @@ -566,15 +566,24 @@ func fetchRelatedRemoteRotationArchive(ctx context.Context, fetchTarget string, archiveTmpRefs = append(archiveTmpRefs, archiveTmpRef) } - // These archive commits are read immediately through go-git for tree - // flattening, so fetch the complete refs rather than blobless packfiles. - if output, fetchErr := remote.Fetch(ctx, remote.FetchOptions{ - Remote: fetchTarget, - RefSpecs: refSpecs, - NoTags: true, - NoFilter: true, - ExtraArgs: []string{"--no-write-fetch-head"}, - }); fetchErr != nil { + // Probe with --filter=blob:none; only the matched archive's blobs are topped + // up after we pick it. Fall back to an unfiltered fetch on any first-fetch + // error (e.g. server without uploadpack.allowFilter). + fetch := func(extra ...string) ([]byte, error) { + args := append([]string{"--no-write-fetch-head"}, extra...) + return remote.Fetch(ctx, remote.FetchOptions{ + Remote: fetchTarget, + RefSpecs: refSpecs, + NoTags: true, + NoFilter: true, + ExtraArgs: args, + }) + } + output, fetchErr := fetch("--filter=blob:none") + if fetchErr != nil { + output, fetchErr = fetch() + } + if fetchErr != nil { if repo, openErr := OpenRepository(ctx); openErr == nil { cleanupFetchedArchiveTmpRefs(repo, archiveTmpRefs) } @@ -594,17 +603,83 @@ func fetchRelatedRemoteRotationArchive(ctx context.Context, fetchTarget string, if !ok { return fetchedRemoteRotationArchive{}, errors.New("failed to read local /full/current history") } + + // Wall-clock bound on the ancestry walk: on repos whose archives are fully + // disjoint from local /full/current we'd otherwise burn seconds per push + // concluding nothing matches. A future /full/root anchor will replace this + // with a constant-time lookup. + walkStart := time.Now() + walked := 0 for _, archive := range archives { + if time.Since(walkStart) > rotationAncestryWalkBudget { + break + } + walked++ fetched, err := readFetchedRemoteRotationArchive(repo, archive) if err != nil { return fetchedRemoteRotationArchive{}, err } if archiveSharesHistoryWithCurrentGeneration(ctx, repo, localCurrentAncestors, fetched.ref.Hash()) { + if err := topUpMatchedArchiveBlobs(ctx, fetchTarget, repo, fetched.tree); err != nil { + return fetchedRemoteRotationArchive{}, err + } tmpRefsToCleanup = removeRef(tmpRefsToCleanup, fetched.tmpRefName) return fetched, nil } } - return fetchedRemoteRotationArchive{}, errors.New("no remote archive shares history with local /full/current") + err = errors.New("no remote archive shares history with local /full/current") + if walked < len(archives) { + err = fmt.Errorf("%w (walk budget exhausted after %d/%d archives)", err, walked, len(archives)) + } + return fetchedRemoteRotationArchive{}, err +} + +// rotationAncestryWalkBudget caps wall-clock for the per-archive ancestry +// walk. Exposed as a var so tests can lower it. +var rotationAncestryWalkBudget = 1 * time.Second //nolint:gochecknoglobals // test override + +// topUpMatchedArchiveBlobs ensures every blob in the matched archive's tree +// is in the local pack directory. Downstream reads (updateGenerationTimestamps, +// the push that follows) go through go-git's BlobObject, which scans only the +// local pack directory and does not follow file-transport alternates — so we +// must pull missing blobs explicitly via `git fetch-pack` rather than rely on +// FetchingTree's cat-file second-opinion. +func topUpMatchedArchiveBlobs(ctx context.Context, fetchTarget string, repo *git.Repository, archiveTree *object.Tree) error { + seen := make(map[plumbing.Hash]struct{}) + var missing []string + var walk func(t *object.Tree) error + walk = func(t *object.Tree) error { + for _, entry := range t.Entries { + if entry.Mode == filemode.Dir { + sub, err := repo.TreeObject(entry.Hash) + if err != nil { + return fmt.Errorf("read subtree %s: %w", entry.Name, err) + } + if err := walk(sub); err != nil { + return err + } + continue + } + if _, ok := seen[entry.Hash]; ok { + continue + } + seen[entry.Hash] = struct{}{} + if repo.Storer.HasEncodedObject(entry.Hash) != nil { + missing = append(missing, entry.Hash.String()) + } + } + return nil + } + if err := walk(archiveTree); err != nil { + return err + } + if len(missing) == 0 { + return nil + } + if err := remote.FetchBlobs(ctx, fetchTarget, missing); err != nil { + return fmt.Errorf("fetch matched archive blobs: %w", err) + } + return nil } func archiveTmpRefName(archive string) plumbing.ReferenceName { diff --git a/cmd/entire/cli/strategy/push_v2_test.go b/cmd/entire/cli/strategy/push_v2_test.go index 21eafd9ede..17213f2c41 100644 --- a/cmd/entire/cli/strategy/push_v2_test.go +++ b/cmd/entire/cli/strategy/push_v2_test.go @@ -1169,3 +1169,121 @@ func TestFetchAndMergeRef_RemoteRotatedMultipleTimesUsesRelatedArchive(t *testin assert.True(t, refContainsV2Checkpoint(t, bareRepo, archive2Ref, remoteGen2CP), "remote generation 2 checkpoint should remain in archive 2") } + +// TestFetchAndMergeRef_RotationConflict_BloblessProbeAvoidsUnmatchedBlobs +// verifies that rotation recovery probes archive refs with --filter=blob:none +// and only tops up blobs for the archive it matches. Multiple unrelated +// archives must not have their blobs fetched. +// Not parallel: uses t.Chdir() +func TestFetchAndMergeRef_RotationConflict_BloblessProbeAvoidsUnmatchedBlobs(t *testing.T) { + ctx := context.Background() + fullCurrentRef := plumbing.ReferenceName(paths.V2FullCurrentRefName) + archive1Ref := plumbing.ReferenceName(paths.V2FullRefPrefix + "0000000000001") + archive2Ref := plumbing.ReferenceName(paths.V2FullRefPrefix + "0000000000002") + sharedCP := id.MustCheckpointID("aabbccddeeff") + remoteGen1CP := id.MustCheckpointID("112233445566") + remoteGen2CP := id.MustCheckpointID("223344556677") + localOnlyCP := id.MustCheckpointID("ffeeddccbbaa") + + bareDir := t.TempDir() + initCmd := exec.CommandContext(ctx, "git", "init", "--bare") + initCmd.Dir = bareDir + initCmd.Env = testutil.GitIsolatedEnv() + require.NoError(t, initCmd.Run()) + enableFilteredFetchServingForTest(t, bareDir) + bareURL := "file://" + bareDir + + localDir := t.TempDir() + testutil.InitRepo(t, localDir) + testutil.WriteFile(t, localDir, "f.txt", "init") + testutil.GitAdd(t, localDir, "f.txt") + testutil.GitCommit(t, localDir, "init") + localRepo, err := git.PlainOpen(localDir) + require.NoError(t, err) + writeV2Checkpoint(t, localRepo, sharedCP, "shared-session") + + pushCurrent := exec.CommandContext(ctx, "git", "push", bareDir, + string(fullCurrentRef)+":"+string(fullCurrentRef)) + pushCurrent.Dir = localDir + require.NoError(t, pushCurrent.Run()) + + remoteDir := t.TempDir() + testutil.InitRepo(t, remoteDir) + testutil.WriteFile(t, remoteDir, "f.txt", "init") + testutil.GitAdd(t, remoteDir, "f.txt") + testutil.GitCommit(t, remoteDir, "init") + fetchCurrent := exec.CommandContext(ctx, "git", "fetch", bareDir, + "+"+string(fullCurrentRef)+":"+string(fullCurrentRef)) + fetchCurrent.Dir = remoteDir + require.NoError(t, fetchCurrent.Run()) + + remoteRepo, err := git.PlainOpen(remoteDir) + require.NoError(t, err) + writeV2Checkpoint(t, remoteRepo, remoteGen1CP, "remote-gen-1") + rotateV2CurrentForTest(t, remoteRepo, archive1Ref) + writeV2Checkpoint(t, remoteRepo, remoteGen2CP, "remote-gen-2") + rotateV2CurrentForTest(t, remoteRepo, archive2Ref) + + pushRotated := exec.CommandContext(ctx, "git", "push", "--force", bareDir, + string(fullCurrentRef)+":"+string(fullCurrentRef), + string(archive1Ref)+":"+string(archive1Ref), + string(archive2Ref)+":"+string(archive2Ref)) + pushRotated.Dir = remoteDir + out, pushErr := pushRotated.CombinedOutput() + require.NoError(t, pushErr, "push rotated state failed: %s", out) + + // Capture representative blob hashes from each archive on the bare. The + // matched archive (archive1) gets its generation.json topped up; the + // unmatched archive (archive2) should never have its remote-gen-2 shard + // blobs pulled to local. + bareRepo, err := git.PlainOpen(bareDir) + require.NoError(t, err) + matchedBlobHash := blobAtTopLevel(t, bareRepo, archive1Ref, paths.GenerationFileName) + unmatchedBlobHash := firstBlobInV2CheckpointShard(t, bareRepo, archive2Ref, remoteGen2CP) + + writeV2Checkpoint(t, localRepo, localOnlyCP, "local-session") + + t.Chdir(localDir) + err = fetchAndMergeRef(ctx, bareURL, fullCurrentRef) + require.NoError(t, err) + + localRepo, err = git.PlainOpen(localDir) + require.NoError(t, err) + + assert.NoError(t, localRepo.Storer.HasEncodedObject(matchedBlobHash), + "matched archive generation.json must be locally available after top-up") + assert.Error(t, localRepo.Storer.HasEncodedObject(unmatchedBlobHash), + "unmatched archive shard blob must NOT be locally available; blobless probe over-fetched") +} + +func blobAtTopLevel(t *testing.T, repo *git.Repository, refName plumbing.ReferenceName, filename string) plumbing.Hash { + t.Helper() + ref, err := repo.Reference(refName, true) + require.NoError(t, err) + commit, err := repo.CommitObject(ref.Hash()) + require.NoError(t, err) + tree, err := commit.Tree() + require.NoError(t, err) + entry, err := tree.FindEntry(filename) + require.NoError(t, err) + return entry.Hash +} + +func firstBlobInV2CheckpointShard(t *testing.T, repo *git.Repository, refName plumbing.ReferenceName, cpID id.CheckpointID) plumbing.Hash { + t.Helper() + ref, err := repo.Reference(refName, true) + require.NoError(t, err) + commit, err := repo.CommitObject(ref.Hash()) + require.NoError(t, err) + tree, err := commit.Tree() + require.NoError(t, err) + shard, err := tree.Tree(cpID.Path()) + require.NoError(t, err) + entries := make(map[string]object.TreeEntry) + require.NoError(t, checkpoint.FlattenTree(repo, shard, "", entries)) + require.NotEmpty(t, entries, "expected at least one blob in shard %s", cpID.Path()) + for _, entry := range entries { + return entry.Hash + } + return plumbing.ZeroHash +} From a014d847161db332bf21e01bb29d904a3e97b33b Mon Sep 17 00:00:00 2001 From: computermode <2917645+computermode@users.noreply.github.com> Date: Wed, 13 May 2026 18:11:22 -0700 Subject: [PATCH 2/3] Tighten comments on v2 push perf fixes Trim verbose doc comments and a redundant inline comment in the new rotation-recovery test. No logic changes. Co-Authored-By: Claude Opus 4.7 (1M context) Entire-Checkpoint: b466825dbff2 --- cmd/entire/cli/strategy/push_common.go | 6 ++---- cmd/entire/cli/strategy/push_v2.go | 24 +++++++++--------------- cmd/entire/cli/strategy/push_v2_test.go | 8 -------- 3 files changed, 11 insertions(+), 27 deletions(-) diff --git a/cmd/entire/cli/strategy/push_common.go b/cmd/entire/cli/strategy/push_common.go index bf8f4d63cd..04c4a28a04 100644 --- a/cmd/entire/cli/strategy/push_common.go +++ b/cmd/entire/cli/strategy/push_common.go @@ -159,10 +159,8 @@ func printSettingsCommitHint(ctx context.Context, target string) { }) } -// printCheckpointsV2MigrationHint prints a hint when checkpoints_version: 2 is -// committed but the local repo has no v2 /main ref (migration was never run). -// Partial migrations are not detected — drift between v1 and v2 is acceptable -// post-migration. +// printCheckpointsV2MigrationHint nudges users who committed checkpoints_version: 2 +// but never ran the migration. Partial migrations are not flagged. func printCheckpointsV2MigrationHint(ctx context.Context) { checkpointsV2MigrationHintOnce.Do(func() { if !isCheckpointsVersion2Committed(ctx) { diff --git a/cmd/entire/cli/strategy/push_v2.go b/cmd/entire/cli/strategy/push_v2.go index f10ecf8e11..67978aa092 100644 --- a/cmd/entire/cli/strategy/push_v2.go +++ b/cmd/entire/cli/strategy/push_v2.go @@ -566,9 +566,8 @@ func fetchRelatedRemoteRotationArchive(ctx context.Context, fetchTarget string, archiveTmpRefs = append(archiveTmpRefs, archiveTmpRef) } - // Probe with --filter=blob:none; only the matched archive's blobs are topped - // up after we pick it. Fall back to an unfiltered fetch on any first-fetch - // error (e.g. server without uploadpack.allowFilter). + // Probe with --filter=blob:none; only the matched archive's blobs are + // topped up. Fall back to an unfiltered fetch if the server refuses. fetch := func(extra ...string) ([]byte, error) { args := append([]string{"--no-write-fetch-head"}, extra...) return remote.Fetch(ctx, remote.FetchOptions{ @@ -604,10 +603,8 @@ func fetchRelatedRemoteRotationArchive(ctx context.Context, fetchTarget string, return fetchedRemoteRotationArchive{}, errors.New("failed to read local /full/current history") } - // Wall-clock bound on the ancestry walk: on repos whose archives are fully - // disjoint from local /full/current we'd otherwise burn seconds per push - // concluding nothing matches. A future /full/root anchor will replace this - // with a constant-time lookup. + // Bound the ancestry walk so disjoint-history repos fail fast instead of + // scanning every archive. A future /full/root anchor replaces this. walkStart := time.Now() walked := 0 for _, archive := range archives { @@ -634,16 +631,13 @@ func fetchRelatedRemoteRotationArchive(ctx context.Context, fetchTarget string, return fetchedRemoteRotationArchive{}, err } -// rotationAncestryWalkBudget caps wall-clock for the per-archive ancestry -// walk. Exposed as a var so tests can lower it. +// rotationAncestryWalkBudget caps the per-archive ancestry walk. var so tests can lower it. var rotationAncestryWalkBudget = 1 * time.Second //nolint:gochecknoglobals // test override -// topUpMatchedArchiveBlobs ensures every blob in the matched archive's tree -// is in the local pack directory. Downstream reads (updateGenerationTimestamps, -// the push that follows) go through go-git's BlobObject, which scans only the -// local pack directory and does not follow file-transport alternates — so we -// must pull missing blobs explicitly via `git fetch-pack` rather than rely on -// FetchingTree's cat-file second-opinion. +// topUpMatchedArchiveBlobs fetches blobs referenced by the matched archive +// that aren't in the local pack directory. Downstream reads go through go-git's +// BlobObject, which won't follow file-transport alternates — so FetchingTree's +// cat-file fallback isn't sufficient here. func topUpMatchedArchiveBlobs(ctx context.Context, fetchTarget string, repo *git.Repository, archiveTree *object.Tree) error { seen := make(map[plumbing.Hash]struct{}) var missing []string diff --git a/cmd/entire/cli/strategy/push_v2_test.go b/cmd/entire/cli/strategy/push_v2_test.go index 17213f2c41..c228a8ae2b 100644 --- a/cmd/entire/cli/strategy/push_v2_test.go +++ b/cmd/entire/cli/strategy/push_v2_test.go @@ -1170,10 +1170,6 @@ func TestFetchAndMergeRef_RemoteRotatedMultipleTimesUsesRelatedArchive(t *testin "remote generation 2 checkpoint should remain in archive 2") } -// TestFetchAndMergeRef_RotationConflict_BloblessProbeAvoidsUnmatchedBlobs -// verifies that rotation recovery probes archive refs with --filter=blob:none -// and only tops up blobs for the archive it matches. Multiple unrelated -// archives must not have their blobs fetched. // Not parallel: uses t.Chdir() func TestFetchAndMergeRef_RotationConflict_BloblessProbeAvoidsUnmatchedBlobs(t *testing.T) { ctx := context.Background() @@ -1232,10 +1228,6 @@ func TestFetchAndMergeRef_RotationConflict_BloblessProbeAvoidsUnmatchedBlobs(t * out, pushErr := pushRotated.CombinedOutput() require.NoError(t, pushErr, "push rotated state failed: %s", out) - // Capture representative blob hashes from each archive on the bare. The - // matched archive (archive1) gets its generation.json topped up; the - // unmatched archive (archive2) should never have its remote-gen-2 shard - // blobs pulled to local. bareRepo, err := git.PlainOpen(bareDir) require.NoError(t, err) matchedBlobHash := blobAtTopLevel(t, bareRepo, archive1Ref, paths.GenerationFileName) From 1bc7c4ec4a04c668fa2c90c4c85d2dfd0e4a692b Mon Sep 17 00:00:00 2001 From: computermode <2917645+computermode@users.noreply.github.com> Date: Thu, 14 May 2026 17:20:19 -0700 Subject: [PATCH 3/3] Speed up v2 push recovery Fetch checkpoint recovery refs bloblessly through resolved URLs so pushes do not download transcript blobs or persist partial-clone settings on origin. Replace rotation archive probing with a single wildcard archive fetch, check newest archives first, and use git merge-base under the existing ancestry timeout. Add regression coverage for filtered fetch target resolution, blobless merge recovery, rotation recovery cleanup, and pushability after tree-only merges. Entire-Checkpoint: 0845ffccc4f3 --- cmd/entire/cli/checkpoint/remote/git.go | 16 ++ cmd/entire/cli/checkpoint/remote/git_test.go | 30 +++ cmd/entire/cli/strategy/push_v2.go | 262 +++++++------------ cmd/entire/cli/strategy/push_v2_test.go | 99 ++++--- 4 files changed, 199 insertions(+), 208 deletions(-) diff --git a/cmd/entire/cli/checkpoint/remote/git.go b/cmd/entire/cli/checkpoint/remote/git.go index e24cdc938d..ec30e0eb8d 100644 --- a/cmd/entire/cli/checkpoint/remote/git.go +++ b/cmd/entire/cli/checkpoint/remote/git.go @@ -309,6 +309,22 @@ func ResolveFetchTarget(ctx context.Context, target string) (string, error) { return url, nil } +// ResolveFilteredFetchTarget returns a fetch target suitable for an explicit +// filtered fetch. Remote names are resolved to URLs even when the repo-level +// filtered_fetches setting is disabled, because callers that pass +// --filter=blob:none must not let git persist promisor settings onto a named +// remote like origin. +func ResolveFilteredFetchTarget(ctx context.Context, target string) (string, error) { + if target == "" || IsURL(target) || isLocalPath(target) { + return target, nil + } + url, err := GetRemoteURL(ctx, target) + if err != nil { + return "", fmt.Errorf("get remote URL: %w", err) + } + return url, nil +} + // newCommand creates an exec.Cmd for a git operation that may need // checkpoint token authentication. If ENTIRE_CHECKPOINT_TOKEN is set: // - if the target in args is (or resolves to) an SSH remote, the target is diff --git a/cmd/entire/cli/checkpoint/remote/git_test.go b/cmd/entire/cli/checkpoint/remote/git_test.go index a6a9958224..7bb585afcf 100644 --- a/cmd/entire/cli/checkpoint/remote/git_test.go +++ b/cmd/entire/cli/checkpoint/remote/git_test.go @@ -256,6 +256,36 @@ func TestResolveFetchTarget(t *testing.T) { }) } +// Not parallel: uses t.Chdir() +func TestResolveFilteredFetchTarget_AlwaysResolvesRemoteName(t *testing.T) { + ctx := context.Background() + + tmpDir := t.TempDir() + testutil.InitRepo(t, tmpDir) + testutil.WriteFile(t, tmpDir, "f.txt", "init") + testutil.GitAdd(t, tmpDir, "f.txt") + testutil.GitCommit(t, tmpDir, "init") + + cmd := exec.CommandContext(ctx, "git", "remote", "add", "origin", "https://github.com/org/repo.git") + cmd.Dir = tmpDir + cmd.Env = testutil.GitIsolatedEnv() + require.NoError(t, cmd.Run()) + + t.Chdir(tmpDir) + + target, err := ResolveFilteredFetchTarget(ctx, "origin") + require.NoError(t, err) + assert.Equal(t, "https://github.com/org/repo.git", target) + + target, err = ResolveFilteredFetchTarget(ctx, "https://github.com/org/repo.git") + require.NoError(t, err) + assert.Equal(t, "https://github.com/org/repo.git", target) + + target, err = ResolveFilteredFetchTarget(ctx, "../repo.git") + require.NoError(t, err) + assert.Equal(t, "../repo.git", target) +} + func TestAppendCheckpointTokenEnv(t *testing.T) { t.Parallel() diff --git a/cmd/entire/cli/strategy/push_v2.go b/cmd/entire/cli/strategy/push_v2.go index ba5709047e..701824061a 100644 --- a/cmd/entire/cli/strategy/push_v2.go +++ b/cmd/entire/cli/strategy/push_v2.go @@ -8,6 +8,7 @@ import ( "io" "log/slog" "os" + "os/exec" "slices" "sort" "strings" @@ -26,6 +27,8 @@ import ( ) // tryPushRef attempts to push a custom ref using an explicit refspec. +var errNoRemoteRotationRefs = errors.New("no remote rotation archive refs") + func tryPushRef(ctx context.Context, target string, refName plumbing.ReferenceName) error { ctx, cancel := context.WithTimeout(ctx, 2*time.Minute) defer cancel() @@ -392,25 +395,14 @@ func fetchAndMergeRef(ctx context.Context, target string, refName plumbing.Refer ctx, cancel := context.WithTimeout(ctx, 2*time.Minute) defer cancel() - fetchTarget, err := remote.ResolveFetchTarget(ctx, target) - if err != nil { - return fmt.Errorf("resolve fetch target: %w", err) - } - // Fetch to a temp ref tmpRefSuffix := strings.ReplaceAll(string(refName), "/", "-") tmpRefName := plumbing.ReferenceName("refs/entire-fetch-tmp/" + tmpRefSuffix) refSpec := fmt.Sprintf("+%s:%s", refName, tmpRefName) - // Recovery flattens fetched trees recursively, so it needs a complete object - // graph instead of the normal blobless sync fetch. - if output, err := remote.Fetch(ctx, remote.FetchOptions{ - Remote: fetchTarget, - RefSpecs: []string{refSpec}, - NoTags: true, - NoFilter: true, - ExtraArgs: []string{"--no-write-fetch-head"}, - }); err != nil { + // Recovery only needs commits and trees to merge checkpoint refs. Blobless + // fetches avoid pulling every transcript blob across large v2 refs. + if output, err := fetchTreeMergeRefs(ctx, target, []string{refSpec}); err != nil { return fmt.Errorf("fetch failed: %s", output) } @@ -424,9 +416,10 @@ func fetchAndMergeRef(ctx context.Context, target string, refName plumbing.Refer // Check for rotation conflict on /full/current if refName == plumbing.ReferenceName(paths.V2FullCurrentRefName) { - remoteRotationArchives, detectErr := detectRemoteRotationArchives(ctx, target, repo) - if detectErr == nil && len(remoteRotationArchives) > 0 { - return handleRotationConflict(ctx, target, fetchTarget, repo, refName, tmpRefName, remoteRotationArchives) + if err := handleRotationConflict(ctx, target, repo, refName, tmpRefName); err == nil { + return nil + } else if !errors.Is(err, errNoRemoteRotationRefs) { + return err } } @@ -485,44 +478,28 @@ func fetchAndMergeRef(ctx context.Context, target string, refName plumbing.Refer return nil } -// detectRemoteRotationArchives discovers archived generation refs on the remote -// that are missing locally or whose local ref hash differs from the remote ref -// hash. Returns them sorted ascending (oldest first). -func detectRemoteRotationArchives(ctx context.Context, target string, repo *git.Repository) ([]string, error) { - ctx, cancel := context.WithTimeout(ctx, 30*time.Second) - defer cancel() - - output, err := remote.LsRemote(ctx, target, paths.V2FullRefPrefix+"*") +func fetchTreeMergeRefs(ctx context.Context, target string, refSpecs []string) ([]byte, error) { + fetchTarget, err := remote.ResolveFilteredFetchTarget(ctx, target) if err != nil { - return nil, fmt.Errorf("ls-remote failed: %w", err) + return nil, fmt.Errorf("resolve fetch target: %w", err) } - var remoteRotationArchives []string - for line := range strings.SplitSeq(strings.TrimSpace(string(output)), "\n") { - if line == "" { - continue - } - parts := strings.Fields(line) - if len(parts) < 2 { - continue - } - refName := parts[1] - suffix := strings.TrimPrefix(refName, paths.V2FullRefPrefix) - if suffix == "current" || !checkpoint.GenerationRefPattern.MatchString(suffix) { - continue - } - if len(parts[0]) != 40 { - return nil, fmt.Errorf("invalid remote archive hash %q for %s", parts[0], refName) - } - remoteHash := plumbing.NewHash(parts[0]) - localRef, err := repo.Reference(plumbing.ReferenceName(refName), true) - if err != nil || localRef.Hash() != remoteHash { - remoteRotationArchives = append(remoteRotationArchives, suffix) - } + fetch := func(fetchCtx context.Context, extra ...string) ([]byte, error) { + args := append([]string{"--no-write-fetch-head"}, extra...) + return remote.Fetch(fetchCtx, remote.FetchOptions{ + Remote: fetchTarget, + RefSpecs: refSpecs, + NoTags: true, + NoFilter: true, + ExtraArgs: args, + }) } - sort.Strings(remoteRotationArchives) - return remoteRotationArchives, nil + output, fetchErr := fetch(ctx, "--filter=blob:none") + if fetchErr != nil { + output, fetchErr = fetch(ctx) + } + return output, fetchErr } type fetchedRemoteRotationArchive struct { @@ -558,36 +535,32 @@ func readFetchedRemoteRotationArchive(repo *git.Repository, archive string) (fet }, nil } -func fetchRelatedRemoteRotationArchive(ctx context.Context, fetchTarget string, archives []string, localCurrentHash plumbing.Hash) (fetchedRemoteRotationArchive, error) { - refSpecs := make([]string, 0, len(archives)) - archiveTmpRefs := make([]plumbing.ReferenceName, 0, len(archives)) +func fetchRelatedRemoteRotationArchive(ctx context.Context, fetchTarget string, localCurrentHash plumbing.Hash) (fetchedRemoteRotationArchive, error) { + refSpec := fmt.Sprintf("+%s*:%s*", paths.V2FullRefPrefix, archiveTmpRefPrefix()) - for _, archive := range archives { - archiveRefName := plumbing.ReferenceName(paths.V2FullRefPrefix + archive) - archiveTmpRef := archiveTmpRefName(archive) - refSpecs = append(refSpecs, fmt.Sprintf("+%s:%s", archiveRefName, archiveTmpRef)) - archiveTmpRefs = append(archiveTmpRefs, archiveTmpRef) + if repo, openErr := OpenRepository(ctx); openErr == nil { + cleanupFetchedArchiveTmpRefs(repo, archiveTmpRefs(repo)) } - // Probe with --filter=blob:none; only the matched archive's blobs are - // topped up. Fall back to an unfiltered fetch if the server refuses. - fetch := func(extra ...string) ([]byte, error) { + // Probe with --filter=blob:none; rotation recovery only needs commit and + // tree objects. Fall back to an unfiltered fetch if the server refuses. + fetch := func(fetchCtx context.Context, extra ...string) ([]byte, error) { args := append([]string{"--no-write-fetch-head"}, extra...) - return remote.Fetch(ctx, remote.FetchOptions{ + return remote.Fetch(fetchCtx, remote.FetchOptions{ Remote: fetchTarget, - RefSpecs: refSpecs, + RefSpecs: []string{refSpec}, NoTags: true, NoFilter: true, ExtraArgs: args, }) } - output, fetchErr := fetch("--filter=blob:none") + output, fetchErr := fetch(ctx, "--filter=blob:none") if fetchErr != nil { - output, fetchErr = fetch() + output, fetchErr = fetch(ctx) } if fetchErr != nil { if repo, openErr := OpenRepository(ctx); openErr == nil { - cleanupFetchedArchiveTmpRefs(repo, archiveTmpRefs) + cleanupFetchedArchiveTmpRefs(repo, archiveTmpRefs(repo)) } return fetchedRemoteRotationArchive{}, fmt.Errorf("fetch archived generations failed: %s", output) } @@ -596,22 +569,23 @@ func fetchRelatedRemoteRotationArchive(ctx context.Context, fetchTarget string, if err != nil { return fetchedRemoteRotationArchive{}, fmt.Errorf("reopen repository after fetching archived generations: %w", err) } - tmpRefsToCleanup := archiveTmpRefs + tmpRefsToCleanup := archiveTmpRefs(repo) defer func() { cleanupFetchedArchiveTmpRefs(repo, tmpRefsToCleanup) }() - localCurrentAncestors, ok := currentGenerationAncestors(ctx, repo, localCurrentHash) - if !ok { - return fetchedRemoteRotationArchive{}, errors.New("failed to read local /full/current history") + archives := fetchedArchiveSuffixes(repo, tmpRefsToCleanup) + if len(archives) == 0 { + return fetchedRemoteRotationArchive{}, errNoRemoteRotationRefs } // Bound the ancestry walk so disjoint-history repos fail fast instead of // scanning every archive. A future /full/root anchor replaces this. - walkStart := time.Now() + walkCtx, cancelWalk := context.WithTimeout(ctx, rotationAncestryWalkBudget) + defer cancelWalk() walked := 0 for _, archive := range archives { - if time.Since(walkStart) > rotationAncestryWalkBudget { + if walkCtx.Err() != nil { break } walked++ @@ -619,17 +593,16 @@ func fetchRelatedRemoteRotationArchive(ctx context.Context, fetchTarget string, if err != nil { return fetchedRemoteRotationArchive{}, err } - if archiveSharesHistoryWithCurrentGeneration(ctx, repo, localCurrentAncestors, fetched.ref.Hash()) { - if err := topUpMatchedArchiveBlobs(ctx, fetchTarget, repo, fetched.tree); err != nil { - return fetchedRemoteRotationArchive{}, err - } + if commitsShareHistory(walkCtx, localCurrentHash, fetched.ref.Hash()) { tmpRefsToCleanup = removeRef(tmpRefsToCleanup, fetched.tmpRefName) return fetched, nil } } err = errors.New("no remote archive shares history with local /full/current") - if walked < len(archives) { + if errors.Is(walkCtx.Err(), context.DeadlineExceeded) { err = fmt.Errorf("%w (walk budget exhausted after %d/%d archives)", err, walked, len(archives)) + } else if walkCtx.Err() != nil { + err = fmt.Errorf("%w: %w", err, walkCtx.Err()) } return fetchedRemoteRotationArchive{}, err } @@ -637,126 +610,83 @@ func fetchRelatedRemoteRotationArchive(ctx context.Context, fetchTarget string, // rotationAncestryWalkBudget caps the per-archive ancestry walk. var so tests can lower it. var rotationAncestryWalkBudget = 1 * time.Second //nolint:gochecknoglobals // test override -// topUpMatchedArchiveBlobs fetches blobs referenced by the matched archive -// that aren't in the local pack directory. Downstream reads go through go-git's -// BlobObject, which won't follow file-transport alternates — so FetchingTree's -// cat-file fallback isn't sufficient here. -func topUpMatchedArchiveBlobs(ctx context.Context, fetchTarget string, repo *git.Repository, archiveTree *object.Tree) error { - seen := make(map[plumbing.Hash]struct{}) - var missing []string - var walk func(t *object.Tree) error - walk = func(t *object.Tree) error { - for _, entry := range t.Entries { - if entry.Mode == filemode.Dir { - sub, err := repo.TreeObject(entry.Hash) - if err != nil { - return fmt.Errorf("read subtree %s: %w", entry.Name, err) - } - if err := walk(sub); err != nil { - return err - } - continue - } - if _, ok := seen[entry.Hash]; ok { - continue - } - seen[entry.Hash] = struct{}{} - if repo.Storer.HasEncodedObject(entry.Hash) != nil { - missing = append(missing, entry.Hash.String()) - } - } - return nil - } - if err := walk(archiveTree); err != nil { - return err - } - if len(missing) == 0 { - return nil - } - if err := remote.FetchBlobs(ctx, fetchTarget, missing); err != nil { - return fmt.Errorf("fetch matched archive blobs: %w", err) - } - return nil -} - func archiveTmpRefName(archive string) plumbing.ReferenceName { - return plumbing.ReferenceName("refs/entire-fetch-tmp/archive-" + archive) + return plumbing.ReferenceName(archiveTmpRefPrefix() + archive) } -func cleanupFetchedArchiveTmpRefs(repo *git.Repository, tmpRefs []plumbing.ReferenceName) { - for _, tmpRef := range tmpRefs { - _ = repo.Storer.RemoveReference(tmpRef) //nolint:errcheck // cleanup is best-effort - } +func archiveTmpRefPrefix() string { + return "refs/entire-fetch-tmp/archive-" } -func currentGenerationAncestors(ctx context.Context, repo *git.Repository, currentHash plumbing.Hash) (map[plumbing.Hash]struct{}, bool) { - ancestors := make(map[plumbing.Hash]struct{}) - iter, err := repo.Log(&git.LogOptions{From: currentHash}) +func archiveTmpRefs(repo *git.Repository) []plumbing.ReferenceName { + iter, err := repo.References() if err != nil { - return nil, false + return nil } defer iter.Close() - count := 0 - _ = iter.ForEach(func(c *object.Commit) error { //nolint:errcheck // Best-effort search, errors are non-fatal - if err := ctx.Err(); err != nil { - return err //nolint:wrapcheck // Propagating context cancellation - } - count++ - if count > MaxCommitTraversalDepth { - return errStop + var refs []plumbing.ReferenceName + prefix := archiveTmpRefPrefix() + _ = iter.ForEach(func(ref *plumbing.Reference) error { //nolint:errcheck // Best-effort cleanup/listing + if strings.HasPrefix(ref.Name().String(), prefix) { + refs = append(refs, ref.Name()) } - ancestors[c.Hash] = struct{}{} return nil }) - return ancestors, true + return refs } -func archiveSharesHistoryWithCurrentGeneration(ctx context.Context, repo *git.Repository, currentAncestors map[plumbing.Hash]struct{}, archiveHash plumbing.Hash) bool { - if _, ok := currentAncestors[archiveHash]; ok { - return true +func fetchedArchiveSuffixes(repo *git.Repository, tmpRefs []plumbing.ReferenceName) []string { + archives := make([]string, 0, len(tmpRefs)) + prefix := archiveTmpRefPrefix() + for _, tmpRef := range tmpRefs { + suffix, ok := strings.CutPrefix(tmpRef.String(), prefix) + if !ok || suffix == "current" || !checkpoint.GenerationRefPattern.MatchString(suffix) { + continue + } + if _, err := repo.Reference(tmpRef, true); err != nil { + continue + } + archives = append(archives, suffix) } + sort.Sort(sort.Reverse(sort.StringSlice(archives))) + return archives +} - iter, err := repo.Log(&git.LogOptions{From: archiveHash}) - if err != nil { - return false +func cleanupFetchedArchiveTmpRefs(repo *git.Repository, tmpRefs []plumbing.ReferenceName) { + for _, tmpRef := range tmpRefs { + _ = repo.Storer.RemoveReference(tmpRef) //nolint:errcheck // cleanup is best-effort } - defer iter.Close() +} - found := false - count := 0 - _ = iter.ForEach(func(c *object.Commit) error { //nolint:errcheck // Best-effort search, errors are non-fatal - if err := ctx.Err(); err != nil { - return err //nolint:wrapcheck // Propagating context cancellation - } - count++ - if count > MaxCommitTraversalDepth { - return errStop - } - if _, ok := currentAncestors[c.Hash]; ok { - found = true - return errStop - } - return nil - }) - return found +func commitsShareHistory(ctx context.Context, a, b plumbing.Hash) bool { + if a == b { + return true + } + + cmd := exec.CommandContext(ctx, "git", "merge-base", a.String(), b.String()) + return cmd.Run() == nil } // handleRotationConflict handles the case where remote /full/current was rotated. // Merges local /full/current into the related remote archived generation to avoid // duplicating checkpoint data, then adopts remote's /full/current as local. -func handleRotationConflict(ctx context.Context, target, fetchTarget string, repo *git.Repository, refName, tmpRefName plumbing.ReferenceName, remoteRotationArchives []string) error { +func handleRotationConflict(ctx context.Context, target string, repo *git.Repository, refName, tmpRefName plumbing.ReferenceName) error { localRef, err := repo.Reference(refName, true) if err != nil { return fmt.Errorf("failed to get local ref: %w", err) } - archive, err := fetchRelatedRemoteRotationArchive(ctx, fetchTarget, remoteRotationArchives, localRef.Hash()) + archiveFetchTarget, err := remote.ResolveFilteredFetchTarget(ctx, target) + if err != nil { + return fmt.Errorf("resolve archive fetch target: %w", err) + } + archive, err := fetchRelatedRemoteRotationArchive(ctx, archiveFetchTarget, localRef.Hash()) if err != nil { return fmt.Errorf("failed to find related archived generation: %w", err) } - // fetchRelatedRemoteRotationArchive fetches via git CLI, so continue with - // the fresh go-git handle it used to avoid stale pack indexes. + // fetchRelatedRemoteRotationArchive fetches via git CLI, so continue with a + // go-git handle opened after that fetch. repo = archive.repo defer func() { _ = repo.Storer.RemoveReference(archive.tmpRefName) //nolint:errcheck // cleanup is best-effort diff --git a/cmd/entire/cli/strategy/push_v2_test.go b/cmd/entire/cli/strategy/push_v2_test.go index 11a71023b1..8e4ce8ab93 100644 --- a/cmd/entire/cli/strategy/push_v2_test.go +++ b/cmd/entire/cli/strategy/push_v2_test.go @@ -202,12 +202,14 @@ func refContainsV2Checkpoint(t *testing.T, repo *git.Repository, refName plumbin func TestFetchAndMergeRef_MergesTrees(t *testing.T) { ctx := context.Background() refName := plumbing.ReferenceName(paths.V2MainRefName) + srcCP := id.MustCheckpointID("aabbccddeeff") + localCP := id.MustCheckpointID("112233445566") // Create source repo with a v2 /main ref containing one checkpoint srcDir := setupRepoWithV2Ref(t) srcRepo, err := git.PlainOpen(srcDir) require.NoError(t, err) - writeV2Checkpoint(t, srcRepo, id.MustCheckpointID("aabbccddeeff"), "session-src") + writeV2Checkpoint(t, srcRepo, srcCP, "session-src") // Create a bare "remote" and push src to it bareDir := t.TempDir() @@ -215,22 +217,33 @@ func TestFetchAndMergeRef_MergesTrees(t *testing.T) { initCmd.Dir = bareDir initCmd.Env = testutil.GitIsolatedEnv() require.NoError(t, initCmd.Run()) + enableFilteredFetchServingForTest(t, bareDir) + bareURL := "file://" + bareDir pushCmd := exec.CommandContext(ctx, "git", "push", bareDir, string(refName)+":"+string(refName)) pushCmd.Dir = srcDir require.NoError(t, pushCmd.Run()) + bareRepo, err := git.PlainOpen(bareDir) + require.NoError(t, err) + remoteBlobHash := firstBlobInV2CheckpointShard(t, bareRepo, refName, srcCP) + // Create a local repo that also has the ref but with a different checkpoint localDir := setupRepoWithV2Ref(t) + addOrigin := exec.CommandContext(ctx, "git", "remote", "add", "origin", bareURL) + addOrigin.Dir = localDir + addOrigin.Env = testutil.GitIsolatedEnv() + out, err := addOrigin.CombinedOutput() + require.NoError(t, err, "add origin failed: %s", out) localRepo, err := git.PlainOpen(localDir) require.NoError(t, err) - writeV2Checkpoint(t, localRepo, id.MustCheckpointID("112233445566"), "session-local") + writeV2Checkpoint(t, localRepo, localCP, "session-local") t.Chdir(localDir) // Fetch and merge — should combine both checkpoints - err = fetchAndMergeRef(ctx, bareDir, refName) + err = fetchAndMergeRef(ctx, "origin", refName) require.NoError(t, err) // Verify merged tree contains both checkpoints on /main @@ -259,6 +272,16 @@ func TestFetchAndMergeRef_MergesTrees(t *testing.T) { } assert.True(t, hasAA, "merged tree should contain checkpoint aabbccddeeff") assert.True(t, has11, "merged tree should contain checkpoint 112233445566") + require.Error(t, mergedRepo.Storer.HasEncodedObject(remoteBlobHash), + "remote checkpoint blob must NOT be locally available; recovery should avoid blob downloads") + assert.Empty(t, gitConfigValueForStrategyTest(t, localDir, "remote.origin.promisor")) + assert.Empty(t, gitConfigValueForStrategyTest(t, localDir, "remote.origin.partialclonefilter")) + + require.NoError(t, tryPushRef(ctx, bareURL, refName)) + bareRepo, err = git.PlainOpen(bareDir) + require.NoError(t, err) + assert.True(t, refContainsV2Checkpoint(t, bareRepo, refName, srcCP)) + assert.True(t, refContainsV2Checkpoint(t, bareRepo, refName, localCP)) } // TestPushV2Refs_SkipsUnrecordedArchiveRefs verifies that pushV2Refs pushes @@ -809,40 +832,6 @@ func TestPushV2Refs_RepeatedLocalRotationsBeforePushPublishesAllArchives(t *test "current should contain current checkpoint") } -func TestDetectRemoteRotationArchives_IncludesSameNameDifferentHash(t *testing.T) { - t.Parallel() - ctx := context.Background() - archiveRef := plumbing.ReferenceName(paths.V2FullRefPrefix + "0000000000001") - - localDir := setupRepoWithV2Ref(t) - localRepo, err := git.PlainOpen(localDir) - require.NoError(t, err) - localHash := writeV2ArchiveRef(t, localRepo, archiveRef, "local archive") - - remoteDir := setupRepoWithV2Ref(t) - remoteRepo, err := git.PlainOpen(remoteDir) - require.NoError(t, err) - remoteHash := writeV2ArchiveRef(t, remoteRepo, archiveRef, "remote archive") - require.NotEqual(t, localHash, remoteHash) - - bareDir := t.TempDir() - initCmd := exec.CommandContext(ctx, "git", "init", "--bare") - initCmd.Dir = bareDir - initCmd.Env = testutil.GitIsolatedEnv() - out, err := initCmd.CombinedOutput() - require.NoError(t, err, "git init --bare failed: %s", out) - - pushArchive := exec.CommandContext(ctx, "git", "push", bareDir, - string(archiveRef)+":"+string(archiveRef)) - pushArchive.Dir = remoteDir - out, err = pushArchive.CombinedOutput() - require.NoError(t, err, "archive push failed: %s", out) - - archives, err := detectRemoteRotationArchives(ctx, bareDir, localRepo) - require.NoError(t, err) - assert.Contains(t, archives, "0000000000001") -} - func TestPrintV2PartialPushResult(t *testing.T) { t.Parallel() @@ -1206,6 +1195,8 @@ func TestFetchAndMergeRef_RemoteRotatedMultipleTimesUsesRelatedArchive(t *testin require.Error(t, err, "selected archive temp ref should be removed after rotation recovery") _, err = localRepo.Reference(archiveTmpRefName("0000000000002"), true) require.Error(t, err, "unselected archive temp ref should be removed after rotation recovery") + _, err = localRepo.Reference(archiveTmpRefName("current"), true) + require.Error(t, err, "wildcard current temp ref should be removed after rotation recovery") assert.True(t, refContainsV2Checkpoint(t, localRepo, archive1Ref, localOnlyCP), "local checkpoint from the first generation should be merged into archive 1") @@ -1221,7 +1212,7 @@ func TestFetchAndMergeRef_RemoteRotatedMultipleTimesUsesRelatedArchive(t *testin } // Not parallel: uses t.Chdir() -func TestFetchAndMergeRef_RotationConflict_BloblessProbeAvoidsUnmatchedBlobs(t *testing.T) { +func TestFetchAndMergeRef_RotationConflict_BloblessProbeAvoidsArchiveBlobDownloads(t *testing.T) { ctx := context.Background() fullCurrentRef := plumbing.ReferenceName(paths.V2FullCurrentRefName) archive1Ref := plumbing.ReferenceName(paths.V2FullRefPrefix + "0000000000001") @@ -1244,6 +1235,11 @@ func TestFetchAndMergeRef_RotationConflict_BloblessProbeAvoidsUnmatchedBlobs(t * testutil.WriteFile(t, localDir, "f.txt", "init") testutil.GitAdd(t, localDir, "f.txt") testutil.GitCommit(t, localDir, "init") + addOrigin := exec.CommandContext(ctx, "git", "remote", "add", "origin", bareURL) + addOrigin.Dir = localDir + addOrigin.Env = testutil.GitIsolatedEnv() + out, err := addOrigin.CombinedOutput() + require.NoError(t, err, "add origin failed: %s", out) localRepo, err := git.PlainOpen(localDir) require.NoError(t, err) writeV2Checkpoint(t, localRepo, sharedCP, "shared-session") @@ -1286,16 +1282,23 @@ func TestFetchAndMergeRef_RotationConflict_BloblessProbeAvoidsUnmatchedBlobs(t * writeV2Checkpoint(t, localRepo, localOnlyCP, "local-session") t.Chdir(localDir) - err = fetchAndMergeRef(ctx, bareURL, fullCurrentRef) + err = fetchAndMergeRef(ctx, "origin", fullCurrentRef) require.NoError(t, err) localRepo, err = git.PlainOpen(localDir) require.NoError(t, err) - assert.NoError(t, localRepo.Storer.HasEncodedObject(matchedBlobHash), - "matched archive generation.json must be locally available after top-up") - assert.Error(t, localRepo.Storer.HasEncodedObject(unmatchedBlobHash), + require.Error(t, localRepo.Storer.HasEncodedObject(matchedBlobHash), + "matched archive generation.json must NOT be locally available; recovery should avoid full archive top-up") + require.Error(t, localRepo.Storer.HasEncodedObject(unmatchedBlobHash), "unmatched archive shard blob must NOT be locally available; blobless probe over-fetched") + assert.Empty(t, gitConfigValueForStrategyTest(t, localDir, "remote.origin.promisor")) + assert.Empty(t, gitConfigValueForStrategyTest(t, localDir, "remote.origin.partialclonefilter")) + + bareRepo, err = git.PlainOpen(bareDir) + require.NoError(t, err) + assert.True(t, refContainsV2Checkpoint(t, bareRepo, archive1Ref, sharedCP)) + assert.True(t, refContainsV2Checkpoint(t, bareRepo, archive1Ref, localOnlyCP)) } func blobAtTopLevel(t *testing.T, repo *git.Repository, refName plumbing.ReferenceName, filename string) plumbing.Hash { @@ -1329,3 +1332,15 @@ func firstBlobInV2CheckpointShard(t *testing.T, repo *git.Repository, refName pl } return plumbing.ZeroHash } + +func gitConfigValueForStrategyTest(t *testing.T, dir, key string) string { + t.Helper() + cmd := exec.CommandContext(t.Context(), "git", "config", "--local", "--get", key) + cmd.Dir = dir + cmd.Env = testutil.GitIsolatedEnv() + output, err := cmd.Output() + if err != nil { + return "" + } + return strings.TrimSpace(string(output)) +}