Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 10 additions & 35 deletions cmd/entire/cli/strategy/push_common.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"github.com/entireio/cli/cmd/entire/cli/checkpoint"
"github.com/entireio/cli/cmd/entire/cli/checkpoint/remote"
"github.com/entireio/cli/cmd/entire/cli/logging"
"github.com/entireio/cli/cmd/entire/cli/paths"
"github.com/entireio/cli/cmd/entire/cli/settings"

"github.com/go-git/go-git/v6"
Expand Down Expand Up @@ -158,56 +159,30 @@ func printSettingsCommitHint(ctx context.Context, target string) {
})
}

// printCheckpointsV2MigrationHint prints a hint when the committed project
// settings enable checkpoints_version: 2 AND there are v1 checkpoints that have
// not yet been mirrored into v2. Suppressed when v2 already has every v1
// checkpoint (nothing to migrate) so the hint does not become noise once the
// migration is done.
// printCheckpointsV2MigrationHint prints a hint when checkpoints_version: 2 is
// committed but the local repo has no v2 /main ref (migration was never run).
// Partial migrations are not detected — drift between v1 and v2 is acceptable
// post-migration.
func printCheckpointsV2MigrationHint(ctx context.Context) {
checkpointsV2MigrationHintOnce.Do(func() {
if !isCheckpointsVersion2Committed(ctx) {
return
}
if !hasUnmigratedV1Checkpoints(ctx) {
if v2MainRefExists(ctx) {
return
}
fmt.Fprintln(os.Stderr, "[entire] Note: .entire/settings.json sets checkpoints_version: 2, but there are some v1 checkpoints that have not been migrated to v2.")
fmt.Fprintln(os.Stderr, "[entire] Note: .entire/settings.json sets checkpoints_version: 2, but no v2 /main ref was found in this repo.")
fmt.Fprintln(os.Stderr, "[entire] Run 'entire migrate --checkpoints v2' to migrate missing checkpoints to v2.")
})
}

// hasUnmigratedV1Checkpoints reports whether any v1 checkpoint has no matching
// entry in v2. Any failure opening the repo or listing either store is treated
// as "no migration needed" so we stay silent instead of printing a speculative
// hint — the hint is advisory and should never be the reason a push gets noisy.
func hasUnmigratedV1Checkpoints(ctx context.Context) bool {
func v2MainRefExists(ctx context.Context) bool {
repo, err := OpenRepository(ctx)
if err != nil {
return false
}
v1Store := checkpoint.NewGitStore(repo)
v1List, err := v1Store.ListCommitted(ctx)
if err != nil || len(v1List) == 0 {
return false
}
v2List, err := checkpoint.NewV2GitStore(repo, "").ListCommitted(ctx)
if err != nil {
return false
}
v2Set := make(map[string]struct{}, len(v2List))
for _, info := range v2List {
v2Set[info.CheckpointID.String()] = struct{}{}
}
for _, info := range v1List {
if _, ok := v2Set[info.CheckpointID.String()]; !ok {
summary, readErr := v1Store.ReadCommitted(ctx, info.CheckpointID)
if readErr != nil || summary == nil {
continue
}
return true
}
}
return false
_, err = repo.Reference(plumbing.ReferenceName(paths.V2MainRefName), true)
return err == nil
}

// isCheckpointRemoteCommitted returns true if the committed .entire/settings.json
Expand Down
102 changes: 6 additions & 96 deletions cmd/entire/cli/strategy/push_common_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,9 @@ import (
"github.com/entireio/cli/cmd/entire/cli/checkpoint/id"
"github.com/entireio/cli/cmd/entire/cli/paths"
"github.com/entireio/cli/cmd/entire/cli/testutil"
"github.com/entireio/cli/redact"

"github.com/go-git/go-git/v6"
"github.com/go-git/go-git/v6/plumbing"
"github.com/go-git/go-git/v6/plumbing/filemode"
"github.com/go-git/go-git/v6/plumbing/object"

"github.com/stretchr/testify/assert"
Expand Down Expand Up @@ -1269,74 +1267,22 @@ func setupCheckpointsV2CommittedRepo(t *testing.T) *git.Repository {
return repo
}

// writeV1Checkpoint writes a minimal checkpoint to the v1 metadata branch.
func writeV1Checkpoint(t *testing.T, repo *git.Repository, cpID id.CheckpointID, sessionID string) {
t.Helper()
err := checkpoint.NewGitStore(repo).WriteCommitted(context.Background(), checkpoint.WriteCommittedOptions{
CheckpointID: cpID,
SessionID: sessionID,
Strategy: "manual-commit",
Transcript: redact.AlreadyRedacted([]byte(`{"from":"` + sessionID + `"}`)),
AuthorName: "Test",
AuthorEmail: "test@test.com",
})
require.NoError(t, err)
}

func writeMalformedV1CheckpointWithoutSummary(t *testing.T, repo *git.Repository, cpID id.CheckpointID) {
t.Helper()
ctx := context.Background()

blobHash, err := checkpoint.CreateBlobFromContent(repo, []byte("transcript without root metadata"))
require.NoError(t, err)

treeHash, err := checkpoint.BuildTreeFromEntries(ctx, repo, map[string]object.TreeEntry{
cpID.Path() + "/0/" + paths.TranscriptFileName: {
Mode: filemode.Regular,
Hash: blobHash,
},
})
require.NoError(t, err)

commitHash, err := checkpoint.CreateCommit(ctx, repo, treeHash, plumbing.ZeroHash, "malformed v1 checkpoint", "Test", "test@test.com")
require.NoError(t, err)

refName := plumbing.NewBranchReferenceName(paths.MetadataBranchName)
require.NoError(t, repo.Storer.SetReference(plumbing.NewHashReference(refName, commitHash)))
}

func TestPrintCheckpointsV2MigrationHint(t *testing.T) {
t.Run("suppressed when no v1 checkpoints exist", func(t *testing.T) {
checkpointsV2MigrationHintOnce = sync.Once{}
setupCheckpointsV2CommittedRepo(t)

restore := captureStderr(t)
printCheckpointsV2MigrationHint(context.Background())
output := restore()

assert.Empty(t, output, "hint should not print when there are no v1 checkpoints to migrate")
})

t.Run("suppressed when every v1 checkpoint is already in v2", func(t *testing.T) {
t.Run("suppressed when v2 /main exists", func(t *testing.T) {
checkpointsV2MigrationHintOnce = sync.Once{}
repo := setupCheckpointsV2CommittedRepo(t)

cpID := id.MustCheckpointID("aabbccddeeff")
writeV1Checkpoint(t, repo, cpID, "session-1")
writeV2Checkpoint(t, repo, cpID, "session-1")
writeV2Checkpoint(t, repo, id.MustCheckpointID("aabbccddeeff"), "session-1")

restore := captureStderr(t)
printCheckpointsV2MigrationHint(context.Background())
output := restore()

assert.Empty(t, output, "hint should not print once v2 already mirrors every v1 checkpoint")
assert.Empty(t, output, "hint should not print once v2 /main has been populated")
})

t.Run("prints when v1 has checkpoints not in v2", func(t *testing.T) {
t.Run("prints when v2 /main is missing", func(t *testing.T) {
checkpointsV2MigrationHintOnce = sync.Once{}
repo := setupCheckpointsV2CommittedRepo(t)

writeV1Checkpoint(t, repo, id.MustCheckpointID("111111111111"), "session-1")
setupCheckpointsV2CommittedRepo(t)

restore := captureStderr(t)
printCheckpointsV2MigrationHint(context.Background())
Expand All @@ -1347,9 +1293,7 @@ func TestPrintCheckpointsV2MigrationHint(t *testing.T) {

t.Run("prints only once per process", func(t *testing.T) {
checkpointsV2MigrationHintOnce = sync.Once{}
repo := setupCheckpointsV2CommittedRepo(t)

writeV1Checkpoint(t, repo, id.MustCheckpointID("222222222222"), "session-2")
setupCheckpointsV2CommittedRepo(t)

restore := captureStderr(t)
printCheckpointsV2MigrationHint(context.Background())
Expand All @@ -1361,40 +1305,6 @@ func TestPrintCheckpointsV2MigrationHint(t *testing.T) {
})
}

func TestHasUnmigratedV1Checkpoints(t *testing.T) {
t.Run("false when no v1 checkpoints exist", func(t *testing.T) {
setupCheckpointsV2CommittedRepo(t)
assert.False(t, hasUnmigratedV1Checkpoints(context.Background()))
})

t.Run("false when every v1 checkpoint is in v2", func(t *testing.T) {
repo := setupCheckpointsV2CommittedRepo(t)
cpID := id.MustCheckpointID("333333333333")
writeV1Checkpoint(t, repo, cpID, "session-a")
writeV2Checkpoint(t, repo, cpID, "session-a")

assert.False(t, hasUnmigratedV1Checkpoints(context.Background()))
})

t.Run("true when at least one v1 checkpoint is missing from v2", func(t *testing.T) {
repo := setupCheckpointsV2CommittedRepo(t)
mirrored := id.MustCheckpointID("444444444444")
missing := id.MustCheckpointID("555555555555")
writeV1Checkpoint(t, repo, mirrored, "session-b")
writeV2Checkpoint(t, repo, mirrored, "session-b")
writeV1Checkpoint(t, repo, missing, "session-c")

assert.True(t, hasUnmigratedV1Checkpoints(context.Background()))
})

t.Run("false when only malformed v1 checkpoint entries are missing from v2", func(t *testing.T) {
repo := setupCheckpointsV2CommittedRepo(t)
writeMalformedV1CheckpointWithoutSummary(t, repo, id.MustCheckpointID("666666666666"))

assert.False(t, hasUnmigratedV1Checkpoints(context.Background()))
})
}

// captureStderr redirects os.Stderr to a pipe and returns a function that restores
// stderr and returns the captured output. Must be called on the main goroutine
// (not parallel-safe). Uses t.Cleanup as a safety net to restore stderr and close
Expand Down
95 changes: 85 additions & 10 deletions cmd/entire/cli/strategy/push_v2.go
Original file line number Diff line number Diff line change
Expand Up @@ -566,15 +566,24 @@ func fetchRelatedRemoteRotationArchive(ctx context.Context, fetchTarget string,
archiveTmpRefs = append(archiveTmpRefs, archiveTmpRef)
}

// These archive commits are read immediately through go-git for tree
// flattening, so fetch the complete refs rather than blobless packfiles.
if output, fetchErr := remote.Fetch(ctx, remote.FetchOptions{
Remote: fetchTarget,
RefSpecs: refSpecs,
NoTags: true,
NoFilter: true,
ExtraArgs: []string{"--no-write-fetch-head"},
}); fetchErr != nil {
// Probe with --filter=blob:none; only the matched archive's blobs are topped
// up after we pick it. Fall back to an unfiltered fetch on any first-fetch
// error (e.g. server without uploadpack.allowFilter).
fetch := func(extra ...string) ([]byte, error) {
args := append([]string{"--no-write-fetch-head"}, extra...)
return remote.Fetch(ctx, remote.FetchOptions{
Remote: fetchTarget,
RefSpecs: refSpecs,
NoTags: true,
NoFilter: true,
ExtraArgs: args,
})
}
output, fetchErr := fetch("--filter=blob:none")
Comment thread
computermode marked this conversation as resolved.
Outdated
if fetchErr != nil {
output, fetchErr = fetch()
}
if fetchErr != nil {
if repo, openErr := OpenRepository(ctx); openErr == nil {
cleanupFetchedArchiveTmpRefs(repo, archiveTmpRefs)
}
Expand All @@ -594,17 +603,83 @@ func fetchRelatedRemoteRotationArchive(ctx context.Context, fetchTarget string,
if !ok {
return fetchedRemoteRotationArchive{}, errors.New("failed to read local /full/current history")
}

// Wall-clock bound on the ancestry walk: on repos whose archives are fully
// disjoint from local /full/current we'd otherwise burn seconds per push
// concluding nothing matches. A future /full/root anchor will replace this
// with a constant-time lookup.
walkStart := time.Now()
walked := 0
for _, archive := range archives {
if time.Since(walkStart) > rotationAncestryWalkBudget {
break
}
walked++
fetched, err := readFetchedRemoteRotationArchive(repo, archive)
if err != nil {
return fetchedRemoteRotationArchive{}, err
}
if archiveSharesHistoryWithCurrentGeneration(ctx, repo, localCurrentAncestors, fetched.ref.Hash()) {
Comment thread
computermode marked this conversation as resolved.
Outdated
if err := topUpMatchedArchiveBlobs(ctx, fetchTarget, repo, fetched.tree); err != nil {
return fetchedRemoteRotationArchive{}, err
}
Comment thread
computermode marked this conversation as resolved.
Outdated
tmpRefsToCleanup = removeRef(tmpRefsToCleanup, fetched.tmpRefName)
return fetched, nil
}
}
return fetchedRemoteRotationArchive{}, errors.New("no remote archive shares history with local /full/current")
err = errors.New("no remote archive shares history with local /full/current")
if walked < len(archives) {
err = fmt.Errorf("%w (walk budget exhausted after %d/%d archives)", err, walked, len(archives))
}
return fetchedRemoteRotationArchive{}, err
}

// rotationAncestryWalkBudget caps wall-clock for the per-archive ancestry
// walk. Exposed as a var so tests can lower it.
var rotationAncestryWalkBudget = 1 * time.Second //nolint:gochecknoglobals // test override

// topUpMatchedArchiveBlobs ensures every blob in the matched archive's tree
// is in the local pack directory. Downstream reads (updateGenerationTimestamps,
// the push that follows) go through go-git's BlobObject, which scans only the
// local pack directory and does not follow file-transport alternates — so we
// must pull missing blobs explicitly via `git fetch-pack` rather than rely on
// FetchingTree's cat-file second-opinion.
func topUpMatchedArchiveBlobs(ctx context.Context, fetchTarget string, repo *git.Repository, archiveTree *object.Tree) error {
seen := make(map[plumbing.Hash]struct{})
var missing []string
var walk func(t *object.Tree) error
walk = func(t *object.Tree) error {
for _, entry := range t.Entries {
if entry.Mode == filemode.Dir {
sub, err := repo.TreeObject(entry.Hash)
if err != nil {
return fmt.Errorf("read subtree %s: %w", entry.Name, err)
}
if err := walk(sub); err != nil {
return err
}
continue
}
if _, ok := seen[entry.Hash]; ok {
continue
}
seen[entry.Hash] = struct{}{}
if repo.Storer.HasEncodedObject(entry.Hash) != nil {
missing = append(missing, entry.Hash.String())
}
}
return nil
}
if err := walk(archiveTree); err != nil {
return err
}
if len(missing) == 0 {
return nil
}
if err := remote.FetchBlobs(ctx, fetchTarget, missing); err != nil {
return fmt.Errorf("fetch matched archive blobs: %w", err)
Comment thread
computermode marked this conversation as resolved.
Outdated
}
return nil
}

func archiveTmpRefName(archive string) plumbing.ReferenceName {
Expand Down
Loading
Loading