Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions adapters/googlePhotos/googlephotos.go
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,8 @@ func (toc *TakeoutCmd) passOneFsWalk(ctx context.Context, w fs.FS) error {
toc.fileTracker.Store(key, tracking) // to.fileTracker[key] = tracking

if _, ok := dirCatalog.unMatchedFiles[base]; ok {
toc.processor.RecordAssetDiscardedImmediately(ctx, fshelper.FSName(w, name), finfo.Size(), fileevent.DiscardedLocalDuplicate, "duplicated in the directory")
// Asset was already discovered above, so transition from pending to discarded
toc.processor.RecordAssetDiscarded(ctx, fshelper.FSName(w, name), finfo.Size(), fileevent.DiscardedLocalDuplicate, "duplicated in the directory")
return nil
}

Expand Down Expand Up @@ -323,8 +324,11 @@ func (toc *TakeoutCmd) solvePuzzle(ctx context.Context) error {
if toc.KeepJSONLess {
a := toc.makeAsset(ctx, dir, i, nil)
cat.matchedFiles[f] = a
delete(cat.unMatchedFiles, f)
} else {
// Asset was discovered but has no JSON metadata and --include-unmatched is not set
toc.processor.RecordAssetDiscarded(ctx, fshelper.FSName(i.fsys, path.Join(dir, i.base)), int64(i.length), fileevent.DiscardedFiltered, "no matching JSON metadata (use --include-unmatched to import)")
}
delete(cat.unMatchedFiles, f)
}
}
}
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
5 changes: 4 additions & 1 deletion internal/e2e/client/fromGooglePhotos_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ func Test_FromGooglePhotos(t *testing.T) {
"--no-ui",
// "--api-trace",
"--log-level=debug",
"DATA/fromGooglePhotos/gophers",
"DATA/fromGooglePhotos/gophers*",
})
err = c.ExecuteContext(ctx)
if err != nil && a.Log().GetSLog() != nil {
Expand All @@ -47,5 +47,8 @@ func Test_FromGooglePhotos(t *testing.T) {
fileevent.ProcessedUploadSuccess: 5,
fileevent.ProcessedAlbumAdded: 5,
fileevent.ProcessedTagged: 5,
fileevent.DiscardedLocalDuplicate: 1,
fileevent.DiscardedFiltered: 1,
fileevent.ProcessedMissingMetadata: 1,
}, false, a.FileProcessor())
}
118 changes: 118 additions & 0 deletions internal/fileprocessor/processor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,124 @@ func TestSummary(t *testing.T) {
}
}

// TestDiscoverThenDiscardTransition verifies that assets discovered and later
// determined to be discardable (e.g., duplicates found during processing, files
// without required metadata) are properly transitioned from PENDING to DISCARDED.
//
// This test documents a bug fix where assets were being discovered but not
// properly transitioned to a final state when discarded during later processing
// stages, leaving them stuck in PENDING state.
//
// The key insight is:
// - RecordAssetDiscardedImmediately: For assets discarded AT discovery time
// (before being added to pending)
// - RecordAssetDiscarded: For assets already discovered that are discarded
// during later processing (must transition from PENDING to DISCARDED)
func TestDiscoverThenDiscardTransition(t *testing.T) {
logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{Level: slog.LevelError}))
tracker := assettracker.New()
recorder := fileevent.NewRecorder(logger)
fp := New(tracker, recorder)

ctx := context.Background()

// Scenario 1: Duplicate file found during processing
// In Google Photos takeout, the same image can appear in multiple zip parts
// with different paths. Both are discovered, then one is detected as duplicate.
file1 := newTestFile("/photos/album/image.jpg")
fp.RecordAssetDiscovered(ctx, file1, 1024, fileevent.DiscoveredImage)

// Same image in a different location (e.g., year folder) - different full path
file1InYear := newTestFile("/photos/2023/image.jpg")
fp.RecordAssetDiscovered(ctx, file1InYear, 1024, fileevent.DiscoveredImage)

// The year folder copy is detected as duplicate during processing
// Must use RecordAssetDiscarded (NOT RecordAssetDiscardedImmediately)
// because the asset was already discovered and is in PENDING state
fp.RecordAssetDiscarded(ctx, file1InYear, 1024, fileevent.DiscardedLocalDuplicate, "duplicate in directory")

// Scenario 2: File discovered but later filtered (e.g., no JSON metadata)
fileNoMeta := newTestFile("/photos/year/orphan.jpg")
fp.RecordAssetDiscovered(ctx, fileNoMeta, 2048, fileevent.DiscoveredImage)

// During puzzle solving, file has no matching JSON and --include-unmatched is false
// Must use RecordAssetDiscarded to properly transition from PENDING
fp.RecordAssetDiscarded(ctx, fileNoMeta, 2048, fileevent.DiscardedFiltered, "no matching JSON metadata")

// Process the first (non-duplicate) file normally
fp.RecordAssetProcessed(ctx, file1, 1024, fileevent.ProcessedUploadSuccess)

// Verify final state - NO assets should be pending
counters := fp.GetAssetCounters()
if counters.Pending != 0 {
t.Errorf("Expected 0 pending assets, got %d - assets not properly transitioned", counters.Pending)
}
if counters.Processed != 1 {
t.Errorf("Expected 1 processed asset, got %d", counters.Processed)
}
if counters.Discarded != 2 {
t.Errorf("Expected 2 discarded assets, got %d", counters.Discarded)
}

// Finalize should succeed with no pending assets
err := fp.Finalize(ctx)
if err != nil {
t.Errorf("Finalize should succeed when all assets reach final state, got error: %v", err)
}
}

// TestDiscardedImmediatelyVsDiscarded clarifies the difference between
// RecordAssetDiscardedImmediately and RecordAssetDiscarded.
func TestDiscardedImmediatelyVsDiscarded(t *testing.T) {
logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{Level: slog.LevelError}))
tracker := assettracker.New()
recorder := fileevent.NewRecorder(logger)
fp := New(tracker, recorder)

ctx := context.Background()

// RecordAssetDiscardedImmediately: Asset is rejected at discovery time
// (e.g., banned filename pattern, excluded extension)
// This creates the asset directly in DISCARDED state
bannedFile := newTestFile("/photos/.DS_Store.jpg")
fp.RecordAssetDiscardedImmediately(ctx, bannedFile, 100, fileevent.DiscardedBanned, "banned filename")

// Verify it went directly to discarded, never pending
counters := fp.GetAssetCounters()
if counters.Pending != 0 {
t.Errorf("Immediately discarded asset should not be pending, got %d pending", counters.Pending)
}
if counters.Discarded != 1 {
t.Errorf("Expected 1 discarded, got %d", counters.Discarded)
}

// RecordAssetDiscarded: Asset was discovered, then later discarded during processing
// First it must be discovered (goes to PENDING)
laterDiscarded := newTestFile("/photos/image.jpg")
fp.RecordAssetDiscovered(ctx, laterDiscarded, 512, fileevent.DiscoveredImage)

counters = fp.GetAssetCounters()
if counters.Pending != 1 {
t.Errorf("Discovered asset should be pending, got %d", counters.Pending)
}

// Then it's discarded (transitions PENDING -> DISCARDED)
fp.RecordAssetDiscarded(ctx, laterDiscarded, 512, fileevent.DiscardedLocalDuplicate, "duplicate")

counters = fp.GetAssetCounters()
if counters.Pending != 0 {
t.Errorf("After RecordAssetDiscarded, asset should not be pending, got %d", counters.Pending)
}
if counters.Discarded != 2 {
t.Errorf("Expected 2 discarded (immediate + later), got %d", counters.Discarded)
}

// Verify finalization succeeds
if err := fp.Finalize(ctx); err != nil {
t.Errorf("Finalize should succeed: %v", err)
}
}

func TestCompleteWorkflow(t *testing.T) {
logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{Level: slog.LevelError}))
tracker := assettracker.New()
Expand Down