diff --git a/adapters/googlePhotos/googlephotos.go b/adapters/googlePhotos/googlephotos.go index 9dc54a04..cb5f3e21 100644 --- a/adapters/googlePhotos/googlephotos.go +++ b/adapters/googlePhotos/googlephotos.go @@ -229,7 +229,8 @@ func (toc *TakeoutCmd) passOneFsWalk(ctx context.Context, w fs.FS) error { toc.fileTracker.Store(key, tracking) // to.fileTracker[key] = tracking if _, ok := dirCatalog.unMatchedFiles[base]; ok { - toc.processor.RecordAssetDiscardedImmediately(ctx, fshelper.FSName(w, name), finfo.Size(), fileevent.DiscardedLocalDuplicate, "duplicated in the directory") + // Asset was already discovered above, so transition from pending to discarded + toc.processor.RecordAssetDiscarded(ctx, fshelper.FSName(w, name), finfo.Size(), fileevent.DiscardedLocalDuplicate, "duplicated in the directory") return nil } @@ -323,8 +324,11 @@ func (toc *TakeoutCmd) solvePuzzle(ctx context.Context) error { if toc.KeepJSONLess { a := toc.makeAsset(ctx, dir, i, nil) cat.matchedFiles[f] = a - delete(cat.unMatchedFiles, f) + } else { + // Asset was discovered but has no JSON metadata and --include-unmatched is not set + toc.processor.RecordAssetDiscarded(ctx, fshelper.FSName(i.fsys, path.Join(dir, i.base)), int64(i.length), fileevent.DiscardedFiltered, "no matching JSON metadata (use --include-unmatched to import)") } + delete(cat.unMatchedFiles, f) } } } diff --git a/internal/e2e/client/DATA/fromGooglePhotos/gophers-duplicate/Google Photos/Photos from 2025/PinClipart.com_gopher-clipart_4000224.jpg b/internal/e2e/client/DATA/fromGooglePhotos/gophers-duplicate/Google Photos/Photos from 2025/PinClipart.com_gopher-clipart_4000224.jpg new file mode 100644 index 00000000..b1833bb3 Binary files /dev/null and b/internal/e2e/client/DATA/fromGooglePhotos/gophers-duplicate/Google Photos/Photos from 2025/PinClipart.com_gopher-clipart_4000224.jpg differ diff --git a/internal/e2e/client/DATA/fromGooglePhotos/gophers/Google Photos/Photos from 2025/orphan_no_json.jpg b/internal/e2e/client/DATA/fromGooglePhotos/gophers/Google Photos/Photos from 2025/orphan_no_json.jpg new file mode 100644 index 00000000..08768c85 Binary files /dev/null and b/internal/e2e/client/DATA/fromGooglePhotos/gophers/Google Photos/Photos from 2025/orphan_no_json.jpg differ diff --git a/internal/e2e/client/fromGooglePhotos_test.go b/internal/e2e/client/fromGooglePhotos_test.go index dc6bcdfc..302abbb9 100644 --- a/internal/e2e/client/fromGooglePhotos_test.go +++ b/internal/e2e/client/fromGooglePhotos_test.go @@ -31,7 +31,7 @@ func Test_FromGooglePhotos(t *testing.T) { "--no-ui", // "--api-trace", "--log-level=debug", - "DATA/fromGooglePhotos/gophers", + "DATA/fromGooglePhotos/gophers*", }) err = c.ExecuteContext(ctx) if err != nil && a.Log().GetSLog() != nil { @@ -47,5 +47,8 @@ func Test_FromGooglePhotos(t *testing.T) { fileevent.ProcessedUploadSuccess: 5, fileevent.ProcessedAlbumAdded: 5, fileevent.ProcessedTagged: 5, + fileevent.DiscardedLocalDuplicate: 1, + fileevent.DiscardedFiltered: 1, + fileevent.ProcessedMissingMetadata: 1, }, false, a.FileProcessor()) } diff --git a/internal/fileprocessor/processor_test.go b/internal/fileprocessor/processor_test.go index 38526c2a..afee27de 100644 --- a/internal/fileprocessor/processor_test.go +++ b/internal/fileprocessor/processor_test.go @@ -435,6 +435,124 @@ func TestSummary(t *testing.T) { } } +// TestDiscoverThenDiscardTransition verifies that assets discovered and later +// determined to be discardable (e.g., duplicates found during processing, files +// without required metadata) are properly transitioned from PENDING to DISCARDED. +// +// This test documents a bug fix where assets were being discovered but not +// properly transitioned to a final state when discarded during later processing +// stages, leaving them stuck in PENDING state. +// +// The key insight is: +// - RecordAssetDiscardedImmediately: For assets discarded AT discovery time +// (before being added to pending) +// - RecordAssetDiscarded: For assets already discovered that are discarded +// during later processing (must transition from PENDING to DISCARDED) +func TestDiscoverThenDiscardTransition(t *testing.T) { + logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{Level: slog.LevelError})) + tracker := assettracker.New() + recorder := fileevent.NewRecorder(logger) + fp := New(tracker, recorder) + + ctx := context.Background() + + // Scenario 1: Duplicate file found during processing + // In Google Photos takeout, the same image can appear in multiple zip parts + // with different paths. Both are discovered, then one is detected as duplicate. + file1 := newTestFile("/photos/album/image.jpg") + fp.RecordAssetDiscovered(ctx, file1, 1024, fileevent.DiscoveredImage) + + // Same image in a different location (e.g., year folder) - different full path + file1InYear := newTestFile("/photos/2023/image.jpg") + fp.RecordAssetDiscovered(ctx, file1InYear, 1024, fileevent.DiscoveredImage) + + // The year folder copy is detected as duplicate during processing + // Must use RecordAssetDiscarded (NOT RecordAssetDiscardedImmediately) + // because the asset was already discovered and is in PENDING state + fp.RecordAssetDiscarded(ctx, file1InYear, 1024, fileevent.DiscardedLocalDuplicate, "duplicate in directory") + + // Scenario 2: File discovered but later filtered (e.g., no JSON metadata) + fileNoMeta := newTestFile("/photos/year/orphan.jpg") + fp.RecordAssetDiscovered(ctx, fileNoMeta, 2048, fileevent.DiscoveredImage) + + // During puzzle solving, file has no matching JSON and --include-unmatched is false + // Must use RecordAssetDiscarded to properly transition from PENDING + fp.RecordAssetDiscarded(ctx, fileNoMeta, 2048, fileevent.DiscardedFiltered, "no matching JSON metadata") + + // Process the first (non-duplicate) file normally + fp.RecordAssetProcessed(ctx, file1, 1024, fileevent.ProcessedUploadSuccess) + + // Verify final state - NO assets should be pending + counters := fp.GetAssetCounters() + if counters.Pending != 0 { + t.Errorf("Expected 0 pending assets, got %d - assets not properly transitioned", counters.Pending) + } + if counters.Processed != 1 { + t.Errorf("Expected 1 processed asset, got %d", counters.Processed) + } + if counters.Discarded != 2 { + t.Errorf("Expected 2 discarded assets, got %d", counters.Discarded) + } + + // Finalize should succeed with no pending assets + err := fp.Finalize(ctx) + if err != nil { + t.Errorf("Finalize should succeed when all assets reach final state, got error: %v", err) + } +} + +// TestDiscardedImmediatelyVsDiscarded clarifies the difference between +// RecordAssetDiscardedImmediately and RecordAssetDiscarded. +func TestDiscardedImmediatelyVsDiscarded(t *testing.T) { + logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{Level: slog.LevelError})) + tracker := assettracker.New() + recorder := fileevent.NewRecorder(logger) + fp := New(tracker, recorder) + + ctx := context.Background() + + // RecordAssetDiscardedImmediately: Asset is rejected at discovery time + // (e.g., banned filename pattern, excluded extension) + // This creates the asset directly in DISCARDED state + bannedFile := newTestFile("/photos/.DS_Store.jpg") + fp.RecordAssetDiscardedImmediately(ctx, bannedFile, 100, fileevent.DiscardedBanned, "banned filename") + + // Verify it went directly to discarded, never pending + counters := fp.GetAssetCounters() + if counters.Pending != 0 { + t.Errorf("Immediately discarded asset should not be pending, got %d pending", counters.Pending) + } + if counters.Discarded != 1 { + t.Errorf("Expected 1 discarded, got %d", counters.Discarded) + } + + // RecordAssetDiscarded: Asset was discovered, then later discarded during processing + // First it must be discovered (goes to PENDING) + laterDiscarded := newTestFile("/photos/image.jpg") + fp.RecordAssetDiscovered(ctx, laterDiscarded, 512, fileevent.DiscoveredImage) + + counters = fp.GetAssetCounters() + if counters.Pending != 1 { + t.Errorf("Discovered asset should be pending, got %d", counters.Pending) + } + + // Then it's discarded (transitions PENDING -> DISCARDED) + fp.RecordAssetDiscarded(ctx, laterDiscarded, 512, fileevent.DiscardedLocalDuplicate, "duplicate") + + counters = fp.GetAssetCounters() + if counters.Pending != 0 { + t.Errorf("After RecordAssetDiscarded, asset should not be pending, got %d", counters.Pending) + } + if counters.Discarded != 2 { + t.Errorf("Expected 2 discarded (immediate + later), got %d", counters.Discarded) + } + + // Verify finalization succeeds + if err := fp.Finalize(ctx); err != nil { + t.Errorf("Finalize should succeed: %v", err) + } +} + func TestCompleteWorkflow(t *testing.T) { logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{Level: slog.LevelError})) tracker := assettracker.New()