Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions syft_client/sync/connections/drive/gdrive_transport.py
Original file line number Diff line number Diff line change
Expand Up @@ -959,6 +959,7 @@ def reset_caches(self):
self.ds_outbox_folder_id_cache.clear()
self.archive_folder_id_cache.clear()
self.personal_syftbox_event_id_cache.clear()
self.dataset_collection_folder_id_cache.clear()
self._rolling_state_folder_id = None
self._rolling_state_file_id = None

Expand Down Expand Up @@ -1023,15 +1024,20 @@ def delete_file_by_id(

def find_orphaned_message_files(self) -> list[str]:
"""
Find message files (syfteventsmessagev3_*, msgv2_*) owned by user.
Find syft files by name pattern owned by user, regardless of parent folder.

Due to Google Drive's eventual consistency, files can become orphaned when
their parent folder is deleted before they're fully registered. This method
finds such files by searching for name patterns regardless of parent.

Returns list of file IDs.
"""
patterns = ["syfteventsmessagev3_", "msgv2_"]
patterns = [
"syfteventsmessagev3_", # event messages
"msgv2_", # proposed file change messages
CHECKPOINT_FILENAME_PREFIX, # checkpoint and incremental checkpoint files
ROLLING_STATE_FILENAME_PREFIX, # rolling state files
]
file_ids = []

for pattern in patterns:
Expand Down
28 changes: 24 additions & 4 deletions syft_client/sync/syftbox_manager.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from pathlib import Path
import copy
import shutil
import warnings
from syft_client.sync.connections.drive.gdrive_transport import GDriveConnection
from syft_client.utils import resolve_path
Expand Down Expand Up @@ -1041,21 +1042,21 @@ def _clear_caches(self):

def delete_syftbox(self, verbose: bool = True):
"""
Delete the SyftBox folder and all its contents, including orphaned files.
Delete all SyftBox state: Google Drive files, local caches, and local folder.

Due to Google Drive's eventual consistency, files can become orphaned when
their parent folder is deleted before they're fully registered. We use two
strategies to ensure complete cleanup:
1. Gather all files by traversing the SyftBox folder hierarchy
2. Find message files by name pattern (catches orphaned files from any location)
2. Find files by name pattern (catches orphaned files from any location)
"""
# Get files by folder hierarchy
folder_file_ids = set(self._connection_router.gather_all_file_and_folder_ids())

# Also find message files by name pattern (catches orphaned files)
# Also find syft files by name pattern (catches orphaned files)
orphaned_file_ids = set(self._connection_router.find_orphaned_message_files())

# Combine both sets
# Combine both sets and delete from Google Drive
all_file_ids = list(folder_file_ids | orphaned_file_ids)

start = time.time()
Expand All @@ -1071,8 +1072,27 @@ def delete_syftbox(self, verbose: bool = True):
print(f" (including {orphan_count} orphaned)")
else:
print()

# Clear in-memory caches and filesystem cache contents
self._clear_caches()
self._connection_router.reset_caches()

# Delete local filesystem cache directories
self._delete_local_cache_dirs()

def _delete_local_cache_dirs(self):
"""Delete local cache directories that live alongside the syftbox folder."""
syftbox_name = self.syftbox_folder.name
syftbox_parent = self.syftbox_folder.parent

cache_dirs = [
syftbox_parent / f"{syftbox_name}-events", # DO event cache
syftbox_parent / f"{syftbox_name}-event-messages", # DS event cache
]
for cache_dir in cache_dirs:
if cache_dir.exists():
shutil.rmtree(cache_dir)

# =========================================================================
# CHECKPOINT METHODS
# =========================================================================
Expand Down