1010
1111import logging
1212import os
13+ from pathlib import Path
1314import platform
1415import re
1516import sys
1617
1718from abc import ABCMeta , abstractmethod
18- from typing import Iterator , Optional
19+ from typing import Iterator , Optional , Set
1920
2021from ..utils import fix_windows_path_limit , get_file_mtime , is_file_readable
2122from .exception import EmptyDirectory , EnvironmentEncodingError , NotADirectory , UnableToCreateDirectory , UnsupportedFilename
@@ -132,7 +133,8 @@ def all_files(self, reporter: Optional[ProgressReport],
132133 :param reporter: a place to report errors
133134 :param policies_manager: a policy manager object, default is DEFAULT_SCAN_MANAGER
134135 """
135- yield from self ._walk_relative_paths (self .root , '' , reporter , policies_manager )
136+ root_path = Path (self .root )
137+ yield from self ._walk_relative_paths (root_path , Path ('' ), reporter , policies_manager )
136138
137139 def make_full_path (self , file_name ):
138140 """
@@ -178,17 +180,23 @@ def ensure_non_empty(self):
178180 raise EmptyDirectory (self .root )
179181
180182 def _walk_relative_paths (
181- self , local_dir : str , relative_dir_path : str , reporter ,
182- policies_manager : ScanPoliciesManager
183+ self ,
184+ local_dir : Path ,
185+ relative_dir_path : Path ,
186+ reporter : ProgressReport ,
187+ policies_manager : ScanPoliciesManager ,
188+ visited_symlinks : Optional [Set [int ]] = None ,
183189 ):
184190 """
185191 Yield a File object for each of the files anywhere under this folder, in the
186192 order they would appear in B2, unless the path is excluded by policies manager.
187193
188- :param relative_dir_path: the path of this dir relative to the scan point, or '' if at scan point
194+ :param local_dir: the path to the local directory that we are currently inspecting
195+ :param relative_dir_path: the path of this dir relative to the scan point, or Path('') if at scan point
196+ :param reporter: a reporter object to report errors and warnings
197+ :param policies_manager: a policies manager object
198+ :param visited_symlinks: a set of paths to symlinks that have already been visited. Using inode numbers to reduce memory usage
189199 """
190- if not isinstance (local_dir , str ):
191- raise ValueError ('folder path should be unicode: %s' % repr (local_dir ))
192200
193201 # Collect the names. We do this before returning any results, because
194202 # directories need to sort as if their names end in '/'.
@@ -204,39 +212,59 @@ def _walk_relative_paths(
204212 #
205213 # This is because in Unicode '.' comes before '/', which comes before '0'.
206214 names = [] # list of (name, local_path, relative_file_path)
207- for name in os .listdir (local_dir ):
208- # We expect listdir() to return unicode if dir_path is unicode.
209- # If the file name is not valid, based on the file system
210- # encoding, then listdir() will return un-decoded str/bytes.
211- if not isinstance (name , str ):
212- name = self ._handle_non_unicode_file_name (name )
215+
216+ visited_symlinks = visited_symlinks or set ()
217+
218+ if local_dir .is_symlink ():
219+ real_path = local_dir .resolve ()
220+ inode_number = real_path .stat ().st_ino
221+
222+ visited_symlinks_count = len (visited_symlinks )
223+
224+ # Add symlink to visited_symlinks to prevent infinite symlink loops
225+ visited_symlinks .add (inode_number )
226+
227+ # Check if set size has changed, if not, symlink has already been visited
228+ if len (visited_symlinks ) == visited_symlinks_count :
229+ # Infinite symlink loop detected, report warning and skip symlink
230+ if reporter is not None :
231+ reporter .circular_symlink_skipped (str (local_dir ))
232+ return
233+
234+ visited_symlinks .add (inode_number )
235+
236+ for name in (x .name for x in local_dir .iterdir ()):
213237
214238 if '/' in name :
215239 raise UnsupportedFilename (
216240 "scan does not support file names that include '/'" ,
217241 "%s in dir %s" % (name , local_dir )
218242 )
219243
220- local_path = os . path . join ( local_dir , name )
244+ local_path = local_dir / name
221245 relative_file_path = join_b2_path (
222- relative_dir_path , name
246+ str ( relative_dir_path ) , name
223247 ) # file path relative to the scan point
224248
225249 # Skip broken symlinks or other inaccessible files
226- if not is_file_readable (local_path , reporter ):
250+ if not is_file_readable (str ( local_path ) , reporter ):
227251 continue
228252
229- if policies_manager .exclude_all_symlinks and os . path . islink ( local_path ):
253+ if policies_manager .exclude_all_symlinks and local_path . is_symlink ( ):
230254 if reporter is not None :
231- reporter .symlink_skipped (local_path )
255+ reporter .symlink_skipped (str ( local_path ) )
232256 continue
233257
234- if os . path . isdir ( local_path ):
258+ if local_path . is_dir ( ):
235259 name += '/'
236- if policies_manager .should_exclude_local_directory (relative_file_path ):
260+ if policies_manager .should_exclude_local_directory (str ( relative_file_path ) ):
237261 continue
238262
239- names .append ((name , local_path , relative_file_path ))
263+ # remove the leading './' from the relative path to ensure backward compatibility
264+ relative_file_path_str = str (relative_file_path )
265+ if relative_file_path_str .startswith ("./" ):
266+ relative_file_path_str = relative_file_path_str [2 :]
267+ names .append ((name , local_path , relative_file_path_str ))
240268
241269 # Yield all of the answers.
242270 #
@@ -245,19 +273,23 @@ def _walk_relative_paths(
245273 for (name , local_path , relative_file_path ) in sorted (names ):
246274 if name .endswith ('/' ):
247275 for subdir_file in self ._walk_relative_paths (
248- local_path , relative_file_path , reporter , policies_manager
276+ local_path ,
277+ relative_file_path ,
278+ reporter ,
279+ policies_manager ,
280+ visited_symlinks ,
249281 ):
250282 yield subdir_file
251283 else :
252284 # Check that the file still exists and is accessible, since it can take a long time
253285 # to iterate through large folders
254- if is_file_readable (local_path , reporter ):
255- file_mod_time = get_file_mtime (local_path )
256- file_size = os . path . getsize ( local_path )
286+ if is_file_readable (str ( local_path ) , reporter ):
287+ file_mod_time = get_file_mtime (str ( local_path ) )
288+ file_size = local_path . stat (). st_size
257289
258290 local_scan_path = LocalPath (
259- absolute_path = self .make_full_path (relative_file_path ),
260- relative_path = relative_file_path ,
291+ absolute_path = self .make_full_path (str ( relative_file_path ) ),
292+ relative_path = str ( relative_file_path ) ,
261293 mod_time = file_mod_time ,
262294 size = file_size ,
263295 )
0 commit comments