From b5dc3ec57defe34319d3d0f069882039f84c5fa5 Mon Sep 17 00:00:00 2001 From: Fabrizio Ferrai Date: Fri, 5 Jul 2024 14:19:34 +0200 Subject: [PATCH 1/4] Recompute the ignore glob only when needed instead of at every path --- src/Spago/Command/Build.purs | 3 ++- src/Spago/Glob.js | 3 ++- src/Spago/Glob.purs | 50 ++++++++++++++++++++++-------------- 3 files changed, 35 insertions(+), 21 deletions(-) diff --git a/src/Spago/Command/Build.purs b/src/Spago/Command/Build.purs index 4c818494a..79d8f69af 100644 --- a/src/Spago/Command/Build.purs +++ b/src/Spago/Command/Build.purs @@ -153,8 +153,9 @@ run opts = do if Array.null pedanticPkgs || opts.depsOnly then pure true else do - logInfo $ "Looking for unused and undeclared transitive dependencies..." + logInfo "Looking for unused and undeclared transitive dependencies..." eitherGraph <- Graph.runGraph globs opts.pursArgs + logDebug "Decoded the output of `purs graph` successfully. Analyzing dependencies..." eitherGraph # either (prepareToDie >>> (_ $> false)) \graph -> do env <- ask checkResults <- map Array.fold $ for pedanticPkgs \(Tuple selected options) -> do diff --git a/src/Spago/Glob.js b/src/Spago/Glob.js index b053202ba..8e3a4643e 100644 --- a/src/Spago/Glob.js +++ b/src/Spago/Glob.js @@ -1,7 +1,7 @@ import mm from 'micromatch'; import * as fsWalk from '@nodelib/fs.walk'; -export const testGlob = glob => mm.matcher(glob.include, {ignore: glob.ignore}); +export const testGlob = glob => mm.matcher(glob.include, { ignore: glob.ignore }); export const fsWalkImpl = Left => Right => respond => options => path => () => { const entryFilter = entry => options.entryFilter(entry)(); @@ -14,3 +14,4 @@ export const fsWalkImpl = Left => Right => respond => options => path => () => { export const isFile = dirent => dirent.isFile(); +export const direntToString = dirent => JSON.stringify(dirent); diff --git a/src/Spago/Glob.purs b/src/Spago/Glob.purs index 385642713..e8d5a2db2 100644 --- a/src/Spago/Glob.purs +++ b/src/Spago/Glob.purs @@ -11,11 +11,10 @@ import Data.Foldable (any, fold) import Data.String as String import Data.Traversable (traverse_) import Effect.Aff as Aff +import Effect.Class.Console as Console import Effect.Ref as Ref import Node.FS.Sync as SyncFS import Node.Path as Path -import Record as Record -import Type.Proxy (Proxy(..)) type Glob = { ignore :: Array String @@ -30,8 +29,15 @@ splitGlob { ignore, include } = (\a -> { ignore, include: [ a ] }) <$> include type Entry = { name :: String, path :: String, dirent :: DirEnt } type FsWalkOptions = { entryFilter :: Entry -> Effect Boolean, deepFilter :: Entry -> Effect Boolean } +-- https://nodejs.org/api/fs.html#class-fsdirent foreign import data DirEnt :: Type foreign import isFile :: DirEnt -> Boolean + +foreign import direntToString :: DirEnt -> String + +instance Show DirEnt where + show = direntToString + foreign import fsWalkImpl :: (forall a b. a -> Either a b) -> (forall a b. b -> Either a b) @@ -40,8 +46,8 @@ foreign import fsWalkImpl -> String -> Effect Unit -gitignoreGlob :: String -> String -> Glob -gitignoreGlob base = +gitignoreFileToGlob :: FilePath -> String -> Glob +gitignoreFileToGlob base = String.split (String.Pattern "\n") >>> map String.trim >>> Array.filter (not <<< or [ String.null, isComment ]) @@ -54,8 +60,7 @@ gitignoreGlob base = Just negated -> Left $ pat negated Nothing -> Right $ pat line ) - >>> Record.rename (Proxy @"left") (Proxy @"ignore") - >>> Record.rename (Proxy @"right") (Proxy @"include") + >>> (\{ left, right } -> { ignore: left, include: right }) where isComment = isJust <<< String.stripPrefix (String.Pattern "#") @@ -74,41 +79,48 @@ fsWalk cwd ignorePatterns includePatterns = Aff.makeAff \cb -> do -- Pattern for directories which can be outright ignored. -- This will be updated whenver a .gitignore is found. - ignoreMatcherRef :: Ref Glob <- Ref.new { ignore: [], include: ignorePatterns } + let firstIgnoreGlob = { ignore: [], include: ignorePatterns } + ignoreGlobRef :: Ref Glob <- Ref.new firstIgnoreGlob + -- We recompute the ignoreMatcher every time we update the ignoreMatcherRef + ignoreMatcherRef :: Ref (String -> Boolean) <- Ref.new (testGlob firstIgnoreGlob) -- If this Ref contains `true` because this Aff has been canceled, then deepFilter will always return false. canceled <- Ref.new false let - entryGitignore :: Entry -> Effect Unit - entryGitignore entry = + -- Update the ignoreMatcherRef with the patterns from a .gitignore file + updateGitignore :: Entry -> Effect Unit + updateGitignore entry = try (SyncFS.readTextFile UTF8 entry.path) - >>= traverse_ \gitignore -> + >>= traverse_ \gitignore -> do let + -- directory of this .gitignore relative to the directory being globbed base = Path.relative cwd $ Path.dirname entry.path - glob = gitignoreGlob base gitignore + glob = gitignoreFileToGlob base gitignore pats = splitGlob glob patOk g = not $ any (testGlob g) includePatterns newPats = filter patOk pats - in - void $ Ref.modify (_ <> fold newPats) $ ignoreMatcherRef + currentGlob <- Ref.read ignoreGlobRef + let newGlob = currentGlob <> fold newPats + void $ Ref.write newGlob ignoreGlobRef + void $ Ref.write (testGlob newGlob) ignoreMatcherRef -- Should `fsWalk` recurse into this directory? deepFilter :: Entry -> Effect Boolean deepFilter entry = fromMaybe false <$> runMaybeT do + lift $ Console.log $ "deepFilter: " <> show entry isCanceled <- lift $ Ref.read canceled guard $ not isCanceled - shouldIgnore <- lift $ testGlob <$> Ref.read ignoreMatcherRef + shouldIgnore <- lift $ testGlob <$> Ref.read ignoreGlobRef pure $ not $ shouldIgnore $ Path.relative cwd entry.path -- Should `fsWalk` retain this entry for the result array? entryFilter :: Entry -> Effect Boolean entryFilter entry = do - when (isFile entry.dirent && entry.name == ".gitignore") (entryGitignore entry) - ignorePat <- Ref.read ignoreMatcherRef - let - ignoreMatcher = testGlob ignorePat - path = withForwardSlashes $ Path.relative cwd entry.path + Console.log $ "entryFilter: " <> show entry + when (isFile entry.dirent && entry.name == ".gitignore") (updateGitignore entry) + ignoreMatcher <- Ref.read ignoreMatcherRef + let path = withForwardSlashes $ Path.relative cwd entry.path pure $ includeMatcher path && not (ignoreMatcher path) options = { entryFilter, deepFilter } From 25b2edea5078feeb475039298cd689dfd3d21586 Mon Sep 17 00:00:00 2001 From: Fabrizio Ferrai Date: Fri, 5 Jul 2024 15:12:39 +0200 Subject: [PATCH 2/4] Same buf for the deepFilter --- src/Spago/Glob.js | 2 -- src/Spago/Glob.purs | 41 ++++++++++++++++++----------------------- 2 files changed, 18 insertions(+), 25 deletions(-) diff --git a/src/Spago/Glob.js b/src/Spago/Glob.js index 8e3a4643e..d9da637a0 100644 --- a/src/Spago/Glob.js +++ b/src/Spago/Glob.js @@ -13,5 +13,3 @@ export const fsWalkImpl = Left => Right => respond => options => path => () => { }; export const isFile = dirent => dirent.isFile(); - -export const direntToString = dirent => JSON.stringify(dirent); diff --git a/src/Spago/Glob.purs b/src/Spago/Glob.purs index e8d5a2db2..8ec5335eb 100644 --- a/src/Spago/Glob.purs +++ b/src/Spago/Glob.purs @@ -9,9 +9,9 @@ import Data.Array as Array import Data.Filterable (filter) import Data.Foldable (any, fold) import Data.String as String +import Data.String as String.CodePoint import Data.Traversable (traverse_) import Effect.Aff as Aff -import Effect.Class.Console as Console import Effect.Ref as Ref import Node.FS.Sync as SyncFS import Node.Path as Path @@ -33,11 +33,6 @@ type FsWalkOptions = { entryFilter :: Entry -> Effect Boolean, deepFilter :: Ent foreign import data DirEnt :: Type foreign import isFile :: DirEnt -> Boolean -foreign import direntToString :: DirEnt -> String - -instance Show DirEnt where - show = direntToString - foreign import fsWalkImpl :: (forall a b. a -> Either a b) -> (forall a b. b -> Either a b) @@ -53,24 +48,25 @@ gitignoreFileToGlob base = >>> Array.filter (not <<< or [ String.null, isComment ]) >>> partitionMap ( \line -> do - let - resolve a = Path.concat [ base, a ] - pat a = withForwardSlashes $ resolve $ unpackPattern a + let pattern lin = withForwardSlashes $ Path.concat [ base, gitignorePatternToGlobPattern lin ] case String.stripPrefix (String.Pattern "!") line of - Just negated -> Left $ pat negated - Nothing -> Right $ pat line + Just negated -> Left $ pattern negated + Nothing -> Right $ pattern line ) >>> (\{ left, right } -> { ignore: left, include: right }) where isComment = isJust <<< String.stripPrefix (String.Pattern "#") - leadingSlash = String.stripPrefix (String.Pattern "/") - trailingSlash = String.stripSuffix (String.Pattern "/") + dropSuffixSlash str = fromMaybe str $ String.stripSuffix (String.Pattern "/") str + dropPrefixSlash str = fromMaybe str $ String.stripPrefix (String.Pattern "/") str + + leadingSlash str = String.codePointAt 0 str == Just (String.CodePoint.codePointFromChar '/') + trailingSlash str = String.codePointAt (String.length str - 1) str == Just (String.CodePoint.codePointFromChar '/') - unpackPattern :: String -> String - unpackPattern pattern - | Just a <- trailingSlash pattern = unpackPattern a - | Just a <- leadingSlash pattern = a <> "/**" + gitignorePatternToGlobPattern :: String -> String + gitignorePatternToGlobPattern pattern + | trailingSlash pattern = gitignorePatternToGlobPattern $ dropSuffixSlash pattern + | leadingSlash pattern = dropPrefixSlash pattern <> "/**" | otherwise = "**/" <> pattern <> "/**" fsWalk :: String -> Array String -> Array String -> Aff (Array Entry) @@ -89,8 +85,8 @@ fsWalk cwd ignorePatterns includePatterns = Aff.makeAff \cb -> do let -- Update the ignoreMatcherRef with the patterns from a .gitignore file - updateGitignore :: Entry -> Effect Unit - updateGitignore entry = + updateIgnoreMatcherWithGitignore :: Entry -> Effect Unit + updateIgnoreMatcherWithGitignore entry = do try (SyncFS.readTextFile UTF8 entry.path) >>= traverse_ \gitignore -> do let @@ -108,17 +104,16 @@ fsWalk cwd ignorePatterns includePatterns = Aff.makeAff \cb -> do -- Should `fsWalk` recurse into this directory? deepFilter :: Entry -> Effect Boolean deepFilter entry = fromMaybe false <$> runMaybeT do - lift $ Console.log $ "deepFilter: " <> show entry isCanceled <- lift $ Ref.read canceled guard $ not isCanceled - shouldIgnore <- lift $ testGlob <$> Ref.read ignoreGlobRef + shouldIgnore <- lift $ Ref.read ignoreMatcherRef pure $ not $ shouldIgnore $ Path.relative cwd entry.path -- Should `fsWalk` retain this entry for the result array? entryFilter :: Entry -> Effect Boolean entryFilter entry = do - Console.log $ "entryFilter: " <> show entry - when (isFile entry.dirent && entry.name == ".gitignore") (updateGitignore entry) + when (isFile entry.dirent && entry.name == ".gitignore") do + updateIgnoreMatcherWithGitignore entry ignoreMatcher <- Ref.read ignoreMatcherRef let path = withForwardSlashes $ Path.relative cwd entry.path pure $ includeMatcher path && not (ignoreMatcher path) From 778d2f4e89c49969a90bdffd91a6b8a539dcdd8b Mon Sep 17 00:00:00 2001 From: Fabrizio Ferrai Date: Fri, 5 Jul 2024 15:34:46 +0200 Subject: [PATCH 3/4] Back to composing functions --- src/Spago/Glob.purs | 53 +++++++++++++++++++++++++++++---------------- 1 file changed, 34 insertions(+), 19 deletions(-) diff --git a/src/Spago/Glob.purs b/src/Spago/Glob.purs index 8ec5335eb..639a42361 100644 --- a/src/Spago/Glob.purs +++ b/src/Spago/Glob.purs @@ -7,10 +7,9 @@ import Control.Monad.Maybe.Trans (runMaybeT) import Control.Monad.Trans.Class (lift) import Data.Array as Array import Data.Filterable (filter) -import Data.Foldable (any, fold) +import Data.Foldable (any, traverse_) import Data.String as String import Data.String as String.CodePoint -import Data.Traversable (traverse_) import Effect.Aff as Aff import Effect.Ref as Ref import Node.FS.Sync as SyncFS @@ -75,10 +74,7 @@ fsWalk cwd ignorePatterns includePatterns = Aff.makeAff \cb -> do -- Pattern for directories which can be outright ignored. -- This will be updated whenver a .gitignore is found. - let firstIgnoreGlob = { ignore: [], include: ignorePatterns } - ignoreGlobRef :: Ref Glob <- Ref.new firstIgnoreGlob - -- We recompute the ignoreMatcher every time we update the ignoreMatcherRef - ignoreMatcherRef :: Ref (String -> Boolean) <- Ref.new (testGlob firstIgnoreGlob) + ignoreMatcherRef :: Ref (String -> Boolean) <- Ref.new (testGlob { ignore: [], include: ignorePatterns }) -- If this Ref contains `true` because this Aff has been canceled, then deepFilter will always return false. canceled <- Ref.new false @@ -87,19 +83,38 @@ fsWalk cwd ignorePatterns includePatterns = Aff.makeAff \cb -> do -- Update the ignoreMatcherRef with the patterns from a .gitignore file updateIgnoreMatcherWithGitignore :: Entry -> Effect Unit updateIgnoreMatcherWithGitignore entry = do - try (SyncFS.readTextFile UTF8 entry.path) - >>= traverse_ \gitignore -> do - let - -- directory of this .gitignore relative to the directory being globbed - base = Path.relative cwd $ Path.dirname entry.path - glob = gitignoreFileToGlob base gitignore - pats = splitGlob glob - patOk g = not $ any (testGlob g) includePatterns - newPats = filter patOk pats - currentGlob <- Ref.read ignoreGlobRef - let newGlob = currentGlob <> fold newPats - void $ Ref.write newGlob ignoreGlobRef - void $ Ref.write (testGlob newGlob) ignoreMatcherRef + let + gitignorePath = entry.path + -- directory of this .gitignore relative to the directory being globbed + base = Path.relative cwd (Path.dirname gitignorePath) + + try (SyncFS.readTextFile UTF8 entry.path) >>= traverse_ \gitignore -> do + let + gitignored = testGlob <$> (splitGlob $ gitignoreFileToGlob base gitignore) + + -- Do not add `.gitignore` patterns that explicitly ignore the files + -- we're searching for; + -- + -- ex. if `includePatterns` is [".spago/p/aff-1.0.0/**/*.purs"], + -- and `gitignored` is ["node_modules", ".spago"], + -- then add "node_modules" to `ignoreMatcher` but not ".spago" + wouldConflictWithSearch matcher = any matcher includePatterns + + newMatchers = or $ filter (not <<< wouldConflictWithSearch) gitignored + + -- Another possible approach could be to keep a growing array of patterns and + -- regenerate the matcher on every gitignore. We have tried that (see #1234), + -- and turned out to be 2x slower. (see #1242, and #1244) + -- Composing functions is faster, but there's the risk of blowing the stack + -- (see #1231) - when this was introduced in #1210, every match from the + -- gitignore file would be `or`ed to the previous matcher, which would create + -- a very long recursive call - in this latest iteration we are `or`ing the + -- new matchers together, then the whole thing with the previous matcher. + -- This is still prone to stack issues, but we now have a tree so it should + -- not be as dramatic. + addMatcher currentMatcher = or [ currentMatcher, newMatchers ] + + Ref.modify_ addMatcher ignoreMatcherRef -- Should `fsWalk` recurse into this directory? deepFilter :: Entry -> Effect Boolean From a6c54a18e7a15814348cd191038600efef3de07f Mon Sep 17 00:00:00 2001 From: Fabrizio Ferrai Date: Fri, 5 Jul 2024 15:55:41 +0200 Subject: [PATCH 4/4] Fix stack-safety test --- src/Spago/Glob.purs | 2 +- test/Spago/Glob.purs | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/Spago/Glob.purs b/src/Spago/Glob.purs index 639a42361..9345a14f1 100644 --- a/src/Spago/Glob.purs +++ b/src/Spago/Glob.purs @@ -108,7 +108,7 @@ fsWalk cwd ignorePatterns includePatterns = Aff.makeAff \cb -> do -- Composing functions is faster, but there's the risk of blowing the stack -- (see #1231) - when this was introduced in #1210, every match from the -- gitignore file would be `or`ed to the previous matcher, which would create - -- a very long recursive call - in this latest iteration we are `or`ing the + -- a very long (linear) call chain - in this latest iteration we are `or`ing the -- new matchers together, then the whole thing with the previous matcher. -- This is still prone to stack issues, but we now have a tree so it should -- not be as dramatic. diff --git a/test/Spago/Glob.purs b/test/Spago/Glob.purs index 9a8223230..45129b970 100644 --- a/test/Spago/Glob.purs +++ b/test/Spago/Glob.purs @@ -92,10 +92,14 @@ spec = Spec.around globTmpDir do Spec.it "is stacksafe" \p -> do let - chars = [ "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k" ] - -- 10,000-line gitignore + chars = [ "a", "b", "c", "d", "e", "f", "g", "h" ] + -- 4000-line gitignore words = [ \a b c d -> a <> b <> c <> d ] <*> chars <*> chars <*> chars <*> chars hugeGitignore = intercalate "\n" words + -- Write it in a few places FS.writeTextFile (Path.concat [ p, ".gitignore" ]) hugeGitignore + FS.writeTextFile (Path.concat [ p, "fruits", ".gitignore" ]) hugeGitignore + FS.writeTextFile (Path.concat [ p, "fruits", "left", ".gitignore" ]) hugeGitignore + FS.writeTextFile (Path.concat [ p, "fruits", "right", ".gitignore" ]) hugeGitignore a <- Glob.gitignoringGlob p [ "fruits/**/apple" ] Array.sort a `Assert.shouldEqual` [ "fruits/left/apple", "fruits/right/apple" ]