Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: gitignoringGlob stack safety + perf #1234

Merged
merged 7 commits into from
Jun 24, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/Spago/Glob.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import mm from 'micromatch';
import * as fsWalk from '@nodelib/fs.walk';

export const micromatch = options => patterns => mm.matcher(patterns, options);
export const micromatch = options => mm.matcher(options.include, options);

export const fsWalkImpl = Left => Right => respond => options => path => () => {
const entryFilter = entry => options.entryFilter(entry)();
Expand Down
91 changes: 43 additions & 48 deletions src/Spago/Glob.purs
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,28 @@ module Spago.Glob (gitignoringGlob) where

import Spago.Prelude

import Control.Alternative (guard)
import Control.Monad.Maybe.Trans (MaybeT(..), runMaybeT)
import Control.Monad.Trans.Class (lift)
import Data.Array as Array
import Data.Filterable (filter)
import Data.Foldable (any)
import Data.Foldable (any, fold)
import Data.String as String
import Data.String.CodePoints as String.CodePoint
import Data.Traversable (traverse_)
import Effect.Aff as Aff
import Effect.Ref as Ref
import Node.FS.Sync as SyncFS
import Node.Path as Path
import Record as Record
import Type.Proxy (Proxy(..))

type MicroMatchOptions = { ignore :: Array String }
type MicroMatchOptions = { ignore :: Array String, include :: Array String }
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this change allows us to accumulate with appending arrays instead of ||ing functions.

Fun fact for those who may not be aware, if all of a record's fields are Semigroup, the record is Semigroup so you can <> these :)


foreign import micromatch :: MicroMatchOptions -> Array String -> String -> Boolean
foreign import micromatch :: MicroMatchOptions -> String -> Boolean

splitMicromatch :: MicroMatchOptions -> Array MicroMatchOptions
splitMicromatch { ignore, include } = (\a -> { ignore, include: [ a ] }) <$> include

type Entry = { name :: String, path :: String, dirent :: DirEnt }
type FsWalkOptions = { entryFilter :: Entry -> Effect Boolean, deepFilter :: Entry -> Effect Boolean }
Expand All @@ -31,86 +38,74 @@ foreign import fsWalkImpl
-> String
-> Effect Unit

gitignoreToMicromatchPatterns :: String -> String -> { ignore :: Array String, patterns :: Array String }
gitignoreToMicromatchPatterns :: String -> String -> { ignore :: Array String, include :: Array String }
gitignoreToMicromatchPatterns base =
String.split (String.Pattern "\n")
>>> map String.trim
>>> Array.filter (not <<< or [ String.null, isComment ])
>>> partitionMap
( \line -> do
let negated = isJust $ String.stripPrefix (String.Pattern "!") line
let pattern = Path.concat [ base, gitignorePatternToMicromatch line ]
if negated then Left pattern else Right pattern
let pattern a = Path.concat [ base, gitignorePatternToMicromatch a ]
case String.stripPrefix (String.Pattern "!") line of
Just negated -> Left $ pattern negated
Nothing -> Right $ pattern line
)
>>> Record.rename (Proxy :: Proxy "left") (Proxy :: Proxy "ignore")
>>> Record.rename (Proxy :: Proxy "right") (Proxy :: Proxy "patterns")
>>> Record.rename (Proxy :: Proxy "right") (Proxy :: Proxy "include")

where
isComment = isJust <<< String.stripPrefix (String.Pattern "#")
dropSuffixSlash str = fromMaybe str $ String.stripSuffix (String.Pattern "/") str
dropPrefixSlash str = fromMaybe str $ String.stripPrefix (String.Pattern "/") str

leadingSlash str = String.codePointAt 0 str == Just (String.CodePoint.codePointFromChar '/')
trailingSlash str = String.codePointAt (String.length str - 1) str == Just (String.CodePoint.codePointFromChar '/')

gitignorePatternToMicromatch :: String -> String
gitignorePatternToMicromatch pattern
| trailingSlash pattern = gitignorePatternToMicromatch $ dropSuffixSlash pattern
| leadingSlash pattern = dropPrefixSlash pattern <> "/**"
| otherwise = "**/" <> pattern <> "/**"
| otherwise = "**/" <> dropSuffixSlash pattern <> "/**"

fsWalk :: String -> Array String -> Array String -> Aff (Array Entry)
fsWalk cwd ignorePatterns includePatterns = Aff.makeAff \cb -> do
let includeMatcher = micromatch { ignore: [] } includePatterns -- The Stuff we are globbing for.
let includeMatcher = micromatch { ignore: [], include: includePatterns }

-- Pattern for directories which can be outright ignored.
-- This will be updated whenver a .gitignore is found.
ignoreMatcherRef <- Ref.new $ micromatch { ignore: [] } ignorePatterns
ignoreMatcherRef <- Ref.new { ignore: [], include: ignorePatterns }

-- If this Ref contains `true` because this Aff has been canceled, then deepFilter will always return false.
canceled <- Ref.new false
let
entryGitignore :: Entry -> Effect Unit
entryGitignore entry =
try (SyncFS.readTextFile UTF8 entry.path)
>>= traverse_ \gitignore ->
let
base = Path.relative cwd $ Path.dirname entry.path
gitignored = splitMicromatch $ gitignoreToMicromatchPatterns base gitignore
canIgnore = not <<< flip any includePatterns
newIgnores = filter (canIgnore <<< micromatch) gitignored
in
void
$ Ref.modify (_ <> fold newIgnores)
$ ignoreMatcherRef

-- Should `fsWalk` recurse into this directory?
deepFilter :: Entry -> Effect Boolean
deepFilter entry = Ref.read canceled >>=
if _ then
-- The Aff has been canceled, don't recurse into any further directories!
pure false
else do
matcher <- Ref.read ignoreMatcherRef
pure $ not $ matcher (Path.relative cwd entry.path)
deepFilter entry = fromMaybe false <$> runMaybeT do
isCanceled <- lift $ Ref.read canceled
guard $ not isCanceled
shouldIgnore <- lift $ micromatch <$> Ref.read ignoreMatcherRef
pure $ not $ shouldIgnore $ Path.relative cwd entry.path

-- Should `fsWalk` retain this entry for the result array?
entryFilter :: Entry -> Effect Boolean
entryFilter entry = do
when (isFile entry.dirent && entry.name == ".gitignore") do -- A .gitignore was encountered
let gitignorePath = entry.path

-- directory of this .gitignore relative to the directory being globbed
let base = Path.relative cwd (Path.dirname gitignorePath)

try (SyncFS.readTextFile UTF8 gitignorePath) >>= case _ of
Left _ -> pure unit
Right gitignore -> do
let { ignore, patterns } = gitignoreToMicromatchPatterns base gitignore
let gitignored = (micromatch { ignore } <<< pure) <$> patterns
let wouldConflictWithSearch m = any m includePatterns

-- Do not add `.gitignore` patterns that explicitly ignore the files
-- we're searching for;
--
-- ex. if `includePatterns` is [".spago/p/aff-1.0.0/**/*.purs"],
-- and `gitignored` is ["node_modules", ".spago"],
-- then add "node_modules" to `ignoreMatcher` but not ".spago"
for_ (filter (not <<< wouldConflictWithSearch) gitignored) \pat -> do
-- Instead of composing the matcher functions, we could also keep a growing array of
-- patterns and regenerate the matcher on every append. I don't know which option is
-- more performant, but composing functions is more convenient.
let addMatcher currentMatcher = or [ currentMatcher, pat ]
void $ Ref.modify addMatcher ignoreMatcherRef

ignoreMatcher <- Ref.read ignoreMatcherRef
let path = withForwardSlashes $ Path.relative cwd entry.path
when (isFile entry.dirent && entry.name == ".gitignore") (entryGitignore entry)
ignorePat <- Ref.read ignoreMatcherRef
let
ignoreMatcher = micromatch ignorePat
path = withForwardSlashes $ Path.relative cwd entry.path
pure $ includeMatcher path && not (ignoreMatcher path)

options = { entryFilter, deepFilter }
Expand Down
35 changes: 24 additions & 11 deletions test/Spago/Glob.purs
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,13 @@ module Test.Spago.Glob where

import Test.Prelude

import Data.Foldable (intercalate)
import Data.String.Gen (genAlphaLowercaseString)
import Effect.Aff as Aff
import Node.Path as Path
import Spago.FS as FS
import Spago.Glob as Glob
import Test.QuickCheck.Gen (randomSample', resize)
import Test.Spec (Spec)
import Test.Spec as Spec
import Test.Spec.Assertions as Assert
Expand All @@ -26,11 +29,13 @@ globTmpDir m = Aff.bracket make cleanup m
base
dir
"fruits"
[touch "apple", touch "orange", touch "banana"]
base
dir
"sports"
[touch "baseball", touch "scrabble"]
[ touch "apple"
, touch "orange"
, touch "banana"
, dir "special"
[ touch "apple"
]
]
base
dir
"src"
Expand All @@ -45,17 +50,19 @@ spec = Spec.around globTmpDir do
Spec.describe "glob" do
Spec.describe "gitignoringGlob" do
Spec.it "when no .gitignore, yields all matches" \p -> do
a <- Glob.gitignoringGlob p ["**/fruits/apple/**"]
a `Assert.shouldEqual` ["fruits/apple", "src/fruits/apple"]

b <- Glob.gitignoringGlob p ["sports/**"]
b `Assert.shouldEqual` ["sports", "sports/baseball", "sports/scrabble"]
a <- Glob.gitignoringGlob p ["**/apple"]
a `Assert.shouldEqual` ["fruits/apple", "fruits/special/apple", "src/fruits/apple"]

Spec.it "respects a .gitignore pattern that doesn't conflict with search" \p -> do
FS.writeTextFile (Path.concat [p, ".gitignore"]) "/fruits"
a <- Glob.gitignoringGlob p ["src/fruits/apple/**"]
a <- Glob.gitignoringGlob p ["**/apple"]
a `Assert.shouldEqual` ["src/fruits/apple"]

Spec.it "respects a negated .gitignore pattern" \p -> do
FS.writeTextFile (Path.concat [p, ".gitignore"]) "!/fruits/special/apple\n/fruits/apple"
a <- Glob.gitignoringGlob p ["**/apple"]
a `Assert.shouldEqual` ["fruits/special/apple", "src/fruits/apple"]

Spec.it "does not respect a .gitignore pattern that conflicts with search" \p -> do
for_ ["/fruits", "fruits", "fruits/", "**/fruits", "fruits/**", "**/fruits/**"] \gitignore -> do
FS.writeTextFile (Path.concat [p, ".gitignore"]) gitignore
Expand All @@ -66,3 +73,9 @@ spec = Spec.around globTmpDir do
FS.writeTextFile (Path.concat [p, ".gitignore"]) """/fruits\n/src"""
a <- Glob.gitignoringGlob p ["fruits/apple/**"]
a `Assert.shouldEqual` ["fruits/apple"]

Spec.it "is stacksafe" \p -> do
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fails on f0f7901 (tip of master at the moment)

hugeGitignore <- liftEffect $ intercalate "\n" <$> randomSample' 10000 (resize 4 $ genAlphaLowercaseString)
FS.writeTextFile (Path.concat [p, ".gitignore"]) hugeGitignore
a <- Glob.gitignoringGlob p ["fruits/apple/**"]
a `Assert.shouldEqual` ["fruits/apple"]
Loading