Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: gitignoringGlob stack safety + perf #1234

Merged
merged 7 commits into from
Jun 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/Spago/Glob.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import mm from 'micromatch';
import * as fsWalk from '@nodelib/fs.walk';

export const micromatch = options => patterns => mm.matcher(patterns, options);
export const testGlob = glob => mm.matcher(glob.include, {ignore: glob.ignore});

export const fsWalkImpl = Left => Right => respond => options => path => () => {
const entryFilter = entry => options.entryFilter(entry)();
Expand Down
110 changes: 53 additions & 57 deletions src/Spago/Glob.purs
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,30 @@ module Spago.Glob (gitignoringGlob) where

import Spago.Prelude

import Control.Alternative (guard)
import Control.Monad.Maybe.Trans (runMaybeT)
import Control.Monad.Trans.Class (lift)
import Data.Array as Array
import Data.Filterable (filter)
import Data.Foldable (any)
import Data.Foldable (any, fold)
import Data.String as String
import Data.String.CodePoints as String.CodePoint
import Data.Traversable (traverse_)
import Effect.Aff as Aff
import Effect.Ref as Ref
import Node.FS.Sync as SyncFS
import Node.Path as Path
import Record as Record
import Type.Proxy (Proxy(..))

type MicroMatchOptions = { ignore :: Array String }
type Glob =
{ ignore :: Array String
, include :: Array String
}

foreign import micromatch :: MicroMatchOptions -> Array String -> String -> Boolean
foreign import testGlob :: Glob -> String -> Boolean

splitGlob :: Glob -> Array Glob
splitGlob { ignore, include } = (\a -> { ignore, include: [ a ] }) <$> include

type Entry = { name :: String, path :: String, dirent :: DirEnt }
type FsWalkOptions = { entryFilter :: Entry -> Effect Boolean, deepFilter :: Entry -> Effect Boolean }
Expand All @@ -31,86 +40,73 @@ foreign import fsWalkImpl
-> String
-> Effect Unit

gitignoreToMicromatchPatterns :: String -> String -> { ignore :: Array String, patterns :: Array String }
gitignoreToMicromatchPatterns base =
gitignoreGlob :: String -> String -> Glob
gitignoreGlob base =
String.split (String.Pattern "\n")
>>> map String.trim
>>> Array.filter (not <<< or [ String.null, isComment ])
>>> partitionMap
( \line -> do
let negated = isJust $ String.stripPrefix (String.Pattern "!") line
let pattern = Path.concat [ base, gitignorePatternToMicromatch line ]
if negated then Left pattern else Right pattern
let
resolve a = Path.concat [ base, a ]
pat a = resolve $ unpackPattern a
case String.stripPrefix (String.Pattern "!") line of
Just negated -> Left $ pat negated
Nothing -> Right $ pat line
)
>>> Record.rename (Proxy :: Proxy "left") (Proxy :: Proxy "ignore")
>>> Record.rename (Proxy :: Proxy "right") (Proxy :: Proxy "patterns")
>>> Record.rename (Proxy @"left") (Proxy @"ignore")
>>> Record.rename (Proxy @"right") (Proxy @"include")

where
isComment = isJust <<< String.stripPrefix (String.Pattern "#")
dropSuffixSlash str = fromMaybe str $ String.stripSuffix (String.Pattern "/") str
dropPrefixSlash str = fromMaybe str $ String.stripPrefix (String.Pattern "/") str

leadingSlash str = String.codePointAt 0 str == Just (String.CodePoint.codePointFromChar '/')
trailingSlash str = String.codePointAt (String.length str - 1) str == Just (String.CodePoint.codePointFromChar '/')
leadingSlash = String.stripPrefix (String.Pattern "/")
trailingSlash = String.stripSuffix (String.Pattern "/")

gitignorePatternToMicromatch :: String -> String
gitignorePatternToMicromatch pattern
| trailingSlash pattern = gitignorePatternToMicromatch $ dropSuffixSlash pattern
| leadingSlash pattern = dropPrefixSlash pattern <> "/**"
unpackPattern :: String -> String
unpackPattern pattern
| Just a <- trailingSlash pattern = unpackPattern a
| Just a <- leadingSlash pattern = a <> "/**"
| otherwise = "**/" <> pattern <> "/**"

fsWalk :: String -> Array String -> Array String -> Aff (Array Entry)
fsWalk cwd ignorePatterns includePatterns = Aff.makeAff \cb -> do
let includeMatcher = micromatch { ignore: [] } includePatterns -- The Stuff we are globbing for.
let includeMatcher = testGlob { ignore: [], include: includePatterns }

-- Pattern for directories which can be outright ignored.
-- This will be updated whenver a .gitignore is found.
ignoreMatcherRef <- Ref.new $ micromatch { ignore: [] } ignorePatterns
ignoreMatcherRef :: Ref Glob <- Ref.new { ignore: [], include: ignorePatterns }

-- If this Ref contains `true` because this Aff has been canceled, then deepFilter will always return false.
canceled <- Ref.new false
let
entryGitignore :: Entry -> Effect Unit
entryGitignore entry =
try (SyncFS.readTextFile UTF8 entry.path)
>>= traverse_ \gitignore ->
let
base = Path.relative cwd $ Path.dirname entry.path
pats = splitGlob $ gitignoreGlob base gitignore
patIsOk g = not $ any (testGlob g) includePatterns
newPats = filter (patIsOk) pats
in
void $ Ref.modify (_ <> fold newPats) $ ignoreMatcherRef

-- Should `fsWalk` recurse into this directory?
deepFilter :: Entry -> Effect Boolean
deepFilter entry = Ref.read canceled >>=
if _ then
-- The Aff has been canceled, don't recurse into any further directories!
pure false
else do
matcher <- Ref.read ignoreMatcherRef
pure $ not $ matcher (Path.relative cwd entry.path)
deepFilter entry = fromMaybe false <$> runMaybeT do
isCanceled <- lift $ Ref.read canceled
guard $ not isCanceled
shouldIgnore <- lift $ testGlob <$> Ref.read ignoreMatcherRef
pure $ not $ shouldIgnore $ Path.relative cwd entry.path

-- Should `fsWalk` retain this entry for the result array?
entryFilter :: Entry -> Effect Boolean
entryFilter entry = do
when (isFile entry.dirent && entry.name == ".gitignore") do -- A .gitignore was encountered
let gitignorePath = entry.path

-- directory of this .gitignore relative to the directory being globbed
let base = Path.relative cwd (Path.dirname gitignorePath)

try (SyncFS.readTextFile UTF8 gitignorePath) >>= case _ of
Left _ -> pure unit
Right gitignore -> do
let { ignore, patterns } = gitignoreToMicromatchPatterns base gitignore
let gitignored = (micromatch { ignore } <<< pure) <$> patterns
let wouldConflictWithSearch m = any m includePatterns

-- Do not add `.gitignore` patterns that explicitly ignore the files
-- we're searching for;
--
-- ex. if `includePatterns` is [".spago/p/aff-1.0.0/**/*.purs"],
-- and `gitignored` is ["node_modules", ".spago"],
-- then add "node_modules" to `ignoreMatcher` but not ".spago"
for_ (filter (not <<< wouldConflictWithSearch) gitignored) \pat -> do
-- Instead of composing the matcher functions, we could also keep a growing array of
-- patterns and regenerate the matcher on every append. I don't know which option is
-- more performant, but composing functions is more convenient.
let addMatcher currentMatcher = or [ currentMatcher, pat ]
void $ Ref.modify addMatcher ignoreMatcherRef

ignoreMatcher <- Ref.read ignoreMatcherRef
let path = withForwardSlashes $ Path.relative cwd entry.path
when (isFile entry.dirent && entry.name == ".gitignore") (entryGitignore entry)
ignorePat <- Ref.read ignoreMatcherRef
let
ignoreMatcher = testGlob ignorePat
path = withForwardSlashes $ Path.relative cwd entry.path
pure $ includeMatcher path && not (ignoreMatcher path)

options = { entryFilter, deepFilter }
Expand Down
47 changes: 31 additions & 16 deletions test/Spago/Glob.purs
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,13 @@ module Test.Spago.Glob where

import Test.Prelude

import Data.Foldable (intercalate)
import Data.String.Gen (genAlphaLowercaseString)
import Effect.Aff as Aff
import Node.Path as Path
import Spago.FS as FS
import Spago.Glob as Glob
import Test.QuickCheck.Gen (randomSample', resize)
import Test.Spec (Spec)
import Test.Spec as Spec
import Test.Spec.Assertions as Assert
Expand All @@ -26,11 +29,13 @@ globTmpDir m = Aff.bracket make cleanup m
base
dir
"fruits"
[touch "apple", touch "orange", touch "banana"]
base
dir
"sports"
[touch "baseball", touch "scrabble"]
[ touch "apple"
, touch "orange"
, touch "banana"
, dir "special"
[ touch "apple"
]
]
base
dir
"src"
Expand All @@ -45,24 +50,34 @@ spec = Spec.around globTmpDir do
Spec.describe "glob" do
Spec.describe "gitignoringGlob" do
Spec.it "when no .gitignore, yields all matches" \p -> do
a <- Glob.gitignoringGlob p ["**/fruits/apple/**"]
a `Assert.shouldEqual` ["fruits/apple", "src/fruits/apple"]

b <- Glob.gitignoringGlob p ["sports/**"]
b `Assert.shouldEqual` ["sports", "sports/baseball", "sports/scrabble"]
a <- Glob.gitignoringGlob p ["**/apple"]
a `Assert.shouldEqual` ["fruits/apple", "fruits/special/apple", "src/fruits/apple"]

Spec.it "respects a .gitignore pattern that doesn't conflict with search" \p -> do
FS.writeTextFile (Path.concat [p, ".gitignore"]) "/fruits"
a <- Glob.gitignoringGlob p ["src/fruits/apple/**"]
a <- Glob.gitignoringGlob p ["**/apple"]
a `Assert.shouldEqual` ["src/fruits/apple"]

Spec.it "does not respect a .gitignore pattern that conflicts with search" \p -> do
for_ ["/fruits", "fruits", "fruits/", "**/fruits", "fruits/**", "**/fruits/**"] \gitignore -> do
FS.writeTextFile (Path.concat [p, ".gitignore"]) gitignore
a <- Glob.gitignoringGlob p ["fruits/apple/**"]
a `Assert.shouldEqual` ["fruits/apple"]
Spec.it "respects a negated .gitignore pattern" \p -> do
FS.writeTextFile (Path.concat [p, ".gitignore"]) "!/fruits/special/apple\n/fruits/apple"
a <- Glob.gitignoringGlob p ["**/apple"]
a `Assert.shouldEqual` ["fruits/special/apple", "src/fruits/apple"]

for_ ["/fruits", "fruits", "fruits/", "**/fruits", "fruits/**", "**/fruits/**"] \gitignore -> do
Spec.focus $ Spec.it
("does not respect a .gitignore pattern that conflicts with search: " <> gitignore)
\p -> do
FS.writeTextFile (Path.concat [p, ".gitignore"]) gitignore
a <- Glob.gitignoringGlob p ["fruits/apple/**"]
a `Assert.shouldEqual` ["fruits/apple"]

Spec.it "respects some .gitignore patterns" \p -> do
FS.writeTextFile (Path.concat [p, ".gitignore"]) """/fruits\n/src"""
a <- Glob.gitignoringGlob p ["fruits/apple/**"]
a `Assert.shouldEqual` ["fruits/apple"]

Spec.it "is stacksafe" \p -> do
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fails on f0f7901 (tip of master at the moment)

hugeGitignore <- liftEffect $ intercalate "\n" <$> randomSample' 10000 (resize 4 $ genAlphaLowercaseString)
FS.writeTextFile (Path.concat [p, ".gitignore"]) hugeGitignore
a <- Glob.gitignoringGlob p ["fruits/apple/**"]
a `Assert.shouldEqual` ["fruits/apple"]
Loading