Commit 950f6d1a authored by quasicomputational's avatar quasicomputational

Be more efficient about globbing

Before, we were walking the directory tree recursively
unconditionally. Now we check if the pattern is recursive before doing
that, and also only check the directory's full listing if a glob
pattern is involved at all, so literal patterns are more efficient.
parent 5c9cadef
...@@ -16,11 +16,12 @@ ...@@ -16,11 +16,12 @@
module Distribution.Simple.Glob ( module Distribution.Simple.Glob (
matchFileGlob, matchFileGlob,
matchDirFileGlob, matchDirFileGlob,
matchDirFileGlob',
fileGlobMatches, fileGlobMatches,
parseFileGlob, parseFileGlob,
explainGlobSyntaxError, explainGlobSyntaxError,
GlobSyntaxError(..), GlobSyntaxError(..),
GlobPat, Glob,
) where ) where
import Prelude () import Prelude ()
...@@ -30,6 +31,7 @@ import Distribution.Simple.Utils ...@@ -30,6 +31,7 @@ import Distribution.Simple.Utils
import Distribution.Verbosity import Distribution.Verbosity
import Distribution.Version import Distribution.Version
import System.Directory (getDirectoryContents, doesFileExist)
import System.FilePath (joinPath, splitExtensions, splitDirectories, takeExtensions, (</>)) import System.FilePath (joinPath, splitExtensions, splitDirectories, takeExtensions, (</>))
-- Note throughout that we use splitDirectories, not splitPath. On -- Note throughout that we use splitDirectories, not splitPath. On
...@@ -84,30 +86,35 @@ explainGlobSyntaxError filepath VersionDoesNotSupportGlob = ...@@ -84,30 +86,35 @@ explainGlobSyntaxError filepath VersionDoesNotSupportGlob =
data IsRecursive = Recursive | NonRecursive data IsRecursive = Recursive | NonRecursive
data GlobPat = PatStem String GlobPat data Glob
-- ^ A single subdirectory component + remainder. = GlobStem String Glob
| PatMatch IsRecursive String -- ^ A single subdirectory component + remainder.
-- ^ First argument: Is this a @**/*.ext@ pattern? | GlobFinal GlobFinal
-- Second argument: the extensions to accept.
| PatLit FilePath
-- ^ Literal file name.
fileGlobMatches :: GlobPat -> FilePath -> Bool data GlobFinal
= FinalMatch IsRecursive String
-- ^ First argument: Is this a @**/*.ext@ pattern?
-- Second argument: the extensions to accept.
| FinalLit FilePath
-- ^ Literal file name.
fileGlobMatches :: Glob -> FilePath -> Bool
fileGlobMatches pat = fileGlobMatchesSegments pat . splitDirectories fileGlobMatches pat = fileGlobMatchesSegments pat . splitDirectories
fileGlobMatchesSegments :: GlobPat -> [FilePath] -> Bool fileGlobMatchesSegments :: Glob -> [FilePath] -> Bool
fileGlobMatchesSegments _ [] = False fileGlobMatchesSegments _ [] = False
fileGlobMatchesSegments pat (seg : segs) = case pat of fileGlobMatchesSegments pat (seg : segs) = case pat of
PatStem dir pat' -> GlobStem dir pat' ->
dir == seg && fileGlobMatchesSegments pat' segs dir == seg && fileGlobMatchesSegments pat' segs
PatMatch Recursive ext -> GlobFinal final -> case final of
ext == takeExtensions (last $ seg:segs) FinalMatch Recursive ext ->
PatMatch NonRecursive ext -> ext == takeExtensions (last $ seg:segs)
null segs && ext == takeExtensions seg FinalMatch NonRecursive ext ->
PatLit filename -> null segs && ext == takeExtensions seg
null segs && filename == seg FinalLit filename ->
null segs && filename == seg
parseFileGlob :: Version -> FilePath -> Either GlobSyntaxError GlobPat
parseFileGlob :: Version -> FilePath -> Either GlobSyntaxError Glob
parseFileGlob version filepath = case reverse (splitDirectories filepath) of parseFileGlob version filepath = case reverse (splitDirectories filepath) of
[] -> [] ->
Left EmptyGlob Left EmptyGlob
...@@ -118,31 +125,43 @@ parseFileGlob version filepath = case reverse (splitDirectories filepath) of ...@@ -118,31 +125,43 @@ parseFileGlob version filepath = case reverse (splitDirectories filepath) of
| null ext -> Left NoExtensionOnStar | null ext -> Left NoExtensionOnStar
| otherwise -> Right ext | otherwise -> Right ext
_ -> Left LiteralFileNameGlobStar _ -> Left LiteralFileNameGlobStar
foldM addStem (PatMatch Recursive ext) segments foldM addStem (GlobFinal $ FinalMatch Recursive ext) segments
| otherwise -> Left VersionDoesNotSupportGlobStar | otherwise -> Left VersionDoesNotSupportGlobStar
(filename : segments) -> do (filename : segments) -> do
pat <- case splitExtensions filename of pat <- case splitExtensions filename of
("*", ext) | not allowGlob -> Left VersionDoesNotSupportGlob ("*", ext) | not allowGlob -> Left VersionDoesNotSupportGlob
| '*' `elem` ext -> Left StarInExtension | '*' `elem` ext -> Left StarInExtension
| null ext -> Left NoExtensionOnStar | null ext -> Left NoExtensionOnStar
| otherwise -> Right (PatMatch NonRecursive ext) | otherwise -> Right (FinalMatch NonRecursive ext)
(_, ext) | '*' `elem` ext -> Left StarInExtension (_, ext) | '*' `elem` ext -> Left StarInExtension
| '*' `elem` filename -> Left StarInFileName | '*' `elem` filename -> Left StarInFileName
| otherwise -> Right (PatLit filename) | otherwise -> Right (FinalLit filename)
foldM addStem pat segments foldM addStem (GlobFinal pat) segments
where where
allowGlob = version >= mkVersion [1,6] allowGlob = version >= mkVersion [1,6]
allowGlobStar = version >= mkVersion [3,0] allowGlobStar = version >= mkVersion [3,0]
addStem pat seg addStem pat seg
| '*' `elem` seg = Left StarInDirectory | '*' `elem` seg = Left StarInDirectory
| otherwise = Right (PatStem seg pat) | otherwise = Right (GlobStem seg pat)
matchFileGlob :: Verbosity -> Version -> FilePath -> IO [FilePath] matchFileGlob :: Verbosity -> Version -> FilePath -> IO [FilePath]
matchFileGlob verbosity version = matchDirFileGlob verbosity version "." matchFileGlob verbosity version = matchDirFileGlob verbosity version "."
-- The returned values do not include the supplied @dir@ prefix. -- | Like 'matchDirFileGlob'', but will 'die'' when the glob matches
-- no files.
matchDirFileGlob :: Verbosity -> Version -> FilePath -> FilePath -> IO [FilePath] matchDirFileGlob :: Verbosity -> Version -> FilePath -> FilePath -> IO [FilePath]
matchDirFileGlob verbosity version rawDir filepath = case parseFileGlob version filepath of matchDirFileGlob verbosity version dir filepath = do
matches <- matchDirFileGlob' verbosity version dir filepath
when (null matches) $ die' verbosity $
"filepath wildcard '" ++ filepath
++ "' does not match any files."
return matches
-- | Match files against a glob, starting in a directory.
--
-- The returned values do not include the supplied @dir@ prefix.
matchDirFileGlob' :: Verbosity -> Version -> FilePath -> FilePath -> IO [FilePath]
matchDirFileGlob' verbosity version rawDir filepath = case parseFileGlob version filepath of
Left err -> die' verbosity $ explainGlobSyntaxError filepath err Left err -> die' verbosity $ explainGlobSyntaxError filepath err
Right pat -> do Right pat -> do
-- The default data-dir is null. Our callers -should- be -- The default data-dir is null. Our callers -should- be
...@@ -159,17 +178,22 @@ matchDirFileGlob verbosity version rawDir filepath = case parseFileGlob version ...@@ -159,17 +178,22 @@ matchDirFileGlob verbosity version rawDir filepath = case parseFileGlob version
-- ".". Walking the tree starting there involves going into .git/ -- ".". Walking the tree starting there involves going into .git/
-- and dist-newstyle/, which is a lot of work for no reward, so -- and dist-newstyle/, which is a lot of work for no reward, so
-- extract the constant prefix from the pattern and start walking -- extract the constant prefix from the pattern and start walking
-- there. If the pattern is **/*.blah, then of course we'll have -- there, and only walk as much as we need to: recursively if **,
-- to walk the whole thing anyway, but that's what the user asked -- the whole directory if *, and just the specific file if it's a
-- for! -- literal.
let (prefixSegments, pat') = splitConstantPrefix pat let (prefixSegments, final) = splitConstantPrefix pat
joinedPrefix = joinPath prefixSegments joinedPrefix = joinPath prefixSegments
files <- getDirectoryContentsRecursive (dir </> joinedPrefix) files <- case final of
case filter (fileGlobMatches pat') files of FinalMatch recursive exts -> do
[] -> die' verbosity $ let prefix = dir </> joinedPrefix
"filepath wildcard '" ++ filepath candidates <- case recursive of
++ "' does not match any files." Recursive -> getDirectoryContentsRecursive prefix
matches -> return $ fmap (joinedPrefix </>) matches NonRecursive -> filterM (doesFileExist . (prefix </>)) =<< getDirectoryContents prefix
return $ filter ((==) exts . takeExtensions) candidates
FinalLit fn -> do
exists <- doesFileExist (dir </> joinedPrefix </> fn)
return [ fn | exists ]
return $ fmap (joinedPrefix </>) files
unfoldr' :: (a -> Either r (b, a)) -> a -> ([b], r) unfoldr' :: (a -> Either r (b, a)) -> a -> ([b], r)
unfoldr' f a = case f a of unfoldr' f a = case f a of
...@@ -178,10 +202,10 @@ unfoldr' f a = case f a of ...@@ -178,10 +202,10 @@ unfoldr' f a = case f a of
(bs, r) -> (b : bs, r) (bs, r) -> (b : bs, r)
-- | Extract the (possibly null) constant prefix from the pattern. -- | Extract the (possibly null) constant prefix from the pattern.
-- This has the property that, if @(pref, pat') = splitConstantPrefix pat@, -- This has the property that, if @(pref, final) = splitConstantPrefix pat@,
-- then @pat === foldr PatStem pat' pref@. -- then @pat === foldr GlobStem (GlobFinal final) pref@.
splitConstantPrefix :: GlobPat -> ([FilePath], GlobPat) splitConstantPrefix :: Glob -> ([FilePath], GlobFinal)
splitConstantPrefix = unfoldr' step splitConstantPrefix = unfoldr' step
where where
step (PatStem seg pat) = Right (seg, pat) step (GlobStem seg pat) = Right (seg, pat)
step pat = Left pat step (GlobFinal pat) = Left pat
...@@ -1009,11 +1009,12 @@ describe the package as a whole: ...@@ -1009,11 +1009,12 @@ describe the package as a whole:
of the same file type without making it too easy to accidentally of the same file type without making it too easy to accidentally
include unwanted files. include unwanted files.
On efficiency: the directory tree will be walked starting with the On efficiency: if you use ``**`` patterns, the directory tree will
parent directory of the first wildcard. If that's the root of the be walked starting with the parent directory of the ``**``. If
project, this might include ``.git/``, ``dist-newstyle/``, or that's the root of the project, this might include ``.git/``,
other large directories! To avoid this behaviour, put the files ``dist-newstyle/``, or other large directories! To avoid this
that wildcards will match against in their own folder. behaviour, put the files that wildcards will match against in
their own folder.
``**`` wildcards are available starting in Cabal 3.0. ``**`` wildcards are available starting in Cabal 3.0.
......
...@@ -51,7 +51,7 @@ compatibilityTests version = ...@@ -51,7 +51,7 @@ compatibilityTests version =
[ testCase "literal match" $ [ testCase "literal match" $
testMatches "foo/a" ["foo/a"] testMatches "foo/a" ["foo/a"]
, testCase "literal no match on prefix" $ , testCase "literal no match on prefix" $
testNoMatches "foo/c.html" testMatches "foo/c.html" []
, testCase "literal no match on suffix" $ , testCase "literal no match on suffix" $
testMatches "foo/a.html" ["foo/a.html"] testMatches "foo/a.html" ["foo/a.html"]
, testCase "literal no prefix" $ , testCase "literal no prefix" $
...@@ -81,7 +81,6 @@ compatibilityTests version = ...@@ -81,7 +81,6 @@ compatibilityTests version =
] ]
where where
testMatches = testMatchesVersion version testMatches = testMatchesVersion version
testNoMatches = testNoMatchesVersion version
testFailParse = testFailParseVersion version testFailParse = testFailParseVersion version
-- For efficiency reasons, matchDirFileGlob isn't a simple call to -- For efficiency reasons, matchDirFileGlob isn't a simple call to
...@@ -103,23 +102,12 @@ testMatchesVersion version pat expected = do ...@@ -103,23 +102,12 @@ testMatchesVersion version pat expected = do
-- ...and the impure glob matcher. -- ...and the impure glob matcher.
withSystemTempDirectory "globstar-sample" $ \tmpdir -> do withSystemTempDirectory "globstar-sample" $ \tmpdir -> do
makeSampleFiles tmpdir makeSampleFiles tmpdir
actual <- matchDirFileGlob Verbosity.normal version tmpdir pat actual <- matchDirFileGlob' Verbosity.normal version tmpdir pat
unless (isEqual actual expected) $ unless (isEqual actual expected) $
assertFailure $ "Unexpected result (impure matcher): " ++ show actual assertFailure $ "Unexpected result (impure matcher): " ++ show actual
where where
isEqual = (==) `on` (sort . fmap normalise) isEqual = (==) `on` (sort . fmap normalise)
-- TODO: Unify this and testMatchesVersion. Can't do this yet because
-- matchDirFileGlob calls die' when it doesn't match anything.
testNoMatchesVersion :: Version -> FilePath -> Assertion
testNoMatchesVersion version pat =
case parseFileGlob version pat of
Left _ -> assertFailure "Couldn't compile the pattern."
Right globPat ->
let actual = filter (fileGlobMatches globPat) sampleFileNames
in unless (null actual) $
assertFailure $ "Unexpected result (pure matcher): " ++ show actual
testFailParseVersion :: Version -> FilePath -> GlobSyntaxError -> Assertion testFailParseVersion :: Version -> FilePath -> GlobSyntaxError -> Assertion
testFailParseVersion version pat expected = testFailParseVersion version pat expected =
case parseFileGlob version pat of case parseFileGlob version pat of
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment