Glob.hs 8.59 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE RankNTypes #-}

-----------------------------------------------------------------------------
-- |
-- Module      :  Distribution.Simple.Glob
-- Copyright   :  Isaac Jones, Simon Marlow 2003-2004
-- License     :  BSD3
--                portions Copyright (c) 2007, Galois Inc.
--
-- Maintainer  :  cabal-devel@haskell.org
-- Portability :  portable
--
-- Simple file globbing.

module Distribution.Simple.Glob (
        matchFileGlob,
        matchDirFileGlob,
19
        matchDirFileGlob',
20 21 22 23
        fileGlobMatches,
        parseFileGlob,
        explainGlobSyntaxError,
        GlobSyntaxError(..),
24
        Glob,
25 26 27 28 29 30 31 32 33
  ) where

import Prelude ()
import Distribution.Compat.Prelude

import Distribution.Simple.Utils
import Distribution.Verbosity
import Distribution.Version

34
import System.Directory (getDirectoryContents, doesFileExist)
35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88
import System.FilePath (joinPath, splitExtensions, splitDirectories, takeExtensions, (</>))

-- Note throughout that we use splitDirectories, not splitPath. On
-- Posix, this makes no difference, but, because Windows accepts both
-- slash and backslash as its path separators, if we left in the
-- separators from the glob we might not end up properly normalised.

data GlobSyntaxError
  = StarInDirectory
  | StarInFileName
  | StarInExtension
  | NoExtensionOnStar
  | EmptyGlob
  | LiteralFileNameGlobStar
  | VersionDoesNotSupportGlobStar
  | VersionDoesNotSupportGlob
  deriving (Eq, Show)

explainGlobSyntaxError :: FilePath -> GlobSyntaxError -> String
explainGlobSyntaxError filepath StarInDirectory =
     "invalid file glob '" ++ filepath
  ++ "'. A wildcard '**' is only allowed as the final parent"
  ++ " directory. Stars must not otherwise appear in the parent"
  ++ " directories."
explainGlobSyntaxError filepath StarInExtension =
     "invalid file glob '" ++ filepath
  ++ "'. Wildcards '*' are only allowed as the"
  ++ " file's base name, not in the file extension."
explainGlobSyntaxError filepath StarInFileName =
     "invalid file glob '" ++ filepath
  ++ "'. Wildcards '*' may only totally replace the"
  ++ " file's base name, not only parts of it."
explainGlobSyntaxError filepath NoExtensionOnStar =
     "invalid file glob '" ++ filepath
  ++ "'. If a wildcard '*' is used it must be with an file extension."
explainGlobSyntaxError filepath LiteralFileNameGlobStar =
     "invalid file glob '" ++ filepath
  ++ "'. If a wildcard '**' is used as a parent directory, the"
  ++ " file's base name must be a wildcard '*'."
explainGlobSyntaxError _ EmptyGlob =
     "invalid file glob. A glob cannot be the empty string."
explainGlobSyntaxError filepath VersionDoesNotSupportGlobStar =
     "invalid file glob '" ++ filepath
  ++ "'. Using the double-star syntax requires 'cabal-version: 3.0'"
  ++ " or greater. Alternatively, for compatibility with earlier Cabal"
  ++ " versions, list the included directories explicitly."
explainGlobSyntaxError filepath VersionDoesNotSupportGlob =
     "invalid file glob '" ++ filepath
  ++ "'. Using star wildcards requires 'cabal-version: >= 1.6'. "
  ++ "Alternatively if you require compatibility with earlier Cabal "
  ++ "versions then list all the files explicitly."

data IsRecursive = Recursive | NonRecursive

89 90 91 92
data Glob
  = GlobStem String Glob
    -- ^ A single subdirectory component + remainder.
  | GlobFinal GlobFinal
93

94 95 96 97 98 99 100 101
data GlobFinal
  = FinalMatch IsRecursive String
    -- ^ First argument: Is this a @**/*.ext@ pattern?
    --   Second argument: the extensions to accept.
  | FinalLit FilePath
    -- ^ Literal file name.

fileGlobMatches :: Glob -> FilePath -> Bool
102 103
fileGlobMatches pat = fileGlobMatchesSegments pat . splitDirectories

104
fileGlobMatchesSegments :: Glob -> [FilePath] -> Bool
105 106
fileGlobMatchesSegments _ [] = False
fileGlobMatchesSegments pat (seg : segs) = case pat of
107
  GlobStem dir pat' ->
108
    dir == seg && fileGlobMatchesSegments pat' segs
109 110 111 112 113 114 115 116 117
  GlobFinal final -> case final of
    FinalMatch Recursive ext ->
      ext == takeExtensions (last $ seg:segs)
    FinalMatch NonRecursive ext ->
      null segs && ext == takeExtensions seg
    FinalLit filename ->
      null segs && filename == seg

parseFileGlob :: Version -> FilePath -> Either GlobSyntaxError Glob
118 119 120 121 122 123 124 125 126 127
parseFileGlob version filepath = case reverse (splitDirectories filepath) of
  [] ->
        Left EmptyGlob
  (filename : "**" : segments)
    | allowGlobStar -> do
        ext <- case splitExtensions filename of
          ("*", ext) | '*' `elem` ext -> Left StarInExtension
                     | null ext       -> Left NoExtensionOnStar
                     | otherwise      -> Right ext
          _                           -> Left LiteralFileNameGlobStar
128
        foldM addStem (GlobFinal $ FinalMatch Recursive ext) segments
129 130 131 132 133 134
    | otherwise -> Left VersionDoesNotSupportGlobStar
  (filename : segments) -> do
        pat <- case splitExtensions filename of
          ("*", ext) | not allowGlob       -> Left VersionDoesNotSupportGlob
                     | '*' `elem` ext      -> Left StarInExtension
                     | null ext            -> Left NoExtensionOnStar
135
                     | otherwise           -> Right (FinalMatch NonRecursive ext)
136 137
          (_, ext)   | '*' `elem` ext      -> Left StarInExtension
                     | '*' `elem` filename -> Left StarInFileName
138 139
                     | otherwise           -> Right (FinalLit filename)
        foldM addStem (GlobFinal pat) segments
140 141 142 143 144
  where
    allowGlob = version >= mkVersion [1,6]
    allowGlobStar = version >= mkVersion [3,0]
    addStem pat seg
      | '*' `elem` seg = Left StarInDirectory
145
      | otherwise      = Right (GlobStem seg pat)
146 147 148 149

matchFileGlob :: Verbosity -> Version -> FilePath -> IO [FilePath]
matchFileGlob verbosity version = matchDirFileGlob verbosity version "."

150 151
-- | Like 'matchDirFileGlob'', but will 'die'' when the glob matches
-- no files.
152
matchDirFileGlob :: Verbosity -> Version -> FilePath -> FilePath -> IO [FilePath]
153 154 155 156 157 158 159 160 161 162 163 164
matchDirFileGlob verbosity version dir filepath = do
  matches <- matchDirFileGlob' verbosity version dir filepath
  when (null matches) $ die' verbosity $
       "filepath wildcard '" ++ filepath
    ++ "' does not match any files."
  return matches

-- | Match files against a glob, starting in a directory.
--
-- The returned values do not include the supplied @dir@ prefix.
matchDirFileGlob' :: Verbosity -> Version -> FilePath -> FilePath -> IO [FilePath]
matchDirFileGlob' verbosity version rawDir filepath = case parseFileGlob version filepath of
165 166
  Left err -> die' verbosity $ explainGlobSyntaxError filepath err
  Right pat -> do
167 168 169 170 171 172 173 174 175 176
    -- The default data-dir is null. Our callers -should- be
    -- converting that to '.' themselves, but it's a certainty that
    -- some future call-site will forget and trigger a really
    -- hard-to-debug failure if we don't check for that here.
    when (null rawDir) $
      warn verbosity $
           "Null dir passed to matchDirFileGlob; interpreting it "
        ++ "as '.'. This is probably an internal error."
    let dir = if null rawDir then "." else rawDir
    debug verbosity $ "Expanding glob '" ++ filepath ++ "' in directory '" ++ dir ++ "'."
177 178 179 180
    -- This function might be called from the project root with dir as
    -- ".". Walking the tree starting there involves going into .git/
    -- and dist-newstyle/, which is a lot of work for no reward, so
    -- extract the constant prefix from the pattern and start walking
181 182 183 184
    -- there, and only walk as much as we need to: recursively if **,
    -- the whole directory if *, and just the specific file if it's a
    -- literal.
    let (prefixSegments, final) = splitConstantPrefix pat
185
        joinedPrefix = joinPath prefixSegments
186 187 188 189 190 191 192 193 194 195 196
    files <- case final of
      FinalMatch recursive exts -> do
        let prefix = dir </> joinedPrefix
        candidates <- case recursive of
          Recursive -> getDirectoryContentsRecursive prefix
          NonRecursive -> filterM (doesFileExist . (prefix </>)) =<< getDirectoryContents prefix
        return $ filter ((==) exts . takeExtensions) candidates
      FinalLit fn -> do
        exists <- doesFileExist (dir </> joinedPrefix </> fn)
        return [ fn | exists ]
    return $ fmap (joinedPrefix </>) files
197 198 199 200 201 202 203 204

unfoldr' :: (a -> Either r (b, a)) -> a -> ([b], r)
unfoldr' f a = case f a of
  Left r -> ([], r)
  Right (b, a') -> case unfoldr' f a' of
    (bs, r) -> (b : bs, r)

-- | Extract the (possibly null) constant prefix from the pattern.
205 206 207
-- This has the property that, if @(pref, final) = splitConstantPrefix pat@,
-- then @pat === foldr GlobStem (GlobFinal final) pref@.
splitConstantPrefix :: Glob -> ([FilePath], GlobFinal)
208 209
splitConstantPrefix = unfoldr' step
  where
210 211
    step (GlobStem seg pat) = Right (seg, pat)
    step (GlobFinal pat) = Left pat