Heuristics.hs 14.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10
-----------------------------------------------------------------------------
-- |
-- Module      :  Distribution.Client.Init.Heuristics
-- Copyright   :  (c) Benedikt Huber 2009
-- License     :  BSD-like
--
-- Maintainer  :  cabal-devel@haskell.org
-- Stability   :  provisional
-- Portability :  portable
--
11 12
-- Heuristics for creating initial cabal files.
--
13 14 15 16 17
-----------------------------------------------------------------------------
module Distribution.Client.Init.Heuristics (
    guessPackageName,
    scanForModules,     SourceFileEntry(..),
    neededBuildPrograms,
Mikhail Glushenkov's avatar
Mikhail Glushenkov committed
18
    guessMainFileCandidates,
19 20 21
    guessAuthorNameMail,
    knownCategories,
) where
22

23
import Prelude ()
24
import qualified Data.ByteString as BS
25
import Distribution.Client.Compat.Prelude
26
import Distribution.Utils.Generic (safeHead, safeTail, safeLast)
27

Mikhail Glushenkov's avatar
Mikhail Glushenkov committed
28
import Distribution.Simple.Setup (Flag(..), flagToMaybe)
29
import Distribution.Simple.Utils (fromUTF8BS)
30
import Distribution.ModuleName
31
    ( ModuleName, toFilePath )
Ian D. Bollinger's avatar
Ian D. Bollinger committed
32
import qualified Distribution.Package as P
33 34
import qualified Distribution.PackageDescription as PD
    ( category, packageDescription )
35 36
import Distribution.Client.Utils
         ( tryCanonicalizePath )
37
import Language.Haskell.Extension ( Extension )
38

39 40 41
import Distribution.Solver.Types.PackageIndex
    ( allPackagesByName )
import Distribution.Solver.Types.SourcePackage
Oleg Grenrus's avatar
Oleg Grenrus committed
42
    ( srcpkgDescription )
43 44

import Distribution.Client.Types ( SourcePackageDb(..) )
45
import Data.Char   ( isLower )
46
import Data.List   ( isInfixOf )
47
import qualified Data.Set as Set ( fromList, toList )
Mikhail Glushenkov's avatar
Mikhail Glushenkov committed
48
import System.Directory ( getCurrentDirectory, getDirectoryContents,
49
                          doesDirectoryExist, doesFileExist, getHomeDirectory, )
50
import Distribution.Compat.Environment ( getEnvironment )
51
import System.FilePath ( takeExtension, takeBaseName, dropExtension,
52
                         (</>), (<.>), splitDirectories, makeRelative )
53

Mikhail Glushenkov's avatar
Mikhail Glushenkov committed
54
import Distribution.Client.Init.Types     ( InitFlags(..) )
55
import Distribution.Client.Compat.Process ( readProcessWithExitCode )
56

57 58
import qualified Distribution.Utils.ShortText as ShortText

Mikhail Glushenkov's avatar
Mikhail Glushenkov committed
59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86
-- | Return a list of candidate main files for this executable: top-level
-- modules including the word 'Main' in the file name. The list is sorted in
-- order of preference, shorter file names are preferred. 'Right's are existing
-- candidates and 'Left's are those that do not yet exist.
guessMainFileCandidates :: InitFlags -> IO [Either FilePath FilePath]
guessMainFileCandidates flags = do
  dir <-
    maybe getCurrentDirectory return (flagToMaybe $ packageDir flags)
  files <- getDirectoryContents dir
  let existingCandidates = filter isMain files
      -- We always want to give the user at least one default choice.  If either
      -- Main.hs or Main.lhs has already been created, then we don't want to
      -- suggest the other; however, if neither has been created, then we
      -- suggest both.
      newCandidates =
        if any (`elem` existingCandidates) ["Main.hs", "Main.lhs"]
        then []
        else ["Main.hs", "Main.lhs"]
      candidates =
        sortBy (\x y -> comparing (length . either id id) x y
                        `mappend` compare x y)
               (map Left newCandidates ++ map Right existingCandidates)
  return candidates

  where
    isMain f =    (isInfixOf "Main" f || isInfixOf  "main" f)
               && (isSuffixOf ".hs" f || isSuffixOf ".lhs" f)

Ian D. Bollinger's avatar
Ian D. Bollinger committed
87 88
-- | Guess the package name based on the given root directory.
guessPackageName :: FilePath -> IO P.PackageName
89
guessPackageName = liftM (P.mkPackageName . repair . fromMaybe "" . safeLast . splitDirectories)
Ian D. Bollinger's avatar
Ian D. Bollinger committed
90 91 92 93 94 95 96 97 98 99 100 101 102 103
                 . tryCanonicalizePath
  where
    -- Treat each span of non-alphanumeric characters as a hyphen. Each
    -- hyphenated component of a package name must contain at least one
    -- alphabetic character. An arbitrary character ('x') will be prepended if
    -- this is not the case for the first component, and subsequent components
    -- will simply be run together. For example, "1+2_foo-3" will become
    -- "x12-foo3".
    repair = repair' ('x' :) id
    repair' invalid valid x = case dropWhile (not . isAlphaNum) x of
        "" -> repairComponent ""
        x' -> let (c, r) = first repairComponent $ break (not . isAlphaNum) x'
              in c ++ repairRest r
      where
104 105
        repairComponent c | all isDigit c = invalid c
                          | otherwise     = valid c
Ian D. Bollinger's avatar
Ian D. Bollinger committed
106
    repairRest = repair' id ('-' :)
107 108 109 110

-- |Data type of source files found in the working directory
data SourceFileEntry = SourceFileEntry
    { relativeSourcePath :: FilePath
111 112 113
    , moduleName         :: ModuleName
    , fileExtension      :: String
    , imports            :: [ModuleName]
114
    , extensions         :: [Extension]
115 116
    } deriving Show

117
sfToFileName :: FilePath -> SourceFileEntry -> FilePath
118
sfToFileName projectRoot (SourceFileEntry relPath m ext _ _)
119 120
  = projectRoot </> relPath </> toFilePath m <.> ext

121
-- |Search for source files in the given directory
Ian D. Bollinger's avatar
Ian D. Bollinger committed
122
-- and return pairs of guessed Haskell source path and
123 124 125 126 127 128 129 130 131
-- module names.
scanForModules :: FilePath -> IO [SourceFileEntry]
scanForModules rootDir = scanForModulesIn rootDir rootDir

scanForModulesIn :: FilePath -> FilePath -> IO [SourceFileEntry]
scanForModulesIn projectRoot srcRoot = scan srcRoot []
  where
    scan dir hierarchy = do
        entries <- getDirectoryContents (projectRoot </> dir)
132
        (files, dirs) <- liftM partitionEithers (traverse (tagIsDir dir) entries)
133 134
        let modules = catMaybes [ guessModuleName hierarchy file
                                | file <- files
135
                                , maybe False isUpper (safeHead file) ]
136 137
        modules' <- traverse (findImportsAndExts projectRoot) modules
        recMods <- traverse (scanRecursive dir hierarchy) dirs
138
        return $ concat (modules' : recMods)
139 140 141 142 143
    tagIsDir parent entry = do
        isDir <- doesDirectoryExist (parent </> entry)
        return $ (if isDir then Right else Left) entry
    guessModuleName hierarchy entry
        | takeBaseName entry == "Setup" = Nothing
144
        | ext `elem` sourceExtensions   =
145
            SourceFileEntry <$> pure relRoot <*> modName <*> pure ext <*> pure [] <*> pure []
146 147
        | otherwise = Nothing
      where
148
        relRoot       = makeRelative projectRoot srcRoot
149
        unqualModName = dropExtension entry
150
        modName       = simpleParsec
151 152
                      $ intercalate "." . reverse $ (unqualModName : hierarchy)
        ext           = case takeExtension entry of '.':e -> e; e -> e
153
    scanRecursive parent hierarchy entry
154 155
      | maybe False isUpper (safeHead entry) = scan (parent </> entry) (entry : hierarchy)
      | maybe False isLower (safeHead entry) && not (ignoreDir entry) =
156
          scanForModulesIn projectRoot $ foldl (</>) srcRoot (reverse (entry : hierarchy))
157
      | otherwise = return []
158 159
    ignoreDir ('.':_)  = True
    ignoreDir dir      = dir `elem` ["dist", "_darcs"]
160

161 162
-- | Read the contents of the handle and parse for Language pragmas
-- and other module names that might be part of this project.
163 164
findImportsAndExts :: FilePath -> SourceFileEntry -> IO SourceFileEntry
findImportsAndExts projectRoot sf = do
165
  s <- fromUTF8BS <$> BS.readFile (sfToFileName projectRoot sf)
166

EyalLotem's avatar
EyalLotem committed
167 168 169 170 171 172 173
  let modules = mapMaybe
                ( getModName
                . drop 1
                . filter (not . null)
                . dropWhile (/= "import")
                . words
                )
174 175 176 177
              . filter (not . ("--" `isPrefixOf`)) -- poor man's comment filtering
              . lines
              $ s

178
      -- TODO: We should probably make a better attempt at parsing
179 180 181 182
      -- comments above.  Unfortunately we can't use a full-fledged
      -- Haskell parser since cabal's dependencies must be kept at a
      -- minimum.

183
      -- A poor man's LANGUAGE pragma parser.
184
      exts = mapMaybe simpleParsec
185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201
           . concatMap getPragmas
           . filter isLANGUAGEPragma
           . map fst
           . drop 1
           . takeWhile (not . null . snd)
           . iterate (takeBraces . snd)
           $ ("",s)

      takeBraces = break (== '}') . dropWhile (/= '{')

      isLANGUAGEPragma = ("{-# LANGUAGE " `isPrefixOf`)

      getPragmas = map trim . splitCommas . takeWhile (/= '#') . drop 13

      splitCommas "" = []
      splitCommas xs = x : splitCommas (drop 1 y)
        where (x,y) = break (==',') xs
202 203 204 205

  return sf { imports    = modules
            , extensions = exts
            }
206 207 208 209

 where getModName :: [String] -> Maybe ModuleName
       getModName []               = Nothing
       getModName ("qualified":ws) = getModName ws
210
       getModName (ms:_)           = simpleParsec ms
211 212 213



214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232
-- Unfortunately we cannot use the version exported by Distribution.Simple.Program
knownSuffixHandlers :: [(String,String)]
knownSuffixHandlers =
  [ ("gc",     "greencard")
  , ("chs",    "chs")
  , ("hsc",    "hsc2hs")
  , ("x",      "alex")
  , ("y",      "happy")
  , ("ly",     "happy")
  , ("cpphs",  "cpp")
  ]

sourceExtensions :: [String]
sourceExtensions = "hs" : "lhs" : map fst knownSuffixHandlers

neededBuildPrograms :: [SourceFileEntry] -> [String]
neededBuildPrograms entries =
    [ handler
    | ext <- nubSet (map fileExtension entries)
EyalLotem's avatar
EyalLotem committed
233
    , handler <- maybeToList (lookup ext knownSuffixHandlers)
234 235
    ]

236 237 238
-- | Guess author and email using darcs and git configuration options. Use
-- the following in decreasing order of preference:
--
239 240 241 242
-- 1. vcs env vars ($DARCS_EMAIL, $GIT_AUTHOR_*)
-- 2. Local repo configs
-- 3. Global vcs configs
-- 4. The generic $EMAIL
243
--
244 245 246 247
-- Name and email are processed separately, so the guess might end up being
-- a name from DARCS_EMAIL and an email from git config.
--
-- Darcs has preference, for tradition's sake.
248
guessAuthorNameMail :: IO (Flag String, Flag String)
249 250 251 252 253
guessAuthorNameMail = fmap authorGuessPure authorGuessIO

-- Ordered in increasing preference, since Flag-as-monoid is identical to
-- Last.
authorGuessPure :: AuthorGuessIO -> AuthorGuess
254 255 256 257 258
authorGuessPure (AuthorGuessIO { authorGuessEnv = env
                               , authorGuessLocalDarcs = darcsLocalF
                               , authorGuessGlobalDarcs = darcsGlobalF
                               , authorGuessLocalGit = gitLocal
                               , authorGuessGlobalGit = gitGlobal })
259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275
    = mconcat
        [ emailEnv env
        , gitGlobal
        , darcsCfg darcsGlobalF
        , gitLocal
        , darcsCfg darcsLocalF
        , gitEnv env
        , darcsEnv env
        ]

authorGuessIO :: IO AuthorGuessIO
authorGuessIO = AuthorGuessIO
    <$> getEnvironment
    <*> (maybeReadFile $ "_darcs" </> "prefs" </> "author")
    <*> (maybeReadFile =<< liftM (</> (".darcs" </> "author")) getHomeDirectory)
    <*> gitCfg Local
    <*> gitCfg Global
276 277

-- Types and functions used for guessing the author are now defined:
278

279
type AuthorGuess   = (Flag String, Flag String)
280
type Enviro        = [(String, String)]
281
data GitLoc        = Local | Global
282 283 284 285 286 287 288
data AuthorGuessIO = AuthorGuessIO {
    authorGuessEnv         :: Enviro,         -- ^ Environment lookup table
    authorGuessLocalDarcs  :: (Maybe String), -- ^ Contents of local darcs author info
    authorGuessGlobalDarcs :: (Maybe String), -- ^ Contents of global darcs author info
    authorGuessLocalGit    :: AuthorGuess,   -- ^ Git config --local
    authorGuessGlobalGit   :: AuthorGuess    -- ^ Git config --global
  }
289 290 291 292 293

darcsEnv :: Enviro -> AuthorGuess
darcsEnv = maybe mempty nameAndMail . lookup "DARCS_EMAIL"

gitEnv :: Enviro -> AuthorGuess
Mikhail Glushenkov's avatar
Mikhail Glushenkov committed
294
gitEnv env = (name, mail)
295
  where
Mikhail Glushenkov's avatar
Mikhail Glushenkov committed
296 297
    name = maybeFlag "GIT_AUTHOR_NAME" env
    mail = maybeFlag "GIT_AUTHOR_EMAIL" env
298

299 300
darcsCfg :: Maybe String -> AuthorGuess
darcsCfg = maybe mempty nameAndMail
301

302
emailEnv :: Enviro -> AuthorGuess
Mikhail Glushenkov's avatar
Mikhail Glushenkov committed
303
emailEnv env = (mempty, mail)
304
  where
Mikhail Glushenkov's avatar
Mikhail Glushenkov committed
305
    mail = maybeFlag "EMAIL" env
306

307
gitCfg :: GitLoc -> IO AuthorGuess
308 309 310
gitCfg which = do
  name <- gitVar which "user.name"
  mail <- gitVar which "user.email"
311 312
  return (name, mail)

313 314
gitVar :: GitLoc -> String -> IO (Flag String)
gitVar which = fmap happyOutput . gitConfigQuery which
315 316 317 318

happyOutput :: (ExitCode, a, t) -> Flag a
happyOutput v = case v of
  (ExitSuccess, s, _) -> Flag s
319
  _                   -> mempty
320

321 322 323 324 325 326 327 328
gitConfigQuery :: GitLoc -> String -> IO (ExitCode, String, String)
gitConfigQuery which key =
    fmap trim' $ readProcessWithExitCode "git" ["config", w, key] ""
  where
    w = case which of
        Local  -> "--local"
        Global -> "--global"
    trim' (a, b, c) = (a, trim b, c)
329

330 331 332
maybeFlag :: String -> Enviro -> Flag String
maybeFlag k = maybe mempty Flag . lookup k

333
-- | Read the first non-comment, non-trivial line of a file, if it exists
334 335 336 337
maybeReadFile :: String -> IO (Maybe String)
maybeReadFile f = do
    exists <- doesFileExist f
    if exists
338
        then fmap getFirstLine $ readFile f
339
        else return Nothing
340 341 342 343 344 345
  where
    getFirstLine content =
      let nontrivialLines = dropWhile (\l -> (null l) || ("#" `isPrefixOf` l)) . lines $ content
      in case nontrivialLines of
           [] -> Nothing
           (l:_) -> Just l
346

Ian D. Bollinger's avatar
Ian D. Bollinger committed
347
-- |Get list of categories used in Hackage. NOTE: Very slow, needs to be cached
348
knownCategories :: SourcePackageDb -> [String]
EyalLotem's avatar
EyalLotem committed
349
knownCategories (SourcePackageDb sourcePkgIndex _) = nubSet
350
    [ cat | pkg <- maybeToList . safeHead =<< (allPackagesByName sourcePkgIndex)
Oleg Grenrus's avatar
Oleg Grenrus committed
351
          , let catList = (PD.category . PD.packageDescription . srcpkgDescription) pkg
352
          , cat <- splitString ',' $ ShortText.fromShortText catList
353 354 355 356 357 358 359
    ]

-- Parse name and email, from darcs pref files or environment variable
nameAndMail :: String -> (Flag String, Flag String)
nameAndMail str
  | all isSpace nameOrEmail = mempty
  | null erest = (mempty, Flag $ trim nameOrEmail)
Mikhail Glushenkov's avatar
Mikhail Glushenkov committed
360
  | otherwise  = (Flag $ trim nameOrEmail, Flag mail)
361 362
  where
    (nameOrEmail,erest) = break (== '<') str
363
    (mail,_)            = break (== '>') (safeTail erest)
364 365 366 367

trim :: String -> String
trim = removeLeadingSpace . reverse . removeLeadingSpace . reverse
  where
368 369
    removeLeadingSpace  = dropWhile isSpace

Ian D. Bollinger's avatar
Ian D. Bollinger committed
370
-- split string at given character, and remove whitespace
371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395
splitString :: Char -> String -> [String]
splitString sep str = go str where
    go s = if null s' then [] else tok : go rest where
      s' = dropWhile (\c -> c == sep || isSpace c) s
      (tok,rest) = break (==sep) s'

nubSet :: (Ord a) => [a] -> [a]
nubSet = Set.toList . Set.fromList

{-
test db testProjectRoot = do
  putStrLn "Guessed package name"
  (guessPackageName >=> print) testProjectRoot
  putStrLn "Guessed name and email"
  guessAuthorNameMail >>= print

  mods <- scanForModules testProjectRoot

  putStrLn "Guessed modules"
  mapM_ print mods
  putStrLn "Needed build programs"
  print (neededBuildPrograms mods)

  putStrLn "List of known categories"
  print $ knownCategories db
Duncan Coutts's avatar
Duncan Coutts committed
396
-}