Commit eee1b61f authored by Alp Mestanogullari's avatar Alp Mestanogullari
Browse files

hadrian: optimise Rules.Compile

Previously, as reported in #15938, resuming a build "in the middle",
e.g when building _build/stage1/libraries/base/, hadrian would take up
to a whole minute to get started doing actual work, building code.

This was mostly due to a big enumeration that we do in Rules.hs, to
generate all the possible patterns for object files for 1) all ways, 2)
all packages and 3) all stages. Since rule enumeration is always
performed, whatever the target, we were always paying this cost, which
seemed to grow bigger the farther in the build we stopped and were
resuming from.

Instead, this patch borrows the approach that we took for Rules.Library
in https://github.com/snowleopard/hadrian/pull/571, which exposes all the
relevant object files under as few catch-all rules as possible (8 here),
and parses all the information we need out of the object's path.

The concrete effect of this patch that I have observed is to reduce the
45-60 seconds pause to <5 seconds. Along with the Shake performance
improvements that Neil mentions in #15938, most of the pause should
effectively disappear.

Reviewers: snowleopard, bgamari, goldfire

Reviewed By: snowleopard

Subscribers: rwbarton, carter

GHC Trac Issues: #15938

Differential Revision: https://phabricator.haskell.org/D5412
parent fb669f51
......@@ -32,6 +32,7 @@ executable hadrian
, Hadrian.Builder.Ar
, Hadrian.Builder.Sphinx
, Hadrian.Builder.Tar
, Hadrian.BuildPath
, Hadrian.Expression
, Hadrian.Haskell.Cabal
, Hadrian.Haskell.Cabal.Type
......
module Hadrian.BuildPath where
import Base
import Data.Functor
import qualified Text.Parsec as Parsec
-- | A path of the form
--
-- > <build root>/stage<N>/<path/to/pkg/from/ghc/root>/build/<something>
--
-- where @something@ describes a library or object file or ... to be built
-- for the given package.
--
-- @a@, which represents that @something@, is instantiated with library-related
-- data types in @Rules.Library@ and with object/interface files related types
-- in @Rules.Compile@.
data BuildPath a = BuildPath FilePath -- ^ > <build root>/
Stage -- ^ > stage<N>/
FilePath -- ^ > <path/to/pkg/from/ghc/root>/build/
a -- ^ > whatever comes after 'build/'
deriving (Eq, Show)
-- | Parse a build path under the given build root.
parseBuildPath
:: FilePath -- ^ build root
-> Parsec.Parsec String () a -- ^ what to parse after @build/@
-> Parsec.Parsec String () (BuildPath a)
parseBuildPath root afterBuild = do
_ <- Parsec.string root *> Parsec.optional (Parsec.char '/')
stage <- parseStage
_ <- Parsec.char '/'
pkgpath <- Parsec.manyTill Parsec.anyChar
(Parsec.try $ Parsec.string "/build/")
a <- afterBuild
return (BuildPath root stage pkgpath a)
-- To be kept in sync with Stage.hs's stageString function
-- | Parse @"stageX"@ into a 'Stage'.
parseStage :: Parsec.Parsec String () Stage
parseStage = (Parsec.string "stage" *> Parsec.choice
[ Parsec.string (show n) $> toEnum n
| n <- map fromEnum [minBound .. maxBound :: Stage]
]) Parsec.<?> "stage string"
-- To be kept in sync with the show instances in 'Way.Type', until we perhaps
-- use some bidirectional parsing/pretty printing approach or library.
-- | Parse a way suffix, returning the argument when no suffix is found (the
-- argument will be vanilla in most cases, but dynamic when we parse the way
-- suffix out of a shared library file name).
parseWaySuffix :: Way -> Parsec.Parsec String () Way
parseWaySuffix w = Parsec.choice
[ Parsec.char '_' *>
(wayFromUnits <$> Parsec.sepBy1 parseWayUnit (Parsec.char '_'))
, pure w
] Parsec.<?> "way suffix (e.g _thr_p, or none for vanilla)"
-- | Same as 'parseWaySuffix', but for parsing e.g @thr_p_@
-- instead of @_thr_p@, like 'parseWaySuffix' does.
--
-- This is used to parse paths to object files,
-- in Rules.Compile.
parseWayPrefix :: Way -> Parsec.Parsec String () Way
parseWayPrefix w = Parsec.choice
[ wayFromUnits <$> Parsec.endBy1 parseWayUnit (Parsec.char '_')
, pure w
] Parsec.<?> "way prefix (e.g thr_p_, or none for vanilla)"
parseWayUnit :: Parsec.Parsec String () WayUnit
parseWayUnit = Parsec.choice
[ Parsec.string "thr" *> pure Threaded
, Parsec.char 'd' *>
(Parsec.choice [ Parsec.string "ebug" *> pure Debug
, Parsec.string "yn" *> pure Dynamic ])
, Parsec.char 'p' *> pure Profiling
, Parsec.char 'l' *> pure Logging
] Parsec.<?> "way unit (thr, debug, dyn, p, l)"
-- | Parse a @"pkgname-pkgversion"@ string into the package name and the
-- integers that make up the package version.
parsePkgId :: Parsec.Parsec String () (String, [Integer])
parsePkgId = parsePkgId' "" Parsec.<?> "package identifier (<name>-<version>)"
where
parsePkgId' currName = do
s <- Parsec.many1 Parsec.alphaNum
_ <- Parsec.char '-'
let newName = if null currName then s else currName ++ "-" ++ s
Parsec.choice [ (newName,) <$> parsePkgVersion
, parsePkgId' newName ]
-- | Parse "."-separated integers that describe a package's version.
parsePkgVersion :: Parsec.Parsec String () [Integer]
parsePkgVersion = fmap reverse (parsePkgVersion' [])
Parsec.<?> "package version"
where
parsePkgVersion' xs = do
n <- parseNatural
Parsec.choice
[ Parsec.try
(Parsec.lookAhead (Parsec.char '.' *>
(Parsec.letter <|> Parsec.char '_')
)
)
$> (n:xs)
, Parsec.char '.' *> parsePkgVersion' (n:xs)
, pure $ (n:xs) ]
-- | Parse a natural number.
parseNatural :: Parsec.Parsec String () Integer
parseNatural = (read <$> Parsec.many1 Parsec.digit) Parsec.<?> "natural number"
-- | Runs the given parser against the given path, erroring out when the parser
-- fails (because it shouldn't if the code from this module is correct).
parsePath
:: Parsec.Parsec String () a -- ^ parser to run
-> String -- ^ string describing the input source
-> FilePath -- ^ path to parse
-> Action a
parsePath p inp path = case Parsec.parse p inp path of
Left err -> fail $ "Hadrian.BuildPath.parsePath: path="
++ path ++ ", error:\n" ++ show err
Right a -> pure a
......@@ -94,18 +94,7 @@ packageRules = do
let readPackageDb = [(packageDb, 1)]
writePackageDb = [(packageDb, maxConcurrentReaders)]
let contexts = liftM3 Context allStages knownPackages allWays
vanillaContexts = liftM2 vanillaContext allStages knownPackages
-- TODO: we might want to look into converting more and more
-- rules to the style introduced in Rules.Library in
-- https://github.com/snowleopard/hadrian/pull/571,
-- where "catch-all" rules are used to "catch" the need
-- for library files, and we then use parsec parsers to
-- extract all sorts of information needed to build them, like
-- the package, the stage, the way, etc.
forM_ contexts (Rules.Compile.compilePackage readPackageDb)
Rules.Compile.compilePackage readPackageDb
Rules.Program.buildProgram readPackageDb
......@@ -116,6 +105,12 @@ packageRules = do
-- being forced.
Rules.Register.registerPackage writePackageDb (Context stage dummyPackage vanilla)
-- TODO: Can we get rid of this enumeration of contexts? Since we iterate
-- over it to generate all 4 types of rules below, all the time, we
-- might want to see whether the parse-and-extract approach of
-- Rules.Compile and Rules.Library could save us some time there.
let vanillaContexts = liftM2 vanillaContext allStages knownPackages
forM_ vanillaContexts $ mconcat
[ Rules.Register.configurePackage
, Rules.Dependencies.buildPackageDependencies readPackageDb
......
module Rules.Compile (compilePackage) where
import Hadrian.BuildPath
import Hadrian.Oracles.TextFile
import Base
import Context
import Expression
import Rules.Generate
import Settings
import Settings.Default
import Target
import Utilities
compilePackage :: [(Resource, Int)] -> Context -> Rules ()
compilePackage rs context@Context {..} = do
import qualified Text.Parsec as Parsec
-- * Rules for building objects and Haskell interface files
compilePackage :: [(Resource, Int)] -> Rules ()
compilePackage rs = do
root <- buildRootRules
let dir = root -/- buildDir context
nonHs extension = dir -/- extension <//> "*" <.> osuf way
compile compiler obj2src obj = do
src <- obj2src context obj
need [src]
needDependencies context src $ obj <.> "d"
buildWithResources rs $ target context (compiler stage) [src] [obj]
compileHs = \[obj, _hi] -> do
path <- contextPath context
(src, deps) <- lookupDependencies (path -/- ".dependencies") obj
need $ src : deps
needLibrary =<< contextDependencies context
buildWithResources rs $ target context (Ghc CompileHs stage) [src] [obj]
priority 2.0 $ do
nonHs "c" %> compile (Ghc CompileCWithGhc) (obj2src "c" $ const False )
nonHs "cmm" %> compile (Ghc CompileHs) (obj2src "cmm" isGeneratedCmmFile)
nonHs "s" %> compile (Ghc CompileHs) (obj2src "S" $ const False )
-- TODO: Add dependencies for #include of .h and .hs-incl files (gcc -MM?).
[ dir <//> "*" <.> suf way | suf <- [ osuf, hisuf] ] &%> compileHs
[ dir <//> "*" <.> suf way | suf <- [obootsuf, hibootsuf] ] &%> compileHs
-- We match all file paths that look like:
-- <root>/...stuffs.../build/...stuffs.../<something>.<suffix>
--
-- where:
-- - the '...stuffs...' bits can be one or more path components,
-- - the '<suffix>' part is a way prefix (e.g thr_p_, or nothing if
-- vanilla) followed by an object file extension, without the dot
-- (o, o-boot, hi, hi-boot),
--
-- and parse the information we need (stage, package path, ...) from
-- the path and figure out the suitable way to produce that object file.
objectFilesUnder root |%> \path -> do
obj <- parsePath (parseBuildObject root) "<object file path parser>" path
compileObject rs path obj
where
objectFilesUnder r = [ r -/- ("**/build/**/*" ++ pat)
| pat <- extensionPats
]
exts = [ "o", "hi", "o-boot", "hi-boot" ]
patternsFor e = [ "." ++ e, ".*_" ++ e ]
extensionPats = concatMap patternsFor exts
-- * Object file paths types and parsers
{- We are using a non uniform representation that separates
object files produced from Haskell code and from other
languages, because the two "groups" have to be parsed
differently enough that this would complicated the parser
significantly.
Indeed, non-Haskell files can only produce .o (or .thr_o, ...)
files while Haskell modules can produce those as well as
interface files, both in -boot or non-boot variants.
Moreover, non-Haskell object files live under:
<root>/stage<N>/<path/to/pkg>/build/{c,cmm,s}/
while Haskell object/interface files live under:
<root>/stage<N>/<path/to/pkg>/build/
So the kind of object is partially determined by
whether we're in c/, cmm/ or s/ but also by the
object file's extension, in the case of a Haskell file.
This could have been addressed with some knot-tying but
Parsec's monad doesn't give us a MonadFix instance.
We therefore stick to treating those two type of object
files non uniformly.
-}
-- | Non Haskell source languages that we compile to get object files.
data SourceLang = Asm | C | Cmm
deriving (Eq, Show)
parseSourceLang :: Parsec.Parsec String () SourceLang
parseSourceLang = Parsec.choice
[ Parsec.char 'c' *> Parsec.choice
[ Parsec.string "mm" *> pure Cmm
, pure C
]
, Parsec.char 's' *> pure Asm
]
type Basename = String
parseBasename :: Parsec.Parsec String () Basename
parseBasename = Parsec.manyTill Parsec.anyChar (Parsec.try $ Parsec.char '.')
-- | > <c|cmm|s>/<file>.<way prefix>_o
data NonHsObject = NonHsObject SourceLang Basename Way
deriving (Eq, Show)
parseNonHsObject :: Parsec.Parsec String () NonHsObject
parseNonHsObject = do
lang <- parseSourceLang
_ <- Parsec.char '/'
file <- parseBasename
way <- parseWayPrefix vanilla
_ <- Parsec.char 'o'
return (NonHsObject lang file way)
-- | > <o|hi|o-boot|hi-boot>
data SuffixType = O | Hi | OBoot | HiBoot
deriving (Eq, Show)
parseSuffixType :: Parsec.Parsec String () SuffixType
parseSuffixType = Parsec.choice
[ Parsec.char 'o' *> Parsec.choice
[ Parsec.string "-boot" *> pure OBoot
, pure O
]
, Parsec.string "hi" *> Parsec.choice
[ Parsec.string "-boot" *> pure HiBoot
, pure Hi
]
]
-- | > <way prefix>_<o|hi|o-boot|hi-boot>
data Extension = Extension Way SuffixType
deriving (Eq, Show)
parseExtension :: Parsec.Parsec String () Extension
parseExtension =
Extension <$> parseWayPrefix vanilla <*> parseSuffixType
-- | > <file>.<way prefix>_<o|hi|o-boot|hi-boot>
data HsObject = HsObject Basename Extension
deriving (Eq, Show)
parseHsObject :: Parsec.Parsec String () HsObject
parseHsObject = do
file <- parseBasename
ext <- parseExtension
return (HsObject file ext)
data Object = Hs HsObject | NonHs NonHsObject
deriving (Eq, Show)
parseObject :: Parsec.Parsec String () Object
parseObject = Parsec.choice
[ NonHs <$> parseNonHsObject
, Hs <$> parseHsObject
]
-- * Toplevel parsers
parseBuildObject :: FilePath -> Parsec.Parsec String () (BuildPath Object)
parseBuildObject root = parseBuildPath root parseObject
-- * Getting contexts from objects
objectContext :: BuildPath Object -> Action Context
objectContext (BuildPath _ stage pkgpath obj) = do
pkg <- getPackageFromPath pkgpath
return (Context stage pkg way)
where way = case obj of
NonHs (NonHsObject _lang _file w) -> w
Hs (HsObject _file (Extension w _suf)) -> w
getPackageFromPath path = do
pkgs <- getPackages
case filter (\p -> pkgPath p == path) pkgs of
(p:_) -> return p
_ -> error $ "couldn't find a package with path: " ++ path
getPackages = do
pkgs <- stagePackages stage
testPkgs <- testsuitePackages
return $ pkgs ++ if stage == Stage1 then testPkgs else []
-- * Building an object
compileHsObject
:: [(Resource, Int)] -> FilePath -> BuildPath Object -> HsObject -> Action ()
compileHsObject rs objpath b@(BuildPath _root stage _path _o) hsobj =
case hsobj of
HsObject _basename (Extension _way Hi) ->
need [ change "hi" "o" objpath ]
HsObject _basename (Extension _way HiBoot) ->
need [ change "hi-boot" "o-boot" objpath ]
HsObject _basename (Extension _way _suf) -> do
ctx <- objectContext b
ctxPath <- contextPath ctx
(src, deps) <- lookupDependencies (ctxPath -/- ".dependencies") objpath
need (src:deps)
needLibrary =<< contextDependencies ctx
buildWithResources rs $ target ctx (Ghc CompileHs stage) [src] [objpath]
where change oldSuffix newSuffix str
| not (oldSuffix `isSuffixOf` str) = error $
"compileHsObject.change: " ++ oldSuffix ++
" not a suffix of " ++ str
| otherwise = take (length str - length oldSuffix) str
++ newSuffix
compileNonHsObject
:: [(Resource, Int)] -> FilePath -> BuildPath Object -> NonHsObject
-> Action ()
compileNonHsObject rs objpath b@(BuildPath _root stage _path _o) nonhsobj =
case nonhsobj of
NonHsObject lang _basename _way ->
go (builderFor lang) (toSrcFor lang)
where builderFor C = Ghc CompileCWithGhc
builderFor _ = Ghc CompileHs
toSrcFor Asm = obj2src "S" (const False)
toSrcFor C = obj2src "c" (const False)
toSrcFor Cmm = obj2src "cmm" isGeneratedCmmFile
go builder tosrc = do
ctx <- objectContext b
src <- tosrc ctx objpath
need [src]
needDependencies ctx src (objpath <.> "d")
buildWithResources rs $ target ctx (builder stage) [src] [objpath]
compileObject
:: [(Resource, Int)] -> FilePath -> BuildPath Object -> Action ()
compileObject rs objpath b@(BuildPath _root _stage _path (Hs o)) =
compileHsObject rs objpath b o
compileObject rs objpath b@(BuildPath _root _stage _path (NonHs o)) =
compileNonHsObject rs objpath b o
-- * Helpers
-- | Discover dependencies of a given source file by iteratively calling @gcc@
-- in the @-MM -MG@ mode and building generated dependencies if they are missing
......
module Rules.Library (libraryRules) where
import Data.Functor
import Hadrian.BuildPath
import Hadrian.Haskell.Cabal
import Hadrian.Haskell.Cabal.Type
import qualified System.Directory as IO
......@@ -140,21 +140,6 @@ data LibDyn = LibDyn String [Integer] Way DynLibExt deriving (Eq, Show)
-- | > HS<pkg name>-<pkg version>[_<way suffix>].o
data LibGhci = LibGhci String [Integer] Way deriving (Eq, Show)
-- | A path of the form
--
-- > <build root>/stage<N>/<path/to/pkg/from/ghc/root>/build/<something>
--
-- where @something@ describes a library to be build for the given package.
--
-- @a@, which represents that @something@, is instantiated as 'LibA', 'LibDyn'
-- and 'LibGhci' successively in this module, depending on the type of library
-- we're giving the build rules for.
data BuildPath a = BuildPath FilePath -- ^ > <build root>/
Stage -- ^ > stage<N>/
FilePath -- ^ > <path/to/pkg/from/ghc/root>/build/
a -- ^ > whatever comes after 'build/'
deriving (Eq, Show)
-- | Get the 'Context' corresponding to the build path for a given static library.
libAContext :: BuildPath LibA -> Context
libAContext (BuildPath _ stage pkgpath (LibA pkgname _ way)) =
......@@ -176,20 +161,6 @@ libDynContext (BuildPath _ stage pkgpath (LibDyn pkgname _ way _)) =
where
pkg = library pkgname pkgpath
-- | Parse a build path for a library to be built under the given build root,
-- where the filename will be parsed with the given parser argument.
parseBuildPath
:: FilePath -- ^ build root
-> Parsec.Parsec String () a -- ^ what to parse after @build/@
-> Parsec.Parsec String () (BuildPath a)
parseBuildPath root afterBuild = do
_ <- Parsec.string root *> Parsec.optional (Parsec.char '/')
stage <- parseStage
_ <- Parsec.char '/'
pkgpath <- Parsec.manyTill Parsec.anyChar (Parsec.try $ Parsec.string "/build/")
a <- afterBuild
return (BuildPath root stage pkgpath a)
-- | Parse a path to a static library to be built, making sure the path starts
-- with the given build root.
parseBuildLibA :: FilePath -> Parsec.Parsec String () (BuildPath LibA)
......@@ -235,71 +206,3 @@ parseLibDynFilename ext = do
_ <- optional $ Parsec.string "-ghc" *> parsePkgVersion
_ <- Parsec.string ("." ++ ext)
return (LibDyn pkgname pkgver way $ if ext == "so" then So else Dylib)
-- To be kept in sync with Stage.hs's stageString function
-- | Parse @"stageX"@ into a 'Stage'.
parseStage :: Parsec.Parsec String () Stage
parseStage = (Parsec.string "stage" *> Parsec.choice
[ Parsec.string (show n) $> toEnum n
| n <- map fromEnum [minBound .. maxBound :: Stage]
]) Parsec.<?> "stage string"
-- To be kept in sync with the show instances in 'Way.Type', until we perhaps
-- use some bidirectional parsing/pretty printing approach or library.
-- | Parse a way suffix, returning the argument when no suffix is found (the
-- argument will be vanilla in most cases, but dynamic when we parse the way
-- suffix out of a shared library file name).
parseWaySuffix :: Way -> Parsec.Parsec String () Way
parseWaySuffix w = Parsec.choice
[ Parsec.string "_" *> (wayFromUnits <$> Parsec.sepBy1 parseWayUnit (Parsec.string "_"))
, pure w
] Parsec.<?> "way suffix (e.g _thr_p, or none for vanilla)"
where
parseWayUnit = Parsec.choice
[ Parsec.string "thr" *> pure Threaded
, Parsec.char 'd' *>
(Parsec.choice [ Parsec.string "ebug" *> pure Debug
, Parsec.string "yn" *> pure Dynamic ])
, Parsec.char 'p' *> pure Profiling
, Parsec.char 'l' *> pure Logging
] Parsec.<?> "way unit (thr, debug, dyn, p, l)"
-- | Parse a @"pkgname-pkgversion"@ string into the package name and the
-- integers that make up the package version.
parsePkgId :: Parsec.Parsec String () (String, [Integer])
parsePkgId = parsePkgId' "" Parsec.<?> "package identifier (<name>-<version>)"
where
parsePkgId' currName = do
s <- Parsec.many1 Parsec.alphaNum
_ <- Parsec.char '-'
let newName = if null currName then s else currName ++ "-" ++ s
Parsec.choice [ (newName,) <$> parsePkgVersion
, parsePkgId' newName ]
-- | Parse "."-separated integers that describe a package's version.
parsePkgVersion :: Parsec.Parsec String () [Integer]
parsePkgVersion = fmap reverse (parsePkgVersion' []) Parsec.<?> "package version"
where
parsePkgVersion' xs = do
n <- parseNatural
Parsec.choice
[ Parsec.try (Parsec.lookAhead (Parsec.char '.' *> (Parsec.letter <|> Parsec.char '_')))
$> (n:xs)
, Parsec.char '.' *> parsePkgVersion' (n:xs)
, pure $ (n:xs) ]
-- | Parse a natural number.
parseNatural :: Parsec.Parsec String () Integer
parseNatural = (read <$> Parsec.many1 Parsec.digit) Parsec.<?> "natural number"
-- | Runs the given parser against the given path, erroring out when the parser
-- fails (because it shouldn't if the code from this module is correct).
parsePath
:: Parsec.Parsec String () a -- ^ parser to run
-> String -- ^ string describing the input source
-> FilePath -- ^ path to parse
-> Action a
parsePath p inp path = case Parsec.parse p inp path of
Left err -> fail $ "Rules.Library.parsePath: path="
++ path ++ ", error:\n" ++ show err
Right a -> pure a
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment