Commit 6c771aaf authored by Sylvain Henry's avatar Sylvain Henry

Implement Unique supply with Addr# atomic primop

Before this patch the compiler depended on the RTS way (threaded or not)
to use atomic incrementation or not. This is wrong because the RTS is
supposed to be switchable at link time, without recompilation.

Now we always use atomic incrementation of the unique counter.
parent 26a928b8
Pipeline #29604 failed with stages
in 770 minutes and 44 seconds
......@@ -59,7 +59,6 @@ import GHC.Core.Unfold
import GHC.Types.Basic ( CompilerPhase(..) )
import GHC.Types.Annotations
import GHC.Types.Var
import GHC.Types.Unique (uniqFromMask)
import GHC.Types.Unique.Supply
import GHC.Types.Name.Env
import GHC.Types.SrcLoc
......
......@@ -77,6 +77,7 @@ data OptKind m -- Suppose the flag is -f
| OptPrefix (String -> EwM m ()) -- -f or -farg (i.e. the arg is optional)
| OptIntSuffix (Maybe Int -> EwM m ()) -- -f or -f=n; pass n to fn
| IntSuffix (Int -> EwM m ()) -- -f or -f=n; pass n to fn
| WordSuffix (Word -> EwM m ()) -- -f or -f=n; pass n to fn
| FloatSuffix (Float -> EwM m ()) -- -f or -f=n; pass n to fn
| PassFlag (String -> EwM m ()) -- -f; pass "-f" fn
| AnySuffix (String -> EwM m ()) -- -f or -farg; pass entire "-farg" to fn
......@@ -253,6 +254,9 @@ processOneArg opt_kind rest arg args
IntSuffix f | Just n <- parseInt rest_no_eq -> Right (f n, args)
| otherwise -> Left ("malformed integer argument in " ++ dash_arg)
WordSuffix f | Just n <- parseWord rest_no_eq -> Right (f n, args)
| otherwise -> Left ("malformed natural argument in " ++ dash_arg)
FloatSuffix f | Just n <- parseFloat rest_no_eq -> Right (f n, args)
| otherwise -> Left ("malformed float argument in " ++ dash_arg)
......@@ -279,6 +283,7 @@ arg_ok (Prefix _) _ _ = True -- Missing argument checked for in p
-- to improve error message (#12625)
arg_ok (OptIntSuffix _) _ _ = True
arg_ok (IntSuffix _) _ _ = True
arg_ok (WordSuffix _) _ _ = True
arg_ok (FloatSuffix _) _ _ = True
arg_ok (OptPrefix _) _ _ = True
arg_ok (PassFlag _) rest _ = null rest
......@@ -294,6 +299,11 @@ parseInt s = case reads s of
((n,""):_) -> Just n
_ -> Nothing
parseWord :: String -> Maybe Word
parseWord s = case reads s of
((n,""):_) -> Just n
_ -> Nothing
parseFloat :: String -> Maybe Float
parseFloat s = case reads s of
((n,""):_) -> Just n
......
......@@ -710,8 +710,9 @@ data DynFlags = DynFlags {
maxErrors :: Maybe Int,
-- | Unique supply configuration for testing build determinism
initialUnique :: Int,
initialUnique :: Word,
uniqueIncrement :: Int,
-- 'Int' because it can be used to test uniques in decreasing order.
-- | Temporary: CFG Edge weights for fast iterations
cfgWeights :: Weights
......@@ -2092,6 +2093,8 @@ add_dep_message (OptIntSuffix f) message =
OptIntSuffix $ \oi -> f oi >> deprecate message
add_dep_message (IntSuffix f) message =
IntSuffix $ \i -> f i >> deprecate message
add_dep_message (WordSuffix f) message =
WordSuffix $ \i -> f i >> deprecate message
add_dep_message (FloatSuffix f) message =
FloatSuffix $ \fl -> f fl >> deprecate message
add_dep_message (PassFlag f) message =
......@@ -2856,7 +2859,7 @@ dynamic_flags_deps = [
, make_ord_flag defGhcFlag "fmax-inline-memset-insns"
(intSuffix (\n d -> d { maxInlineMemsetInsns = n }))
, make_ord_flag defGhcFlag "dinitial-unique"
(intSuffix (\n d -> d { initialUnique = n }))
(wordSuffix (\n d -> d { initialUnique = n }))
, make_ord_flag defGhcFlag "dunique-increment"
(intSuffix (\n d -> d { uniqueIncrement = n }))
......@@ -4247,6 +4250,9 @@ intSuffix fn = IntSuffix (\n -> upd (fn n))
intSuffixM :: (Int -> DynFlags -> DynP DynFlags) -> OptKind (CmdLineP DynFlags)
intSuffixM fn = IntSuffix (\n -> updM (fn n))
wordSuffix :: (Word -> DynFlags -> DynFlags) -> OptKind (CmdLineP DynFlags)
wordSuffix fn = WordSuffix (\n -> upd (fn n))
floatSuffix :: (Float -> DynFlags -> DynFlags) -> OptKind (CmdLineP DynFlags)
floatSuffix fn = FloatSuffix (\n -> upd (fn n))
......
......@@ -27,7 +27,6 @@ import GHC.Unit.Module ( Module )
import GHC.Driver.Session
import GHC.Utils.Error
import GHC.Types.Unique (uniqFromMask)
import GHC.Types.Unique.Supply
import GHC.Utils.Outputable
import GHC.Utils.Panic
......
......@@ -200,7 +200,6 @@ import GHC.Types.SrcLoc
import GHC.Types.Name.Env
import GHC.Types.Name.Set
import GHC.Types.Name.Ppr
import GHC.Types.Unique (uniqFromMask)
import GHC.Types.Unique.Supply
import GHC.Types.Annotations
import GHC.Types.Basic( TopLevelFlag, TypeOrKind(..) )
......
......@@ -31,7 +31,7 @@ module GHC.Types.Unique (
mkUniqueGrimily,
getKey,
mkUnique, unpkUnique, uniqFromMask,
mkUnique, unpkUnique,
eqUnique, ltUnique,
incrUnique, stepUnique,
......@@ -147,13 +147,6 @@ unpkUnique (MkUnique u)
in
(tag, i)
foreign import ccall unsafe "genSym" genSym :: IO Int
uniqFromMask :: Char -> IO Unique
uniqFromMask mask
= do { uqNum <- genSym
; return $! mkUnique mask uqNum }
-- | The interface file symbol-table encoding assumes that known-key uniques fit
-- in 30-bits; verify this.
--
......
......@@ -3,11 +3,8 @@
(c) The GRASP/AQUA Project, Glasgow University, 1992-1998
-}
{-# OPTIONS_GHC -fno-state-hack #-}
-- This -fno-state-hack is important
-- See Note [Optimising the unique supply]
{-# LANGUAGE CPP #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE PatternSynonyms #-}
{-# LANGUAGE BangPatterns #-}
......@@ -22,7 +19,7 @@ module GHC.Types.Unique.Supply (
-- ** Operations on supplies
uniqFromSupply, uniqsFromSupply, -- basic ops
takeUniqFromSupply,
takeUniqFromSupply, uniqFromMask,
mkSplitUniqSupply,
splitUniqSupply, listSplitUniqSupply,
......@@ -40,7 +37,7 @@ module GHC.Types.Unique.Supply (
import GHC.Prelude
import GHC.Types.Unique
import GHC.Utils.Panic.Plain (panic)
import GHC.Utils.Panic.Plain
import GHC.IO
......@@ -48,9 +45,17 @@ import GHC.Utils.Monad
import Control.Monad
import Data.Bits
import Data.Char
import GHC.Exts( inline )
import GHC.Exts( Ptr(..), noDuplicate# )
#if MIN_VERSION_GLASGOW_HASKELL(9,1,0,0)
import GHC.Exts( Int(..), word2Int#, fetchAddWordAddr#, plusWord#, readWordOffAddr# )
#if defined(DEBUG)
import GHC.Utils.Misc
#endif
#endif
import Foreign.Storable
#include "Unique.h"
#include "HsVersions.h"
{-
************************************************************************
......@@ -83,8 +88,23 @@ lazily-evaluated infinite tree.
Note [Optimising the unique supply]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The inner loop of mkSplitUniqSupply is a function closure
mk_supply s0 =
case noDuplicate# s0 of { s1 ->
case unIO genSym s1 of { (# s2, u #) ->
case unIO (unsafeDupableInterleaveIO (IO mk_supply)) s2 of { (# s3, x #) ->
case unIO (unsafeDupableInterleaveIO (IO mk_supply)) s3 of { (# s4, y #) ->
(# s4, MkSplitUniqSupply (mask .|. u) x y #)
}}}}
It's a classic example of an IO action that is captured and then called
repeatedly (see #18238 for some discussion). It mustn't allocate! The test
perf/should_run/UniqLoop keeps track of this loop. Watch it carefully.
We used to write it as:
mk_supply :: IO UniqSupply
mk_supply = unsafeInterleaveIO $
genSym >>= \ u ->
......@@ -92,100 +112,11 @@ The inner loop of mkSplitUniqSupply is a function closure
mk_supply >>= \ s2 ->
return (MkSplitUniqSupply (mask .|. u) s1 s2)
It's a classic example of an IO action that is captured
and the called repeatedly (see #18238 for some discussion).
It turns out that we can get something like
$wmkSplitUniqSupply c# s
= letrec
mk_supply
= \s -> unsafeDupableInterleaveIO1
(\s2 -> case noDuplicate# s2 of s3 ->
...
case mk_supply s4 of (# s5, t1 #) ->
...
(# s6, MkSplitUniqSupply ... #)
in mk_supply s
This is bad becuase we allocate that inner (\s2...) every time.
Why doesn't full laziness float out the (\s2...)? Because of
the state hack (#18238).
So for this module we switch the state hack off -- it's an example
of when it makes things worse rather than better. And we use
multiShotIO (see Note [multiShotIO]) thus:
mk_supply = multiShotIO $
unsafeInterleaveIO $
genSym >>= \ u ->
...
Now full laziness can float that lambda out, and we get
$wmkSplitUniqSupply c# s
= letrec
lvl = \s2 -> case noDuplicate# s2 of s3 ->
...
case unsafeDupableInterleaveIO
lvl s4 of (# s5, t1 #) ->
...
(# s6, MkSplitUniqSupply ... #)
in unsafeDupableInterleaveIO1 lvl s
This is all terribly delicate. It just so happened that before I
fixed #18078, and even with the state-hack still enabled, we were
getting this:
$wmkSplitUniqSupply c# s
= letrec
mk_supply = \s2 -> case noDuplicate# s2 of s3 ->
...
case mks_help s3 of (# s5,t1 #) ->
...
(# s6, MkSplitUniqSupply ... #)
mks_help = unsafeDupableInterleaveIO mk_supply
-- mks_help marked as loop breaker
in mks_help s
The fact that we didn't need full laziness was somewhat fortuitious.
We got the right number of allocations. But the partial application of
the arity-2 unsafeDupableInterleaveIO in mks_help makes it quite a
bit slower. (Test perf/should_run/UniqLoop had a 20% perf change.)
Sigh. The test perf/should_run/UniqLoop keeps track of this loop.
Watch it carefully.
Note [multiShotIO]
~~~~~~~~~~~~~~~~~~
The function multiShotIO :: IO a -> IO a
says that the argument IO action may be invoked repeatedly (is
multi-shot), and so there should be a multi-shot lambda around it.
It's quite easy to define, in any module with `-fno-state-hack`:
multiShotIO :: IO a -> IO a
{-# INLINE multiShotIO #-}
multiShotIO (IO m) = IO (\s -> inline m s)
Because of -fno-state-hack, that '\s' will be multi-shot. Now,
ignoring the casts from IO:
multiShotIO (\ss{one-shot}. blah)
==> let m = \ss{one-shot}. blah
in \s. inline m s
==> \s. (\ss{one-shot}.blah) s
==> \s. blah[s/ss]
The magic `inline` function does two things
* It prevents eta reduction. If we wrote just
multiShotIO (IO m) = IO (\s -> m s)
the lamda would eta-reduce to 'm' and all would be lost.
* It helps ensure that 'm' really does inline.
Note that 'inline' evaporates in phase 0. See Note [inlineId magic]
in GHC.Core.Opt.ConstantFold.match_inline.
The INLINE pragma on multiShotIO is very important, else the
'inline' call will evaporate when compiling the module that
defines 'multiShotIO', before it is ever exported.
and to rely on -fno-state-hack, full laziness and inlining to get the same
result. It was very brittle and required enabling -fno-state-hack globally. So
it has been rewritten using lower level constructs to explicitly state what we
want.
-}
......@@ -208,28 +139,58 @@ mkSplitUniqSupply :: Char -> IO UniqSupply
-- See Note [How the unique supply works]
-- See Note [Optimising the unique supply]
mkSplitUniqSupply c
= mk_supply
= unsafeDupableInterleaveIO (IO mk_supply)
where
!mask = ord c `shiftL` uNIQUE_BITS
!mask = ord c `unsafeShiftL` uNIQUE_BITS
-- Here comes THE MAGIC: see Note [How the unique supply works]
-- This is one of the most hammered bits in the whole compiler
-- See Note [Optimising the unique supply]
-- NB: Use unsafeInterleaveIO for thread-safety.
mk_supply = multiShotIO $
unsafeInterleaveIO $
genSym >>= \ u ->
mk_supply >>= \ s1 ->
mk_supply >>= \ s2 ->
return (MkSplitUniqSupply (mask .|. u) s1 s2)
-- NB: Use noDuplicate# for thread-safety.
mk_supply s0 =
case noDuplicate# s0 of { s1 ->
case unIO genSym s1 of { (# s2, u #) ->
-- deferred IO computations
case unIO (unsafeDupableInterleaveIO (IO mk_supply)) s2 of { (# s3, x #) ->
case unIO (unsafeDupableInterleaveIO (IO mk_supply)) s3 of { (# s4, y #) ->
(# s4, MkSplitUniqSupply (mask .|. u) x y #)
}}}}
#if !MIN_VERSION_GLASGOW_HASKELL(9,1,0,0)
foreign import ccall unsafe "genSym" genSym :: IO Int
#else
genSym :: IO Int
genSym = do
let !mask = (1 `unsafeShiftL` uNIQUE_BITS) - 1
let !(Ptr counter) = ghc_unique_counter
let !(Ptr inc_ptr) = ghc_unique_inc
u <- IO $ \s0 -> case readWordOffAddr# inc_ptr 0# s0 of
(# s1, inc #) -> case fetchAddWordAddr# counter inc s1 of
(# s2, val #) ->
let !u = I# (word2Int# (val `plusWord#` inc)) .&. mask
in (# s2, u #)
#if defined(DEBUG)
-- Uh oh! We will overflow next time a unique is requested.
-- (Note that if the increment isn't 1 we may miss this check)
MASSERT(u /= mask)
#endif
return u
#endif
multiShotIO :: IO a -> IO a
{-# INLINE multiShotIO #-}
-- See Note [multiShotIO]
multiShotIO (IO m) = IO (\s -> inline m s)
foreign import ccall unsafe "&ghc_unique_counter" ghc_unique_counter :: Ptr Word
foreign import ccall unsafe "&ghc_unique_inc" ghc_unique_inc :: Ptr Int
initUniqSupply :: Word -> Int -> IO ()
initUniqSupply counter inc = do
poke ghc_unique_counter counter
poke ghc_unique_inc inc
uniqFromMask :: Char -> IO Unique
uniqFromMask mask
= do { uqNum <- genSym
; return $! mkUnique mask uqNum }
foreign import ccall unsafe "genSym" genSym :: IO Int
foreign import ccall unsafe "initGenSym" initUniqSupply :: Int -> Int -> IO ()
splitUniqSupply :: UniqSupply -> (UniqSupply, UniqSupply)
-- ^ Build two 'UniqSupply' from a single one, each of which
......
......@@ -2,39 +2,17 @@
#include <assert.h>
#include "Unique.h"
static HsInt GenSymCounter = 0;
static HsInt GenSymInc = 1;
HsInt ghc_unique_counter = 0;
HsInt ghc_unique_inc = 1;
#define UNIQUE_BITS (sizeof (HsInt) * 8 - UNIQUE_TAG_BITS)
#define UNIQUE_MASK ((1ULL << UNIQUE_BITS) - 1)
STATIC_INLINE void checkUniqueRange(HsInt u STG_UNUSED) {
HsInt genSym(void) {
HsInt u = atomic_inc((StgWord *)&ghc_unique_counter, ghc_unique_inc) & UNIQUE_MASK;
#if DEBUG
// Uh oh! We will overflow next time a unique is requested.
assert(u != UNIQUE_MASK);
#endif
}
HsInt genSym(void) {
#if defined(THREADED_RTS)
if (n_capabilities == 1) {
GenSymCounter = (GenSymCounter + GenSymInc) & UNIQUE_MASK;
checkUniqueRange(GenSymCounter);
return GenSymCounter;
} else {
HsInt n = atomic_inc((StgWord *)&GenSymCounter, GenSymInc)
& UNIQUE_MASK;
checkUniqueRange(n);
return n;
}
#else
GenSymCounter = (GenSymCounter + GenSymInc) & UNIQUE_MASK;
checkUniqueRange(GenSymCounter);
return GenSymCounter;
#endif
}
void initGenSym(HsInt NewGenSymCounter, HsInt NewGenSymInc) {
GenSymCounter = NewGenSymCounter;
GenSymInc = NewGenSymInc;
return u;
}
......@@ -190,18 +190,6 @@ compiler_stage1_CONFIGURE_OPTS += --flags=stage1
compiler_stage2_CONFIGURE_OPTS += --flags=stage2
compiler_stage3_CONFIGURE_OPTS += --flags=stage3
ifeq "$(GhcThreaded)" "YES"
# We pass THREADED_RTS to the stage2 C files so that cbits/genSym.c will bring
# the threaded version of atomic_inc() into scope.
compiler_stage2_CONFIGURE_OPTS += --ghc-option=-optc-DTHREADED_RTS
endif
# If the bootstrapping GHC supplies the threaded RTS, then we can have a
# threaded stage 1 too.
ifeq "$(GhcThreadedRts)" "YES"
compiler_stage1_CONFIGURE_OPTS += --ghc-option=-optc-DTHREADED_RTS
endif
ifeq "$(GhcWithInterpreter)" "YES"
compiler_stage2_CONFIGURE_OPTS += --flags=internal-interpreter
......
......@@ -11,7 +11,6 @@ import Settings
packageArgs :: Args
packageArgs = do
stage <- getStage
rtsWays <- getRtsWays
path <- getBuildPath
compilerPath <- expr $ buildPath (vanillaContext stage compiler)
let -- Do not bind the result to a Boolean: this forces the configure rule
......@@ -59,13 +58,6 @@ packageArgs = do
, flag GhcUnregisterised ? arg "--ghc-option=-DNO_REGS"
, notM targetSupportsSMP ? arg "--ghc-option=-DNOSMP"
, notM targetSupportsSMP ? arg "--ghc-option=-optc-DNOSMP"
-- When building stage 1 or later, use thread-safe RTS functions if
-- the configuration calls for a threaded GHC.
, (any (wayUnit Threaded) rtsWays) ?
notStage0 ? arg "--ghc-option=-optc-DTHREADED_RTS"
-- When building stage 1, use thread-safe RTS functions if the
-- bootstrapping (stage 0) compiler provides a threaded RTS way.
, stage0 ? threadedBootstrapper ? arg "--ghc-option=-optc-DTHREADED_RTS"
, ghcWithInterpreter ?
ghciWithDebugger <$> flavour ?
notStage0 ? arg "--ghc-option=-DDEBUGGER"
......@@ -86,14 +78,6 @@ packageArgs = do
, builder (Cabal Flags) ? mconcat
[ ghcWithInterpreter ? notStage0 ? arg "internal-interpreter"
, cross ? arg "-terminfo"
-- Note [Linking ghc-bin against threaded stage0 RTS]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-- We must maintain the invariant that GHCs linked with '-threaded'
-- are built with '-optc=-DTHREADED_RTS', otherwise we'll end up
-- with a GHC that can use the threaded runtime, but contains some
-- non-thread-safe functions. See
-- https://gitlab.haskell.org/ghc/ghc/issues/18024 for an example of
-- the sort of issues this can cause.
, ifM stage0
-- We build a threaded stage 1 if the bootstrapping compiler
-- supports it.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment