Commit 3452473b authored by thomie's avatar thomie Committed by Ben Gamari

Delete FastBool

This reverses some of the work done in Trac #1405, and assumes GHC is
smart enough to do its own unboxing of booleans now.

I would like to do some more performance measurements, but the code
changes can be reviewed already.

Test Plan:
With a perf build:
./inplace/bin/ghc-stage2 nofib/spectral/simple/Main.hs -fforce-recomp
+RTS -t --machine-readable

before:
```
  [("bytes allocated", "1300744864")
  ,("num_GCs", "302")
  ,("average_bytes_used", "8811118")
  ,("max_bytes_used", "24477464")
  ,("num_byte_usage_samples", "9")
  ,("peak_megabytes_allocated", "64")
  ,("init_cpu_seconds", "0.001")
  ,("init_wall_seconds", "0.001")
  ,("mutator_cpu_seconds", "2.833")
  ,("mutator_wall_seconds", "4.283")
  ,("GC_cpu_seconds", "0.960")
  ,("GC_wall_seconds", "0.961")
  ]
```

after:
```
  [("bytes allocated", "1301088064")
  ,("num_GCs", "310")
  ,("average_bytes_used", "8820253")
  ,("max_bytes_used", "24539904")
  ,("num_byte_usage_samples", "9")
  ,("peak_megabytes_allocated", "64")
  ,("init_cpu_seconds", "0.001")
  ,("init_wall_seconds", "0.001")
  ,("mutator_cpu_seconds", "2.876")
  ,("mutator_wall_seconds", "4.474")
  ,("GC_cpu_seconds", "0.965")
  ,("GC_wall_seconds", "0.979")
  ]
```

CPU time seems to be up a bit, but I'm not sure. Unfortunately CPU time
measurements are rather noisy.

Reviewers: austin, bgamari, rwbarton

Subscribers: nomeata

Differential Revision: https://phabricator.haskell.org/D1143

GHC Trac Issues: #1405
parent 9e8562ae
......@@ -4,7 +4,6 @@ module CodeGen.Platform
where
import CmmExpr
import FastBool
import Platform
import Reg
......@@ -97,7 +96,7 @@ globalRegMaybe platform
| otherwise -> NoRegs.globalRegMaybe
freeReg :: Platform -> RegNo -> FastBool
freeReg :: Platform -> RegNo -> Bool
freeReg platform
| platformUnregisterised platform = NoRegs.freeReg
| otherwise
......
......@@ -441,7 +441,6 @@ Library
BufWrite
Digraph
Encoding
FastBool
FastFunctions
FastMutInt
FastString
......
......@@ -613,7 +613,6 @@ compiler_stage2_dll0_MODULES += \
CodeGen.Platform.SPARC \
CodeGen.Platform.X86 \
CodeGen.Platform.X86_64 \
FastBool \
Hoopl \
Hoopl.Dataflow \
InteractiveEvalTypes \
......
......@@ -56,7 +56,6 @@ import Maybes
import UniqSupply
import ErrUtils (Severity(..))
import Outputable
import FastBool hiding ( fastOr )
import SrcLoc
import FastString
import qualified ErrUtils as Err
......@@ -1319,7 +1318,7 @@ hasCafRefs dflags this_pkg this_mod p@(_,cvt_integer) arity expr
| is_caf || mentions_cafs = MayHaveCafRefs
| otherwise = NoCafRefs
where
mentions_cafs = isFastTrue (cafRefsE p expr)
mentions_cafs = cafRefsE p expr
is_dynamic_name = isDllName dflags this_pkg this_mod
is_caf = not (arity > 0 || rhsIsStatic (targetPlatform dflags) is_dynamic_name cvt_integer expr)
......@@ -1329,38 +1328,34 @@ hasCafRefs dflags this_pkg this_mod p@(_,cvt_integer) arity expr
-- CorePrep later on, and we don't want to duplicate that
-- knowledge in rhsIsStatic below.
cafRefsE :: CafRefEnv -> Expr a -> FastBool
cafRefsE :: CafRefEnv -> Expr a -> Bool
cafRefsE p (Var id) = cafRefsV p id
cafRefsE p (Lit lit) = cafRefsL p lit
cafRefsE p (App f a) = fastOr (cafRefsE p f) (cafRefsE p) a
cafRefsE p (App f a) = cafRefsE p f || cafRefsE p a
cafRefsE p (Lam _ e) = cafRefsE p e
cafRefsE p (Let b e) = fastOr (cafRefsEs p (rhssOfBind b)) (cafRefsE p) e
cafRefsE p (Case e _bndr _ alts) = fastOr (cafRefsE p e) (cafRefsEs p) (rhssOfAlts alts)
cafRefsE p (Let b e) = cafRefsEs p (rhssOfBind b) || cafRefsE p e
cafRefsE p (Case e _bndr _ alts) = cafRefsE p e || cafRefsEs p (rhssOfAlts alts)
cafRefsE p (Tick _n e) = cafRefsE p e
cafRefsE p (Cast e _co) = cafRefsE p e
cafRefsE _ (Type _) = fastBool False
cafRefsE _ (Coercion _) = fastBool False
cafRefsE _ (Type _) = False
cafRefsE _ (Coercion _) = False
cafRefsEs :: CafRefEnv -> [Expr a] -> FastBool
cafRefsEs _ [] = fastBool False
cafRefsEs p (e:es) = fastOr (cafRefsE p e) (cafRefsEs p) es
cafRefsEs :: CafRefEnv -> [Expr a] -> Bool
cafRefsEs _ [] = False
cafRefsEs p (e:es) = cafRefsE p e || cafRefsEs p es
cafRefsL :: CafRefEnv -> Literal -> FastBool
cafRefsL :: CafRefEnv -> Literal -> Bool
-- Don't forget that mk_integer id might have Caf refs!
-- We first need to convert the Integer into its final form, to
-- see whether mkInteger is used.
cafRefsL p@(_, cvt_integer) (LitInteger i _) = cafRefsE p (cvt_integer i)
cafRefsL _ _ = fastBool False
cafRefsL _ _ = False
cafRefsV :: CafRefEnv -> Id -> FastBool
cafRefsV :: CafRefEnv -> Id -> Bool
cafRefsV (subst, _) id
| not (isLocalId id) = fastBool (mayHaveCafRefs (idCafInfo id))
| Just id' <- lookupVarEnv subst id = fastBool (mayHaveCafRefs (idCafInfo id'))
| otherwise = fastBool False
fastOr :: FastBool -> (a -> FastBool) -> a -> FastBool
-- hack for lazy-or over FastBool.
fastOr a f x = fastBool (isFastTrue a || isFastTrue (f x))
| not (isLocalId id) = mayHaveCafRefs (idCafInfo id)
| Just id' <- lookupVarEnv subst id = mayHaveCafRefs (idCafInfo id')
| otherwise = False
{-
------------------------------------------------------------------------------
......
......@@ -39,7 +39,6 @@ import FastString
import CLabel
import Outputable
import Platform
import FastBool
import UniqFM (listToUFM, lookupUFM)
import UniqSupply
......@@ -362,9 +361,7 @@ ppc_regUsageOfInstr platform instr
interesting :: Platform -> Reg -> Bool
interesting _ (RegVirtual _) = True
interesting platform (RegReal (RealRegSingle i))
= isFastTrue (freeReg platform i)
interesting platform (RegReal (RealRegSingle i)) = freeReg platform i
interesting _ (RegReal (RealRegPair{}))
= panic "PPC.Instr.interesting: no reg pairs on this arch"
......
......@@ -60,7 +60,6 @@ import Unique
import CodeGen.Platform
import DynFlags
import Outputable
import FastBool
import FastTypes
import Platform
......@@ -325,5 +324,5 @@ f21 = regSingle $ fReg 21
-- register allocator to attempt to map VRegs to.
allocatableRegs :: Platform -> [RealReg]
allocatableRegs platform
= let isFree i = isFastTrue (freeReg platform i)
= let isFree i = freeReg platform i
in map RealRegSingle $ filter isFree allMachRegNos
......@@ -255,7 +255,7 @@ worst n classN classC
-- register allocator to attempt to map VRegs to.
allocatableRegs :: [RegNo]
allocatableRegs
= let isFree i = isFastTrue (freeReg i)
= let isFree i = freeReg i
in filter isFree allMachRegNos
......
......@@ -10,7 +10,6 @@ import Reg
import CodeGen.Platform
import Outputable
import Platform
import FastBool
import Data.Word
import Data.Bits
......@@ -75,7 +74,7 @@ allocateReg platform
(FreeRegs g f d)
-- can't allocate free regs
| not $ isFastTrue (freeReg platform r)
| not $ freeReg platform r
= pprPanic "SPARC.FreeRegs.allocateReg: not allocating pinned reg" (ppr reg)
-- a general purpose reg
......@@ -131,7 +130,7 @@ releaseReg platform
regs@(FreeRegs g f d)
-- don't release pinned reg
| not $ isFastTrue (freeReg platform r)
| not $ freeReg platform r
= regs
-- a general purpose reg
......
......@@ -43,7 +43,6 @@ import BlockId
import DynFlags
import Cmm
import FastString
import FastBool
import Outputable
import Platform
......@@ -279,8 +278,8 @@ interesting :: Platform -> Reg -> Bool
interesting platform reg
= case reg of
RegVirtual _ -> True
RegReal (RealRegSingle r1) -> isFastTrue (freeReg platform r1)
RegReal (RealRegPair r1 _) -> isFastTrue (freeReg platform r1)
RegReal (RealRegSingle r1) -> freeReg platform r1
RegReal (RealRegPair r1 _) -> freeReg platform r1
......
......@@ -40,7 +40,6 @@ import Format
import Unique
import Outputable
import FastTypes
import FastBool
{-
The SPARC has 64 registers of interest; 32 integer registers and 32
......@@ -199,13 +198,8 @@ allocatableRegs :: [RealReg]
allocatableRegs
= let isFree rr
= case rr of
RealRegSingle r
-> isFastTrue (freeReg r)
RealRegPair r1 r2
-> isFastTrue (freeReg r1)
&& isFastTrue (freeReg r2)
RealRegSingle r -> freeReg r
RealRegPair r1 r2 -> freeReg r1 && freeReg r2
in filter isFree allRealRegs
......
......@@ -58,7 +58,6 @@ import OrdList
import Outputable
import Unique
import FastString
import FastBool ( isFastTrue )
import DynFlags
import Util
......@@ -1176,7 +1175,7 @@ amodeCouldBeClobbered :: Platform -> AddrMode -> Bool
amodeCouldBeClobbered platform amode = any (regClobbered platform) (addrModeRegs amode)
regClobbered :: Platform -> Reg -> Bool
regClobbered platform (RegReal (RealRegSingle rr)) = isFastTrue (freeReg platform rr)
regClobbered platform (RegReal (RealRegSingle rr)) = freeReg platform rr
regClobbered _ _ = False
-- getOperand: the operand is not required to remain valid across the
......
......@@ -29,7 +29,6 @@ import BlockId
import CodeGen.Platform
import Cmm
import FastString
import FastBool
import Outputable
import Platform
......@@ -531,7 +530,7 @@ x86_regUsageOfInstr platform instr
-- | Is this register interesting for the register allocator?
interesting :: Platform -> Reg -> Bool
interesting _ (RegVirtual _) = True
interesting platform (RegReal (RealRegSingle i)) = isFastTrue (freeReg platform i)
interesting platform (RegReal (RealRegSingle i)) = freeReg platform i
interesting _ (RegReal (RealRegPair{})) = panic "X86.interesting: no reg pairs on this arch"
......
......@@ -58,7 +58,6 @@ import DynFlags
import Outputable
import Platform
import FastTypes
import FastBool
-- | regSqueeze_class reg
......@@ -447,6 +446,6 @@ instrClobberedRegs platform
-- register allocator to attempt to map VRegs to.
allocatableRegs :: Platform -> [RealReg]
allocatableRegs platform
= let isFree i = isFastTrue (freeReg platform i)
= let isFree i = freeReg platform i
in map RealRegSingle $ filter isFree (allMachRegNos platform)
{-
(c) The University of Glasgow, 2000-2006
\section{Fast booleans}
-}
{-# LANGUAGE CPP, MagicHash #-}
module FastBool (
--fastBool could be called bBox; isFastTrue, bUnbox; but they're not
FastBool, fastBool, isFastTrue, fastOr, fastAnd
) where
-- Import the beggars
import GHC.Exts
#ifdef DEBUG
import Panic
#endif
type FastBool = Int#
fastBool True = 1#
fastBool False = 0#
#ifdef DEBUG
--then waste time deciding whether to panic. FastBool should normally
--be at least as fast as Bool, one would hope...
isFastTrue 1# = True
isFastTrue 0# = False
isFastTrue _ = panic "FastTypes: isFastTrue"
-- note that fastOr and fastAnd are strict in both arguments
-- since they are unboxed
fastOr 1# _ = 1#
fastOr 0# x = x
fastOr _ _ = panicFastInt "FastTypes: fastOr"
fastAnd 0# _ = 0#
fastAnd 1# x = x
fastAnd _ _ = panicFastInt "FastTypes: fastAnd"
--these "panicFastInt"s (formerly known as "panic#") rely on
--FastInt = FastBool ( = Int# presumably),
--haha, true enough when __GLASGOW_HASKELL__. Why can't we have functions
--that return _|_ be kind-polymorphic ( ?? to be precise ) ?
#else /* ! DEBUG */
--Isn't comparison to zero sometimes faster on CPUs than comparison to 1?
-- (since using Int# as _synonym_ fails to guarantee that it will
-- only take on values of 0 and 1)
isFastTrue 0# = False
isFastTrue _ = True
-- note that fastOr and fastAnd are strict in both arguments
-- since they are unboxed
-- Also, to avoid incomplete-pattern warning
-- (and avoid wasting time with redundant runtime checks),
-- we don't pattern-match on both 0# and 1# .
fastOr 0# x = x
fastOr _ _ = 1#
fastAnd 0# _ = 0#
fastAnd _ x = x
#endif /* ! DEBUG */
fastBool :: Bool -> FastBool
isFastTrue :: FastBool -> Bool
fastOr :: FastBool -> FastBool -> FastBool
fastAnd :: FastBool -> FastBool -> FastBool
import CmmExpr
import FastBool
#if !(MACHREGS_i386 || MACHREGS_x86_64 || MACHREGS_sparc || MACHREGS_powerpc)
import Panic
#endif
......@@ -823,17 +822,17 @@ globalRegMaybe _ = Nothing
globalRegMaybe = panic "globalRegMaybe not defined for this platform"
#endif
freeReg :: RegNo -> FastBool
freeReg :: RegNo -> Bool
#if MACHREGS_i386 || MACHREGS_x86_64
# if MACHREGS_i386
freeReg esp = fastBool False -- %esp is the C stack pointer
freeReg esi = fastBool False -- Note [esi/edi not allocatable]
freeReg edi = fastBool False
freeReg esp = False -- %esp is the C stack pointer
freeReg esi = False -- Note [esi/edi not allocatable]
freeReg edi = False
# endif
# if MACHREGS_x86_64
freeReg rsp = fastBool False -- %rsp is the C stack pointer
freeReg rsp = False -- %rsp is the C stack pointer
# endif
{-
......@@ -853,158 +852,158 @@ Hence, on x86 esi and edi are treated as not allocatable.
-- split patterns in two functions to prevent overlaps
freeReg r = freeRegBase r
freeRegBase :: RegNo -> FastBool
freeRegBase :: RegNo -> Bool
# ifdef REG_Base
freeRegBase REG_Base = fastBool False
freeRegBase REG_Base = False
# endif
# ifdef REG_Sp
freeRegBase REG_Sp = fastBool False
freeRegBase REG_Sp = False
# endif
# ifdef REG_SpLim
freeRegBase REG_SpLim = fastBool False
freeRegBase REG_SpLim = False
# endif
# ifdef REG_Hp
freeRegBase REG_Hp = fastBool False
freeRegBase REG_Hp = False
# endif
# ifdef REG_HpLim
freeRegBase REG_HpLim = fastBool False
freeRegBase REG_HpLim = False
# endif
-- All other regs are considered to be "free", because we can track
-- their liveness accurately.
freeRegBase _ = fastBool True
freeRegBase _ = True
#elif MACHREGS_powerpc
freeReg 0 = fastBool False -- Hack: r0 can't be used in all insns,
-- but it's actually free
freeReg 1 = fastBool False -- The Stack Pointer
freeReg 0 = False -- Hack: r0 can't be used in all insns,
-- but it's actually free
freeReg 1 = False -- The Stack Pointer
# if !MACHREGS_darwin
-- most non-darwin powerpc OSes use r2 as a TOC pointer or something like that
freeReg 2 = fastBool False
freeReg 2 = False
-- TODO: make this conditonal for ppc64 ELF
freeReg 13 = fastBool False -- reserved for system thread ID
freeReg 13 = False -- reserved for system thread ID
-- TODO: do not reserve r30 in ppc64 ELF
-- at least linux in -fPIC relies on r30 in PLT stubs
freeReg 30 = fastBool False
freeReg 30 = False
# endif
# ifdef REG_Base
freeReg REG_Base = fastBool False
freeReg REG_Base = False
# endif
# ifdef REG_R1
freeReg REG_R1 = fastBool False
freeReg REG_R1 = False
# endif
# ifdef REG_R2
freeReg REG_R2 = fastBool False
freeReg REG_R2 = False
# endif
# ifdef REG_R3
freeReg REG_R3 = fastBool False
freeReg REG_R3 = False
# endif
# ifdef REG_R4
freeReg REG_R4 = fastBool False
freeReg REG_R4 = False
# endif
# ifdef REG_R5
freeReg REG_R5 = fastBool False
freeReg REG_R5 = False
# endif
# ifdef REG_R6
freeReg REG_R6 = fastBool False
freeReg REG_R6 = False
# endif
# ifdef REG_R7
freeReg REG_R7 = fastBool False
freeReg REG_R7 = False
# endif
# ifdef REG_R8
freeReg REG_R8 = fastBool False
freeReg REG_R8 = False
# endif
# ifdef REG_R9
freeReg REG_R9 = fastBool False
freeReg REG_R9 = False
# endif
# ifdef REG_R10
freeReg REG_R10 = fastBool False
freeReg REG_R10 = False
# endif
# ifdef REG_F1
freeReg REG_F1 = fastBool False
freeReg REG_F1 = False
# endif
# ifdef REG_F2
freeReg REG_F2 = fastBool False
freeReg REG_F2 = False
# endif
# ifdef REG_F3
freeReg REG_F3 = fastBool False
freeReg REG_F3 = False
# endif
# ifdef REG_F4
freeReg REG_F4 = fastBool False
freeReg REG_F4 = False
# endif
# ifdef REG_F5
freeReg REG_F5 = fastBool False
freeReg REG_F5 = False
# endif
# ifdef REG_F6
freeReg REG_F6 = fastBool False
freeReg REG_F6 = False
# endif
# ifdef REG_D1
freeReg REG_D1 = fastBool False
freeReg REG_D1 = False
# endif
# ifdef REG_D2
freeReg REG_D2 = fastBool False
freeReg REG_D2 = False
# endif
# ifdef REG_D3
freeReg REG_D3 = fastBool False
freeReg REG_D3 = False
# endif
# ifdef REG_D4
freeReg REG_D4 = fastBool False
freeReg REG_D4 = False
# endif
# ifdef REG_D5
freeReg REG_D5 = fastBool False
freeReg REG_D5 = False
# endif
# ifdef REG_D6
freeReg REG_D6 = fastBool False
freeReg REG_D6 = False
# endif
# ifdef REG_Sp
freeReg REG_Sp = fastBool False
freeReg REG_Sp = False
# endif
# ifdef REG_Su
freeReg REG_Su = fastBool False
freeReg REG_Su = False
# endif
# ifdef REG_SpLim
freeReg REG_SpLim = fastBool False
freeReg REG_SpLim = False
# endif
# ifdef REG_Hp
freeReg REG_Hp = fastBool False
freeReg REG_Hp = False
# endif
# ifdef REG_HpLim
freeReg REG_HpLim = fastBool False
freeReg REG_HpLim = False
# endif
freeReg _ = fastBool True
freeReg _ = True
#elif MACHREGS_sparc
-- SPARC regs used by the OS / ABI
-- %g0(r0) is always zero
freeReg g0 = fastBool False
freeReg g0 = False
-- %g5(r5) - %g7(r7)
-- are reserved for the OS
freeReg g5 = fastBool False
freeReg g6 = fastBool False
freeReg g7 = fastBool False
freeReg g5 = False
freeReg g6 = False
freeReg g7 = False
-- %o6(r14)
-- is the C stack pointer
freeReg o6 = fastBool False
freeReg o6 = False
-- %o7(r15)
-- holds the C return address
freeReg o7 = fastBool False
freeReg o7 = False
-- %i6(r30)
-- is the C frame pointer
freeReg i6 = fastBool False
freeReg i6 = False
-- %i7(r31)
-- is used for C return addresses
freeReg i7 = fastBool False
freeReg i7 = False
-- %f0(r32) - %f1(r32)
-- are C floating point return regs
freeReg f0 = fastBool False
freeReg f1 = fastBool False
freeReg f0 = False
freeReg f1 = False
{-
freeReg regNo
......@@ -1012,112 +1011,112 @@ freeReg regNo
| regNo >= f0
, regNo < NCG_FirstFloatReg
, regNo `mod` 2 /= 0
= fastBool False
= False
-}
# ifdef REG_Base
freeReg REG_Base = fastBool False
freeReg REG_Base = False
# endif
# ifdef REG_R1
freeReg REG_R1 = fastBool False
freeReg REG_R1 = False
# endif
# ifdef REG_R2
freeReg REG_R2 = fastBool False
freeReg REG_R2 = False
# endif
# ifdef REG_R3
freeReg REG_R3 = fastBool False
freeReg REG_R3 = False
# endif
# ifdef REG_R4
freeReg REG_R4 = fastBool False
freeReg REG_R4 = False
# endif
# ifdef REG_R5
freeReg REG_R5 = fastBool False
freeReg REG_R5 = False
# endif
# ifdef REG_R6
freeReg REG_R6 = fastBool False
freeReg REG_R6 = False
# endif
# ifdef REG_R7
freeReg REG_R7 = fastBool False
freeReg REG_R7 = False
# endif
# ifdef REG_R8
freeReg REG_R8 = fastBool False
freeReg REG_R8 = False
# endif
# ifdef REG_R9
freeReg REG_R9 = fastBool False
freeReg REG_R9 = False
# endif
# ifdef REG_R10
freeReg REG_R10 = fastBool False
freeReg REG_R10 = False
# endif
# ifdef REG_F1
freeReg REG_F1 = fastBool False
freeReg REG_F1 = False
# endif
# ifdef REG_F2
freeReg REG_F2 = fastBool False
freeReg REG_F2 = False
# endif
# ifdef REG_F3
freeReg REG_F3 = fastBool False
freeReg REG_F3 = False
# endif
# ifdef REG_F4
freeReg REG_F4 = fastBool False
freeReg REG_F4 = False
# endif
# ifdef REG_F5
freeReg REG_F5 = fastBool False
freeReg REG_F5 = False
# endif
# ifdef REG_F6
freeReg REG_F6 = fastBool False
freeReg REG_F6 = False
# endif
# ifdef REG_D1
freeReg REG_D1 = fastBool False
freeReg REG_D1 = False
# endif
# ifdef REG_D1_2
freeReg REG_D1_2 = fastBool False
freeReg REG_D1_2 = False
# endif
# ifdef REG_D2
freeReg REG_D2 = fastBool False
freeReg REG_D2 = False
# endif
# ifdef REG_D2_2
freeReg REG_D2_2 = fastBool False
freeReg REG_D2_2 = False
# endif
# ifdef REG_D3
freeReg REG_D3 = fastBool False
freeReg REG_D3 = False
# endif
# ifdef REG_D3_2
freeReg REG_D3_2 = fastBool False
freeReg REG_D3_2 = False
# endif
# ifdef REG_D4
freeReg REG_D4 = fastBool False
freeReg REG_D4 = False
# endif
# ifdef REG_D4_2
freeReg REG_D4_2 = fastBool False
freeReg REG_D4_2 = False
# endif
# ifdef REG_D5
freeReg REG_D5 = fastBool False
freeReg REG_D5 = False
# endif
# ifdef REG_D5_2
freeReg REG_D5_2 = fastBool False
freeReg REG_D5_2 = False
# endif
# ifdef REG_D6
freeReg REG_D6 = fastBool False
freeReg REG_D6 = False
# endif
# ifdef REG_D6_2
freeReg REG_D6_2 = fastBool False
freeReg REG_D6_2 = False
# endif
# ifdef REG_Sp
freeReg REG_Sp = fastBool False
freeReg REG_Sp = False
# endif
# ifdef REG_Su