Commit 210ccabc authored by tibbe's avatar tibbe

codeGen: allocate small byte arrays of statically known size inline

This results in a 57% runtime decrease when allocating an array of 128
bytes on a 64-bit machine.

Fixes #8876.
parent cbdd8328
......@@ -56,6 +56,7 @@ module CLabel (
mkMAP_FROZEN_infoLabel,
mkMAP_DIRTY_infoLabel,
mkEMPTY_MVAR_infoLabel,
mkArrWords_infoLabel,
mkTopTickyCtrLabel,
mkCAFBlackHoleInfoTableLabel,
......@@ -402,7 +403,8 @@ mkDirty_MUT_VAR_Label, mkSplitMarkerLabel, mkUpdInfoLabel,
mkBHUpdInfoLabel, mkIndStaticInfoLabel, mkMainCapabilityLabel,
mkMAP_FROZEN_infoLabel, mkMAP_DIRTY_infoLabel,
mkEMPTY_MVAR_infoLabel, mkTopTickyCtrLabel,
mkCAFBlackHoleInfoTableLabel, mkCAFBlackHoleEntryLabel :: CLabel
mkCAFBlackHoleInfoTableLabel, mkCAFBlackHoleEntryLabel,
mkArrWords_infoLabel :: CLabel
mkDirty_MUT_VAR_Label = mkForeignLabel (fsLit "dirty_MUT_VAR") Nothing ForeignLabelInExternalPackage IsFunction
mkSplitMarkerLabel = CmmLabel rtsPackageId (fsLit "__stg_split_marker") CmmCode
mkUpdInfoLabel = CmmLabel rtsPackageId (fsLit "stg_upd_frame") CmmInfo
......@@ -415,6 +417,7 @@ mkEMPTY_MVAR_infoLabel = CmmLabel rtsPackageId (fsLit "stg_EMPTY_MVAR")
mkTopTickyCtrLabel = CmmLabel rtsPackageId (fsLit "top_ct") CmmData
mkCAFBlackHoleInfoTableLabel = CmmLabel rtsPackageId (fsLit "stg_CAF_BLACKHOLE") CmmInfo
mkCAFBlackHoleEntryLabel = CmmLabel rtsPackageId (fsLit "stg_CAF_BLACKHOLE") CmmEntry
mkArrWords_infoLabel = CmmLabel rtsPackageId (fsLit "stg_ARR_WORDS") CmmInfo
-----
mkCmmInfoLabel, mkCmmEntryLabel, mkCmmRetInfoLabel, mkCmmRetLabel,
......
......@@ -26,6 +26,7 @@ module SMRep (
-- ** Construction
mkHeapRep, blackHoleRep, indStaticRep, mkStackRep, mkRTSRep, arrPtrsRep,
arrWordsRep,
-- ** Predicates
isStaticRep, isConRep, isThunkRep, isFunRep, isStaticNoCafCon,
......@@ -33,8 +34,8 @@ module SMRep (
-- ** Size-related things
heapClosureSizeW,
fixedHdrSize, arrWordsHdrSize, arrPtrsHdrSize, arrPtrsHdrSizeW,
profHdrSize, thunkHdrSize, nonHdrSizeW,
fixedHdrSize, arrWordsHdrSize, arrWordsHdrSizeW, arrPtrsHdrSize,
arrPtrsHdrSizeW, profHdrSize, thunkHdrSize, nonHdrSize, nonHdrSizeW,
-- ** RTS closure types
rtsClosureType, rET_SMALL, rET_BIG,
......@@ -157,6 +158,9 @@ data SMRep
!WordOff -- # ptr words
!WordOff -- # card table words
| ArrayWordsRep
!WordOff -- # bytes expressed in words, rounded up
| StackRep -- Stack frame (RET_SMALL or RET_BIG)
Liveness
......@@ -241,6 +245,9 @@ indStaticRep = HeapRep True 1 0 IndStatic
arrPtrsRep :: DynFlags -> WordOff -> SMRep
arrPtrsRep dflags elems = ArrayPtrsRep elems (cardTableSizeW dflags elems)
arrWordsRep :: DynFlags -> ByteOff -> SMRep
arrWordsRep dflags bytes = ArrayWordsRep (bytesToWordsRoundUp dflags bytes)
-----------------------------------------------------------------------------
-- Predicates
......@@ -299,6 +306,11 @@ arrWordsHdrSize :: DynFlags -> ByteOff
arrWordsHdrSize dflags
= fixedHdrSize dflags * wORD_SIZE dflags + sIZEOF_StgArrWords_NoHdr dflags
arrWordsHdrSizeW :: DynFlags -> WordOff
arrWordsHdrSizeW dflags =
fixedHdrSize dflags +
(sIZEOF_StgArrWords_NoHdr dflags `quot` wORD_SIZE dflags)
arrPtrsHdrSize :: DynFlags -> ByteOff
arrPtrsHdrSize dflags
= fixedHdrSize dflags * wORD_SIZE dflags + sIZEOF_StgMutArrPtrs_NoHdr dflags
......@@ -314,18 +326,24 @@ thunkHdrSize :: DynFlags -> WordOff
thunkHdrSize dflags = fixedHdrSize dflags + smp_hdr
where smp_hdr = sIZEOF_StgSMPThunkHeader dflags `quot` wORD_SIZE dflags
nonHdrSize :: DynFlags -> SMRep -> ByteOff
nonHdrSize dflags rep = wordsToBytes dflags (nonHdrSizeW rep)
nonHdrSizeW :: SMRep -> WordOff
nonHdrSizeW (HeapRep _ p np _) = p + np
nonHdrSizeW (ArrayPtrsRep elems ct) = elems + ct
nonHdrSizeW (ArrayWordsRep words) = words
nonHdrSizeW (StackRep bs) = length bs
nonHdrSizeW (RTSRep _ rep) = nonHdrSizeW rep
-- | The total size of the closure, in words.
heapClosureSizeW :: DynFlags -> SMRep -> WordOff
heapClosureSizeW dflags (HeapRep _ p np ty)
= closureTypeHdrSize dflags ty + p + np
heapClosureSizeW dflags (ArrayPtrsRep elems ct)
= arrPtrsHdrSizeW dflags + elems + ct
heapClosureSizeW dflags (ArrayWordsRep words)
= arrWordsHdrSizeW dflags + words
heapClosureSizeW _ _ = panic "SMRep.heapClosureSize"
closureTypeHdrSize :: DynFlags -> ClosureTypeInfo -> WordOff
......@@ -454,6 +472,8 @@ instance Outputable SMRep where
ppr (ArrayPtrsRep size _) = ptext (sLit "ArrayPtrsRep") <+> ppr size
ppr (ArrayWordsRep words) = ptext (sLit "ArrayWordsRep") <+> ppr words
ppr (StackRep bs) = ptext (sLit "StackRep") <+> ppr bs
ppr (RTSRep ty rep) = ptext (sLit "tag:") <> ppr ty <+> ppr rep
......
......@@ -132,9 +132,12 @@ shouldInlinePrimOp :: DynFlags
-> PrimOp -- ^ The primop
-> [CmmExpr] -- ^ The primop arguments
-> Maybe ([LocalReg] -> FCode ())
shouldInlinePrimOp _ NewByteArrayOp_Char [(CmmLit (CmmInt n _))]
| fromInteger n <= maxInlineAllocThreshold =
Just $ \ [res] -> doNewByteArrayOp res (fromInteger n)
shouldInlinePrimOp dflags NewArrayOp [(CmmLit (CmmInt n _)), init]
| n <= maxInlineAllocThreshold dflags =
Just $ \ [res] -> doNewArrayOp res n init
| wordsToBytes dflags (fromInteger n) <= maxInlineAllocThreshold =
Just $ \ [res] -> doNewArrayOp res (fromInteger n) init
shouldInlinePrimOp dflags primop args
| primOpOutOfLine primop = Nothing
| otherwise = Just $ \ regs -> emitPrimOp dflags regs primop args
......@@ -1436,6 +1439,32 @@ mkBasicPrefetch locality off res base idx
[reg] -> emitAssign (CmmLocal reg) base
_ -> panic "StgCmmPrim: mkBasicPrefetch"
-- ----------------------------------------------------------------------------
-- Allocating byte arrays
-- | Takes a register to return the newly allocated array in and the
-- size of the new array in bytes. Allocates a new
-- 'MutableByteArray#'.
doNewByteArrayOp :: CmmFormal -> ByteOff -> FCode ()
doNewByteArrayOp res_r n = do
dflags <- getDynFlags
let info_ptr = mkLblExpr mkArrWords_infoLabel
rep = arrWordsRep dflags n
tickyAllocPrim (mkIntExpr dflags (arrWordsHdrSize dflags))
(mkIntExpr dflags (nonHdrSize dflags rep))
(zeroExpr dflags)
let hdr_size = wordsToBytes dflags (fixedHdrSize dflags)
base <- allocHeapClosure rep info_ptr curCCS
[ (mkIntExpr dflags n,
hdr_size + oFFSET_StgArrWords_bytes dflags)
]
emit $ mkAssign (CmmLocal res_r) base
-- ----------------------------------------------------------------------------
-- Copying byte arrays
......@@ -1530,21 +1559,21 @@ doSetByteArrayOp ba off len c
-- | Takes a register to return the newly allocated array in, the size
-- of the new array, and an initial value for the elements. Allocates
-- a new 'MutableArray#'.
doNewArrayOp :: CmmFormal -> Integer -> CmmExpr -> FCode ()
doNewArrayOp :: CmmFormal -> WordOff -> CmmExpr -> FCode ()
doNewArrayOp res_r n init = do
dflags <- getDynFlags
let info_ptr = mkLblExpr mkMAP_DIRTY_infoLabel
rep = arrPtrsRep dflags (fromIntegral n)
rep = arrPtrsRep dflags n
tickyAllocPrim (mkIntExpr dflags (arrPtrsHdrSize dflags))
(mkIntExpr dflags (wordsToBytes dflags (heapClosureSizeW dflags rep)))
(mkIntExpr dflags (nonHdrSize dflags rep))
(zeroExpr dflags)
let hdr_size = wordsToBytes dflags (fixedHdrSize dflags)
base <- allocHeapClosure rep info_ptr curCCS
[ (mkIntExpr dflags (fromInteger n),
[ (mkIntExpr dflags n,
hdr_size + oFFSET_StgMutArrPtrs_ptrs dflags)
, (mkIntExpr dflags (nonHdrSizeW rep),
hdr_size + oFFSET_StgMutArrPtrs_size dflags)
......@@ -1564,14 +1593,14 @@ doNewArrayOp res_r n init = do
emit =<< mkCmmIfThen
(cmmULtWord dflags (CmmReg (CmmLocal p))
(cmmOffsetW dflags (CmmReg arr)
(arrPtrsHdrSizeW dflags + fromInteger n)))
(arrPtrsHdrSizeW dflags + n)))
(catAGraphs loopBody)
emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
-- | The inline allocation limit is 128 bytes, expressed in words.
maxInlineAllocThreshold :: DynFlags -> Integer
maxInlineAllocThreshold dflags = toInteger (128 `quot` wORD_SIZE dflags)
-- | The inline allocation limit is 128 bytes.
maxInlineAllocThreshold :: ByteOff
maxInlineAllocThreshold = 128
-- ----------------------------------------------------------------------------
-- Copying pointer arrays
......
{-# LANGUAGE MagicHash, UnboxedTuples #-}
-- Test allocation of statically sized byte arrays. There's an
-- optimization that targets these and we want to make sure that the
-- code generated in the optimized case is correct.
--
-- The tests proceeds by allocating a bunch of byte arrays of
-- different sizes, to try to provoke GC crashes, which would be a
-- symptom of the optimization not generating correct code.
module Main where
import GHC.Exts
import GHC.IO
main :: IO ()
main = do
loop 1000
putStrLn "success"
where
loop :: Int -> IO ()
loop 0 = return ()
loop i = do
-- Sizes have been picked to match the triggering of the
-- optimization and to match boundary conditions. Sizes are
-- given explicitly as to not rely on other optimizations to
-- make the static size known to the compiler.
newByteArray 0
newByteArray 1
newByteArray 2
newByteArray 3
newByteArray 4
newByteArray 5
newByteArray 6
newByteArray 7
newByteArray 8
newByteArray 9
newByteArray 10
newByteArray 11
newByteArray 12
newByteArray 13
newByteArray 14
newByteArray 15
newByteArray 16
newByteArray 64
newByteArray 128
newByteArray 129
loop (i-1)
newByteArray :: Int -> IO ()
newByteArray (I# sz#) = IO $ \s -> case newByteArray# sz# s of
(# s', _ #) -> (# s', () #)
{-# INLINE newByteArray #-} -- to make sure optimization triggers
......@@ -117,3 +117,4 @@ test('T7953', reqlib('random'), compile_and_run, [''])
test('T8256', reqlib('vector'), compile_and_run, [''])
test('T6084',normal, compile_and_run, ['-O2'])
test('StaticArraySize', normal, compile_and_run, ['-O2'])
test('StaticByteArraySize', normal, compile_and_run, ['-O2'])
{-# LANGUAGE MagicHash, UnboxedTuples #-}
module Main where
import GHC.Exts
import GHC.IO
main :: IO ()
main = loop 10000000
where
loop :: Int -> IO ()
loop 0 = return ()
loop i = newByteArray >> loop (i-1)
newByteArray :: IO ()
newByteArray = IO $ \s -> case newByteArray# 128# s of
(# s', _ #) -> (# s', () #)
......@@ -337,3 +337,10 @@ test('InlineArrayAlloc',
only_ways(['normal'])],
compile_and_run,
['-O2'])
test('InlineByteArrayAlloc',
[stats_num_field('bytes allocated',
[ (wordsize(64), 1440040960, 5)]),
only_ways(['normal'])],
compile_and_run,
['-O2'])
......@@ -392,7 +392,7 @@ wanteds = concat
,closureField Both "StgMutArrPtrs" "size"
,closureSize Both "StgArrWords"
,closureField C "StgArrWords" "bytes"
,closureField Both "StgArrWords" "bytes"
,closurePayload C "StgArrWords" "payload"
,closureField C "StgTSO" "_link"
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment