Commit b684f27e authored by Simon Marlow's avatar Simon Marlow Committed by tibbe

Refactor inline array allocation

- Move array representation knowledge into SMRep

- Separate out low-level heap-object allocation so that we can reuse
  it from doNewArrayOp

- remove card-table initialisation, we can safely ignore the card
  table for newly allocated arrays.
parent a70e7b47
...@@ -25,21 +25,24 @@ module SMRep ( ...@@ -25,21 +25,24 @@ module SMRep (
ConstrDescription, ConstrDescription,
-- ** Construction -- ** Construction
mkHeapRep, blackHoleRep, indStaticRep, mkStackRep, mkRTSRep, mkHeapRep, blackHoleRep, indStaticRep, mkStackRep, mkRTSRep, arrPtrsRep,
-- ** Predicates -- ** Predicates
isStaticRep, isConRep, isThunkRep, isFunRep, isStaticNoCafCon, isStaticRep, isConRep, isThunkRep, isFunRep, isStaticNoCafCon,
isStackRep, isStackRep,
-- ** Size-related things -- ** Size-related things
heapClosureSize, heapClosureSizeW,
fixedHdrSize, arrWordsHdrSize, arrPtrsHdrSize, fixedHdrSize, arrWordsHdrSize, arrPtrsHdrSize, arrPtrsHdrSizeW,
profHdrSize, thunkHdrSize, nonHdrSize, profHdrSize, thunkHdrSize, nonHdrSizeW,
-- ** RTS closure types -- ** RTS closure types
rtsClosureType, rET_SMALL, rET_BIG, rtsClosureType, rET_SMALL, rET_BIG,
aRG_GEN, aRG_GEN_BIG, aRG_GEN, aRG_GEN_BIG,
-- ** Arrays
card, cardRoundUp, cardTableSizeB, cardTableSizeW,
-- * Operations over [Word8] strings that don't belong here -- * Operations over [Word8] strings that don't belong here
pprWord8String, stringToWord8s pprWord8String, stringToWord8s
) where ) where
...@@ -150,6 +153,10 @@ data SMRep ...@@ -150,6 +153,10 @@ data SMRep
!WordOff -- # non-ptr words INCLUDING SLOP (see mkHeapRep below) !WordOff -- # non-ptr words INCLUDING SLOP (see mkHeapRep below)
ClosureTypeInfo -- type-specific info ClosureTypeInfo -- type-specific info
| ArrayPtrsRep
!WordOff -- # ptr words
!WordOff -- # card table words
| StackRep -- Stack frame (RET_SMALL or RET_BIG) | StackRep -- Stack frame (RET_SMALL or RET_BIG)
Liveness Liveness
...@@ -231,13 +238,16 @@ blackHoleRep = HeapRep False 0 0 BlackHole ...@@ -231,13 +238,16 @@ blackHoleRep = HeapRep False 0 0 BlackHole
indStaticRep :: SMRep indStaticRep :: SMRep
indStaticRep = HeapRep True 1 0 IndStatic indStaticRep = HeapRep True 1 0 IndStatic
arrPtrsRep :: DynFlags -> WordOff -> SMRep
arrPtrsRep dflags elems = ArrayPtrsRep elems (cardTableSizeW dflags elems)
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
-- Predicates -- Predicates
isStaticRep :: SMRep -> IsStatic isStaticRep :: SMRep -> IsStatic
isStaticRep (HeapRep is_static _ _ _) = is_static isStaticRep (HeapRep is_static _ _ _) = is_static
isStaticRep (StackRep {}) = False
isStaticRep (RTSRep _ rep) = isStaticRep rep isStaticRep (RTSRep _ rep) = isStaticRep rep
isStaticRep _ = False
isStackRep :: SMRep -> Bool isStackRep :: SMRep -> Bool
isStackRep StackRep{} = True isStackRep StackRep{} = True
...@@ -293,6 +303,11 @@ arrPtrsHdrSize :: DynFlags -> ByteOff ...@@ -293,6 +303,11 @@ arrPtrsHdrSize :: DynFlags -> ByteOff
arrPtrsHdrSize dflags arrPtrsHdrSize dflags
= fixedHdrSize dflags * wORD_SIZE dflags + sIZEOF_StgMutArrPtrs_NoHdr dflags = fixedHdrSize dflags * wORD_SIZE dflags + sIZEOF_StgMutArrPtrs_NoHdr dflags
arrPtrsHdrSizeW :: DynFlags -> WordOff
arrPtrsHdrSizeW dflags =
fixedHdrSize dflags +
(sIZEOF_StgMutArrPtrs_NoHdr dflags `quot` wORD_SIZE dflags)
-- Thunks have an extra header word on SMP, so the update doesn't -- Thunks have an extra header word on SMP, so the update doesn't
-- splat the payload. -- splat the payload.
thunkHdrSize :: DynFlags -> WordOff thunkHdrSize :: DynFlags -> WordOff
...@@ -300,15 +315,18 @@ thunkHdrSize dflags = fixedHdrSize dflags + smp_hdr ...@@ -300,15 +315,18 @@ thunkHdrSize dflags = fixedHdrSize dflags + smp_hdr
where smp_hdr = sIZEOF_StgSMPThunkHeader dflags `quot` wORD_SIZE dflags where smp_hdr = sIZEOF_StgSMPThunkHeader dflags `quot` wORD_SIZE dflags
nonHdrSize :: SMRep -> WordOff nonHdrSizeW :: SMRep -> WordOff
nonHdrSize (HeapRep _ p np _) = p + np nonHdrSizeW (HeapRep _ p np _) = p + np
nonHdrSize (StackRep bs) = length bs nonHdrSizeW (ArrayPtrsRep elems ct) = elems + ct
nonHdrSize (RTSRep _ rep) = nonHdrSize rep nonHdrSizeW (StackRep bs) = length bs
nonHdrSizeW (RTSRep _ rep) = nonHdrSizeW rep
heapClosureSize :: DynFlags -> SMRep -> WordOff heapClosureSizeW :: DynFlags -> SMRep -> WordOff
heapClosureSize dflags (HeapRep _ p np ty) heapClosureSizeW dflags (HeapRep _ p np ty)
= closureTypeHdrSize dflags ty + p + np = closureTypeHdrSize dflags ty + p + np
heapClosureSize _ _ = panic "SMRep.heapClosureSize" heapClosureSizeW dflags (ArrayPtrsRep elems ct)
= arrPtrsHdrSizeW dflags + elems + ct
heapClosureSizeW _ _ = panic "SMRep.heapClosureSize"
closureTypeHdrSize :: DynFlags -> ClosureTypeInfo -> WordOff closureTypeHdrSize :: DynFlags -> ClosureTypeInfo -> WordOff
closureTypeHdrSize dflags ty = case ty of closureTypeHdrSize dflags ty = case ty of
...@@ -323,6 +341,27 @@ closureTypeHdrSize dflags ty = case ty of ...@@ -323,6 +341,27 @@ closureTypeHdrSize dflags ty = case ty of
-- difference. If we ever have significant numbers of non- -- difference. If we ever have significant numbers of non-
-- updatable thunks, it might be worth fixing this. -- updatable thunks, it might be worth fixing this.
-- ---------------------------------------------------------------------------
-- Arrays
-- | The byte offset into the card table of the card for a given element
card :: DynFlags -> Int -> Int
card dflags i = i `shiftR` mUT_ARR_PTRS_CARD_BITS dflags
-- | Convert a number of elements to a number of cards, rounding up
cardRoundUp :: DynFlags -> Int -> Int
cardRoundUp dflags i =
card dflags (i + ((1 `shiftL` mUT_ARR_PTRS_CARD_BITS dflags) - 1))
-- | The size of a card table, in bytes
cardTableSizeB :: DynFlags -> Int -> ByteOff
cardTableSizeB dflags elems = cardRoundUp dflags elems
-- | The size of a card table, in words
cardTableSizeW :: DynFlags -> Int -> WordOff
cardTableSizeW dflags elems =
bytesToWordsRoundUp dflags (cardTableSizeB dflags elems)
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
-- deriving the RTS closure type from an SMRep -- deriving the RTS closure type from an SMRep
...@@ -413,6 +452,8 @@ instance Outputable SMRep where ...@@ -413,6 +452,8 @@ instance Outputable SMRep where
pp_n _ 0 = empty pp_n _ 0 = empty
pp_n s n = int n <+> text s pp_n s n = int n <+> text s
ppr (ArrayPtrsRep size _) = ptext (sLit "ArrayPtrsRep") <+> ppr size
ppr (StackRep bs) = ptext (sLit "StackRep") <+> ppr bs ppr (StackRep bs) = ptext (sLit "StackRep") <+> ppr bs
ppr (RTSRep ty rep) = ptext (sLit "tag:") <> ppr ty <+> ppr rep ppr (RTSRep ty rep) = ptext (sLit "tag:") <> ppr ty <+> ppr rep
......
...@@ -16,7 +16,7 @@ module StgCmmHeap ( ...@@ -16,7 +16,7 @@ module StgCmmHeap (
mkStaticClosureFields, mkStaticClosure, mkStaticClosureFields, mkStaticClosure,
allocDynClosure, allocDynClosureCmm, allocDynClosure, allocDynClosureCmm, allocHeapClosure,
emitSetDynHdr emitSetDynHdr
) where ) where
...@@ -88,61 +88,69 @@ allocDynClosureCmm ...@@ -88,61 +88,69 @@ allocDynClosureCmm
-- significant - see test T4801. -- significant - see test T4801.
allocDynClosure mb_id info_tbl lf_info use_cc _blame_cc args_w_offsets allocDynClosure mb_id info_tbl lf_info use_cc _blame_cc args_w_offsets = do
= do { let (args, offsets) = unzip args_w_offsets let (args, offsets) = unzip args_w_offsets
; cmm_args <- mapM getArgAmode args -- No void args cmm_args <- mapM getArgAmode args -- No void args
; allocDynClosureCmm mb_id info_tbl lf_info allocDynClosureCmm mb_id info_tbl lf_info
use_cc _blame_cc (zip cmm_args offsets) use_cc _blame_cc (zip cmm_args offsets)
}
allocDynClosureCmm mb_id info_tbl lf_info use_cc _blame_cc amodes_w_offsets
= do { virt_hp <- getVirtHp
-- SAY WHAT WE ARE ABOUT TO DO allocDynClosureCmm mb_id info_tbl lf_info use_cc _blame_cc amodes_w_offsets = do
; let rep = cit_rep info_tbl -- SAY WHAT WE ARE ABOUT TO DO
; tickyDynAlloc mb_id rep lf_info let rep = cit_rep info_tbl
; profDynAlloc rep use_cc tickyDynAlloc mb_id rep lf_info
profDynAlloc rep use_cc
let info_ptr = CmmLit (CmmLabel (cit_lbl info_tbl))
allocHeapClosure rep info_ptr use_cc amodes_w_offsets
-- FIND THE OFFSET OF THE INFO-PTR WORD
; let info_offset = virt_hp + 1
-- info_offset is the VirtualHpOffset of the first
-- word of the new object
-- Remember, virtHp points to last allocated word,
-- ie 1 *before* the info-ptr word of new object.
info_ptr = CmmLit (CmmLabel (cit_lbl info_tbl)) -- | Low-level heap object allocation.
allocHeapClosure
:: SMRep -- ^ representation of the object
-> CmmExpr -- ^ info pointer
-> CmmExpr -- ^ cost centre
-> [(CmmExpr,ByteOff)] -- ^ payload
-> FCode CmmExpr -- ^ returns the address of the object
allocHeapClosure rep info_ptr use_cc payload = do
virt_hp <- getVirtHp
-- ALLOCATE THE OBJECT -- Find the offset of the info-ptr word
; base <- getHpRelOffset info_offset let info_offset = virt_hp + 1
; emitComment $ mkFastString "allocDynClosure" -- info_offset is the VirtualHpOffset of the first
; emitSetDynHdr base info_ptr use_cc -- word of the new object
; let (cmm_args, offsets) = unzip amodes_w_offsets -- Remember, virtHp points to last allocated word,
; hpStore base cmm_args offsets -- ie 1 *before* the info-ptr word of new object.
-- BUMP THE VIRTUAL HEAP POINTER base <- getHpRelOffset info_offset
; dflags <- getDynFlags emitComment $ mkFastString "allocDynClosure"
; setVirtHp (virt_hp + heapClosureSize dflags rep) emitSetDynHdr base info_ptr use_cc
-- Fill in the fields
hpStore base payload
-- Bump the virtual heap pointer
dflags <- getDynFlags
setVirtHp (virt_hp + heapClosureSizeW dflags rep)
return base
; getHpRelOffset info_offset
}
emitSetDynHdr :: CmmExpr -> CmmExpr -> CmmExpr -> FCode () emitSetDynHdr :: CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
emitSetDynHdr base info_ptr ccs emitSetDynHdr base info_ptr ccs
= do dflags <- getDynFlags = do dflags <- getDynFlags
hpStore base (header dflags) [0, wORD_SIZE dflags ..] hpStore base (zip (header dflags) [0, wORD_SIZE dflags ..])
where where
header :: DynFlags -> [CmmExpr] header :: DynFlags -> [CmmExpr]
header dflags = [info_ptr] ++ dynProfHdr dflags ccs header dflags = [info_ptr] ++ dynProfHdr dflags ccs
-- ToDof: Parallel stuff -- ToDof: Parallel stuff
-- No ticky header -- No ticky header
hpStore :: CmmExpr -> [CmmExpr] -> [ByteOff] -> FCode ()
-- Store the item (expr,off) in base[off] -- Store the item (expr,off) in base[off]
hpStore base vals offs hpStore :: CmmExpr -> [(CmmExpr, ByteOff)] -> FCode ()
= do dflags <- getDynFlags hpStore base vals = do
let mk_store val off = mkStore (cmmOffsetB dflags base off) val dflags <- getDynFlags
emit (catAGraphs (zipWith mk_store vals offs)) sequence_ $
[ emitStore (cmmOffsetB dflags base off) val | (val,off) <- vals ]
----------------------------------------------------------- -----------------------------------------------------------
-- Layout of static closures -- Layout of static closures
......
...@@ -90,10 +90,11 @@ cgOpApp (StgPrimOp primop) args res_ty = do ...@@ -90,10 +90,11 @@ cgOpApp (StgPrimOp primop) args res_ty = do
dflags <- getDynFlags dflags <- getDynFlags
cmm_args <- getNonVoidArgAmodes args cmm_args <- getNonVoidArgAmodes args
case shouldInlinePrimOp dflags primop cmm_args of case shouldInlinePrimOp dflags primop cmm_args of
Nothing -> do let fun = CmmLit (CmmLabel (mkRtsPrimOpLabel primop)) Nothing -> do -- out-of-line
emitCall (NativeNodeCall, NativeReturn) fun cmm_args let fun = CmmLit (CmmLabel (mkRtsPrimOpLabel primop))
emitCall (NativeNodeCall, NativeReturn) fun cmm_args
Just f Just f -- inline
| ReturnsPrim VoidRep <- result_info | ReturnsPrim VoidRep <- result_info
-> do f [] -> do f []
emitReturn [] emitReturn []
...@@ -1533,36 +1534,24 @@ doNewArrayOp :: CmmFormal -> Integer -> CmmExpr -> FCode () ...@@ -1533,36 +1534,24 @@ doNewArrayOp :: CmmFormal -> Integer -> CmmExpr -> FCode ()
doNewArrayOp res_r n init = do doNewArrayOp res_r n init = do
dflags <- getDynFlags dflags <- getDynFlags
let card_bytes = cardRoundUp dflags (fromInteger n) let info_ptr = mkLblExpr mkMAP_DIRTY_infoLabel
size = fromInteger n + bytesToWordsRoundUp dflags card_bytes
words = arrPtrsHdrSizeWords dflags + size -- ToDo: this probably isn't right (card size?)
-- If the allocation is of small, statically-known size, we reuse
-- the existing heap check to allocate inline.
virt_hp <- getVirtHp
-- FIND THE OFFSET OF THE INFO-PTR WORD
let info_offset = virt_hp + 1
-- info_offset is the VirtualHpOffset of the first
-- word of the new object
-- Remember, virtHp points to last allocated word,
-- ie 1 *before* the info-ptr word of new object.
base <- getHpRelOffset info_offset
setVirtHp (virt_hp + fromIntegral words) -- check n < big
arr <- CmmLocal `fmap` newTemp (bWord dflags)
emit $ mkAssign arr base
tickyAllocPrim (mkIntExpr dflags (arrPtrsHdrSize dflags)) tickyAllocPrim (mkIntExpr dflags (arrPtrsHdrSize dflags))
(cmmMulWord dflags (mkIntExpr dflags (fromInteger n)) (wordSize dflags)) (mkIntExpr dflags (fromInteger n * wORD_SIZE dflags))
(zeroExpr dflags) (zeroExpr dflags)
emitSetDynHdr base (mkLblExpr mkMAP_DIRTY_infoLabel) curCCS let rep = arrPtrsRep dflags (fromIntegral n)
emit $ mkStore (cmmOffsetB dflags base hdr_size = fixedHdrSize dflags * wORD_SIZE dflags
(fixedHdrSize dflags * wORD_SIZE dflags + base <- allocHeapClosure rep info_ptr curCCS
oFFSET_StgMutArrPtrs_ptrs dflags)) [ (mkIntExpr dflags (fromInteger n),
(mkIntExpr dflags (fromInteger n)) hdr_size + oFFSET_StgMutArrPtrs_ptrs dflags)
emit $ mkStore (cmmOffsetB dflags base , (mkIntExpr dflags (nonHdrSizeW rep),
(fixedHdrSize dflags * wORD_SIZE dflags + hdr_size + oFFSET_StgMutArrPtrs_size dflags)
oFFSET_StgMutArrPtrs_size dflags)) (mkIntExpr dflags size) ]
arr <- CmmLocal `fmap` newTemp (bWord dflags)
emit $ mkAssign arr base
-- Initialise all elements of the the array -- Initialise all elements of the the array
p <- assignTemp $ cmmOffsetB dflags (CmmReg arr) (arrPtrsHdrSize dflags) p <- assignTemp $ cmmOffsetB dflags (CmmReg arr) (arrPtrsHdrSize dflags)
...@@ -1577,26 +1566,12 @@ doNewArrayOp res_r n init = do ...@@ -1577,26 +1566,12 @@ doNewArrayOp res_r n init = do
(cmmOffsetW dflags (CmmReg arr) (fromInteger n))) (cmmOffsetW dflags (CmmReg arr) (fromInteger n)))
(catAGraphs loopBody) (catAGraphs loopBody)
-- Initialise the mark bits with 0. This will be unrolled in the
-- backend to e.g. a single assignment since the arguments are
-- statically known.
emitMemsetCall
(cmmOffsetExprW dflags (CmmReg (CmmLocal p))
(mkIntExpr dflags (fromInteger n)))
(mkIntExpr dflags 0)
(mkIntExpr dflags card_bytes)
(mkIntExpr dflags (wORD_SIZE dflags))
emit $ mkAssign (CmmLocal res_r) (CmmReg arr) emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
-- | The inline allocation limit is 128 bytes, expressed in words. -- | The inline allocation limit is 128 bytes, expressed in words.
maxInlineAllocThreshold :: DynFlags -> Integer maxInlineAllocThreshold :: DynFlags -> Integer
maxInlineAllocThreshold dflags = toInteger (128 `quot` wORD_SIZE dflags) maxInlineAllocThreshold dflags = toInteger (128 `quot` wORD_SIZE dflags)
arrPtrsHdrSizeWords :: DynFlags -> WordOff
arrPtrsHdrSizeWords dflags =
fixedHdrSize dflags +
(sIZEOF_StgMutArrPtrs_NoHdr dflags `div` wORD_SIZE dflags)
-- ---------------------------------------------------------------------------- -- ----------------------------------------------------------------------------
-- Copying pointer arrays -- Copying pointer arrays
...@@ -1724,18 +1699,6 @@ emitCloneArray info_p res_r src0 src_off0 n0 = do ...@@ -1724,18 +1699,6 @@ emitCloneArray info_p res_r src0 src_off0 n0 = do
(mkIntExpr dflags (wORD_SIZE dflags)) (mkIntExpr dflags (wORD_SIZE dflags))
emit $ mkAssign (CmmLocal res_r) arr emit $ mkAssign (CmmLocal res_r) arr
card :: DynFlags -> Int -> Int
card dflags i = i `shiftR` mUT_ARR_PTRS_CARD_BITS dflags
-- Convert a number of elements to a number of cards, rounding up
cardRoundUp :: DynFlags -> Int -> Int
cardRoundUp dflags i =
card dflags (i + ((1 `shiftL` mUT_ARR_PTRS_CARD_BITS dflags) - 1))
bytesToWordsRoundUp :: DynFlags -> Int -> Int
bytesToWordsRoundUp dflags e =
(e + wORD_SIZE dflags - 1) `quot` (wORD_SIZE dflags)
-- | Takes and offset in the destination array, the base address of -- | Takes and offset in the destination array, the base address of
-- the card table, and the number of elements affected (*not* the -- the card table, and the number of elements affected (*not* the
-- number of cards). The number of elements may not be zero. -- number of cards). The number of elements may not be zero.
......
...@@ -149,7 +149,7 @@ profDynAlloc :: SMRep -> CmmExpr -> FCode () ...@@ -149,7 +149,7 @@ profDynAlloc :: SMRep -> CmmExpr -> FCode ()
profDynAlloc rep ccs profDynAlloc rep ccs
= ifProfiling $ = ifProfiling $
do dflags <- getDynFlags do dflags <- getDynFlags
profAlloc (mkIntExpr dflags (heapClosureSize dflags rep)) ccs profAlloc (mkIntExpr dflags (heapClosureSizeW dflags rep)) ccs
-- | Record the allocation of a closure (size is given by a CmmExpr) -- | Record the allocation of a closure (size is given by a CmmExpr)
-- The size must be in words, because the allocation counter in a CCS counts -- The size must be in words, because the allocation counter in a CCS counts
......
...@@ -415,7 +415,7 @@ tickyDynAlloc :: Maybe Id -> SMRep -> LambdaFormInfo -> FCode () ...@@ -415,7 +415,7 @@ tickyDynAlloc :: Maybe Id -> SMRep -> LambdaFormInfo -> FCode ()
-- --
-- TODO what else to count while we're here? -- TODO what else to count while we're here?
tickyDynAlloc mb_id rep lf = ifTicky $ getDynFlags >>= \dflags -> tickyDynAlloc mb_id rep lf = ifTicky $ getDynFlags >>= \dflags ->
let bytes = wORD_SIZE dflags * heapClosureSize dflags rep let bytes = wORD_SIZE dflags * heapClosureSizeW dflags rep
countGlobal tot ctr = do countGlobal tot ctr = do
bumpTickyCounterBy tot bytes bumpTickyCounterBy tot bytes
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment