Commit b684f27e authored by Simon Marlow's avatar Simon Marlow Committed by tibbe

Refactor inline array allocation

- Move array representation knowledge into SMRep

- Separate out low-level heap-object allocation so that we can reuse
  it from doNewArrayOp

- remove card-table initialisation, we can safely ignore the card
  table for newly allocated arrays.
parent a70e7b47
......@@ -25,21 +25,24 @@ module SMRep (
ConstrDescription,
-- ** Construction
mkHeapRep, blackHoleRep, indStaticRep, mkStackRep, mkRTSRep,
mkHeapRep, blackHoleRep, indStaticRep, mkStackRep, mkRTSRep, arrPtrsRep,
-- ** Predicates
isStaticRep, isConRep, isThunkRep, isFunRep, isStaticNoCafCon,
isStackRep,
-- ** Size-related things
heapClosureSize,
fixedHdrSize, arrWordsHdrSize, arrPtrsHdrSize,
profHdrSize, thunkHdrSize, nonHdrSize,
heapClosureSizeW,
fixedHdrSize, arrWordsHdrSize, arrPtrsHdrSize, arrPtrsHdrSizeW,
profHdrSize, thunkHdrSize, nonHdrSizeW,
-- ** RTS closure types
rtsClosureType, rET_SMALL, rET_BIG,
aRG_GEN, aRG_GEN_BIG,
-- ** Arrays
card, cardRoundUp, cardTableSizeB, cardTableSizeW,
-- * Operations over [Word8] strings that don't belong here
pprWord8String, stringToWord8s
) where
......@@ -150,6 +153,10 @@ data SMRep
!WordOff -- # non-ptr words INCLUDING SLOP (see mkHeapRep below)
ClosureTypeInfo -- type-specific info
| ArrayPtrsRep
!WordOff -- # ptr words
!WordOff -- # card table words
| StackRep -- Stack frame (RET_SMALL or RET_BIG)
Liveness
......@@ -231,13 +238,16 @@ blackHoleRep = HeapRep False 0 0 BlackHole
indStaticRep :: SMRep
indStaticRep = HeapRep True 1 0 IndStatic
arrPtrsRep :: DynFlags -> WordOff -> SMRep
arrPtrsRep dflags elems = ArrayPtrsRep elems (cardTableSizeW dflags elems)
-----------------------------------------------------------------------------
-- Predicates
isStaticRep :: SMRep -> IsStatic
isStaticRep (HeapRep is_static _ _ _) = is_static
isStaticRep (StackRep {}) = False
isStaticRep (RTSRep _ rep) = isStaticRep rep
isStaticRep _ = False
isStackRep :: SMRep -> Bool
isStackRep StackRep{} = True
......@@ -293,6 +303,11 @@ arrPtrsHdrSize :: DynFlags -> ByteOff
arrPtrsHdrSize dflags
= fixedHdrSize dflags * wORD_SIZE dflags + sIZEOF_StgMutArrPtrs_NoHdr dflags
arrPtrsHdrSizeW :: DynFlags -> WordOff
arrPtrsHdrSizeW dflags =
fixedHdrSize dflags +
(sIZEOF_StgMutArrPtrs_NoHdr dflags `quot` wORD_SIZE dflags)
-- Thunks have an extra header word on SMP, so the update doesn't
-- splat the payload.
thunkHdrSize :: DynFlags -> WordOff
......@@ -300,15 +315,18 @@ thunkHdrSize dflags = fixedHdrSize dflags + smp_hdr
where smp_hdr = sIZEOF_StgSMPThunkHeader dflags `quot` wORD_SIZE dflags
nonHdrSize :: SMRep -> WordOff
nonHdrSize (HeapRep _ p np _) = p + np
nonHdrSize (StackRep bs) = length bs
nonHdrSize (RTSRep _ rep) = nonHdrSize rep
nonHdrSizeW :: SMRep -> WordOff
nonHdrSizeW (HeapRep _ p np _) = p + np
nonHdrSizeW (ArrayPtrsRep elems ct) = elems + ct
nonHdrSizeW (StackRep bs) = length bs
nonHdrSizeW (RTSRep _ rep) = nonHdrSizeW rep
heapClosureSize :: DynFlags -> SMRep -> WordOff
heapClosureSize dflags (HeapRep _ p np ty)
heapClosureSizeW :: DynFlags -> SMRep -> WordOff
heapClosureSizeW dflags (HeapRep _ p np ty)
= closureTypeHdrSize dflags ty + p + np
heapClosureSize _ _ = panic "SMRep.heapClosureSize"
heapClosureSizeW dflags (ArrayPtrsRep elems ct)
= arrPtrsHdrSizeW dflags + elems + ct
heapClosureSizeW _ _ = panic "SMRep.heapClosureSize"
closureTypeHdrSize :: DynFlags -> ClosureTypeInfo -> WordOff
closureTypeHdrSize dflags ty = case ty of
......@@ -323,6 +341,27 @@ closureTypeHdrSize dflags ty = case ty of
-- difference. If we ever have significant numbers of non-
-- updatable thunks, it might be worth fixing this.
-- ---------------------------------------------------------------------------
-- Arrays
-- | The byte offset into the card table of the card for a given element
card :: DynFlags -> Int -> Int
card dflags i = i `shiftR` mUT_ARR_PTRS_CARD_BITS dflags
-- | Convert a number of elements to a number of cards, rounding up
cardRoundUp :: DynFlags -> Int -> Int
cardRoundUp dflags i =
card dflags (i + ((1 `shiftL` mUT_ARR_PTRS_CARD_BITS dflags) - 1))
-- | The size of a card table, in bytes
cardTableSizeB :: DynFlags -> Int -> ByteOff
cardTableSizeB dflags elems = cardRoundUp dflags elems
-- | The size of a card table, in words
cardTableSizeW :: DynFlags -> Int -> WordOff
cardTableSizeW dflags elems =
bytesToWordsRoundUp dflags (cardTableSizeB dflags elems)
-----------------------------------------------------------------------------
-- deriving the RTS closure type from an SMRep
......@@ -413,6 +452,8 @@ instance Outputable SMRep where
pp_n _ 0 = empty
pp_n s n = int n <+> text s
ppr (ArrayPtrsRep size _) = ptext (sLit "ArrayPtrsRep") <+> ppr size
ppr (StackRep bs) = ptext (sLit "StackRep") <+> ppr bs
ppr (RTSRep ty rep) = ptext (sLit "tag:") <> ppr ty <+> ppr rep
......
......@@ -16,7 +16,7 @@ module StgCmmHeap (
mkStaticClosureFields, mkStaticClosure,
allocDynClosure, allocDynClosureCmm,
allocDynClosure, allocDynClosureCmm, allocHeapClosure,
emitSetDynHdr
) where
......@@ -88,61 +88,69 @@ allocDynClosureCmm
-- significant - see test T4801.
allocDynClosure mb_id info_tbl lf_info use_cc _blame_cc args_w_offsets
= do { let (args, offsets) = unzip args_w_offsets
; cmm_args <- mapM getArgAmode args -- No void args
; allocDynClosureCmm mb_id info_tbl lf_info
use_cc _blame_cc (zip cmm_args offsets)
}
allocDynClosure mb_id info_tbl lf_info use_cc _blame_cc args_w_offsets = do
let (args, offsets) = unzip args_w_offsets
cmm_args <- mapM getArgAmode args -- No void args
allocDynClosureCmm mb_id info_tbl lf_info
use_cc _blame_cc (zip cmm_args offsets)
allocDynClosureCmm mb_id info_tbl lf_info use_cc _blame_cc amodes_w_offsets
= do { virt_hp <- getVirtHp
-- SAY WHAT WE ARE ABOUT TO DO
; let rep = cit_rep info_tbl
; tickyDynAlloc mb_id rep lf_info
; profDynAlloc rep use_cc
allocDynClosureCmm mb_id info_tbl lf_info use_cc _blame_cc amodes_w_offsets = do
-- SAY WHAT WE ARE ABOUT TO DO
let rep = cit_rep info_tbl
tickyDynAlloc mb_id rep lf_info
profDynAlloc rep use_cc
let info_ptr = CmmLit (CmmLabel (cit_lbl info_tbl))
allocHeapClosure rep info_ptr use_cc amodes_w_offsets
-- FIND THE OFFSET OF THE INFO-PTR WORD
; let info_offset = virt_hp + 1
-- info_offset is the VirtualHpOffset of the first
-- word of the new object
-- Remember, virtHp points to last allocated word,
-- ie 1 *before* the info-ptr word of new object.
info_ptr = CmmLit (CmmLabel (cit_lbl info_tbl))
-- | Low-level heap object allocation.
allocHeapClosure
:: SMRep -- ^ representation of the object
-> CmmExpr -- ^ info pointer
-> CmmExpr -- ^ cost centre
-> [(CmmExpr,ByteOff)] -- ^ payload
-> FCode CmmExpr -- ^ returns the address of the object
allocHeapClosure rep info_ptr use_cc payload = do
virt_hp <- getVirtHp
-- ALLOCATE THE OBJECT
; base <- getHpRelOffset info_offset
; emitComment $ mkFastString "allocDynClosure"
; emitSetDynHdr base info_ptr use_cc
; let (cmm_args, offsets) = unzip amodes_w_offsets
; hpStore base cmm_args offsets
-- Find the offset of the info-ptr word
let info_offset = virt_hp + 1
-- info_offset is the VirtualHpOffset of the first
-- word of the new object
-- Remember, virtHp points to last allocated word,
-- ie 1 *before* the info-ptr word of new object.
-- BUMP THE VIRTUAL HEAP POINTER
; dflags <- getDynFlags
; setVirtHp (virt_hp + heapClosureSize dflags rep)
base <- getHpRelOffset info_offset
emitComment $ mkFastString "allocDynClosure"
emitSetDynHdr base info_ptr use_cc
-- Fill in the fields
hpStore base payload
-- Bump the virtual heap pointer
dflags <- getDynFlags
setVirtHp (virt_hp + heapClosureSizeW dflags rep)
return base
; getHpRelOffset info_offset
}
emitSetDynHdr :: CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
emitSetDynHdr base info_ptr ccs
= do dflags <- getDynFlags
hpStore base (header dflags) [0, wORD_SIZE dflags ..]
hpStore base (zip (header dflags) [0, wORD_SIZE dflags ..])
where
header :: DynFlags -> [CmmExpr]
header dflags = [info_ptr] ++ dynProfHdr dflags ccs
-- ToDof: Parallel stuff
-- No ticky header
hpStore :: CmmExpr -> [CmmExpr] -> [ByteOff] -> FCode ()
-- Store the item (expr,off) in base[off]
hpStore base vals offs
= do dflags <- getDynFlags
let mk_store val off = mkStore (cmmOffsetB dflags base off) val
emit (catAGraphs (zipWith mk_store vals offs))
hpStore :: CmmExpr -> [(CmmExpr, ByteOff)] -> FCode ()
hpStore base vals = do
dflags <- getDynFlags
sequence_ $
[ emitStore (cmmOffsetB dflags base off) val | (val,off) <- vals ]
-----------------------------------------------------------
-- Layout of static closures
......
......@@ -90,10 +90,11 @@ cgOpApp (StgPrimOp primop) args res_ty = do
dflags <- getDynFlags
cmm_args <- getNonVoidArgAmodes args
case shouldInlinePrimOp dflags primop cmm_args of
Nothing -> do let fun = CmmLit (CmmLabel (mkRtsPrimOpLabel primop))
emitCall (NativeNodeCall, NativeReturn) fun cmm_args
Nothing -> do -- out-of-line
let fun = CmmLit (CmmLabel (mkRtsPrimOpLabel primop))
emitCall (NativeNodeCall, NativeReturn) fun cmm_args
Just f
Just f -- inline
| ReturnsPrim VoidRep <- result_info
-> do f []
emitReturn []
......@@ -1533,36 +1534,24 @@ doNewArrayOp :: CmmFormal -> Integer -> CmmExpr -> FCode ()
doNewArrayOp res_r n init = do
dflags <- getDynFlags
let card_bytes = cardRoundUp dflags (fromInteger n)
size = fromInteger n + bytesToWordsRoundUp dflags card_bytes
words = arrPtrsHdrSizeWords dflags + size
-- If the allocation is of small, statically-known size, we reuse
-- the existing heap check to allocate inline.
virt_hp <- getVirtHp
-- FIND THE OFFSET OF THE INFO-PTR WORD
let info_offset = virt_hp + 1
-- info_offset is the VirtualHpOffset of the first
-- word of the new object
-- Remember, virtHp points to last allocated word,
-- ie 1 *before* the info-ptr word of new object.
base <- getHpRelOffset info_offset
setVirtHp (virt_hp + fromIntegral words) -- check n < big
arr <- CmmLocal `fmap` newTemp (bWord dflags)
emit $ mkAssign arr base
let info_ptr = mkLblExpr mkMAP_DIRTY_infoLabel
-- ToDo: this probably isn't right (card size?)
tickyAllocPrim (mkIntExpr dflags (arrPtrsHdrSize dflags))
(cmmMulWord dflags (mkIntExpr dflags (fromInteger n)) (wordSize dflags))
(mkIntExpr dflags (fromInteger n * wORD_SIZE dflags))
(zeroExpr dflags)
emitSetDynHdr base (mkLblExpr mkMAP_DIRTY_infoLabel) curCCS
emit $ mkStore (cmmOffsetB dflags base
(fixedHdrSize dflags * wORD_SIZE dflags +
oFFSET_StgMutArrPtrs_ptrs dflags))
(mkIntExpr dflags (fromInteger n))
emit $ mkStore (cmmOffsetB dflags base
(fixedHdrSize dflags * wORD_SIZE dflags +
oFFSET_StgMutArrPtrs_size dflags)) (mkIntExpr dflags size)
let rep = arrPtrsRep dflags (fromIntegral n)
hdr_size = fixedHdrSize dflags * wORD_SIZE dflags
base <- allocHeapClosure rep info_ptr curCCS
[ (mkIntExpr dflags (fromInteger n),
hdr_size + oFFSET_StgMutArrPtrs_ptrs dflags)
, (mkIntExpr dflags (nonHdrSizeW rep),
hdr_size + oFFSET_StgMutArrPtrs_size dflags)
]
arr <- CmmLocal `fmap` newTemp (bWord dflags)
emit $ mkAssign arr base
-- Initialise all elements of the the array
p <- assignTemp $ cmmOffsetB dflags (CmmReg arr) (arrPtrsHdrSize dflags)
......@@ -1577,26 +1566,12 @@ doNewArrayOp res_r n init = do
(cmmOffsetW dflags (CmmReg arr) (fromInteger n)))
(catAGraphs loopBody)
-- Initialise the mark bits with 0. This will be unrolled in the
-- backend to e.g. a single assignment since the arguments are
-- statically known.
emitMemsetCall
(cmmOffsetExprW dflags (CmmReg (CmmLocal p))
(mkIntExpr dflags (fromInteger n)))
(mkIntExpr dflags 0)
(mkIntExpr dflags card_bytes)
(mkIntExpr dflags (wORD_SIZE dflags))
emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
-- | The inline allocation limit is 128 bytes, expressed in words.
maxInlineAllocThreshold :: DynFlags -> Integer
maxInlineAllocThreshold dflags = toInteger (128 `quot` wORD_SIZE dflags)
arrPtrsHdrSizeWords :: DynFlags -> WordOff
arrPtrsHdrSizeWords dflags =
fixedHdrSize dflags +
(sIZEOF_StgMutArrPtrs_NoHdr dflags `div` wORD_SIZE dflags)
-- ----------------------------------------------------------------------------
-- Copying pointer arrays
......@@ -1724,18 +1699,6 @@ emitCloneArray info_p res_r src0 src_off0 n0 = do
(mkIntExpr dflags (wORD_SIZE dflags))
emit $ mkAssign (CmmLocal res_r) arr
card :: DynFlags -> Int -> Int
card dflags i = i `shiftR` mUT_ARR_PTRS_CARD_BITS dflags
-- Convert a number of elements to a number of cards, rounding up
cardRoundUp :: DynFlags -> Int -> Int
cardRoundUp dflags i =
card dflags (i + ((1 `shiftL` mUT_ARR_PTRS_CARD_BITS dflags) - 1))
bytesToWordsRoundUp :: DynFlags -> Int -> Int
bytesToWordsRoundUp dflags e =
(e + wORD_SIZE dflags - 1) `quot` (wORD_SIZE dflags)
-- | Takes and offset in the destination array, the base address of
-- the card table, and the number of elements affected (*not* the
-- number of cards). The number of elements may not be zero.
......
......@@ -149,7 +149,7 @@ profDynAlloc :: SMRep -> CmmExpr -> FCode ()
profDynAlloc rep ccs
= ifProfiling $
do dflags <- getDynFlags
profAlloc (mkIntExpr dflags (heapClosureSize dflags rep)) ccs
profAlloc (mkIntExpr dflags (heapClosureSizeW dflags rep)) ccs
-- | Record the allocation of a closure (size is given by a CmmExpr)
-- The size must be in words, because the allocation counter in a CCS counts
......
......@@ -415,7 +415,7 @@ tickyDynAlloc :: Maybe Id -> SMRep -> LambdaFormInfo -> FCode ()
--
-- TODO what else to count while we're here?
tickyDynAlloc mb_id rep lf = ifTicky $ getDynFlags >>= \dflags ->
let bytes = wORD_SIZE dflags * heapClosureSize dflags rep
let bytes = wORD_SIZE dflags * heapClosureSizeW dflags rep
countGlobal tot ctr = do
bumpTickyCounterBy tot bytes
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment