Commit 1eece456 authored by tibbe's avatar tibbe

codeGen: inline allocation optimization for clone array primops

The inline allocation version is 69% faster than the out-of-line
version, when cloning an array of 16 unit elements on a 64-bit
machine.

Comparing the new and the old primop implementations isn't
straightforward. The old version had a missing heap check that I
discovered during the development of the new version. Comparing the
old and the new version would requiring fixing the old version, which
in turn means reimplementing the equivalent of MAYBE_CG in StgCmmPrim.

The inline allocation threshold is configurable via
-fmax-inline-alloc-size which gives the maximum array size, in bytes,
to allocate inline. The size does not include the closure header size.

Allowing the same primop to be either inline or out-of-line has some
implication for how we lay out heap checks. We always place a heap
check around out-of-line primops, as they may allocate outside of our
knowledge. However, for the inline primops we only allow allocation
via the standard means (i.e. virtHp). Since the clone primops might be
either inline or out-of-line the heap check layout code now consults
shouldInlinePrimOp to know whether a primop will be inlined.
parent 99ef2791
......@@ -54,6 +54,7 @@ module CLabel (
mkIndStaticInfoLabel,
mkMainCapabilityLabel,
mkMAP_FROZEN_infoLabel,
mkMAP_FROZEN0_infoLabel,
mkMAP_DIRTY_infoLabel,
mkEMPTY_MVAR_infoLabel,
mkArrWords_infoLabel,
......@@ -401,7 +402,7 @@ mkStaticConEntryLabel name c = IdLabel name c StaticConEntry
-- Constructing Cmm Labels
mkDirty_MUT_VAR_Label, mkSplitMarkerLabel, mkUpdInfoLabel,
mkBHUpdInfoLabel, mkIndStaticInfoLabel, mkMainCapabilityLabel,
mkMAP_FROZEN_infoLabel, mkMAP_DIRTY_infoLabel,
mkMAP_FROZEN_infoLabel, mkMAP_FROZEN0_infoLabel, mkMAP_DIRTY_infoLabel,
mkEMPTY_MVAR_infoLabel, mkTopTickyCtrLabel,
mkCAFBlackHoleInfoTableLabel, mkCAFBlackHoleEntryLabel,
mkArrWords_infoLabel :: CLabel
......@@ -411,7 +412,8 @@ mkUpdInfoLabel = CmmLabel rtsPackageId (fsLit "stg_upd_frame")
mkBHUpdInfoLabel = CmmLabel rtsPackageId (fsLit "stg_bh_upd_frame" ) CmmInfo
mkIndStaticInfoLabel = CmmLabel rtsPackageId (fsLit "stg_IND_STATIC") CmmInfo
mkMainCapabilityLabel = CmmLabel rtsPackageId (fsLit "MainCapability") CmmData
mkMAP_FROZEN_infoLabel = CmmLabel rtsPackageId (fsLit "stg_MUT_ARR_PTRS_FROZEN0") CmmInfo
mkMAP_FROZEN_infoLabel = CmmLabel rtsPackageId (fsLit "stg_MUT_ARR_PTRS_FROZEN") CmmInfo
mkMAP_FROZEN0_infoLabel = CmmLabel rtsPackageId (fsLit "stg_MUT_ARR_PTRS_FROZEN0") CmmInfo
mkMAP_DIRTY_infoLabel = CmmLabel rtsPackageId (fsLit "stg_MUT_ARR_PTRS_DIRTY") CmmInfo
mkEMPTY_MVAR_infoLabel = CmmLabel rtsPackageId (fsLit "stg_EMPTY_MVAR") CmmInfo
mkTopTickyCtrLabel = CmmLabel rtsPackageId (fsLit "top_ct") CmmData
......
......@@ -422,8 +422,8 @@ cgCase scrut bndr alt_type alts
; up_hp_usg <- getVirtHp -- Upstream heap usage
; let ret_bndrs = chooseReturnBndrs bndr alt_type alts
alt_regs = map (idToReg dflags) ret_bndrs
simple_scrut = isSimpleScrut scrut alt_type
do_gc | not simple_scrut = True
; simple_scrut <- isSimpleScrut scrut alt_type
; let do_gc | not simple_scrut = True
| isSingleton alts = False
| up_hp_usg > 0 = False
| otherwise = True
......@@ -450,6 +450,13 @@ recover any unused heap before passing control to the sequel. If we
don't do this, then any unused heap will become slop because the heap
check will reset the heap usage. Slop in the heap breaks LDV profiling
(+RTS -hb) which needs to do a linear sweep through the nursery.
Note [Inlining out-of-line primops and heap checks]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
If shouldInlinePrimOp returns True when called from StgCmmExpr for the
purpose of heap check placement, we *must* inline the primop later in
StgCmmPrim. If we don't things will go wrong.
-}
-----------------
......@@ -460,21 +467,25 @@ maybeSaveCostCentre simple_scrut
-----------------
isSimpleScrut :: StgExpr -> AltType -> Bool
isSimpleScrut :: StgExpr -> AltType -> FCode Bool
-- Simple scrutinee, does not block or allocate; hence safe to amalgamate
-- heap usage from alternatives into the stuff before the case
-- NB: if you get this wrong, and claim that the expression doesn't allocate
-- when it does, you'll deeply mess up allocation
isSimpleScrut (StgOpApp op _ _) _ = isSimpleOp op
isSimpleScrut (StgLit _) _ = True -- case 1# of { 0# -> ..; ... }
isSimpleScrut (StgApp _ []) (PrimAlt _) = True -- case x# of { 0# -> ..; ... }
isSimpleScrut _ _ = False
isSimpleScrut (StgOpApp op args _) _ = isSimpleOp op args
isSimpleScrut (StgLit _) _ = return True -- case 1# of { 0# -> ..; ... }
isSimpleScrut (StgApp _ []) (PrimAlt _) = return True -- case x# of { 0# -> ..; ... }
isSimpleScrut _ _ = return False
isSimpleOp :: StgOp -> Bool
isSimpleOp :: StgOp -> [StgArg] -> FCode Bool
-- True iff the op cannot block or allocate
isSimpleOp (StgFCallOp (CCall (CCallSpec _ _ safe)) _) = not (playSafe safe)
isSimpleOp (StgPrimOp op) = not (primOpOutOfLine op)
isSimpleOp (StgPrimCallOp _) = False
isSimpleOp (StgFCallOp (CCall (CCallSpec _ _ safe)) _) _ = return $! not (playSafe safe)
isSimpleOp (StgPrimOp op) stg_args = do
arg_exprs <- getNonVoidArgAmodes stg_args
dflags <- getDynFlags
-- See Note [Inlining out-of-line primops and heap checks]
return $! isJust $ shouldInlinePrimOp dflags op arg_exprs
isSimpleOp (StgPrimCallOp _) _ = return False
-----------------
chooseReturnBndrs :: Id -> AltType -> [StgAlt] -> [NonVoid Id]
......
......@@ -8,8 +8,9 @@
module StgCmmPrim (
cgOpApp,
cgPrimOp -- internal(ish), used by cgCase to get code for a
-- comparison without also turning it into a Bool.
cgPrimOp, -- internal(ish), used by cgCase to get code for a
-- comparison without also turning it into a Bool.
shouldInlinePrimOp
) where
#include "HsVersions.h"
......@@ -41,7 +42,6 @@ import Outputable
import Util
import Control.Monad (liftM, when)
import Data.Bits
------------------------------------------------------------------------
-- Primitive operations and foreign calls
......@@ -132,12 +132,31 @@ shouldInlinePrimOp :: DynFlags
-> PrimOp -- ^ The primop
-> [CmmExpr] -- ^ The primop arguments
-> Maybe ([LocalReg] -> FCode ())
shouldInlinePrimOp _ NewByteArrayOp_Char [(CmmLit (CmmInt n _))]
| fromInteger n <= maxInlineAllocThreshold =
shouldInlinePrimOp dflags NewByteArrayOp_Char [(CmmLit (CmmInt n _))]
| fromInteger n <= maxInlineAllocSize dflags =
Just $ \ [res] -> doNewByteArrayOp res (fromInteger n)
shouldInlinePrimOp dflags NewArrayOp [(CmmLit (CmmInt n _)), init]
| wordsToBytes dflags (fromInteger n) <= maxInlineAllocThreshold =
| wordsToBytes dflags (fromInteger n) <= maxInlineAllocSize dflags =
Just $ \ [res] -> doNewArrayOp res (fromInteger n) init
shouldInlinePrimOp dflags CloneArrayOp [src, src_off, (CmmLit (CmmInt n _))]
| wordsToBytes dflags (fromInteger n) <= maxInlineAllocSize dflags =
Just $ \ [res] -> emitCloneArray mkMAP_FROZEN_infoLabel res src src_off (fromInteger n)
shouldInlinePrimOp dflags CloneMutableArrayOp [src, src_off, (CmmLit (CmmInt n _))]
| wordsToBytes dflags (fromInteger n) <= maxInlineAllocSize dflags =
Just $ \ [res] -> emitCloneArray mkMAP_DIRTY_infoLabel res src src_off (fromInteger n)
shouldInlinePrimOp dflags FreezeArrayOp [src, src_off, (CmmLit (CmmInt n _))]
| wordsToBytes dflags (fromInteger n) <= maxInlineAllocSize dflags =
Just $ \ [res] -> emitCloneArray mkMAP_FROZEN_infoLabel res src src_off (fromInteger n)
shouldInlinePrimOp dflags ThawArrayOp [src, src_off, (CmmLit (CmmInt n _))]
| wordsToBytes dflags (fromInteger n) <= maxInlineAllocSize dflags =
Just $ \ [res] -> emitCloneArray mkMAP_DIRTY_infoLabel res src src_off (fromInteger n)
shouldInlinePrimOp dflags primop args
| primOpOutOfLine primop = Nothing
| otherwise = Just $ \ regs -> emitPrimOp dflags regs primop args
......@@ -328,11 +347,11 @@ emitPrimOp dflags [res] DataToTagOp [arg]
-- }
emitPrimOp _ [res] UnsafeFreezeArrayOp [arg]
= emit $ catAGraphs
[ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN_infoLabel)),
[ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN0_infoLabel)),
mkAssign (CmmLocal res) arg ]
emitPrimOp _ [res] UnsafeFreezeArrayArrayOp [arg]
= emit $ catAGraphs
[ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN_infoLabel)),
[ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN0_infoLabel)),
mkAssign (CmmLocal res) arg ]
-- #define unsafeFreezzeByteArrayzh(r,a) r=(a)
......@@ -345,15 +364,6 @@ emitPrimOp _ [] CopyArrayOp [src,src_off,dst,dst_off,n] =
doCopyArrayOp src src_off dst dst_off n
emitPrimOp _ [] CopyMutableArrayOp [src,src_off,dst,dst_off,n] =
doCopyMutableArrayOp src src_off dst dst_off n
emitPrimOp _ [res] CloneArrayOp [src,src_off,n] =
emitCloneArray mkMAP_FROZEN_infoLabel res src src_off n
emitPrimOp _ [res] CloneMutableArrayOp [src,src_off,n] =
emitCloneArray mkMAP_DIRTY_infoLabel res src src_off n
emitPrimOp _ [res] FreezeArrayOp [src,src_off,n] =
emitCloneArray mkMAP_FROZEN_infoLabel res src src_off n
emitPrimOp _ [res] ThawArrayOp [src,src_off,n] =
emitCloneArray mkMAP_DIRTY_infoLabel res src src_off n
emitPrimOp _ [] CopyArrayArrayOp [src,src_off,dst,dst_off,n] =
doCopyArrayOp src src_off dst dst_off n
emitPrimOp _ [] CopyMutableArrayArrayOp [src,src_off,dst,dst_off,n] =
......@@ -1598,10 +1608,6 @@ doNewArrayOp res_r n init = do
emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
-- | The inline allocation limit is 128 bytes.
maxInlineAllocThreshold :: ByteOff
maxInlineAllocThreshold = 128
-- ----------------------------------------------------------------------------
-- Copying pointer arrays
......@@ -1689,45 +1695,40 @@ emitCopyArray copy src0 src_off0 dst0 dst_off0 n0 = do
-- allocated array in, a source array, an offset in the source array,
-- and the number of elements to copy. Allocates a new array and
-- initializes it from the source array.
emitCloneArray :: CLabel -> CmmFormal -> CmmExpr -> CmmExpr -> CmmExpr
emitCloneArray :: CLabel -> CmmFormal -> CmmExpr -> CmmExpr -> WordOff
-> FCode ()
emitCloneArray info_p res_r src0 src_off0 n0 = do
emitCloneArray info_p res_r src src_off n = do
dflags <- getDynFlags
let arrPtrsHdrSizeW dflags = mkIntExpr dflags (fixedHdrSize dflags +
(sIZEOF_StgMutArrPtrs_NoHdr dflags `div` wORD_SIZE dflags))
myCapability = cmmSubWord dflags (CmmReg baseReg) (mkIntExpr dflags (oFFSET_Capability_r dflags))
-- Passed as arguments (be careful)
src <- assignTempE src0
src_off <- assignTempE src_off0
n <- assignTempE n0
card_bytes <- assignTempE $ cardRoundUpCmm dflags n
size <- assignTempE $ cmmAddWord dflags n (bytesToWordsRoundUpCmm dflags card_bytes)
words <- assignTempE $ cmmAddWord dflags (arrPtrsHdrSizeW dflags) size
let info_ptr = mkLblExpr info_p
rep = arrPtrsRep dflags n
arr_r <- newTemp (bWord dflags)
emitAllocateCall arr_r myCapability words
tickyAllocPrim (mkIntExpr dflags (arrPtrsHdrSize dflags)) (cmmMulWord dflags n (wordSize dflags))
(zeroExpr dflags)
tickyAllocPrim (mkIntExpr dflags (arrPtrsHdrSize dflags))
(mkIntExpr dflags (nonHdrSize dflags rep))
(zeroExpr dflags)
let arr = CmmReg (CmmLocal arr_r)
emitSetDynHdr arr (CmmLit (CmmLabel info_p)) curCCS
emit $ mkStore (cmmOffsetB dflags arr (fixedHdrSize dflags * wORD_SIZE dflags +
oFFSET_StgMutArrPtrs_ptrs dflags)) n
emit $ mkStore (cmmOffsetB dflags arr (fixedHdrSize dflags * wORD_SIZE dflags +
oFFSET_StgMutArrPtrs_size dflags)) size
let hdr_size = wordsToBytes dflags (fixedHdrSize dflags)
dst_p <- assignTempE $ cmmOffsetB dflags arr (arrPtrsHdrSize dflags)
src_p <- assignTempE $ cmmOffsetExprW dflags (cmmOffsetB dflags src (arrPtrsHdrSize dflags))
src_off
base <- allocHeapClosure rep info_ptr curCCS
[ (mkIntExpr dflags n,
hdr_size + oFFSET_StgMutArrPtrs_ptrs dflags)
, (mkIntExpr dflags (nonHdrSizeW rep),
hdr_size + oFFSET_StgMutArrPtrs_size dflags)
]
emitMemcpyCall dst_p src_p (cmmMulWord dflags n (wordSize dflags)) (mkIntExpr dflags (wORD_SIZE dflags))
arr <- CmmLocal `fmap` newTemp (bWord dflags)
emit $ mkAssign arr base
emitMemsetCall (cmmOffsetExprW dflags dst_p n)
(mkIntExpr dflags 1)
card_bytes
dst_p <- assignTempE $ cmmOffsetB dflags (CmmReg arr)
(arrPtrsHdrSize dflags)
src_p <- assignTempE $ cmmOffsetExprW dflags src
(cmmAddWord dflags
(mkIntExpr dflags (arrPtrsHdrSizeW dflags)) src_off)
emitMemcpyCall dst_p src_p (mkIntExpr dflags (wordsToBytes dflags n))
(mkIntExpr dflags (wORD_SIZE dflags))
emit $ mkAssign (CmmLocal res_r) arr
emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
-- | Takes and offset in the destination array, the base address of
-- the card table, and the number of elements affected (*not* the
......@@ -1748,22 +1749,6 @@ cardCmm :: DynFlags -> CmmExpr -> CmmExpr
cardCmm dflags i =
cmmUShrWord dflags i (mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags))
-- Convert a number of elements to a number of cards, rounding up
cardRoundUpCmm :: DynFlags -> CmmExpr -> CmmExpr
cardRoundUpCmm dflags i =
cardCmm dflags (cmmAddWord dflags i
(mkIntExpr dflags
((1 `shiftL` mUT_ARR_PTRS_CARD_BITS dflags) - 1)))
bytesToWordsRoundUpCmm :: DynFlags -> CmmExpr -> CmmExpr
bytesToWordsRoundUpCmm dflags e =
cmmQuotWord dflags (cmmAddWord dflags e
(mkIntExpr dflags
(wORD_SIZE dflags - 1))) (wordSize dflags)
wordSize :: DynFlags -> CmmExpr
wordSize dflags = mkIntExpr dflags (wORD_SIZE dflags)
-- | Emit a call to @memcpy@.
emitMemcpyCall :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
emitMemcpyCall dst src n align = do
......@@ -1789,19 +1774,6 @@ emitMemsetCall dst c n align = do
MO_Memset
[ dst, c, n, align ]
-- | Emit a call to @allocate@.
emitAllocateCall :: LocalReg -> CmmExpr -> CmmExpr -> FCode ()
emitAllocateCall res cap n = do
emitCCall
[ (res, AddrHint) ]
allocate
[ (cap, AddrHint)
, (n, NoHint)
]
where
allocate = CmmLit (CmmLabel (mkForeignLabel (fsLit "allocate") Nothing
ForeignLabelInExternalPackage IsFunction))
emitBSwapCall :: LocalReg -> CmmExpr -> Width -> FCode ()
emitBSwapCall res x width = do
emitPrimCall
......
......@@ -799,7 +799,12 @@ data DynFlags = DynFlags {
rtldInfo :: IORef (Maybe LinkerInfo),
-- | Run-time compiler information
rtccInfo :: IORef (Maybe CompilerInfo)
rtccInfo :: IORef (Maybe CompilerInfo),
-- Constants used to control the amount of optimization done.
-- ^ Max size, in bytes, of inline array allocations.
maxInlineAllocSize :: Int
}
class HasDynFlags m where
......@@ -1448,7 +1453,9 @@ defaultDynFlags mySettings =
avx512f = False,
avx512pf = False,
rtldInfo = panic "defaultDynFlags: no rtldInfo",
rtccInfo = panic "defaultDynFlags: no rtccInfo"
rtccInfo = panic "defaultDynFlags: no rtccInfo",
maxInlineAllocSize = 128
}
defaultWays :: Settings -> [Way]
......@@ -2428,6 +2435,7 @@ dynamic_flags = [
, Flag "fmax-worker-args" (intSuffix (\n d -> d {maxWorkerArgs = n}))
, Flag "fghci-hist-size" (intSuffix (\n d -> d {ghciHistSize = n}))
, Flag "fmax-inline-alloc-size" (intSuffix (\n d -> d{ maxInlineAllocSize = n }))
------ Profiling ----------------------------------------------------
......
......@@ -794,6 +794,7 @@ primop CloneArrayOp "cloneArray#" GenPrimOp
source array. The provided array must fully contain the specified
range, but this is not checked.}
with
out_of_line = True
has_side_effects = True
code_size = { primOpCodeSizeForeignCall + 4 }
......@@ -804,6 +805,7 @@ primop CloneMutableArrayOp "cloneMutableArray#" GenPrimOp
source array. The provided array must fully contain the specified
range, but this is not checked.}
with
out_of_line = True
has_side_effects = True
code_size = { primOpCodeSizeForeignCall + 4 }
......@@ -814,6 +816,7 @@ primop FreezeArrayOp "freezeArray#" GenPrimOp
source array. The provided array must fully contain the specified
range, but this is not checked.}
with
out_of_line = True
has_side_effects = True
code_size = { primOpCodeSizeForeignCall + 4 }
......@@ -824,6 +827,7 @@ primop ThawArrayOp "thawArray#" GenPrimOp
source array. The provided array must fully contain the specified
range, but this is not checked.}
with
out_of_line = True
has_side_effects = True
code_size = { primOpCodeSizeForeignCall + 4 }
......
......@@ -1887,6 +1887,19 @@
<entry><option>-fno-unfolding-use-threshold</option></entry>
</row>
<row>
<entry><option>-fmax-inline-alloc-size</option>=<replaceable>n</replaceable></entry>
<entry>Set the maximum size of inline array allocations to
<replaceable>n</replaceable> bytes (default: 128). GHC
will allocate non-pinned arrays of statically known size
in the current nursery block if they're no bigger than
<replaceable>n</replaceable> bytes, ignoring GC overheap.
This value should be quite a bit smaller than the block
size (typically: 4096).</entry>
<entry>dynamic</entry>
<entry>-</entry>
</row>
</tbody>
</tgroup>
</informaltable>
......
......@@ -806,4 +806,35 @@
__gen = TO_W_(bdescr_gen_no(__bd)); \
if (__gen > 0) { recordMutableCap(__p, __gen); }
/* Complete function body for the clone family of (mutable) array ops.
Defined as a macro to avoid function call overhead or code
duplication. */
#define cloneArray(info, src, offset, n) \
W_ words, size; \
gcptr dst, dst_p, src_p; \
\
again: MAYBE_GC(again); \
\
size = n + mutArrPtrsCardWords(n); \
words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + size; \
("ptr" dst) = ccall allocate(MyCapability() "ptr", words); \
TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(size), 0); \
\
SET_HDR(dst, info, CCCS); \
StgMutArrPtrs_ptrs(dst) = n; \
StgMutArrPtrs_size(dst) = size; \
\
dst_p = dst + SIZEOF_StgMutArrPtrs; \
src_p = src + SIZEOF_StgMutArrPtrs + WDS(offset); \
while: \
if (n != 0) { \
n = n - 1; \
W_[dst_p] = W_[src_p]; \
dst_p = dst_p + WDS(1); \
src_p = src_p + WDS(1); \
goto while; \
} \
\
return (dst);
#endif /* CMM_H */
......@@ -347,6 +347,10 @@ RTS_FUN_DECL(stg_casIntArrayzh);
RTS_FUN_DECL(stg_fetchAddIntArrayzh);
RTS_FUN_DECL(stg_newArrayzh);
RTS_FUN_DECL(stg_newArrayArrayzh);
RTS_FUN_DECL(stg_cloneArrayzh);
RTS_FUN_DECL(stg_cloneMutableArrayzh);
RTS_FUN_DECL(stg_freezzeArrayzh);
RTS_FUN_DECL(stg_thawArrayzh);
RTS_FUN_DECL(stg_newMutVarzh);
RTS_FUN_DECL(stg_atomicModifyMutVarzh);
......
......@@ -1162,6 +1162,10 @@ typedef struct _RtsSymbolVal {
SymI_HasProto(stg_myThreadIdzh) \
SymI_HasProto(stg_labelThreadzh) \
SymI_HasProto(stg_newArrayzh) \
SymI_HasProto(stg_cloneArrayzh) \
SymI_HasProto(stg_cloneMutableArrayzh) \
SymI_HasProto(stg_freezzeArrayzh) \
SymI_HasProto(stg_thawArrayzh) \
SymI_HasProto(stg_newArrayArrayzh) \
SymI_HasProto(stg_casArrayzh) \
SymI_HasProto(stg_newBCOzh) \
......
......@@ -225,6 +225,27 @@ stg_unsafeThawArrayzh ( gcptr arr )
}
}
stg_cloneArrayzh ( gcptr src, W_ offset, W_ n )
{
cloneArray(stg_MUT_ARR_PTRS_FROZEN_info, src, offset, n)
}
stg_cloneMutableArrayzh ( gcptr src, W_ offset, W_ n )
{
cloneArray(stg_MUT_ARR_PTRS_DIRTY_info, src, offset, n)
}
// We have to escape the "z" in the name.
stg_freezzeArrayzh ( gcptr src, W_ offset, W_ n )
{
cloneArray(stg_MUT_ARR_PTRS_FROZEN_info, src, offset, n)
}
stg_thawArrayzh ( gcptr src, W_ offset, W_ n )
{
cloneArray(stg_MUT_ARR_PTRS_DIRTY_info, src, offset, n)
}
// RRN: Uses the ticketed approach; see casMutVar
stg_casArrayzh ( gcptr arr, W_ ind, gcptr old, gcptr new )
/* MutableArray# s a -> Int# -> a -> a -> State# s -> (# State# s, Int#, Any a #) */
......
......@@ -9,15 +9,20 @@ import GHC.Exts hiding (IsList(..))
import GHC.Prim
import GHC.ST
main :: IO ()
main = putStr
(test_copyArray
++ "\n" ++ test_copyMutableArray
++ "\n" ++ test_copyMutableArrayOverlap
++ "\n" ++ test_cloneArray
++ "\n" ++ test_cloneArrayStatic
++ "\n" ++ test_cloneMutableArray
++ "\n" ++ test_cloneMutableArrayEmpty
++ "\n" ++ test_cloneMutableArrayStatic
++ "\n" ++ test_freezeArray
++ "\n" ++ test_freezeArrayStatic
++ "\n" ++ test_thawArray
++ "\n" ++ test_thawArrayStatic
++ "\n"
)
......@@ -32,6 +37,10 @@ len = 130
copied :: Int
copied = len - 2
copiedStatic :: Int
copiedStatic = 16
{-# INLINE copiedStatic #-} -- to make sure optimization triggers
------------------------------------------------------------------------
-- copyArray#
......@@ -90,9 +99,20 @@ test_cloneArray =
fill src 0 len
src <- unsafeFreezeArray src
-- Don't include the first and last element.
return $ cloneArray src 1 copied
return $! cloneArray src 1 copied
in shows (toList dst copied) "\n"
-- Check that the static-size optimization works.
test_cloneArrayStatic :: String
test_cloneArrayStatic =
let dst = runST $ do
src <- newArray len 0
fill src 0 len
src <- unsafeFreezeArray src
-- Don't include the first and last element.
return $! cloneArray src 1 copiedStatic
in shows (toList dst copiedStatic) "\n"
------------------------------------------------------------------------
-- cloneMutableArray#
......@@ -117,6 +137,17 @@ test_cloneMutableArrayEmpty =
unsafeFreezeArray dst
in shows (toList dst 0) "\n"
-- Check that the static-size optimization works.
test_cloneMutableArrayStatic :: String
test_cloneMutableArrayStatic =
let dst = runST $ do
src <- newArray len 0
fill src 0 len
-- Don't include the first and last element.
dst <- cloneMutableArray src 1 copiedStatic
unsafeFreezeArray dst
in shows (toList dst copiedStatic) "\n"
------------------------------------------------------------------------
-- freezeArray#
......@@ -131,6 +162,16 @@ test_freezeArray =
freezeArray src 1 copied
in shows (toList dst copied) "\n"
-- Check that the static-size optimization works.
test_freezeArrayStatic :: String
test_freezeArrayStatic =
let dst = runST $ do
src <- newArray len 0
fill src 0 len
-- Don't include the first and last element.
freezeArray src 1 copiedStatic
in shows (toList dst copiedStatic) "\n"
------------------------------------------------------------------------
-- thawArray#
......@@ -147,6 +188,18 @@ test_thawArray =
unsafeFreezeArray dst
in shows (toList dst copied) "\n"
-- Check that the static-size optimization works.
test_thawArrayStatic :: String
test_thawArrayStatic =
let dst = runST $ do
src <- newArray len 0
fill src 0 len
src <- unsafeFreezeArray src
-- Don't include the first and last element.
dst <- thawArray src 1 copiedStatic
unsafeFreezeArray dst
in shows (toList dst copiedStatic) "\n"
------------------------------------------------------------------------
-- Test helpers
......@@ -181,13 +234,27 @@ newArray (I# n#) a = ST $ \s# -> case newArray# n# a s# of
(# s2#, marr# #) -> (# s2#, MArray marr# #)
indexArray :: Array a -> Int -> a
indexArray arr (I# i#) = case indexArray# (unArray arr) i# of
(# a #) -> a
indexArray arr i@(I# i#)
| i < 0 || i >= len =
error $ "bounds error, offset " ++ show i ++ ", length " ++ show len
| otherwise = case indexArray# (unArray arr) i# of
(# a #) -> a
where len = lengthArray arr
writeArray :: MArray s a -> Int -> a -> ST s ()
writeArray marr (I# i#) a = ST $ \ s# ->
writeArray marr i@(I# i#) a
| i < 0 || i >= len =
error $ "bounds error, offset " ++ show i ++ ", length " ++ show len
| otherwise = ST $ \ s# ->
case writeArray# (unMArray marr) i# a s# of
s2# -> (# s2#, () #)
where len = lengthMArray marr
lengthArray :: Array a -> Int
lengthArray arr = I# (sizeofArray# (unArray arr))
lengthMArray :: MArray s a -> Int
lengthMArray marr = I# (sizeofMutableArray# (unMArray marr))
unsafeFreezeArray :: MArray s a -> ST s (Array a)
unsafeFreezeArray marr = ST $ \ s# ->
......@@ -206,21 +273,25 @@ copyMutableArray src (I# six#) dst (I# dix#) (I# n#) = ST $ \ s# ->
cloneArray :: Array a -> Int -> Int -> Array a
cloneArray src (I# six#) (I# n#) = Array (cloneArray# (unArray src) six# n#)
{-# INLINE cloneArray #-} -- to make sure optimization triggers
cloneMutableArray :: MArray s a -> Int -> Int -> ST s (MArray s a)
cloneMutableArray src (I# six#) (I# n#) = ST $ \ s# ->
case cloneMutableArray# (unMArray src) six# n# s# of
(# s2#, marr# #) -> (# s2#, MArray marr# #)
{-# INLINE cloneMutableArray #-} -- to make sure optimization triggers
freezeArray :: MArray s a -> Int -> Int -> ST s (Array a)
freezeArray src (I# six#) (I# n#) = ST $ \ s# ->
case freezeArray# (unMArray src) six# n# s# of
(# s2#, arr# #) -> (# s2#, Array arr# #)
{-# INLINE freezeArray #-} -- to make sure optimization triggers
thawArray :: Array a -> Int -> Int -> ST s (MArray s a)
thawArray src (I# six#) (I# n#) = ST $ \ s# ->
case thawArray# (unArray src) six# n# s# of
(# s2#, marr# #) -> (# s2#, MArray marr# #)
{-# INLINE thawArray #-} -- to make sure optimization triggers
toList :: Array a -> Int -> [a]
toList arr n = go 0
...