Commit 9c23f06f authored by tibbe's avatar tibbe Committed by Simon Marlow

Make array copy primops inline

parent a6cc4146
......@@ -4,7 +4,7 @@ module CmmExpr
, CmmReg(..), cmmRegType
, CmmLit(..), cmmLitType
, LocalReg(..), localRegType
, GlobalReg(..), globalRegType, spReg, hpReg, spLimReg, nodeReg, node
, GlobalReg(..), globalRegType, spReg, hpReg, spLimReg, nodeReg, node, baseReg
, VGcPtr(..), vgcFlag -- Temporary!
, DefinerOfLocalRegs, UserOfLocalRegs, foldRegsDefd, foldRegsUsed, filterRegsUsed
, DefinerOfSlots, UserOfSlots, foldSlotsDefd, foldSlotsUsed
......@@ -425,7 +425,8 @@ instance Ord GlobalReg where
compare _ EagerBlackholeInfo = GT
-- convenient aliases
spReg, hpReg, spLimReg, nodeReg :: CmmReg
baseReg, spReg, hpReg, spLimReg, nodeReg :: CmmReg
baseReg = CmmGlobal BaseReg
spReg = CmmGlobal Sp
hpReg = CmmGlobal Hp
spLimReg = CmmGlobal SpLim
......
......@@ -10,13 +10,17 @@ module CgPrimOp (
cgPrimOp
) where
import BasicTypes
import ForeignCall
import ClosureInfo
import StgSyn
import CgForeignCall
import CgBindery
import CgMonad
import CgHeapery
import CgInfoTbls
import CgTicky
import CgProf
import CgUtils
import OldCmm
import CLabel
......@@ -205,6 +209,19 @@ emitPrimOp [res] UnsafeFreezeArrayOp [arg] _
emitPrimOp [res] UnsafeFreezeByteArrayOp [arg] _
= stmtC (CmmAssign (CmmLocal res) arg)
emitPrimOp [] CopyArrayOp [src,src_off,dst,dst_off,n] live =
doCopyArrayOp src src_off dst dst_off n live
emitPrimOp [] CopyMutableArrayOp [src,src_off,dst,dst_off,n] live =
doCopyMutableArrayOp src src_off dst dst_off n live
emitPrimOp [res] CloneArrayOp [src,src_off,n] live =
emitCloneArray mkMAP_FROZEN_infoLabel res src src_off n live
emitPrimOp [res] CloneMutableArrayOp [src,src_off,n] live =
emitCloneArray mkMAP_DIRTY_infoLabel res src src_off n live
emitPrimOp [res] FreezeArrayOp [src,src_off,n] live =
emitCloneArray mkMAP_FROZEN_infoLabel res src src_off n live
emitPrimOp [res] ThawArrayOp [src,src_off,n] live =
emitCloneArray mkMAP_DIRTY_infoLabel res src src_off n live
-- Reading/writing pointer arrays
emitPrimOp [r] ReadArrayOp [obj,ix] _ = doReadPtrArrayOp r obj ix
......@@ -618,3 +635,198 @@ cmmLoadIndexOffExpr off rep base idx
setInfo :: CmmExpr -> CmmExpr -> CmmStmt
setInfo closure_ptr info_ptr = CmmStore closure_ptr info_ptr
-- ----------------------------------------------------------------------------
-- Copying pointer arrays
-- | Takes a source 'Array#', an offset in the source array, a
-- destination 'MutableArray#', an offset into the destination array,
-- and the number of elements to copy. Copies the given number of
-- elements from the source array to the destination array.
doCopyArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
-> StgLiveVars -> Code
doCopyArrayOp = emitCopyArray copy
where
-- Copy data (we assume the arrays aren't overlapping since
-- they're of different types)
copy _src _dst = emitMemcpyCall
-- | Takes a source 'MutableArray#', an offset in the source array, a
-- destination 'MutableArray#', an offset into the destination array,
-- and the number of elements to copy. Copies the given number of
-- elements from the source array to the destination array.
doCopyMutableArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
-> StgLiveVars -> Code
doCopyMutableArrayOp = emitCopyArray copy
where
-- The only time the memory might overlap is when the two arrays
-- we were provided are the same array!
-- TODO: Optimize branch for common case of no aliasing.
copy src dst dst_p src_p bytes live =
emitIfThenElse (cmmEqWord src dst)
(emitMemmoveCall dst_p src_p bytes live)
(emitMemcpyCall dst_p src_p bytes live)
emitCopyArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
-> StgLiveVars -> Code)
-> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
-> StgLiveVars
-> Code
emitCopyArray copy src0 src_off0 dst0 dst_off0 n0 live = do
-- Assign the arguments to temporaries so the code generator can
-- calculate liveness for us.
src <- assignTemp_ src0
src_off <- assignTemp_ src_off0
dst <- assignTemp_ dst0
dst_off <- assignTemp_ dst_off0
n <- assignTemp_ n0
-- Set the dirty bit in the header.
stmtC (setInfo dst (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
dst_elems_p <- assignTemp $ cmmOffsetB dst arrPtrsHdrSize
dst_p <- assignTemp $ cmmOffsetExprW dst_elems_p dst_off
src_p <- assignTemp $ cmmOffsetExprW (cmmOffsetB src arrPtrsHdrSize) src_off
bytes <- assignTemp $ cmmMulWord n (CmmLit (mkIntCLit wORD_SIZE))
copy src dst dst_p src_p bytes live
-- The base address of the destination card table
dst_cards_p <- assignTemp $ cmmOffsetExprW dst_elems_p (loadArrPtrsSize dst)
emitSetCards dst_off dst_cards_p n live
-- | Takes an info table label, a register to return the newly
-- allocated array in, a source array, an offset in the source array,
-- and the number of elements to copy. Allocates a new array and
-- initializes it form the source array.
emitCloneArray :: CLabel -> CmmFormal -> CmmExpr -> CmmExpr -> CmmExpr
-> StgLiveVars -> Code
emitCloneArray info_p res_r src0 src_off0 n0 live = do
-- Assign the arguments to temporaries so the code generator can
-- calculate liveness for us.
src <- assignTemp_ src0
src_off <- assignTemp_ src_off0
n <- assignTemp_ n0
card_words <- assignTemp $ (n `cmmUShrWord`
(CmmLit (mkIntCLit mUT_ARR_PTRS_CARD_BITS)))
`cmmAddWord` CmmLit (mkIntCLit 1)
size <- assignTemp $ n `cmmAddWord` card_words
words <- assignTemp $ arrPtrsHdrSizeW `cmmAddWord` size
arr_r <- newTemp bWord
emitAllocateCall arr_r myCapability words live
tickyAllocPrim (CmmLit (mkIntCLit arrPtrsHdrSize)) (n `cmmMulWord` wordSize)
(CmmLit $ mkIntCLit 0)
let arr = CmmReg (CmmLocal arr_r)
emitSetDynHdr arr (CmmLit (CmmLabel info_p)) curCCSAddr
stmtC $ CmmStore (cmmOffsetB arr (fixedHdrSize * wORD_SIZE +
oFFSET_StgMutArrPtrs_ptrs)) n
stmtC $ CmmStore (cmmOffsetB arr (fixedHdrSize * wORD_SIZE +
oFFSET_StgMutArrPtrs_size)) size
dst_p <- assignTemp $ cmmOffsetB arr arrPtrsHdrSize
src_p <- assignTemp $ cmmOffsetExprW (cmmOffsetB src arrPtrsHdrSize)
src_off
emitMemcpyCall dst_p src_p (n `cmmMulWord` wordSize) live
emitMemsetCall (cmmOffsetExprW dst_p n)
(CmmLit (CmmInt (toInteger (1 :: Int)) W8))
(card_words `cmmMulWord` wordSize)
live
stmtC $ CmmAssign (CmmLocal res_r) arr
where
arrPtrsHdrSizeW = CmmLit $ mkIntCLit $ fixedHdrSize +
(sIZEOF_StgMutArrPtrs_NoHdr `div` wORD_SIZE)
wordSize = CmmLit (mkIntCLit wORD_SIZE)
myCapability = CmmReg baseReg `cmmSubWord`
CmmLit (mkIntCLit oFFSET_Capability_r)
-- | Takes and offset in the destination array, the base address of
-- the card table, and the number of elements affected (*not* the
-- number of cards). Marks the relevant cards as dirty.
emitSetCards :: CmmExpr -> CmmExpr -> CmmExpr -> StgLiveVars -> Code
emitSetCards dst_start dst_cards_start n live = do
start_card <- assignTemp $ card dst_start
emitMemsetCall (dst_cards_start `cmmAddWord` start_card)
(CmmLit (CmmInt (toInteger (1 :: Int)) W8))
((card (dst_start `cmmAddWord` n) `cmmSubWord` start_card)
`cmmAddWord` CmmLit (mkIntCLit 1))
live
where
-- Convert an element index to a card index
card i = i `cmmUShrWord` (CmmLit (mkIntCLit mUT_ARR_PTRS_CARD_BITS))
-- | Emit a call to @memcpy@.
emitMemcpyCall :: CmmExpr -> CmmExpr -> CmmExpr -> StgLiveVars -> Code
emitMemcpyCall dst src n live = do
vols <- getVolatileRegs live
emitForeignCall' PlayRisky
[{-no results-}]
(CmmCallee memcpy CCallConv)
[ (CmmHinted dst AddrHint)
, (CmmHinted src AddrHint)
, (CmmHinted n NoHint)
]
(Just vols)
NoC_SRT -- No SRT b/c we do PlayRisky
CmmMayReturn
where
memcpy = CmmLit (CmmLabel (mkForeignLabel (fsLit "memcpy") Nothing
ForeignLabelInExternalPackage IsFunction))
-- | Emit a call to @memmove@.
emitMemmoveCall :: CmmExpr -> CmmExpr -> CmmExpr -> StgLiveVars -> Code
emitMemmoveCall dst src n live = do
vols <- getVolatileRegs live
emitForeignCall' PlayRisky
[{-no results-}]
(CmmCallee memmove CCallConv)
[ (CmmHinted dst AddrHint)
, (CmmHinted src AddrHint)
, (CmmHinted n NoHint)
]
(Just vols)
NoC_SRT -- No SRT b/c we do PlayRisky
CmmMayReturn
where
memmove = CmmLit (CmmLabel (mkForeignLabel (fsLit "memmove") Nothing
ForeignLabelInExternalPackage IsFunction))
-- | Emit a call to @memset@. The second argument must be of type
-- 'W8'.
emitMemsetCall :: CmmExpr -> CmmExpr -> CmmExpr -> StgLiveVars -> Code
emitMemsetCall dst c n live = do
vols <- getVolatileRegs live
emitForeignCall' PlayRisky
[{-no results-}]
(CmmCallee memset CCallConv)
[ (CmmHinted dst AddrHint)
, (CmmHinted c NoHint)
, (CmmHinted n NoHint)
]
(Just vols)
NoC_SRT -- No SRT b/c we do PlayRisky
CmmMayReturn
where
memset = CmmLit (CmmLabel (mkForeignLabel (fsLit "memset") Nothing
ForeignLabelInExternalPackage IsFunction))
-- | Emit a call to @allocate@.
emitAllocateCall :: LocalReg -> CmmExpr -> CmmExpr -> StgLiveVars -> Code
emitAllocateCall res cap n live = do
vols <- getVolatileRegs live
emitForeignCall' PlayRisky
[CmmHinted res AddrHint]
(CmmCallee allocate CCallConv)
[ (CmmHinted cap AddrHint)
, (CmmHinted n NoHint)
]
(Just vols)
NoC_SRT -- No SRT b/c we do PlayRisky
CmmMayReturn
where
allocate = CmmLit (CmmLabel (mkForeignLabel (fsLit "allocate") Nothing
ForeignLabelInExternalPackage IsFunction))
......@@ -20,7 +20,7 @@ module CgUtils (
emitRODataLits, mkRODataLits,
emitIf, emitIfThenElse,
emitRtsCall, emitRtsCallWithVols, emitRtsCallWithResult,
assignTemp, newTemp,
assignTemp, assignTemp_, newTemp,
emitSimultaneously,
emitSwitch, emitLitSwitch,
tagToClosure,
......@@ -29,7 +29,7 @@ module CgUtils (
activeStgRegs, fixStgRegisters,
cmmAndWord, cmmOrWord, cmmNegate, cmmEqWord, cmmNeWord,
cmmUGtWord,
cmmUGtWord, cmmSubWord, cmmMulWord, cmmAddWord, cmmUShrWord,
cmmOffsetExprW, cmmOffsetExprB,
cmmRegOffW, cmmRegOffB,
cmmLabelOffW, cmmLabelOffB,
......@@ -180,8 +180,10 @@ cmmULtWord e1 e2 = CmmMachOp mo_wordULt [e1, e2]
cmmUGeWord e1 e2 = CmmMachOp mo_wordUGe [e1, e2]
cmmUGtWord e1 e2 = CmmMachOp mo_wordUGt [e1, e2]
--cmmShlWord e1 e2 = CmmMachOp mo_wordShl [e1, e2]
--cmmUShrWord e1 e2 = CmmMachOp mo_wordUShr [e1, e2]
cmmUShrWord e1 e2 = CmmMachOp mo_wordUShr [e1, e2]
cmmAddWord e1 e2 = CmmMachOp mo_wordAdd [e1, e2]
cmmSubWord e1 e2 = CmmMachOp mo_wordSub [e1, e2]
cmmMulWord e1 e2 = CmmMachOp mo_wordMul [e1, e2]
cmmNegate :: CmmExpr -> CmmExpr
cmmNegate (CmmLit (CmmInt n rep)) = CmmLit (CmmInt (-n) rep)
......@@ -587,6 +589,9 @@ mkByteStringCLit bytes
--
-------------------------------------------------------------------------
-- | If the expression is trivial, return it. Otherwise, assign the
-- expression to a temporary register and return an expression
-- referring to this register.
assignTemp :: CmmExpr -> FCode CmmExpr
-- For a non-trivial expression, e, create a local
-- variable and assign the expression to it
......@@ -596,6 +601,14 @@ assignTemp e
; stmtC (CmmAssign (CmmLocal reg) e)
; return (CmmReg (CmmLocal reg)) }
-- | Assign the expression to a temporary register and return an
-- expression referring to this register.
assignTemp_ :: CmmExpr -> FCode CmmExpr
assignTemp_ e = do
reg <- newTemp (cmmExprType e)
stmtC (CmmAssign (CmmLocal reg) e)
return (CmmReg (CmmLocal reg))
newTemp :: CmmType -> FCode LocalReg
newTemp rep = do { uniq <- newUnique; return (LocalReg uniq rep) }
......
......@@ -632,7 +632,6 @@ primop CopyArrayOp "copyArray#" GenPrimOp
Both arrays must fully contain the specified ranges, but this is not checked.
The two arrays must not be the same array in different states, but this is not checked either.}
with
out_of_line = True
has_side_effects = True
primop CopyMutableArrayOp "copyMutableArray#" GenPrimOp
......@@ -640,7 +639,6 @@ primop CopyMutableArrayOp "copyMutableArray#" GenPrimOp
{Copy a range of the first MutableArray# to the specified region in the second MutableArray#.
Both arrays must fully contain the specified ranges, but this is not checked.}
with
out_of_line = True
has_side_effects = True
primop CloneArrayOp "cloneArray#" GenPrimOp
......@@ -648,7 +646,6 @@ primop CloneArrayOp "cloneArray#" GenPrimOp
{Return a newly allocated Array# with the specified subrange of the provided Array#.
The provided Array# should contain the full subrange specified by the two Int#s, but this is not checked.}
with
out_of_line = True
has_side_effects = True
primop CloneMutableArrayOp "cloneMutableArray#" GenPrimOp
......@@ -656,7 +653,6 @@ primop CloneMutableArrayOp "cloneMutableArray#" GenPrimOp
{Return a newly allocated Array# with the specified subrange of the provided Array#.
The provided MutableArray# should contain the full subrange specified by the two Int#s, but this is not checked.}
with
out_of_line = True
has_side_effects = True
primop FreezeArrayOp "freezeArray#" GenPrimOp
......@@ -664,7 +660,6 @@ primop FreezeArrayOp "freezeArray#" GenPrimOp
{Return a newly allocated Array# with the specified subrange of the provided MutableArray#.
The provided MutableArray# should contain the full subrange specified by the two Int#s, but this is not checked.}
with
out_of_line = True
has_side_effects = True
primop ThawArrayOp "thawArray#" GenPrimOp
......@@ -672,7 +667,6 @@ primop ThawArrayOp "thawArray#" GenPrimOp
{Return a newly allocated Array# with the specified subrange of the provided MutableArray#.
The provided Array# should contain the full subrange specified by the two Int#s, but this is not checked.}
with
out_of_line = True
has_side_effects = True
------------------------------------------------------------------------
......
......@@ -380,12 +380,6 @@ RTS_FUN_DECL(stg_newByteArrayzh);
RTS_FUN_DECL(stg_newPinnedByteArrayzh);
RTS_FUN_DECL(stg_newAlignedPinnedByteArrayzh);
RTS_FUN_DECL(stg_newArrayzh);
RTS_FUN_DECL(stg_copyArrayzh);
RTS_FUN_DECL(stg_copyMutableArrayzh);
RTS_FUN_DECL(stg_cloneArrayzh);
RTS_FUN_DECL(stg_cloneMutableArrayzh);
RTS_FUN_DECL(stg_freezzeArrayzh);
RTS_FUN_DECL(stg_thawArrayzh);
RTS_FUN_DECL(stg_newMutVarzh);
RTS_FUN_DECL(stg_atomicModifyMutVarzh);
......
......@@ -826,12 +826,6 @@ typedef struct _RtsSymbolVal {
SymI_HasProto(stg_myThreadIdzh) \
SymI_HasProto(stg_labelThreadzh) \
SymI_HasProto(stg_newArrayzh) \
SymI_HasProto(stg_copyArrayzh) \
SymI_HasProto(stg_copyMutableArrayzh) \
SymI_HasProto(stg_cloneArrayzh) \
SymI_HasProto(stg_cloneMutableArrayzh) \
SymI_HasProto(stg_freezzeArrayzh) \
SymI_HasProto(stg_thawArrayzh) \
SymI_HasProto(stg_newBCOzh) \
SymI_HasProto(stg_newByteArrayzh) \
SymI_HasProto_redirect(newCAF, newDynCAF) \
......
......@@ -212,111 +212,6 @@ stg_unsafeThawArrayzh
}
}
#define COPY_CARDS(src_start, src_cards_start, dst_start, dst_cards_start, n, copy) \
if (src_start & mutArrCardMask == dst_start & mutArrCardMask) { \
foreign "C" copy(dst_cards_start + mutArrPtrCardUp(dst_start), src_cards_start + mutArrPtrCardUp(src_start), mutArrPtrCardDown(n)); \
\
I8[dst_cards_start + mutArrPtrCardDown(dst_start)] = I8[dst_cards_start + mutArrPtrCardDown(dst_start)] | I8[src_cards_start + mutArrPtrCardDown(src_start)]; \
I8[dst_cards_start + mutArrPtrCardUp(n)] = I8[dst_cards_start + mutArrPtrCardUp(dst_start + n)] | I8[src_cards_start + mutArrPtrCardUp(src_start + n)]; \
} else { \
foreign "C" memset(dst_cards_start "ptr", 1, mutArrPtrCardDown(n)); \
}
stg_copyArrayzh
{
W_ bytes, n, src, dst, src_start, dst_start, src_start_ptr, dst_start_ptr;
W_ src_cards_start, dst_cards_start;
src = R1;
src_start = R2;
dst = R3;
dst_start = R4;
n = R5;
MAYBE_GC(R1_PTR & R3_PTR, stg_copyArrayzh);
bytes = WDS(n);
src_start_ptr = src + SIZEOF_StgMutArrPtrs + WDS(src_start);
dst_start_ptr = dst + SIZEOF_StgMutArrPtrs + WDS(dst_start);
// Copy data (we assume the arrays aren't overlapping since they're of different types)
foreign "C" memcpy(dst_start_ptr "ptr", src_start_ptr "ptr", bytes);
// The base address of both source and destination card tables
src_cards_start = src + SIZEOF_StgMutArrPtrs + WDS(StgMutArrPtrs_ptrs(src));
dst_cards_start = dst + SIZEOF_StgMutArrPtrs + WDS(StgMutArrPtrs_ptrs(dst));
COPY_CARDS(src_start, src_cards_start, dst_start, dst_cards_start, n, memcpy);
jump %ENTRY_CODE(Sp(0));
}
stg_copyMutableArrayzh
{
W_ bytes, n, src, dst, src_start, dst_start, src_start_ptr, dst_start_ptr;
W_ src_cards_start, dst_cards_start;
src = R1;
src_start = R2;
dst = R3;
dst_start = R4;
n = R5;
MAYBE_GC(R1_PTR & R3_PTR, stg_copyMutableArrayzh);
bytes = WDS(n);
src_start_ptr = src + SIZEOF_StgMutArrPtrs + WDS(src_start);
dst_start_ptr = dst + SIZEOF_StgMutArrPtrs + WDS(dst_start);
src_cards_start = src + SIZEOF_StgMutArrPtrs + WDS(StgMutArrPtrs_ptrs(src));
dst_cards_start = dst + SIZEOF_StgMutArrPtrs + WDS(StgMutArrPtrs_ptrs(dst));
// The only time the memory might overlap is when the two arrays we were provided are the same array!
if (src == dst) {
foreign "C" memmove(dst_start_ptr "ptr", src_start_ptr "ptr", bytes);
COPY_CARDS(src_start, src_cards_start, dst_start, dst_cards_start, n, memmove);
} else {
foreign "C" memcpy(dst_start_ptr "ptr", src_start_ptr "ptr", bytes);
COPY_CARDS(src_start, src_cards_start, dst_start, dst_cards_start, n, memcpy);
}
jump %ENTRY_CODE(Sp(0));
}
#define ARRAY_CLONE(name, type) \
name \
{ \
W_ src, src_off, words, n, init, arr, src_p, dst_p, size; \
\
src = R1; \
src_off = R2; \
n = R3; \
\
MAYBE_GC(R1_PTR, name); \
\
size = n + mutArrPtrsCardWords(n); \
words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + size; \
("ptr" arr) = foreign "C" allocate(MyCapability() "ptr", words) [R2]; \
TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0); \
\
SET_HDR(arr, type, W_[CCCS]); \
StgMutArrPtrs_ptrs(arr) = n; \
StgMutArrPtrs_size(arr) = size; \
\
dst_p = arr + SIZEOF_StgMutArrPtrs; \
src_p = src + SIZEOF_StgMutArrPtrs + WDS(src_off); \
\
foreign "C" memcpy(dst_p "ptr", src_p "ptr", WDS(n)); \
\
foreign "C" memset(dst_p + WDS(n), 0, WDS(mutArrPtrsCardWords(n))); \
RET_P(arr); \
}
ARRAY_CLONE(stg_cloneArrayzh, stg_MUT_ARR_PTRS_FROZEN0_info)
ARRAY_CLONE(stg_cloneMutableArrayzh, stg_MUT_ARR_PTRS_DIRTY_info)
ARRAY_CLONE(stg_freezzeArrayzh, stg_MUT_ARR_PTRS_FROZEN0_info)
ARRAY_CLONE(stg_thawArrayzh, stg_MUT_ARR_PTRS_DIRTY_info)
/* -----------------------------------------------------------------------------
MutVar primitives
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment