diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs index a86caf1a9d94f96b36363a32c22f4c8da0eddb27..e208318e17a084cf14f0541e5d609c2bc5b15ece 100644 --- a/compiler/codeGen/StgCmmPrim.hs +++ b/compiler/codeGen/StgCmmPrim.hs @@ -735,21 +735,25 @@ emitPrimOp dflags res (VecWriteScalarOffAddrOp vcat n w) args = do ty = vecCmmCat vcat w -- Prefetch -emitPrimOp _ res PrefetchByteArrayOp3 args = doPrefetchByteArrayOp 3 res args -emitPrimOp _ res PrefetchMutableByteArrayOp3 args = doPrefetchByteArrayOp 3 res args -emitPrimOp _ res PrefetchAddrOp3 args = doPrefetchAddrOp 3 res args - -emitPrimOp _ res PrefetchByteArrayOp2 args = doPrefetchByteArrayOp 2 res args -emitPrimOp _ res PrefetchMutableByteArrayOp2 args = doPrefetchByteArrayOp 2 res args -emitPrimOp _ res PrefetchAddrOp2 args = doPrefetchAddrOp 2 res args - -emitPrimOp _ res PrefetchByteArrayOp1 args = doPrefetchByteArrayOp 1 res args -emitPrimOp _ res PrefetchMutableByteArrayOp1 args = doPrefetchByteArrayOp 1 res args -emitPrimOp _ res PrefetchAddrOp1 args = doPrefetchAddrOp 1 res args - -emitPrimOp _ res PrefetchByteArrayOp0 args = doPrefetchByteArrayOp 0 res args -emitPrimOp _ res PrefetchMutableByteArrayOp0 args = doPrefetchByteArrayOp 0 res args -emitPrimOp _ res PrefetchAddrOp0 args = doPrefetchAddrOp 0 res args +emitPrimOp _ [] PrefetchByteArrayOp3 args = doPrefetchByteArrayOp 3 args +emitPrimOp _ [] PrefetchMutableByteArrayOp3 args = doPrefetchMutableByteArrayOp 3 args +emitPrimOp _ [] PrefetchAddrOp3 args = doPrefetchAddrOp 3 args +emitPrimOp _ [] PrefetchValueOp3 args = doPrefetchValueOp 3 args + +emitPrimOp _ [] PrefetchByteArrayOp2 args = doPrefetchByteArrayOp 2 args +emitPrimOp _ [] PrefetchMutableByteArrayOp2 args = doPrefetchMutableByteArrayOp 2 args +emitPrimOp _ [] PrefetchAddrOp2 args = doPrefetchAddrOp 2 args +emitPrimOp _ [] PrefetchValueOp2 args = doPrefetchValueOp 2 args + +emitPrimOp _ [] PrefetchByteArrayOp1 args = doPrefetchByteArrayOp 1 args +emitPrimOp _ [] PrefetchMutableByteArrayOp1 args = doPrefetchMutableByteArrayOp 1 args +emitPrimOp _ [] PrefetchAddrOp1 args = doPrefetchAddrOp 1 args +emitPrimOp _ [] PrefetchValueOp1 args = doPrefetchValueOp 1 args + +emitPrimOp _ [] PrefetchByteArrayOp0 args = doPrefetchByteArrayOp 0 args +emitPrimOp _ [] PrefetchMutableByteArrayOp0 args = doPrefetchMutableByteArrayOp 0 args +emitPrimOp _ [] PrefetchAddrOp0 args = doPrefetchAddrOp 0 args +emitPrimOp _ [] PrefetchValueOp0 args = doPrefetchValueOp 0 args -- Atomic read-modify-write emitPrimOp dflags [res] FetchAddByteArrayOp_Int [mba, ix, n] = @@ -1549,38 +1553,56 @@ doVecInsertOp maybe_pre_write_cast ty src e idx res = do ------------------------------------------------------------------------------ -- Helpers for translating prefetching. + +-- | Translate byte array prefetch operations into proper primcalls. doPrefetchByteArrayOp :: Int - -> [LocalReg] -> [CmmExpr] -> FCode () -doPrefetchByteArrayOp locality res [addr,idx] +doPrefetchByteArrayOp locality [addr,idx] + = do dflags <- getDynFlags + mkBasicPrefetch locality (arrWordsHdrSize dflags) addr idx +doPrefetchByteArrayOp _ _ + = panic "StgCmmPrim: doPrefetchByteArrayOp" + +-- | Translate mutable byte array prefetch operations into proper primcalls. +doPrefetchMutableByteArrayOp :: Int + -> [CmmExpr] + -> FCode () +doPrefetchMutableByteArrayOp locality [addr,idx] = do dflags <- getDynFlags - mkBasicPrefetch locality (arrWordsHdrSize dflags) res addr idx -doPrefetchByteArrayOp _ _ _ + mkBasicPrefetch locality (arrWordsHdrSize dflags) addr idx +doPrefetchMutableByteArrayOp _ _ = panic "StgCmmPrim: doPrefetchByteArrayOp" +-- | Translate address prefetch operations into proper primcalls. doPrefetchAddrOp ::Int - -> [LocalReg] -> [CmmExpr] -> FCode () -doPrefetchAddrOp locality res [addr,idx] - = mkBasicPrefetch locality 0 res addr idx -doPrefetchAddrOp _ _ _ +doPrefetchAddrOp locality [addr,idx] + = mkBasicPrefetch locality 0 addr idx +doPrefetchAddrOp _ _ = panic "StgCmmPrim: doPrefetchAddrOp" +-- | Translate value prefetch operations into proper primcalls. +doPrefetchValueOp :: Int + -> [CmmExpr] + -> FCode () +doPrefetchValueOp locality [addr] + = do dflags <- getDynFlags + mkBasicPrefetch locality 0 addr (CmmLit (CmmInt 0 (wordWidth dflags))) +doPrefetchValueOp _ _ + = panic "StgCmmPrim: doPrefetchValueOp" + +-- | helper to generate prefetch primcalls mkBasicPrefetch :: Int -- Locality level 0-3 -> ByteOff -- Initial offset in bytes - -> [LocalReg] -- Destination -> CmmExpr -- Base address -> CmmExpr -- Index -> FCode () -mkBasicPrefetch locality off res base idx +mkBasicPrefetch locality off base idx = do dflags <- getDynFlags emitPrimCall [] (MO_Prefetch_Data locality) [cmmIndexExpr dflags W8 (cmmOffsetB dflags base off) idx] - case res of - [] -> return () - [reg] -> emitAssign (CmmLocal reg) base - _ -> panic "StgCmmPrim: mkBasicPrefetch" + return () -- ---------------------------------------------------------------------------- -- Allocating byte arrays diff --git a/compiler/prelude/primops.txt.pp b/compiler/prelude/primops.txt.pp index a3c15a9c460c927f7016749f5937e56e7e86e18f..909b17b7f15bbf0d5ac7e56284718fe37e94041c 100644 --- a/compiler/prelude/primops.txt.pp +++ b/compiler/prelude/primops.txt.pp @@ -2933,22 +2933,23 @@ section "Prefetch" architectures or vendor hardware. The manual can be found at http://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-optimization-manual.html . - The {\tt prefetchMutableByteArray} family of operations has the order of operations + The {\tt prefetch*} family of operations has the order of operations determined by passing around the {\tt State#} token. - For the {\tt prefetchByteArray} - and {\tt prefetchAddr} families of operations, consider the following example: - - {\tt let a1 = prefetchByteArray2# a n in ...a1... } - - In the above fragement, {\tt a} is the input variable for the prefetch - and {\tt a1 == a} will be true. To ensure that the prefetch is not treated as deadcode, - the body of the let should only use {\tt a1} and NOT {\tt a}. The same principle - applies for uses of prefetch in a loop. + To get a "pure" version of these operations, use {\tt inlinePerformIO} which is quite safe in this context. + It is important to note that while the prefetch operations will never change the + answer to a pure computation, They CAN change the memory locations resident + in a CPU cache and that may change the performance and timing characteristics + of an application. The prefetch operations are marked has_side_effects=True + to reflect that these operations have side effects with respect to the runtime + performance characteristics of the resulting code. Additionally, if the prefetchValue + operations did not have this attribute, GHC does a float out transformation that + results in a let/app violation, at least with the current design. } + ------------------------------------------------------------------------ @@ -2956,48 +2957,75 @@ section "Prefetch" --- primop PrefetchByteArrayOp3 "prefetchByteArray3#" GenPrimOp - ByteArray# -> Int# -> ByteArray# + ByteArray# -> Int# -> State# s -> State# s + with has_side_effects = True primop PrefetchMutableByteArrayOp3 "prefetchMutableByteArray3#" GenPrimOp - MutableByteArray# s -> Int# -> State# s -> State# s + MutableByteArray# s -> Int# -> State# s -> State# s + with has_side_effects = True primop PrefetchAddrOp3 "prefetchAddr3#" GenPrimOp - Addr# -> Int# -> Addr# + Addr# -> Int# -> State# s -> State# s + with has_side_effects = True +primop PrefetchValueOp3 "prefetchValue3#" GenPrimOp + a -> State# s -> State# s + with strictness = { \ _arity -> mkClosedStrictSig [botDmd, topDmd] topRes } + has_side_effects = True ---- primop PrefetchByteArrayOp2 "prefetchByteArray2#" GenPrimOp - ByteArray# -> Int# -> ByteArray# + ByteArray# -> Int# -> State# s -> State# s + with has_side_effects = True primop PrefetchMutableByteArrayOp2 "prefetchMutableByteArray2#" GenPrimOp - MutableByteArray# s -> Int# -> State# s -> State# s + MutableByteArray# s -> Int# -> State# s -> State# s + with has_side_effects = True primop PrefetchAddrOp2 "prefetchAddr2#" GenPrimOp - Addr# -> Int# -> Addr# + Addr# -> Int# -> State# s -> State# s + with has_side_effects = True +primop PrefetchValueOp2 "prefetchValue2#" GenPrimOp + a -> State# s -> State# s + with strictness = { \ _arity -> mkClosedStrictSig [botDmd, topDmd] topRes } + has_side_effects = True ---- primop PrefetchByteArrayOp1 "prefetchByteArray1#" GenPrimOp - ByteArray# -> Int# -> ByteArray# + ByteArray# -> Int# -> State# s -> State# s + with has_side_effects = True primop PrefetchMutableByteArrayOp1 "prefetchMutableByteArray1#" GenPrimOp - MutableByteArray# s -> Int# -> State# s -> State# s + MutableByteArray# s -> Int# -> State# s -> State# s + with has_side_effects = True primop PrefetchAddrOp1 "prefetchAddr1#" GenPrimOp - Addr# -> Int# -> Addr# + Addr# -> Int# -> State# s -> State# s + with has_side_effects = True +primop PrefetchValueOp1 "prefetchValue1#" GenPrimOp + a -> State# s -> State# s + with strictness = { \ _arity -> mkClosedStrictSig [botDmd, topDmd] topRes } + has_side_effects = True ---- primop PrefetchByteArrayOp0 "prefetchByteArray0#" GenPrimOp - ByteArray# -> Int# -> ByteArray# + ByteArray# -> Int# -> State# s -> State# s + with has_side_effects = True primop PrefetchMutableByteArrayOp0 "prefetchMutableByteArray0#" GenPrimOp - MutableByteArray# s -> Int# -> State# s -> State# s + MutableByteArray# s -> Int# -> State# s -> State# s + with has_side_effects = True primop PrefetchAddrOp0 "prefetchAddr0#" GenPrimOp - Addr# -> Int# -> Addr# - + Addr# -> Int# -> State# s -> State# s + with has_side_effects = True +primop PrefetchValueOp0 "prefetchValue0#" GenPrimOp + a -> State# s -> State# s + with strictness = { \ _arity -> mkClosedStrictSig [botDmd, topDmd] topRes } + has_side_effects = True ------------------------------------------------------------------------ --- --- diff --git a/testsuite/tests/codeGen/should_run/T8256.hs b/testsuite/tests/codeGen/should_run/T8256.hs index 7f8314c8df343d484aec59ce8679ffa133798037..d9dbd25b9cfdb87bfb0e2b8036fa9315eea747ee 100644 --- a/testsuite/tests/codeGen/should_run/T8256.hs +++ b/testsuite/tests/codeGen/should_run/T8256.hs @@ -1,48 +1,33 @@ -{-# LANGUAGE MagicHash, UnboxedTuples #-} +{-# LANGUAGE MagicHash, UnboxedTuples , ScopedTypeVariables #-} +module Main where import GHC.Prim - -import Data.Vector.Storable.Mutable +import GHC.Types +import Foreign import Foreign.Ptr -import GHC.ST -import Data.Primitive.ByteArray -import Control.Monad.Primitive - - - -sameByteArray :: Control.Monad.Primitive.PrimMonad m => ByteArray -> ByteArray -> m Bool -sameByteArray ar1 ar2 = - do v1 <- unsafeThawByteArray ar1 - v2 <- unsafeThawByteArray ar2 - return $ sameMutableByteArray v1 v2 - -pf0 (ByteArray by) = ByteArray ( prefetchByteArray0# by 1#) +import GHC.Ptr -pf1 (ByteArray by) = ByteArray (prefetchByteArray1# by 1#) +wrapFetch :: (a -> State# RealWorld -> State# RealWorld) -> (a -> IO ()) +wrapFetch prefetch a = IO (\ s -> (# prefetch a s, ()#)) -pf2 (ByteArray by) = ByteArray ( prefetchByteArray2# by 1#) -pf3 (ByteArray by) = ByteArray ( prefetchByteArray3# by 1#) -monoSame v f = sameByteArray v (f v) - main :: IO () -main = do - mv1 <- newByteArray 17 - v1 <- unsafeFreezeByteArray mv1 - return () - t0<- monoSame v1 pf0 - t1 <- monoSame v1 pf1 - t2 <- monoSame v1 pf2 - t3 <- monoSame v1 pf3 - if t0 && t1 && t2 && t3 then putStrLn "success" else error "bad prefetch operation! please report" - - - - +main = do + (ptr :: Ptr Int) <- malloc + wrapFetch (\ (Ptr adr)-> prefetchAddr3# adr 0# ) ptr + wrapFetch prefetchValue1# (1 ::Int) + wrapFetch prefetchValue2# "hiiii" + wrapFetch prefetchValue3# (Just "testing") + wrapFetch prefetchValue0# (error "this shouldn't get evaluated") + -- -- ^^ this is to make sure it doesn't force thunks! + --incontrast, + --wrapFetch prefetchValue0# $! (error "this shouldn't get evaluated") + -- would trigger an exception + putStrLn "success" diff --git a/testsuite/tests/codeGen/should_run/all.T b/testsuite/tests/codeGen/should_run/all.T index 89f62781eb5a1b982dfd549bff66d567a80c6772..d193834c6ba68c42d11c2d5041d46ea120639ad2 100644 --- a/testsuite/tests/codeGen/should_run/all.T +++ b/testsuite/tests/codeGen/should_run/all.T @@ -112,7 +112,7 @@ test('T7361', normal, compile_and_run, ['']) test('T7600', normal, compile_and_run, ['']) test('T8103', only_ways(['normal']), compile_and_run, ['']) test('T7953', reqlib('random'), compile_and_run, ['']) -test('T8256', reqlib('vector'), compile_and_run, ['']) +test('T8256', normal, compile_and_run, ['-dcore-lint -O1']) test('T6084',normal, compile_and_run, ['-O2']) test('CgStaticPointers', [ when(compiler_lt('ghc', '7.9'), skip) ],