Commit f44333ea authored by Carter Schonwald's avatar Carter Schonwald Committed by Austin Seipp

Changing prefetch primops to have a `seq`-like interface

Summary:
The current primops for prefetching do not properly work in pure code;
namely, the primops are not 'hoisted' into the correct call sites based
on when arguments are evaluated. Instead, they should use a `seq`-like
interface, which will cause it to be evaluated when the needed term is.

See #9353 for the full discussion.

Test Plan: updated tests for pure prefetch in T8256 to reflect the design changes in #9353

Reviewers: simonmar, hvr, ekmett, austin

Reviewed By: ekmett, austin

Subscribers: merijn, thomie, carter, simonmar

Differential Revision: https://phabricator.haskell.org/D350

GHC Trac Issues: #9353
parent 8afdf274
...@@ -735,21 +735,25 @@ emitPrimOp dflags res (VecWriteScalarOffAddrOp vcat n w) args = do ...@@ -735,21 +735,25 @@ emitPrimOp dflags res (VecWriteScalarOffAddrOp vcat n w) args = do
ty = vecCmmCat vcat w ty = vecCmmCat vcat w
-- Prefetch -- Prefetch
emitPrimOp _ res PrefetchByteArrayOp3 args = doPrefetchByteArrayOp 3 res args emitPrimOp _ [] PrefetchByteArrayOp3 args = doPrefetchByteArrayOp 3 args
emitPrimOp _ res PrefetchMutableByteArrayOp3 args = doPrefetchByteArrayOp 3 res args emitPrimOp _ [] PrefetchMutableByteArrayOp3 args = doPrefetchMutableByteArrayOp 3 args
emitPrimOp _ res PrefetchAddrOp3 args = doPrefetchAddrOp 3 res args emitPrimOp _ [] PrefetchAddrOp3 args = doPrefetchAddrOp 3 args
emitPrimOp _ [] PrefetchValueOp3 args = doPrefetchValueOp 3 args
emitPrimOp _ res PrefetchByteArrayOp2 args = doPrefetchByteArrayOp 2 res args
emitPrimOp _ res PrefetchMutableByteArrayOp2 args = doPrefetchByteArrayOp 2 res args emitPrimOp _ [] PrefetchByteArrayOp2 args = doPrefetchByteArrayOp 2 args
emitPrimOp _ res PrefetchAddrOp2 args = doPrefetchAddrOp 2 res args emitPrimOp _ [] PrefetchMutableByteArrayOp2 args = doPrefetchMutableByteArrayOp 2 args
emitPrimOp _ [] PrefetchAddrOp2 args = doPrefetchAddrOp 2 args
emitPrimOp _ res PrefetchByteArrayOp1 args = doPrefetchByteArrayOp 1 res args emitPrimOp _ [] PrefetchValueOp2 args = doPrefetchValueOp 2 args
emitPrimOp _ res PrefetchMutableByteArrayOp1 args = doPrefetchByteArrayOp 1 res args
emitPrimOp _ res PrefetchAddrOp1 args = doPrefetchAddrOp 1 res args emitPrimOp _ [] PrefetchByteArrayOp1 args = doPrefetchByteArrayOp 1 args
emitPrimOp _ [] PrefetchMutableByteArrayOp1 args = doPrefetchMutableByteArrayOp 1 args
emitPrimOp _ res PrefetchByteArrayOp0 args = doPrefetchByteArrayOp 0 res args emitPrimOp _ [] PrefetchAddrOp1 args = doPrefetchAddrOp 1 args
emitPrimOp _ res PrefetchMutableByteArrayOp0 args = doPrefetchByteArrayOp 0 res args emitPrimOp _ [] PrefetchValueOp1 args = doPrefetchValueOp 1 args
emitPrimOp _ res PrefetchAddrOp0 args = doPrefetchAddrOp 0 res args
emitPrimOp _ [] PrefetchByteArrayOp0 args = doPrefetchByteArrayOp 0 args
emitPrimOp _ [] PrefetchMutableByteArrayOp0 args = doPrefetchMutableByteArrayOp 0 args
emitPrimOp _ [] PrefetchAddrOp0 args = doPrefetchAddrOp 0 args
emitPrimOp _ [] PrefetchValueOp0 args = doPrefetchValueOp 0 args
-- Atomic read-modify-write -- Atomic read-modify-write
emitPrimOp dflags [res] FetchAddByteArrayOp_Int [mba, ix, n] = emitPrimOp dflags [res] FetchAddByteArrayOp_Int [mba, ix, n] =
...@@ -1549,38 +1553,56 @@ doVecInsertOp maybe_pre_write_cast ty src e idx res = do ...@@ -1549,38 +1553,56 @@ doVecInsertOp maybe_pre_write_cast ty src e idx res = do
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
-- Helpers for translating prefetching. -- Helpers for translating prefetching.
-- | Translate byte array prefetch operations into proper primcalls.
doPrefetchByteArrayOp :: Int doPrefetchByteArrayOp :: Int
-> [LocalReg]
-> [CmmExpr] -> [CmmExpr]
-> FCode () -> FCode ()
doPrefetchByteArrayOp locality res [addr,idx] doPrefetchByteArrayOp locality [addr,idx]
= do dflags <- getDynFlags
mkBasicPrefetch locality (arrWordsHdrSize dflags) addr idx
doPrefetchByteArrayOp _ _
= panic "StgCmmPrim: doPrefetchByteArrayOp"
-- | Translate mutable byte array prefetch operations into proper primcalls.
doPrefetchMutableByteArrayOp :: Int
-> [CmmExpr]
-> FCode ()
doPrefetchMutableByteArrayOp locality [addr,idx]
= do dflags <- getDynFlags = do dflags <- getDynFlags
mkBasicPrefetch locality (arrWordsHdrSize dflags) res addr idx mkBasicPrefetch locality (arrWordsHdrSize dflags) addr idx
doPrefetchByteArrayOp _ _ _ doPrefetchMutableByteArrayOp _ _
= panic "StgCmmPrim: doPrefetchByteArrayOp" = panic "StgCmmPrim: doPrefetchByteArrayOp"
-- | Translate address prefetch operations into proper primcalls.
doPrefetchAddrOp ::Int doPrefetchAddrOp ::Int
-> [LocalReg]
-> [CmmExpr] -> [CmmExpr]
-> FCode () -> FCode ()
doPrefetchAddrOp locality res [addr,idx] doPrefetchAddrOp locality [addr,idx]
= mkBasicPrefetch locality 0 res addr idx = mkBasicPrefetch locality 0 addr idx
doPrefetchAddrOp _ _ _ doPrefetchAddrOp _ _
= panic "StgCmmPrim: doPrefetchAddrOp" = panic "StgCmmPrim: doPrefetchAddrOp"
-- | Translate value prefetch operations into proper primcalls.
doPrefetchValueOp :: Int
-> [CmmExpr]
-> FCode ()
doPrefetchValueOp locality [addr]
= do dflags <- getDynFlags
mkBasicPrefetch locality 0 addr (CmmLit (CmmInt 0 (wordWidth dflags)))
doPrefetchValueOp _ _
= panic "StgCmmPrim: doPrefetchValueOp"
-- | helper to generate prefetch primcalls
mkBasicPrefetch :: Int -- Locality level 0-3 mkBasicPrefetch :: Int -- Locality level 0-3
-> ByteOff -- Initial offset in bytes -> ByteOff -- Initial offset in bytes
-> [LocalReg] -- Destination
-> CmmExpr -- Base address -> CmmExpr -- Base address
-> CmmExpr -- Index -> CmmExpr -- Index
-> FCode () -> FCode ()
mkBasicPrefetch locality off res base idx mkBasicPrefetch locality off base idx
= do dflags <- getDynFlags = do dflags <- getDynFlags
emitPrimCall [] (MO_Prefetch_Data locality) [cmmIndexExpr dflags W8 (cmmOffsetB dflags base off) idx] emitPrimCall [] (MO_Prefetch_Data locality) [cmmIndexExpr dflags W8 (cmmOffsetB dflags base off) idx]
case res of return ()
[] -> return ()
[reg] -> emitAssign (CmmLocal reg) base
_ -> panic "StgCmmPrim: mkBasicPrefetch"
-- ---------------------------------------------------------------------------- -- ----------------------------------------------------------------------------
-- Allocating byte arrays -- Allocating byte arrays
......
...@@ -2933,22 +2933,23 @@ section "Prefetch" ...@@ -2933,22 +2933,23 @@ section "Prefetch"
architectures or vendor hardware. The manual can be found at architectures or vendor hardware. The manual can be found at
http://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-optimization-manual.html . http://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-optimization-manual.html .
The {\tt prefetchMutableByteArray} family of operations has the order of operations The {\tt prefetch*} family of operations has the order of operations
determined by passing around the {\tt State#} token. determined by passing around the {\tt State#} token.
For the {\tt prefetchByteArray} To get a "pure" version of these operations, use {\tt inlinePerformIO} which is quite safe in this context.
and {\tt prefetchAddr} families of operations, consider the following example:
{\tt let a1 = prefetchByteArray2# a n in ...a1... }
In the above fragement, {\tt a} is the input variable for the prefetch
and {\tt a1 == a} will be true. To ensure that the prefetch is not treated as deadcode,
the body of the let should only use {\tt a1} and NOT {\tt a}. The same principle
applies for uses of prefetch in a loop.
It is important to note that while the prefetch operations will never change the
answer to a pure computation, They CAN change the memory locations resident
in a CPU cache and that may change the performance and timing characteristics
of an application. The prefetch operations are marked has_side_effects=True
to reflect that these operations have side effects with respect to the runtime
performance characteristics of the resulting code. Additionally, if the prefetchValue
operations did not have this attribute, GHC does a float out transformation that
results in a let/app violation, at least with the current design.
} }
------------------------------------------------------------------------ ------------------------------------------------------------------------
...@@ -2956,48 +2957,75 @@ section "Prefetch" ...@@ -2956,48 +2957,75 @@ section "Prefetch"
--- ---
primop PrefetchByteArrayOp3 "prefetchByteArray3#" GenPrimOp primop PrefetchByteArrayOp3 "prefetchByteArray3#" GenPrimOp
ByteArray# -> Int# -> ByteArray# ByteArray# -> Int# -> State# s -> State# s
with has_side_effects = True
primop PrefetchMutableByteArrayOp3 "prefetchMutableByteArray3#" GenPrimOp primop PrefetchMutableByteArrayOp3 "prefetchMutableByteArray3#" GenPrimOp
MutableByteArray# s -> Int# -> State# s -> State# s MutableByteArray# s -> Int# -> State# s -> State# s
with has_side_effects = True
primop PrefetchAddrOp3 "prefetchAddr3#" GenPrimOp primop PrefetchAddrOp3 "prefetchAddr3#" GenPrimOp
Addr# -> Int# -> Addr# Addr# -> Int# -> State# s -> State# s
with has_side_effects = True
primop PrefetchValueOp3 "prefetchValue3#" GenPrimOp
a -> State# s -> State# s
with strictness = { \ _arity -> mkClosedStrictSig [botDmd, topDmd] topRes }
has_side_effects = True
---- ----
primop PrefetchByteArrayOp2 "prefetchByteArray2#" GenPrimOp primop PrefetchByteArrayOp2 "prefetchByteArray2#" GenPrimOp
ByteArray# -> Int# -> ByteArray# ByteArray# -> Int# -> State# s -> State# s
with has_side_effects = True
primop PrefetchMutableByteArrayOp2 "prefetchMutableByteArray2#" GenPrimOp primop PrefetchMutableByteArrayOp2 "prefetchMutableByteArray2#" GenPrimOp
MutableByteArray# s -> Int# -> State# s -> State# s MutableByteArray# s -> Int# -> State# s -> State# s
with has_side_effects = True
primop PrefetchAddrOp2 "prefetchAddr2#" GenPrimOp primop PrefetchAddrOp2 "prefetchAddr2#" GenPrimOp
Addr# -> Int# -> Addr# Addr# -> Int# -> State# s -> State# s
with has_side_effects = True
primop PrefetchValueOp2 "prefetchValue2#" GenPrimOp
a -> State# s -> State# s
with strictness = { \ _arity -> mkClosedStrictSig [botDmd, topDmd] topRes }
has_side_effects = True
---- ----
primop PrefetchByteArrayOp1 "prefetchByteArray1#" GenPrimOp primop PrefetchByteArrayOp1 "prefetchByteArray1#" GenPrimOp
ByteArray# -> Int# -> ByteArray# ByteArray# -> Int# -> State# s -> State# s
with has_side_effects = True
primop PrefetchMutableByteArrayOp1 "prefetchMutableByteArray1#" GenPrimOp primop PrefetchMutableByteArrayOp1 "prefetchMutableByteArray1#" GenPrimOp
MutableByteArray# s -> Int# -> State# s -> State# s MutableByteArray# s -> Int# -> State# s -> State# s
with has_side_effects = True
primop PrefetchAddrOp1 "prefetchAddr1#" GenPrimOp primop PrefetchAddrOp1 "prefetchAddr1#" GenPrimOp
Addr# -> Int# -> Addr# Addr# -> Int# -> State# s -> State# s
with has_side_effects = True
primop PrefetchValueOp1 "prefetchValue1#" GenPrimOp
a -> State# s -> State# s
with strictness = { \ _arity -> mkClosedStrictSig [botDmd, topDmd] topRes }
has_side_effects = True
---- ----
primop PrefetchByteArrayOp0 "prefetchByteArray0#" GenPrimOp primop PrefetchByteArrayOp0 "prefetchByteArray0#" GenPrimOp
ByteArray# -> Int# -> ByteArray# ByteArray# -> Int# -> State# s -> State# s
with has_side_effects = True
primop PrefetchMutableByteArrayOp0 "prefetchMutableByteArray0#" GenPrimOp primop PrefetchMutableByteArrayOp0 "prefetchMutableByteArray0#" GenPrimOp
MutableByteArray# s -> Int# -> State# s -> State# s MutableByteArray# s -> Int# -> State# s -> State# s
with has_side_effects = True
primop PrefetchAddrOp0 "prefetchAddr0#" GenPrimOp primop PrefetchAddrOp0 "prefetchAddr0#" GenPrimOp
Addr# -> Int# -> Addr# Addr# -> Int# -> State# s -> State# s
with has_side_effects = True
primop PrefetchValueOp0 "prefetchValue0#" GenPrimOp
a -> State# s -> State# s
with strictness = { \ _arity -> mkClosedStrictSig [botDmd, topDmd] topRes }
has_side_effects = True
------------------------------------------------------------------------ ------------------------------------------------------------------------
--- --- --- ---
......
{-# LANGUAGE MagicHash, UnboxedTuples #-} {-# LANGUAGE MagicHash, UnboxedTuples , ScopedTypeVariables #-}
module Main where
import GHC.Prim import GHC.Prim
import GHC.Types
import Data.Vector.Storable.Mutable import Foreign
import Foreign.Ptr import Foreign.Ptr
import GHC.ST import GHC.Ptr
import Data.Primitive.ByteArray
import Control.Monad.Primitive
sameByteArray :: Control.Monad.Primitive.PrimMonad m => ByteArray -> ByteArray -> m Bool
sameByteArray ar1 ar2 =
do v1 <- unsafeThawByteArray ar1
v2 <- unsafeThawByteArray ar2
return $ sameMutableByteArray v1 v2
pf0 (ByteArray by) = ByteArray ( prefetchByteArray0# by 1#)
pf1 (ByteArray by) = ByteArray (prefetchByteArray1# by 1#) wrapFetch :: (a -> State# RealWorld -> State# RealWorld) -> (a -> IO ())
wrapFetch prefetch a = IO (\ s -> (# prefetch a s, ()#))
pf2 (ByteArray by) = ByteArray ( prefetchByteArray2# by 1#)
pf3 (ByteArray by) = ByteArray ( prefetchByteArray3# by 1#)
monoSame v f = sameByteArray v (f v)
main :: IO () main :: IO ()
main = do main = do
mv1 <- newByteArray 17 (ptr :: Ptr Int) <- malloc
v1 <- unsafeFreezeByteArray mv1 wrapFetch (\ (Ptr adr)-> prefetchAddr3# adr 0# ) ptr
return () wrapFetch prefetchValue1# (1 ::Int)
t0<- monoSame v1 pf0 wrapFetch prefetchValue2# "hiiii"
t1 <- monoSame v1 pf1 wrapFetch prefetchValue3# (Just "testing")
t2 <- monoSame v1 pf2 wrapFetch prefetchValue0# (error "this shouldn't get evaluated")
t3 <- monoSame v1 pf3 -- -- ^^ this is to make sure it doesn't force thunks!
if t0 && t1 && t2 && t3 then putStrLn "success" else error "bad prefetch operation! please report" --incontrast,
--wrapFetch prefetchValue0# $! (error "this shouldn't get evaluated")
-- would trigger an exception
putStrLn "success"
...@@ -112,7 +112,7 @@ test('T7361', normal, compile_and_run, ['']) ...@@ -112,7 +112,7 @@ test('T7361', normal, compile_and_run, [''])
test('T7600', normal, compile_and_run, ['']) test('T7600', normal, compile_and_run, [''])
test('T8103', only_ways(['normal']), compile_and_run, ['']) test('T8103', only_ways(['normal']), compile_and_run, [''])
test('T7953', reqlib('random'), compile_and_run, ['']) test('T7953', reqlib('random'), compile_and_run, [''])
test('T8256', reqlib('vector'), compile_and_run, ['']) test('T8256', normal, compile_and_run, ['-dcore-lint -O1'])
test('T6084',normal, compile_and_run, ['-O2']) test('T6084',normal, compile_and_run, ['-O2'])
test('CgStaticPointers', test('CgStaticPointers',
[ when(compiler_lt('ghc', '7.9'), skip) ], [ when(compiler_lt('ghc', '7.9'), skip) ],
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment