Commit f44333ea authored by Carter Schonwald's avatar Carter Schonwald Committed by Austin Seipp

Changing prefetch primops to have a `seq`-like interface

Summary:
The current primops for prefetching do not properly work in pure code;
namely, the primops are not 'hoisted' into the correct call sites based
on when arguments are evaluated. Instead, they should use a `seq`-like
interface, which will cause it to be evaluated when the needed term is.

See #9353 for the full discussion.

Test Plan: updated tests for pure prefetch in T8256 to reflect the design changes in #9353

Reviewers: simonmar, hvr, ekmett, austin

Reviewed By: ekmett, austin

Subscribers: merijn, thomie, carter, simonmar

Differential Revision: https://phabricator.haskell.org/D350

GHC Trac Issues: #9353
parent 8afdf274
......@@ -735,21 +735,25 @@ emitPrimOp dflags res (VecWriteScalarOffAddrOp vcat n w) args = do
ty = vecCmmCat vcat w
-- Prefetch
emitPrimOp _ res PrefetchByteArrayOp3 args = doPrefetchByteArrayOp 3 res args
emitPrimOp _ res PrefetchMutableByteArrayOp3 args = doPrefetchByteArrayOp 3 res args
emitPrimOp _ res PrefetchAddrOp3 args = doPrefetchAddrOp 3 res args
emitPrimOp _ res PrefetchByteArrayOp2 args = doPrefetchByteArrayOp 2 res args
emitPrimOp _ res PrefetchMutableByteArrayOp2 args = doPrefetchByteArrayOp 2 res args
emitPrimOp _ res PrefetchAddrOp2 args = doPrefetchAddrOp 2 res args
emitPrimOp _ res PrefetchByteArrayOp1 args = doPrefetchByteArrayOp 1 res args
emitPrimOp _ res PrefetchMutableByteArrayOp1 args = doPrefetchByteArrayOp 1 res args
emitPrimOp _ res PrefetchAddrOp1 args = doPrefetchAddrOp 1 res args
emitPrimOp _ res PrefetchByteArrayOp0 args = doPrefetchByteArrayOp 0 res args
emitPrimOp _ res PrefetchMutableByteArrayOp0 args = doPrefetchByteArrayOp 0 res args
emitPrimOp _ res PrefetchAddrOp0 args = doPrefetchAddrOp 0 res args
emitPrimOp _ [] PrefetchByteArrayOp3 args = doPrefetchByteArrayOp 3 args
emitPrimOp _ [] PrefetchMutableByteArrayOp3 args = doPrefetchMutableByteArrayOp 3 args
emitPrimOp _ [] PrefetchAddrOp3 args = doPrefetchAddrOp 3 args
emitPrimOp _ [] PrefetchValueOp3 args = doPrefetchValueOp 3 args
emitPrimOp _ [] PrefetchByteArrayOp2 args = doPrefetchByteArrayOp 2 args
emitPrimOp _ [] PrefetchMutableByteArrayOp2 args = doPrefetchMutableByteArrayOp 2 args
emitPrimOp _ [] PrefetchAddrOp2 args = doPrefetchAddrOp 2 args
emitPrimOp _ [] PrefetchValueOp2 args = doPrefetchValueOp 2 args
emitPrimOp _ [] PrefetchByteArrayOp1 args = doPrefetchByteArrayOp 1 args
emitPrimOp _ [] PrefetchMutableByteArrayOp1 args = doPrefetchMutableByteArrayOp 1 args
emitPrimOp _ [] PrefetchAddrOp1 args = doPrefetchAddrOp 1 args
emitPrimOp _ [] PrefetchValueOp1 args = doPrefetchValueOp 1 args
emitPrimOp _ [] PrefetchByteArrayOp0 args = doPrefetchByteArrayOp 0 args
emitPrimOp _ [] PrefetchMutableByteArrayOp0 args = doPrefetchMutableByteArrayOp 0 args
emitPrimOp _ [] PrefetchAddrOp0 args = doPrefetchAddrOp 0 args
emitPrimOp _ [] PrefetchValueOp0 args = doPrefetchValueOp 0 args
-- Atomic read-modify-write
emitPrimOp dflags [res] FetchAddByteArrayOp_Int [mba, ix, n] =
......@@ -1549,38 +1553,56 @@ doVecInsertOp maybe_pre_write_cast ty src e idx res = do
------------------------------------------------------------------------------
-- Helpers for translating prefetching.
-- | Translate byte array prefetch operations into proper primcalls.
doPrefetchByteArrayOp :: Int
-> [LocalReg]
-> [CmmExpr]
-> FCode ()
doPrefetchByteArrayOp locality res [addr,idx]
doPrefetchByteArrayOp locality [addr,idx]
= do dflags <- getDynFlags
mkBasicPrefetch locality (arrWordsHdrSize dflags) addr idx
doPrefetchByteArrayOp _ _
= panic "StgCmmPrim: doPrefetchByteArrayOp"
-- | Translate mutable byte array prefetch operations into proper primcalls.
doPrefetchMutableByteArrayOp :: Int
-> [CmmExpr]
-> FCode ()
doPrefetchMutableByteArrayOp locality [addr,idx]
= do dflags <- getDynFlags
mkBasicPrefetch locality (arrWordsHdrSize dflags) res addr idx
doPrefetchByteArrayOp _ _ _
mkBasicPrefetch locality (arrWordsHdrSize dflags) addr idx
doPrefetchMutableByteArrayOp _ _
= panic "StgCmmPrim: doPrefetchByteArrayOp"
-- | Translate address prefetch operations into proper primcalls.
doPrefetchAddrOp ::Int
-> [LocalReg]
-> [CmmExpr]
-> FCode ()
doPrefetchAddrOp locality res [addr,idx]
= mkBasicPrefetch locality 0 res addr idx
doPrefetchAddrOp _ _ _
doPrefetchAddrOp locality [addr,idx]
= mkBasicPrefetch locality 0 addr idx
doPrefetchAddrOp _ _
= panic "StgCmmPrim: doPrefetchAddrOp"
-- | Translate value prefetch operations into proper primcalls.
doPrefetchValueOp :: Int
-> [CmmExpr]
-> FCode ()
doPrefetchValueOp locality [addr]
= do dflags <- getDynFlags
mkBasicPrefetch locality 0 addr (CmmLit (CmmInt 0 (wordWidth dflags)))
doPrefetchValueOp _ _
= panic "StgCmmPrim: doPrefetchValueOp"
-- | helper to generate prefetch primcalls
mkBasicPrefetch :: Int -- Locality level 0-3
-> ByteOff -- Initial offset in bytes
-> [LocalReg] -- Destination
-> CmmExpr -- Base address
-> CmmExpr -- Index
-> FCode ()
mkBasicPrefetch locality off res base idx
mkBasicPrefetch locality off base idx
= do dflags <- getDynFlags
emitPrimCall [] (MO_Prefetch_Data locality) [cmmIndexExpr dflags W8 (cmmOffsetB dflags base off) idx]
case res of
[] -> return ()
[reg] -> emitAssign (CmmLocal reg) base
_ -> panic "StgCmmPrim: mkBasicPrefetch"
return ()
-- ----------------------------------------------------------------------------
-- Allocating byte arrays
......
......@@ -2933,22 +2933,23 @@ section "Prefetch"
architectures or vendor hardware. The manual can be found at
http://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-optimization-manual.html .
The {\tt prefetchMutableByteArray} family of operations has the order of operations
The {\tt prefetch*} family of operations has the order of operations
determined by passing around the {\tt State#} token.
For the {\tt prefetchByteArray}
and {\tt prefetchAddr} families of operations, consider the following example:
{\tt let a1 = prefetchByteArray2# a n in ...a1... }
In the above fragement, {\tt a} is the input variable for the prefetch
and {\tt a1 == a} will be true. To ensure that the prefetch is not treated as deadcode,
the body of the let should only use {\tt a1} and NOT {\tt a}. The same principle
applies for uses of prefetch in a loop.
To get a "pure" version of these operations, use {\tt inlinePerformIO} which is quite safe in this context.
It is important to note that while the prefetch operations will never change the
answer to a pure computation, They CAN change the memory locations resident
in a CPU cache and that may change the performance and timing characteristics
of an application. The prefetch operations are marked has_side_effects=True
to reflect that these operations have side effects with respect to the runtime
performance characteristics of the resulting code. Additionally, if the prefetchValue
operations did not have this attribute, GHC does a float out transformation that
results in a let/app violation, at least with the current design.
}
------------------------------------------------------------------------
......@@ -2956,48 +2957,75 @@ section "Prefetch"
---
primop PrefetchByteArrayOp3 "prefetchByteArray3#" GenPrimOp
ByteArray# -> Int# -> ByteArray#
ByteArray# -> Int# -> State# s -> State# s
with has_side_effects = True
primop PrefetchMutableByteArrayOp3 "prefetchMutableByteArray3#" GenPrimOp
MutableByteArray# s -> Int# -> State# s -> State# s
with has_side_effects = True
primop PrefetchAddrOp3 "prefetchAddr3#" GenPrimOp
Addr# -> Int# -> Addr#
Addr# -> Int# -> State# s -> State# s
with has_side_effects = True
primop PrefetchValueOp3 "prefetchValue3#" GenPrimOp
a -> State# s -> State# s
with strictness = { \ _arity -> mkClosedStrictSig [botDmd, topDmd] topRes }
has_side_effects = True
----
primop PrefetchByteArrayOp2 "prefetchByteArray2#" GenPrimOp
ByteArray# -> Int# -> ByteArray#
ByteArray# -> Int# -> State# s -> State# s
with has_side_effects = True
primop PrefetchMutableByteArrayOp2 "prefetchMutableByteArray2#" GenPrimOp
MutableByteArray# s -> Int# -> State# s -> State# s
with has_side_effects = True
primop PrefetchAddrOp2 "prefetchAddr2#" GenPrimOp
Addr# -> Int# -> Addr#
Addr# -> Int# -> State# s -> State# s
with has_side_effects = True
primop PrefetchValueOp2 "prefetchValue2#" GenPrimOp
a -> State# s -> State# s
with strictness = { \ _arity -> mkClosedStrictSig [botDmd, topDmd] topRes }
has_side_effects = True
----
primop PrefetchByteArrayOp1 "prefetchByteArray1#" GenPrimOp
ByteArray# -> Int# -> ByteArray#
ByteArray# -> Int# -> State# s -> State# s
with has_side_effects = True
primop PrefetchMutableByteArrayOp1 "prefetchMutableByteArray1#" GenPrimOp
MutableByteArray# s -> Int# -> State# s -> State# s
with has_side_effects = True
primop PrefetchAddrOp1 "prefetchAddr1#" GenPrimOp
Addr# -> Int# -> Addr#
Addr# -> Int# -> State# s -> State# s
with has_side_effects = True
primop PrefetchValueOp1 "prefetchValue1#" GenPrimOp
a -> State# s -> State# s
with strictness = { \ _arity -> mkClosedStrictSig [botDmd, topDmd] topRes }
has_side_effects = True
----
primop PrefetchByteArrayOp0 "prefetchByteArray0#" GenPrimOp
ByteArray# -> Int# -> ByteArray#
ByteArray# -> Int# -> State# s -> State# s
with has_side_effects = True
primop PrefetchMutableByteArrayOp0 "prefetchMutableByteArray0#" GenPrimOp
MutableByteArray# s -> Int# -> State# s -> State# s
with has_side_effects = True
primop PrefetchAddrOp0 "prefetchAddr0#" GenPrimOp
Addr# -> Int# -> Addr#
Addr# -> Int# -> State# s -> State# s
with has_side_effects = True
primop PrefetchValueOp0 "prefetchValue0#" GenPrimOp
a -> State# s -> State# s
with strictness = { \ _arity -> mkClosedStrictSig [botDmd, topDmd] topRes }
has_side_effects = True
------------------------------------------------------------------------
--- ---
......
{-# LANGUAGE MagicHash, UnboxedTuples #-}
{-# LANGUAGE MagicHash, UnboxedTuples , ScopedTypeVariables #-}
module Main where
import GHC.Prim
import Data.Vector.Storable.Mutable
import GHC.Types
import Foreign
import Foreign.Ptr
import GHC.ST
import Data.Primitive.ByteArray
import Control.Monad.Primitive
sameByteArray :: Control.Monad.Primitive.PrimMonad m => ByteArray -> ByteArray -> m Bool
sameByteArray ar1 ar2 =
do v1 <- unsafeThawByteArray ar1
v2 <- unsafeThawByteArray ar2
return $ sameMutableByteArray v1 v2
pf0 (ByteArray by) = ByteArray ( prefetchByteArray0# by 1#)
import GHC.Ptr
pf1 (ByteArray by) = ByteArray (prefetchByteArray1# by 1#)
wrapFetch :: (a -> State# RealWorld -> State# RealWorld) -> (a -> IO ())
wrapFetch prefetch a = IO (\ s -> (# prefetch a s, ()#))
pf2 (ByteArray by) = ByteArray ( prefetchByteArray2# by 1#)
pf3 (ByteArray by) = ByteArray ( prefetchByteArray3# by 1#)
monoSame v f = sameByteArray v (f v)
main :: IO ()
main = do
mv1 <- newByteArray 17
v1 <- unsafeFreezeByteArray mv1
return ()
t0<- monoSame v1 pf0
t1 <- monoSame v1 pf1
t2 <- monoSame v1 pf2
t3 <- monoSame v1 pf3
if t0 && t1 && t2 && t3 then putStrLn "success" else error "bad prefetch operation! please report"
(ptr :: Ptr Int) <- malloc
wrapFetch (\ (Ptr adr)-> prefetchAddr3# adr 0# ) ptr
wrapFetch prefetchValue1# (1 ::Int)
wrapFetch prefetchValue2# "hiiii"
wrapFetch prefetchValue3# (Just "testing")
wrapFetch prefetchValue0# (error "this shouldn't get evaluated")
-- -- ^^ this is to make sure it doesn't force thunks!
--incontrast,
--wrapFetch prefetchValue0# $! (error "this shouldn't get evaluated")
-- would trigger an exception
putStrLn "success"
......@@ -112,7 +112,7 @@ test('T7361', normal, compile_and_run, [''])
test('T7600', normal, compile_and_run, [''])
test('T8103', only_ways(['normal']), compile_and_run, [''])
test('T7953', reqlib('random'), compile_and_run, [''])
test('T8256', reqlib('vector'), compile_and_run, [''])
test('T8256', normal, compile_and_run, ['-dcore-lint -O1'])
test('T6084',normal, compile_and_run, ['-O2'])
test('CgStaticPointers',
[ when(compiler_lt('ghc', '7.9'), skip) ],
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment