diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs
index a86caf1a9d94f96b36363a32c22f4c8da0eddb27..e208318e17a084cf14f0541e5d609c2bc5b15ece 100644
--- a/compiler/codeGen/StgCmmPrim.hs
+++ b/compiler/codeGen/StgCmmPrim.hs
@@ -735,21 +735,25 @@ emitPrimOp dflags res (VecWriteScalarOffAddrOp vcat n w) args = do
ty = vecCmmCat vcat w
-- Prefetch
-emitPrimOp _ res PrefetchByteArrayOp3 args = doPrefetchByteArrayOp 3 res args
-emitPrimOp _ res PrefetchMutableByteArrayOp3 args = doPrefetchByteArrayOp 3 res args
-emitPrimOp _ res PrefetchAddrOp3 args = doPrefetchAddrOp 3 res args
-
-emitPrimOp _ res PrefetchByteArrayOp2 args = doPrefetchByteArrayOp 2 res args
-emitPrimOp _ res PrefetchMutableByteArrayOp2 args = doPrefetchByteArrayOp 2 res args
-emitPrimOp _ res PrefetchAddrOp2 args = doPrefetchAddrOp 2 res args
-
-emitPrimOp _ res PrefetchByteArrayOp1 args = doPrefetchByteArrayOp 1 res args
-emitPrimOp _ res PrefetchMutableByteArrayOp1 args = doPrefetchByteArrayOp 1 res args
-emitPrimOp _ res PrefetchAddrOp1 args = doPrefetchAddrOp 1 res args
-
-emitPrimOp _ res PrefetchByteArrayOp0 args = doPrefetchByteArrayOp 0 res args
-emitPrimOp _ res PrefetchMutableByteArrayOp0 args = doPrefetchByteArrayOp 0 res args
-emitPrimOp _ res PrefetchAddrOp0 args = doPrefetchAddrOp 0 res args
+emitPrimOp _ [] PrefetchByteArrayOp3 args = doPrefetchByteArrayOp 3 args
+emitPrimOp _ [] PrefetchMutableByteArrayOp3 args = doPrefetchMutableByteArrayOp 3 args
+emitPrimOp _ [] PrefetchAddrOp3 args = doPrefetchAddrOp 3 args
+emitPrimOp _ [] PrefetchValueOp3 args = doPrefetchValueOp 3 args
+
+emitPrimOp _ [] PrefetchByteArrayOp2 args = doPrefetchByteArrayOp 2 args
+emitPrimOp _ [] PrefetchMutableByteArrayOp2 args = doPrefetchMutableByteArrayOp 2 args
+emitPrimOp _ [] PrefetchAddrOp2 args = doPrefetchAddrOp 2 args
+emitPrimOp _ [] PrefetchValueOp2 args = doPrefetchValueOp 2 args
+
+emitPrimOp _ [] PrefetchByteArrayOp1 args = doPrefetchByteArrayOp 1 args
+emitPrimOp _ [] PrefetchMutableByteArrayOp1 args = doPrefetchMutableByteArrayOp 1 args
+emitPrimOp _ [] PrefetchAddrOp1 args = doPrefetchAddrOp 1 args
+emitPrimOp _ [] PrefetchValueOp1 args = doPrefetchValueOp 1 args
+
+emitPrimOp _ [] PrefetchByteArrayOp0 args = doPrefetchByteArrayOp 0 args
+emitPrimOp _ [] PrefetchMutableByteArrayOp0 args = doPrefetchMutableByteArrayOp 0 args
+emitPrimOp _ [] PrefetchAddrOp0 args = doPrefetchAddrOp 0 args
+emitPrimOp _ [] PrefetchValueOp0 args = doPrefetchValueOp 0 args
-- Atomic read-modify-write
emitPrimOp dflags [res] FetchAddByteArrayOp_Int [mba, ix, n] =
@@ -1549,38 +1553,56 @@ doVecInsertOp maybe_pre_write_cast ty src e idx res = do
------------------------------------------------------------------------------
-- Helpers for translating prefetching.
+
+-- | Translate byte array prefetch operations into proper primcalls.
doPrefetchByteArrayOp :: Int
- -> [LocalReg]
-> [CmmExpr]
-> FCode ()
-doPrefetchByteArrayOp locality res [addr,idx]
+doPrefetchByteArrayOp locality [addr,idx]
+ = do dflags <- getDynFlags
+ mkBasicPrefetch locality (arrWordsHdrSize dflags) addr idx
+doPrefetchByteArrayOp _ _
+ = panic "StgCmmPrim: doPrefetchByteArrayOp"
+
+-- | Translate mutable byte array prefetch operations into proper primcalls.
+doPrefetchMutableByteArrayOp :: Int
+ -> [CmmExpr]
+ -> FCode ()
+doPrefetchMutableByteArrayOp locality [addr,idx]
= do dflags <- getDynFlags
- mkBasicPrefetch locality (arrWordsHdrSize dflags) res addr idx
-doPrefetchByteArrayOp _ _ _
+ mkBasicPrefetch locality (arrWordsHdrSize dflags) addr idx
+doPrefetchMutableByteArrayOp _ _
= panic "StgCmmPrim: doPrefetchByteArrayOp"
+-- | Translate address prefetch operations into proper primcalls.
doPrefetchAddrOp ::Int
- -> [LocalReg]
-> [CmmExpr]
-> FCode ()
-doPrefetchAddrOp locality res [addr,idx]
- = mkBasicPrefetch locality 0 res addr idx
-doPrefetchAddrOp _ _ _
+doPrefetchAddrOp locality [addr,idx]
+ = mkBasicPrefetch locality 0 addr idx
+doPrefetchAddrOp _ _
= panic "StgCmmPrim: doPrefetchAddrOp"
+-- | Translate value prefetch operations into proper primcalls.
+doPrefetchValueOp :: Int
+ -> [CmmExpr]
+ -> FCode ()
+doPrefetchValueOp locality [addr]
+ = do dflags <- getDynFlags
+ mkBasicPrefetch locality 0 addr (CmmLit (CmmInt 0 (wordWidth dflags)))
+doPrefetchValueOp _ _
+ = panic "StgCmmPrim: doPrefetchValueOp"
+
+-- | helper to generate prefetch primcalls
mkBasicPrefetch :: Int -- Locality level 0-3
-> ByteOff -- Initial offset in bytes
- -> [LocalReg] -- Destination
-> CmmExpr -- Base address
-> CmmExpr -- Index
-> FCode ()
-mkBasicPrefetch locality off res base idx
+mkBasicPrefetch locality off base idx
= do dflags <- getDynFlags
emitPrimCall [] (MO_Prefetch_Data locality) [cmmIndexExpr dflags W8 (cmmOffsetB dflags base off) idx]
- case res of
- [] -> return ()
- [reg] -> emitAssign (CmmLocal reg) base
- _ -> panic "StgCmmPrim: mkBasicPrefetch"
+ return ()
-- ----------------------------------------------------------------------------
-- Allocating byte arrays
diff --git a/compiler/prelude/primops.txt.pp b/compiler/prelude/primops.txt.pp
index a3c15a9c460c927f7016749f5937e56e7e86e18f..909b17b7f15bbf0d5ac7e56284718fe37e94041c 100644
--- a/compiler/prelude/primops.txt.pp
+++ b/compiler/prelude/primops.txt.pp
@@ -2933,22 +2933,23 @@ section "Prefetch"
architectures or vendor hardware. The manual can be found at
http://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-optimization-manual.html .
- The {\tt prefetchMutableByteArray} family of operations has the order of operations
+ The {\tt prefetch*} family of operations has the order of operations
determined by passing around the {\tt State#} token.
- For the {\tt prefetchByteArray}
- and {\tt prefetchAddr} families of operations, consider the following example:
-
- {\tt let a1 = prefetchByteArray2# a n in ...a1... }
-
- In the above fragement, {\tt a} is the input variable for the prefetch
- and {\tt a1 == a} will be true. To ensure that the prefetch is not treated as deadcode,
- the body of the let should only use {\tt a1} and NOT {\tt a}. The same principle
- applies for uses of prefetch in a loop.
+ To get a "pure" version of these operations, use {\tt inlinePerformIO} which is quite safe in this context.
+ It is important to note that while the prefetch operations will never change the
+ answer to a pure computation, They CAN change the memory locations resident
+ in a CPU cache and that may change the performance and timing characteristics
+ of an application. The prefetch operations are marked has_side_effects=True
+ to reflect that these operations have side effects with respect to the runtime
+ performance characteristics of the resulting code. Additionally, if the prefetchValue
+ operations did not have this attribute, GHC does a float out transformation that
+ results in a let/app violation, at least with the current design.
}
+
------------------------------------------------------------------------
@@ -2956,48 +2957,75 @@ section "Prefetch"
---
primop PrefetchByteArrayOp3 "prefetchByteArray3#" GenPrimOp
- ByteArray# -> Int# -> ByteArray#
+ ByteArray# -> Int# -> State# s -> State# s
+ with has_side_effects = True
primop PrefetchMutableByteArrayOp3 "prefetchMutableByteArray3#" GenPrimOp
- MutableByteArray# s -> Int# -> State# s -> State# s
+ MutableByteArray# s -> Int# -> State# s -> State# s
+ with has_side_effects = True
primop PrefetchAddrOp3 "prefetchAddr3#" GenPrimOp
- Addr# -> Int# -> Addr#
+ Addr# -> Int# -> State# s -> State# s
+ with has_side_effects = True
+primop PrefetchValueOp3 "prefetchValue3#" GenPrimOp
+ a -> State# s -> State# s
+ with strictness = { \ _arity -> mkClosedStrictSig [botDmd, topDmd] topRes }
+ has_side_effects = True
----
primop PrefetchByteArrayOp2 "prefetchByteArray2#" GenPrimOp
- ByteArray# -> Int# -> ByteArray#
+ ByteArray# -> Int# -> State# s -> State# s
+ with has_side_effects = True
primop PrefetchMutableByteArrayOp2 "prefetchMutableByteArray2#" GenPrimOp
- MutableByteArray# s -> Int# -> State# s -> State# s
+ MutableByteArray# s -> Int# -> State# s -> State# s
+ with has_side_effects = True
primop PrefetchAddrOp2 "prefetchAddr2#" GenPrimOp
- Addr# -> Int# -> Addr#
+ Addr# -> Int# -> State# s -> State# s
+ with has_side_effects = True
+primop PrefetchValueOp2 "prefetchValue2#" GenPrimOp
+ a -> State# s -> State# s
+ with strictness = { \ _arity -> mkClosedStrictSig [botDmd, topDmd] topRes }
+ has_side_effects = True
----
primop PrefetchByteArrayOp1 "prefetchByteArray1#" GenPrimOp
- ByteArray# -> Int# -> ByteArray#
+ ByteArray# -> Int# -> State# s -> State# s
+ with has_side_effects = True
primop PrefetchMutableByteArrayOp1 "prefetchMutableByteArray1#" GenPrimOp
- MutableByteArray# s -> Int# -> State# s -> State# s
+ MutableByteArray# s -> Int# -> State# s -> State# s
+ with has_side_effects = True
primop PrefetchAddrOp1 "prefetchAddr1#" GenPrimOp
- Addr# -> Int# -> Addr#
+ Addr# -> Int# -> State# s -> State# s
+ with has_side_effects = True
+primop PrefetchValueOp1 "prefetchValue1#" GenPrimOp
+ a -> State# s -> State# s
+ with strictness = { \ _arity -> mkClosedStrictSig [botDmd, topDmd] topRes }
+ has_side_effects = True
----
primop PrefetchByteArrayOp0 "prefetchByteArray0#" GenPrimOp
- ByteArray# -> Int# -> ByteArray#
+ ByteArray# -> Int# -> State# s -> State# s
+ with has_side_effects = True
primop PrefetchMutableByteArrayOp0 "prefetchMutableByteArray0#" GenPrimOp
- MutableByteArray# s -> Int# -> State# s -> State# s
+ MutableByteArray# s -> Int# -> State# s -> State# s
+ with has_side_effects = True
primop PrefetchAddrOp0 "prefetchAddr0#" GenPrimOp
- Addr# -> Int# -> Addr#
-
+ Addr# -> Int# -> State# s -> State# s
+ with has_side_effects = True
+primop PrefetchValueOp0 "prefetchValue0#" GenPrimOp
+ a -> State# s -> State# s
+ with strictness = { \ _arity -> mkClosedStrictSig [botDmd, topDmd] topRes }
+ has_side_effects = True
------------------------------------------------------------------------
--- ---
diff --git a/testsuite/tests/codeGen/should_run/T8256.hs b/testsuite/tests/codeGen/should_run/T8256.hs
index 7f8314c8df343d484aec59ce8679ffa133798037..d9dbd25b9cfdb87bfb0e2b8036fa9315eea747ee 100644
--- a/testsuite/tests/codeGen/should_run/T8256.hs
+++ b/testsuite/tests/codeGen/should_run/T8256.hs
@@ -1,48 +1,33 @@
-{-# LANGUAGE MagicHash, UnboxedTuples #-}
+{-# LANGUAGE MagicHash, UnboxedTuples , ScopedTypeVariables #-}
+module Main where
import GHC.Prim
-
-import Data.Vector.Storable.Mutable
+import GHC.Types
+import Foreign
import Foreign.Ptr
-import GHC.ST
-import Data.Primitive.ByteArray
-import Control.Monad.Primitive
-
-
-
-sameByteArray :: Control.Monad.Primitive.PrimMonad m => ByteArray -> ByteArray -> m Bool
-sameByteArray ar1 ar2 =
- do v1 <- unsafeThawByteArray ar1
- v2 <- unsafeThawByteArray ar2
- return $ sameMutableByteArray v1 v2
-
-pf0 (ByteArray by) = ByteArray ( prefetchByteArray0# by 1#)
+import GHC.Ptr
-pf1 (ByteArray by) = ByteArray (prefetchByteArray1# by 1#)
+wrapFetch :: (a -> State# RealWorld -> State# RealWorld) -> (a -> IO ())
+wrapFetch prefetch a = IO (\ s -> (# prefetch a s, ()#))
-pf2 (ByteArray by) = ByteArray ( prefetchByteArray2# by 1#)
-pf3 (ByteArray by) = ByteArray ( prefetchByteArray3# by 1#)
-monoSame v f = sameByteArray v (f v)
-
main :: IO ()
-main = do
- mv1 <- newByteArray 17
- v1 <- unsafeFreezeByteArray mv1
- return ()
- t0<- monoSame v1 pf0
- t1 <- monoSame v1 pf1
- t2 <- monoSame v1 pf2
- t3 <- monoSame v1 pf3
- if t0 && t1 && t2 && t3 then putStrLn "success" else error "bad prefetch operation! please report"
-
-
-
-
+main = do
+ (ptr :: Ptr Int) <- malloc
+ wrapFetch (\ (Ptr adr)-> prefetchAddr3# adr 0# ) ptr
+ wrapFetch prefetchValue1# (1 ::Int)
+ wrapFetch prefetchValue2# "hiiii"
+ wrapFetch prefetchValue3# (Just "testing")
+ wrapFetch prefetchValue0# (error "this shouldn't get evaluated")
+ -- -- ^^ this is to make sure it doesn't force thunks!
+ --incontrast,
+ --wrapFetch prefetchValue0# $! (error "this shouldn't get evaluated")
+ -- would trigger an exception
+ putStrLn "success"
diff --git a/testsuite/tests/codeGen/should_run/all.T b/testsuite/tests/codeGen/should_run/all.T
index 89f62781eb5a1b982dfd549bff66d567a80c6772..d193834c6ba68c42d11c2d5041d46ea120639ad2 100644
--- a/testsuite/tests/codeGen/should_run/all.T
+++ b/testsuite/tests/codeGen/should_run/all.T
@@ -112,7 +112,7 @@ test('T7361', normal, compile_and_run, [''])
test('T7600', normal, compile_and_run, [''])
test('T8103', only_ways(['normal']), compile_and_run, [''])
test('T7953', reqlib('random'), compile_and_run, [''])
-test('T8256', reqlib('vector'), compile_and_run, [''])
+test('T8256', normal, compile_and_run, ['-dcore-lint -O1'])
test('T6084',normal, compile_and_run, ['-O2'])
test('CgStaticPointers',
[ when(compiler_lt('ghc', '7.9'), skip) ],