Commit a2f828a3 authored by rwbarton's avatar rwbarton

Be aware of overlapping global STG registers in CmmSink (#10521)

Summary:
On x86_64, commit e2f6bbd3 assigned
the STG registers F1 and D1 the same hardware register (xmm1), and
the same for the registers F2 and D2, etc. When mixing calls to
functions involving Float#s and Double#s, this can cause wrong Cmm
optimizations that assume the F1 and D1 registers are independent.

Reviewers: simonpj, austin

Reviewed By: austin

Subscribers: simonpj, thomie, bgamari

Differential Revision: https://phabricator.haskell.org/D993

GHC Trac Issues: #10521
parent c7b6fb59
......@@ -20,7 +20,6 @@ module CmmExpr
, emptyRegSet, elemRegSet, extendRegSet, deleteFromRegSet, mkRegSet
, plusRegSet, minusRegSet, timesRegSet, sizeRegSet, nullRegSet
, regSetToList
, regUsedIn
, Area(..)
, module CmmMachOp
......@@ -374,17 +373,6 @@ instance DefinerOfRegs r a => DefinerOfRegs r (Maybe a) where
foldRegsDefd _ _ set Nothing = set
foldRegsDefd dflags f set (Just x) = foldRegsDefd dflags f set x
-----------------------------------------------------------------------------
-- Another reg utility
regUsedIn :: CmmReg -> CmmExpr -> Bool
_ `regUsedIn` CmmLit _ = False
reg `regUsedIn` CmmLoad e _ = reg `regUsedIn` e
reg `regUsedIn` CmmReg reg' = reg == reg'
reg `regUsedIn` CmmRegOff reg' _ = reg == reg'
reg `regUsedIn` CmmMachOp _ es = any (reg `regUsedIn`) es
_ `regUsedIn` CmmStackSlot _ _ = False
-----------------------------------------------------------------------------
-- Global STG registers
-----------------------------------------------------------------------------
......@@ -399,6 +387,31 @@ vgcFlag :: CmmType -> VGcPtr
vgcFlag ty | isGcPtrType ty = VGcPtr
| otherwise = VNonGcPtr
{-
Note [Overlapping global registers]
The backend might not faithfully implement the abstraction of the STG
machine with independent registers for different values of type
GlobalReg. Specifically, certain pairs of registers (r1, r2) may
overlap in the sense that a store to r1 invalidates the value in r2,
and vice versa.
Currently this occurs only on the x86_64 architecture where FloatReg n
and DoubleReg n are assigned the same microarchitectural register, in
order to allow functions to receive more Float# or Double# arguments
in registers (as opposed to on the stack).
There are no specific rules about which registers might overlap with
which other registers, but presumably it's safe to assume that nothing
will overlap with special registers like Sp or BaseReg.
Use CmmUtils.regsOverlap to determine whether two GlobalRegs overlap
on a particular platform. The instance Eq GlobalReg is syntactic
equality of STG registers and does not take overlap into
account. However it is still used in UserOfRegs/DefinerOfRegs and
there are likely still bugs there, beware!
-}
data GlobalReg
-- Argument and return registers
= VanillaReg -- pointers, unboxed ints and chars
......
......@@ -510,11 +510,8 @@ okToInline _ _ _ = True
-- -----------------------------------------------------------------------------
-- | @conflicts (r,e) stmt@ is @False@ if and only if the assignment
-- @r = e@ can be safely commuted past @stmt@.
--
-- We only sink "r = G" assignments right now, so conflicts is very simple:
--
-- | @conflicts (r,e) node@ is @False@ if and only if the assignment
-- @r = e@ can be safely commuted past statement @node@.
conflicts :: DynFlags -> Assignment -> CmmNode O x -> Bool
conflicts dflags (r, rhs, addr) node
......@@ -548,13 +545,15 @@ conflicts dflags (r, rhs, addr) node
-- Cmm expression
globalRegistersConflict :: DynFlags -> CmmExpr -> CmmNode e x -> Bool
globalRegistersConflict dflags expr node =
foldRegsDefd dflags (\b r -> b || (CmmGlobal r) `regUsedIn` expr) False node
foldRegsDefd dflags (\b r -> b || regUsedIn dflags (CmmGlobal r) expr)
False node
-- Returns True if node defines any local registers that are used in the
-- Cmm expression
localRegistersConflict :: DynFlags -> CmmExpr -> CmmNode e x -> Bool
localRegistersConflict dflags expr node =
foldRegsDefd dflags (\b r -> b || (CmmLocal r) `regUsedIn` expr) False node
foldRegsDefd dflags (\b r -> b || regUsedIn dflags (CmmLocal r) expr)
False node
-- Note [Sinking and calls]
-- ~~~~~~~~~~~~~~~~~~~~~~~~
......@@ -693,7 +692,7 @@ loadAddr dflags e w =
case e of
CmmReg r -> regAddr dflags r 0 w
CmmRegOff r i -> regAddr dflags r i w
_other | CmmGlobal Sp `regUsedIn` e -> StackMem
_other | regUsedIn dflags (CmmGlobal Sp) e -> StackMem
| otherwise -> AnyMem
regAddr :: DynFlags -> CmmReg -> Int -> Width -> AbsMem
......
......@@ -44,6 +44,9 @@ module CmmUtils(
cmmTagMask, cmmPointerMask, cmmUntag, cmmIsTagged,
cmmConstrTag1,
-- Overlap and usage
regsOverlap, regUsedIn,
-- Liveness and bitmaps
mkLiveness,
......@@ -77,6 +80,7 @@ import Unique
import UniqSupply
import DynFlags
import Util
import CodeGen.Platform
import Data.Word
import Data.Maybe
......@@ -392,6 +396,38 @@ cmmConstrTag1 :: DynFlags -> CmmExpr -> CmmExpr
cmmConstrTag1 dflags e = cmmAndWord dflags e (cmmTagMask dflags)
-----------------------------------------------------------------------------
-- Overlap and usage
-- | Returns True if the two STG registers overlap on the specified
-- platform, in the sense that writing to one will clobber the
-- other. This includes the case that the two registers are the same
-- STG register. See Note [Overlapping global registers] for details.
regsOverlap :: DynFlags -> CmmReg -> CmmReg -> Bool
regsOverlap dflags (CmmGlobal g) (CmmGlobal g')
| Just real <- globalRegMaybe (targetPlatform dflags) g,
Just real' <- globalRegMaybe (targetPlatform dflags) g',
real == real'
= True
regsOverlap _ reg reg' = reg == reg'
-- | Returns True if the STG register is used by the expression, in
-- the sense that a store to the register might affect the value of
-- the expression.
--
-- We must check for overlapping registers and not just equal
-- registers here, otherwise CmmSink may incorrectly reorder
-- assignments that conflict due to overlap. See Trac #10521 and Note
-- [Overlapping global registers].
regUsedIn :: DynFlags -> CmmReg -> CmmExpr -> Bool
regUsedIn dflags = regUsedIn_ where
_ `regUsedIn_` CmmLit _ = False
reg `regUsedIn_` CmmLoad e _ = reg `regUsedIn_` e
reg `regUsedIn_` CmmReg reg' = regsOverlap dflags reg reg'
reg `regUsedIn_` CmmRegOff reg' _ = regsOverlap dflags reg reg'
reg `regUsedIn_` CmmMachOp _ es = any (reg `regUsedIn_`) es
_ `regUsedIn_` CmmStackSlot _ _ = False
--------------------------------------------
--
-- mkLiveness
......
......@@ -393,11 +393,13 @@ type Stmt = (LocalReg, CmmExpr) -- r := e
emitMultiAssign [] [] = return ()
emitMultiAssign [reg] [rhs] = emitAssign (CmmLocal reg) rhs
emitMultiAssign regs rhss = ASSERT( equalLength regs rhss )
unscramble ([1..] `zip` (regs `zip` rhss))
emitMultiAssign regs rhss = do
dflags <- getDynFlags
ASSERT( equalLength regs rhss )
unscramble dflags ([1..] `zip` (regs `zip` rhss))
unscramble :: [Vrtx] -> FCode ()
unscramble vertices = mapM_ do_component components
unscramble :: DynFlags -> [Vrtx] -> FCode ()
unscramble dflags vertices = mapM_ do_component components
where
edges :: [ (Vrtx, Key, [Key]) ]
edges = [ (vertex, key1, edges_from stmt1)
......@@ -424,7 +426,7 @@ unscramble vertices = mapM_ do_component components
u <- newUnique
let (to_tmp, from_tmp) = split dflags u first_stmt
mk_graph to_tmp
unscramble rest
unscramble dflags rest
mk_graph from_tmp
split :: DynFlags -> Unique -> Stmt -> (Stmt, Stmt)
......@@ -437,8 +439,8 @@ unscramble vertices = mapM_ do_component components
mk_graph :: Stmt -> FCode ()
mk_graph (reg, rhs) = emitAssign (CmmLocal reg) rhs
mustFollow :: Stmt -> Stmt -> Bool
(reg, _) `mustFollow` (_, rhs) = CmmLocal reg `regUsedIn` rhs
mustFollow :: Stmt -> Stmt -> Bool
(reg, _) `mustFollow` (_, rhs) = regUsedIn dflags (CmmLocal reg) rhs
-------------------------------------------------------------------------
-- mkSwitch
......
......@@ -170,6 +170,12 @@
#define REG_R6 r9
#define REG_SpLim r15
/*
Map both Fn and Dn to register xmmn so that we can pass a function any
combination of up to six Float# or Double# arguments without touching
the stack. See Note [Overlapping global registers] for implications.
*/
#define REG_F1 xmm1
#define REG_F2 xmm2
#define REG_F3 xmm3
......
......@@ -164,6 +164,8 @@ mk/ghcconfig*_inplace_bin_ghc-stage2.exe.mk
/tests/codeGen/should_run/SizeOfSmallArray
/tests/codeGen/should_run/StaticArraySize
/tests/codeGen/should_run/StaticByteArraySize
/tests/codeGen/should_run/T10521
/tests/codeGen/should_run/T10521b
/tests/codeGen/should_run/T1852
/tests/codeGen/should_run/T1861
/tests/codeGen/should_run/T2080
......
import Data.Word( Word8 )
toV :: Float -> Word8
toV d = let coeff = significand d * 255.9999 / d
a = truncate $ d * coeff
b = exponent d
in a `seq` (b `seq` a)
main :: IO ()
main =
print $ map toV [ 3.56158e-2, 0.7415215, 0.5383201, 0.1289829, 0.45520145 ]
{-# LANGUAGE MagicHash #-}
import GHC.Exts
f :: Float# -> Float#
f x = x
{-# NOINLINE f #-}
g :: Double# -> Double#
g x = x
{-# NOINLINE g #-}
h :: Float -> Float
h (F# x) = let a = F# (f x)
b = D# (g (2.0##))
in a `seq` (b `seq` a)
main = print (h 1.0)
......@@ -133,3 +133,5 @@ test('cgrun074', normal, compile_and_run, [''])
test('CmmSwitchTest', when(fast(), skip), compile_and_run, [''])
test('T10245', expect_broken(10246), compile_and_run, [''])
test('T10246', expect_broken(10246), compile_and_run, [''])
test('T10521', normal, compile_and_run, [''])
test('T10521b', normal, compile_and_run, [''])
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment