Commit 7707e54c authored by rwbarton's avatar rwbarton Committed by Austin Seipp

Be aware of overlapping global STG registers in CmmSink (#10521)

Summary:
On x86_64, commit e2f6bbd3 assigned
the STG registers F1 and D1 the same hardware register (xmm1), and
the same for the registers F2 and D2, etc. When mixing calls to
functions involving Float#s and Double#s, this can cause wrong Cmm
optimizations that assume the F1 and D1 registers are independent.

Reviewers: simonpj, austin

Reviewed By: austin

Subscribers: simonpj, thomie, bgamari

Differential Revision: https://phabricator.haskell.org/D993

GHC Trac Issues: #10521

(cherry picked from commit a2f828a3)
parent 18e0e95f
...@@ -20,7 +20,6 @@ module CmmExpr ...@@ -20,7 +20,6 @@ module CmmExpr
, emptyRegSet, elemRegSet, extendRegSet, deleteFromRegSet, mkRegSet , emptyRegSet, elemRegSet, extendRegSet, deleteFromRegSet, mkRegSet
, plusRegSet, minusRegSet, timesRegSet, sizeRegSet, nullRegSet , plusRegSet, minusRegSet, timesRegSet, sizeRegSet, nullRegSet
, regSetToList , regSetToList
, regUsedIn
, Area(..) , Area(..)
, module CmmMachOp , module CmmMachOp
...@@ -372,17 +371,6 @@ instance DefinerOfRegs r a => DefinerOfRegs r (Maybe a) where ...@@ -372,17 +371,6 @@ instance DefinerOfRegs r a => DefinerOfRegs r (Maybe a) where
foldRegsDefd _ _ set Nothing = set foldRegsDefd _ _ set Nothing = set
foldRegsDefd dflags f set (Just x) = foldRegsDefd dflags f set x foldRegsDefd dflags f set (Just x) = foldRegsDefd dflags f set x
-----------------------------------------------------------------------------
-- Another reg utility
regUsedIn :: CmmReg -> CmmExpr -> Bool
_ `regUsedIn` CmmLit _ = False
reg `regUsedIn` CmmLoad e _ = reg `regUsedIn` e
reg `regUsedIn` CmmReg reg' = reg == reg'
reg `regUsedIn` CmmRegOff reg' _ = reg == reg'
reg `regUsedIn` CmmMachOp _ es = any (reg `regUsedIn`) es
_ `regUsedIn` CmmStackSlot _ _ = False
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
-- Global STG registers -- Global STG registers
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
...@@ -397,6 +385,31 @@ vgcFlag :: CmmType -> VGcPtr ...@@ -397,6 +385,31 @@ vgcFlag :: CmmType -> VGcPtr
vgcFlag ty | isGcPtrType ty = VGcPtr vgcFlag ty | isGcPtrType ty = VGcPtr
| otherwise = VNonGcPtr | otherwise = VNonGcPtr
{-
Note [Overlapping global registers]
The backend might not faithfully implement the abstraction of the STG
machine with independent registers for different values of type
GlobalReg. Specifically, certain pairs of registers (r1, r2) may
overlap in the sense that a store to r1 invalidates the value in r2,
and vice versa.
Currently this occurs only on the x86_64 architecture where FloatReg n
and DoubleReg n are assigned the same microarchitectural register, in
order to allow functions to receive more Float# or Double# arguments
in registers (as opposed to on the stack).
There are no specific rules about which registers might overlap with
which other registers, but presumably it's safe to assume that nothing
will overlap with special registers like Sp or BaseReg.
Use CmmUtils.regsOverlap to determine whether two GlobalRegs overlap
on a particular platform. The instance Eq GlobalReg is syntactic
equality of STG registers and does not take overlap into
account. However it is still used in UserOfRegs/DefinerOfRegs and
there are likely still bugs there, beware!
-}
data GlobalReg data GlobalReg
-- Argument and return registers -- Argument and return registers
= VanillaReg -- pointers, unboxed ints and chars = VanillaReg -- pointers, unboxed ints and chars
......
...@@ -510,11 +510,8 @@ okToInline _ _ _ = True ...@@ -510,11 +510,8 @@ okToInline _ _ _ = True
-- ----------------------------------------------------------------------------- -- -----------------------------------------------------------------------------
-- | @conflicts (r,e) stmt@ is @False@ if and only if the assignment -- | @conflicts (r,e) node@ is @False@ if and only if the assignment
-- @r = e@ can be safely commuted past @stmt@. -- @r = e@ can be safely commuted past statement @node@.
--
-- We only sink "r = G" assignments right now, so conflicts is very simple:
--
conflicts :: DynFlags -> Assignment -> CmmNode O x -> Bool conflicts :: DynFlags -> Assignment -> CmmNode O x -> Bool
conflicts dflags (r, rhs, addr) node conflicts dflags (r, rhs, addr) node
...@@ -548,13 +545,15 @@ conflicts dflags (r, rhs, addr) node ...@@ -548,13 +545,15 @@ conflicts dflags (r, rhs, addr) node
-- Cmm expression -- Cmm expression
globalRegistersConflict :: DynFlags -> CmmExpr -> CmmNode e x -> Bool globalRegistersConflict :: DynFlags -> CmmExpr -> CmmNode e x -> Bool
globalRegistersConflict dflags expr node = globalRegistersConflict dflags expr node =
foldRegsDefd dflags (\b r -> b || (CmmGlobal r) `regUsedIn` expr) False node foldRegsDefd dflags (\b r -> b || regUsedIn dflags (CmmGlobal r) expr)
False node
-- Returns True if node defines any local registers that are used in the -- Returns True if node defines any local registers that are used in the
-- Cmm expression -- Cmm expression
localRegistersConflict :: DynFlags -> CmmExpr -> CmmNode e x -> Bool localRegistersConflict :: DynFlags -> CmmExpr -> CmmNode e x -> Bool
localRegistersConflict dflags expr node = localRegistersConflict dflags expr node =
foldRegsDefd dflags (\b r -> b || (CmmLocal r) `regUsedIn` expr) False node foldRegsDefd dflags (\b r -> b || regUsedIn dflags (CmmLocal r) expr)
False node
-- Note [Sinking and calls] -- Note [Sinking and calls]
-- ~~~~~~~~~~~~~~~~~~~~~~~~ -- ~~~~~~~~~~~~~~~~~~~~~~~~
...@@ -693,7 +692,7 @@ loadAddr dflags e w = ...@@ -693,7 +692,7 @@ loadAddr dflags e w =
case e of case e of
CmmReg r -> regAddr dflags r 0 w CmmReg r -> regAddr dflags r 0 w
CmmRegOff r i -> regAddr dflags r i w CmmRegOff r i -> regAddr dflags r i w
_other | CmmGlobal Sp `regUsedIn` e -> StackMem _other | regUsedIn dflags (CmmGlobal Sp) e -> StackMem
| otherwise -> AnyMem | otherwise -> AnyMem
regAddr :: DynFlags -> CmmReg -> Int -> Width -> AbsMem regAddr :: DynFlags -> CmmReg -> Int -> Width -> AbsMem
......
...@@ -42,6 +42,9 @@ module CmmUtils( ...@@ -42,6 +42,9 @@ module CmmUtils(
cmmTagMask, cmmPointerMask, cmmUntag, cmmIsTagged, cmmTagMask, cmmPointerMask, cmmUntag, cmmIsTagged,
cmmConstrTag1, cmmConstrTag1,
-- Overlap and usage
regsOverlap, regUsedIn,
-- Liveness and bitmaps -- Liveness and bitmaps
mkLiveness, mkLiveness,
...@@ -75,6 +78,7 @@ import Unique ...@@ -75,6 +78,7 @@ import Unique
import UniqSupply import UniqSupply
import DynFlags import DynFlags
import Util import Util
import CodeGen.Platform
import Data.Word import Data.Word
import Data.Maybe import Data.Maybe
...@@ -394,6 +398,38 @@ cmmConstrTag1 :: DynFlags -> CmmExpr -> CmmExpr ...@@ -394,6 +398,38 @@ cmmConstrTag1 :: DynFlags -> CmmExpr -> CmmExpr
cmmConstrTag1 dflags e = cmmAndWord dflags e (cmmTagMask dflags) cmmConstrTag1 dflags e = cmmAndWord dflags e (cmmTagMask dflags)
-----------------------------------------------------------------------------
-- Overlap and usage
-- | Returns True if the two STG registers overlap on the specified
-- platform, in the sense that writing to one will clobber the
-- other. This includes the case that the two registers are the same
-- STG register. See Note [Overlapping global registers] for details.
regsOverlap :: DynFlags -> CmmReg -> CmmReg -> Bool
regsOverlap dflags (CmmGlobal g) (CmmGlobal g')
| Just real <- globalRegMaybe (targetPlatform dflags) g,
Just real' <- globalRegMaybe (targetPlatform dflags) g',
real == real'
= True
regsOverlap _ reg reg' = reg == reg'
-- | Returns True if the STG register is used by the expression, in
-- the sense that a store to the register might affect the value of
-- the expression.
--
-- We must check for overlapping registers and not just equal
-- registers here, otherwise CmmSink may incorrectly reorder
-- assignments that conflict due to overlap. See Trac #10521 and Note
-- [Overlapping global registers].
regUsedIn :: DynFlags -> CmmReg -> CmmExpr -> Bool
regUsedIn dflags = regUsedIn_ where
_ `regUsedIn_` CmmLit _ = False
reg `regUsedIn_` CmmLoad e _ = reg `regUsedIn_` e
reg `regUsedIn_` CmmReg reg' = regsOverlap dflags reg reg'
reg `regUsedIn_` CmmRegOff reg' _ = regsOverlap dflags reg reg'
reg `regUsedIn_` CmmMachOp _ es = any (reg `regUsedIn_`) es
_ `regUsedIn_` CmmStackSlot _ _ = False
-------------------------------------------- --------------------------------------------
-- --
-- mkLiveness -- mkLiveness
......
...@@ -401,11 +401,13 @@ type Stmt = (LocalReg, CmmExpr) -- r := e ...@@ -401,11 +401,13 @@ type Stmt = (LocalReg, CmmExpr) -- r := e
emitMultiAssign [] [] = return () emitMultiAssign [] [] = return ()
emitMultiAssign [reg] [rhs] = emitAssign (CmmLocal reg) rhs emitMultiAssign [reg] [rhs] = emitAssign (CmmLocal reg) rhs
emitMultiAssign regs rhss = ASSERT( equalLength regs rhss ) emitMultiAssign regs rhss = do
unscramble ([1..] `zip` (regs `zip` rhss)) dflags <- getDynFlags
ASSERT( equalLength regs rhss )
unscramble dflags ([1..] `zip` (regs `zip` rhss))
unscramble :: [Vrtx] -> FCode () unscramble :: DynFlags -> [Vrtx] -> FCode ()
unscramble vertices = mapM_ do_component components unscramble dflags vertices = mapM_ do_component components
where where
edges :: [ (Vrtx, Key, [Key]) ] edges :: [ (Vrtx, Key, [Key]) ]
edges = [ (vertex, key1, edges_from stmt1) edges = [ (vertex, key1, edges_from stmt1)
...@@ -432,7 +434,7 @@ unscramble vertices = mapM_ do_component components ...@@ -432,7 +434,7 @@ unscramble vertices = mapM_ do_component components
u <- newUnique u <- newUnique
let (to_tmp, from_tmp) = split dflags u first_stmt let (to_tmp, from_tmp) = split dflags u first_stmt
mk_graph to_tmp mk_graph to_tmp
unscramble rest unscramble dflags rest
mk_graph from_tmp mk_graph from_tmp
split :: DynFlags -> Unique -> Stmt -> (Stmt, Stmt) split :: DynFlags -> Unique -> Stmt -> (Stmt, Stmt)
...@@ -445,8 +447,8 @@ unscramble vertices = mapM_ do_component components ...@@ -445,8 +447,8 @@ unscramble vertices = mapM_ do_component components
mk_graph :: Stmt -> FCode () mk_graph :: Stmt -> FCode ()
mk_graph (reg, rhs) = emitAssign (CmmLocal reg) rhs mk_graph (reg, rhs) = emitAssign (CmmLocal reg) rhs
mustFollow :: Stmt -> Stmt -> Bool mustFollow :: Stmt -> Stmt -> Bool
(reg, _) `mustFollow` (_, rhs) = CmmLocal reg `regUsedIn` rhs (reg, _) `mustFollow` (_, rhs) = regUsedIn dflags (CmmLocal reg) rhs
------------------------------------------------------------------------- -------------------------------------------------------------------------
-- mkSwitch -- mkSwitch
......
...@@ -170,6 +170,12 @@ ...@@ -170,6 +170,12 @@
#define REG_R6 r9 #define REG_R6 r9
#define REG_SpLim r15 #define REG_SpLim r15
/*
Map both Fn and Dn to register xmmn so that we can pass a function any
combination of up to six Float# or Double# arguments without touching
the stack. See Note [Overlapping global registers] for implications.
*/
#define REG_F1 xmm1 #define REG_F1 xmm1
#define REG_F2 xmm2 #define REG_F2 xmm2
#define REG_F3 xmm3 #define REG_F3 xmm3
......
...@@ -165,6 +165,8 @@ mk/ghcconfig*_inplace_bin_ghc-stage2.exe.mk ...@@ -165,6 +165,8 @@ mk/ghcconfig*_inplace_bin_ghc-stage2.exe.mk
/tests/codeGen/should_run/SizeOfSmallArray /tests/codeGen/should_run/SizeOfSmallArray
/tests/codeGen/should_run/StaticArraySize /tests/codeGen/should_run/StaticArraySize
/tests/codeGen/should_run/StaticByteArraySize /tests/codeGen/should_run/StaticByteArraySize
/tests/codeGen/should_run/T10521
/tests/codeGen/should_run/T10521b
/tests/codeGen/should_run/T1852 /tests/codeGen/should_run/T1852
/tests/codeGen/should_run/T1861 /tests/codeGen/should_run/T1861
/tests/codeGen/should_run/T2080 /tests/codeGen/should_run/T2080
......
import Data.Word( Word8 )
toV :: Float -> Word8
toV d = let coeff = significand d * 255.9999 / d
a = truncate $ d * coeff
b = exponent d
in a `seq` (b `seq` a)
main :: IO ()
main =
print $ map toV [ 3.56158e-2, 0.7415215, 0.5383201, 0.1289829, 0.45520145 ]
{-# LANGUAGE MagicHash #-}
import GHC.Exts
f :: Float# -> Float#
f x = x
{-# NOINLINE f #-}
g :: Double# -> Double#
g x = x
{-# NOINLINE g #-}
h :: Float -> Float
h (F# x) = let a = F# (f x)
b = D# (g (2.0##))
in a `seq` (b `seq` a)
main = print (h 1.0)
...@@ -129,3 +129,5 @@ test('T9013', omit_ways(['ghci']), # ghci doesn't support unboxed tuples ...@@ -129,3 +129,5 @@ test('T9013', omit_ways(['ghci']), # ghci doesn't support unboxed tuples
compile_and_run, ['']) compile_and_run, [''])
test('T9340', normal, compile_and_run, ['']) test('T9340', normal, compile_and_run, [''])
test('cgrun074', normal, compile_and_run, ['']) test('cgrun074', normal, compile_and_run, [''])
test('T10521', normal, compile_and_run, [''])
test('T10521b', normal, compile_and_run, [''])
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment