diff --git a/compiler/cmm/CmmExpr.hs b/compiler/cmm/CmmExpr.hs index 1d6c97f41eea4a2881fa06a0824a827feea33d15..3d21ebce2eb0555de3ed0aeca3e39a73aef3b023 100644 --- a/compiler/cmm/CmmExpr.hs +++ b/compiler/cmm/CmmExpr.hs @@ -20,7 +20,6 @@ module CmmExpr , emptyRegSet, elemRegSet, extendRegSet, deleteFromRegSet, mkRegSet , plusRegSet, minusRegSet, timesRegSet, sizeRegSet, nullRegSet , regSetToList - , regUsedIn , Area(..) , module CmmMachOp @@ -372,17 +371,6 @@ instance DefinerOfRegs r a => DefinerOfRegs r (Maybe a) where foldRegsDefd _ _ set Nothing = set foldRegsDefd dflags f set (Just x) = foldRegsDefd dflags f set x ------------------------------------------------------------------------------ --- Another reg utility - -regUsedIn :: CmmReg -> CmmExpr -> Bool -_ `regUsedIn` CmmLit _ = False -reg `regUsedIn` CmmLoad e _ = reg `regUsedIn` e -reg `regUsedIn` CmmReg reg' = reg == reg' -reg `regUsedIn` CmmRegOff reg' _ = reg == reg' -reg `regUsedIn` CmmMachOp _ es = any (reg `regUsedIn`) es -_ `regUsedIn` CmmStackSlot _ _ = False - ----------------------------------------------------------------------------- -- Global STG registers ----------------------------------------------------------------------------- @@ -397,6 +385,31 @@ vgcFlag :: CmmType -> VGcPtr vgcFlag ty | isGcPtrType ty = VGcPtr | otherwise = VNonGcPtr +{- +Note [Overlapping global registers] + +The backend might not faithfully implement the abstraction of the STG +machine with independent registers for different values of type +GlobalReg. Specifically, certain pairs of registers (r1, r2) may +overlap in the sense that a store to r1 invalidates the value in r2, +and vice versa. + +Currently this occurs only on the x86_64 architecture where FloatReg n +and DoubleReg n are assigned the same microarchitectural register, in +order to allow functions to receive more Float# or Double# arguments +in registers (as opposed to on the stack). + +There are no specific rules about which registers might overlap with +which other registers, but presumably it's safe to assume that nothing +will overlap with special registers like Sp or BaseReg. + +Use CmmUtils.regsOverlap to determine whether two GlobalRegs overlap +on a particular platform. The instance Eq GlobalReg is syntactic +equality of STG registers and does not take overlap into +account. However it is still used in UserOfRegs/DefinerOfRegs and +there are likely still bugs there, beware! +-} + data GlobalReg -- Argument and return registers = VanillaReg -- pointers, unboxed ints and chars diff --git a/compiler/cmm/CmmSink.hs b/compiler/cmm/CmmSink.hs index 22f4d2ec920417206168763049de9dafd7390efb..7279013e60df82f5d4da43967d4ffa0d8e769654 100644 --- a/compiler/cmm/CmmSink.hs +++ b/compiler/cmm/CmmSink.hs @@ -510,11 +510,8 @@ okToInline _ _ _ = True -- ----------------------------------------------------------------------------- --- | @conflicts (r,e) stmt@ is @False@ if and only if the assignment --- @r = e@ can be safely commuted past @stmt@. --- --- We only sink "r = G" assignments right now, so conflicts is very simple: --- +-- | @conflicts (r,e) node@ is @False@ if and only if the assignment +-- @r = e@ can be safely commuted past statement @node@. conflicts :: DynFlags -> Assignment -> CmmNode O x -> Bool conflicts dflags (r, rhs, addr) node @@ -548,13 +545,15 @@ conflicts dflags (r, rhs, addr) node -- Cmm expression globalRegistersConflict :: DynFlags -> CmmExpr -> CmmNode e x -> Bool globalRegistersConflict dflags expr node = - foldRegsDefd dflags (\b r -> b || (CmmGlobal r) `regUsedIn` expr) False node + foldRegsDefd dflags (\b r -> b || regUsedIn dflags (CmmGlobal r) expr) + False node -- Returns True if node defines any local registers that are used in the -- Cmm expression localRegistersConflict :: DynFlags -> CmmExpr -> CmmNode e x -> Bool localRegistersConflict dflags expr node = - foldRegsDefd dflags (\b r -> b || (CmmLocal r) `regUsedIn` expr) False node + foldRegsDefd dflags (\b r -> b || regUsedIn dflags (CmmLocal r) expr) + False node -- Note [Sinking and calls] -- ~~~~~~~~~~~~~~~~~~~~~~~~ @@ -693,7 +692,7 @@ loadAddr dflags e w = case e of CmmReg r -> regAddr dflags r 0 w CmmRegOff r i -> regAddr dflags r i w - _other | CmmGlobal Sp `regUsedIn` e -> StackMem + _other | regUsedIn dflags (CmmGlobal Sp) e -> StackMem | otherwise -> AnyMem regAddr :: DynFlags -> CmmReg -> Int -> Width -> AbsMem diff --git a/compiler/cmm/CmmUtils.hs b/compiler/cmm/CmmUtils.hs index 65d633e6b7340bf37b938fc1f58823a60c889143..8e7a2dc448cf1654dd7ab0c30d1d16fe3fbb23e8 100644 --- a/compiler/cmm/CmmUtils.hs +++ b/compiler/cmm/CmmUtils.hs @@ -42,6 +42,9 @@ module CmmUtils( cmmTagMask, cmmPointerMask, cmmUntag, cmmIsTagged, cmmConstrTag1, + -- Overlap and usage + regsOverlap, regUsedIn, + -- Liveness and bitmaps mkLiveness, @@ -75,6 +78,7 @@ import Unique import UniqSupply import DynFlags import Util +import CodeGen.Platform import Data.Word import Data.Maybe @@ -394,6 +398,38 @@ cmmConstrTag1 :: DynFlags -> CmmExpr -> CmmExpr cmmConstrTag1 dflags e = cmmAndWord dflags e (cmmTagMask dflags) +----------------------------------------------------------------------------- +-- Overlap and usage + +-- | Returns True if the two STG registers overlap on the specified +-- platform, in the sense that writing to one will clobber the +-- other. This includes the case that the two registers are the same +-- STG register. See Note [Overlapping global registers] for details. +regsOverlap :: DynFlags -> CmmReg -> CmmReg -> Bool +regsOverlap dflags (CmmGlobal g) (CmmGlobal g') + | Just real <- globalRegMaybe (targetPlatform dflags) g, + Just real' <- globalRegMaybe (targetPlatform dflags) g', + real == real' + = True +regsOverlap _ reg reg' = reg == reg' + +-- | Returns True if the STG register is used by the expression, in +-- the sense that a store to the register might affect the value of +-- the expression. +-- +-- We must check for overlapping registers and not just equal +-- registers here, otherwise CmmSink may incorrectly reorder +-- assignments that conflict due to overlap. See Trac #10521 and Note +-- [Overlapping global registers]. +regUsedIn :: DynFlags -> CmmReg -> CmmExpr -> Bool +regUsedIn dflags = regUsedIn_ where + _ `regUsedIn_` CmmLit _ = False + reg `regUsedIn_` CmmLoad e _ = reg `regUsedIn_` e + reg `regUsedIn_` CmmReg reg' = regsOverlap dflags reg reg' + reg `regUsedIn_` CmmRegOff reg' _ = regsOverlap dflags reg reg' + reg `regUsedIn_` CmmMachOp _ es = any (reg `regUsedIn_`) es + _ `regUsedIn_` CmmStackSlot _ _ = False + -------------------------------------------- -- -- mkLiveness diff --git a/compiler/codeGen/StgCmmUtils.hs b/compiler/codeGen/StgCmmUtils.hs index 5e8944df4a9bd7303a55a52d4a5c77a4f5e34fb1..8d8c8a034d9090986c025492bea7cb5ffee29392 100644 --- a/compiler/codeGen/StgCmmUtils.hs +++ b/compiler/codeGen/StgCmmUtils.hs @@ -401,11 +401,13 @@ type Stmt = (LocalReg, CmmExpr) -- r := e emitMultiAssign [] [] = return () emitMultiAssign [reg] [rhs] = emitAssign (CmmLocal reg) rhs -emitMultiAssign regs rhss = ASSERT( equalLength regs rhss ) - unscramble ([1..] `zip` (regs `zip` rhss)) +emitMultiAssign regs rhss = do + dflags <- getDynFlags + ASSERT( equalLength regs rhss ) + unscramble dflags ([1..] `zip` (regs `zip` rhss)) -unscramble :: [Vrtx] -> FCode () -unscramble vertices = mapM_ do_component components +unscramble :: DynFlags -> [Vrtx] -> FCode () +unscramble dflags vertices = mapM_ do_component components where edges :: [ (Vrtx, Key, [Key]) ] edges = [ (vertex, key1, edges_from stmt1) @@ -432,7 +434,7 @@ unscramble vertices = mapM_ do_component components u <- newUnique let (to_tmp, from_tmp) = split dflags u first_stmt mk_graph to_tmp - unscramble rest + unscramble dflags rest mk_graph from_tmp split :: DynFlags -> Unique -> Stmt -> (Stmt, Stmt) @@ -445,8 +447,8 @@ unscramble vertices = mapM_ do_component components mk_graph :: Stmt -> FCode () mk_graph (reg, rhs) = emitAssign (CmmLocal reg) rhs -mustFollow :: Stmt -> Stmt -> Bool -(reg, _) `mustFollow` (_, rhs) = CmmLocal reg `regUsedIn` rhs + mustFollow :: Stmt -> Stmt -> Bool + (reg, _) `mustFollow` (_, rhs) = regUsedIn dflags (CmmLocal reg) rhs ------------------------------------------------------------------------- -- mkSwitch diff --git a/includes/stg/MachRegs.h b/includes/stg/MachRegs.h index 3eeb697604775107892cc841bb517c0f497b04d2..b7090275ab72b54b740d2d17d92ca0a1ac10a6d8 100644 --- a/includes/stg/MachRegs.h +++ b/includes/stg/MachRegs.h @@ -170,6 +170,12 @@ #define REG_R6 r9 #define REG_SpLim r15 +/* +Map both Fn and Dn to register xmmn so that we can pass a function any +combination of up to six Float# or Double# arguments without touching +the stack. See Note [Overlapping global registers] for implications. +*/ + #define REG_F1 xmm1 #define REG_F2 xmm2 #define REG_F3 xmm3 diff --git a/testsuite/.gitignore b/testsuite/.gitignore index 2623c60f4695302b9e0ff3c6f5a4b82e75917c24..b1ed88791cd65411d7e552aab3f665f5d82d7195 100644 --- a/testsuite/.gitignore +++ b/testsuite/.gitignore @@ -165,6 +165,8 @@ mk/ghcconfig*_inplace_bin_ghc-stage2.exe.mk /tests/codeGen/should_run/SizeOfSmallArray /tests/codeGen/should_run/StaticArraySize /tests/codeGen/should_run/StaticByteArraySize +/tests/codeGen/should_run/T10521 +/tests/codeGen/should_run/T10521b /tests/codeGen/should_run/T1852 /tests/codeGen/should_run/T1861 /tests/codeGen/should_run/T2080 diff --git a/testsuite/tests/codeGen/should_run/T10521.hs b/testsuite/tests/codeGen/should_run/T10521.hs new file mode 100644 index 0000000000000000000000000000000000000000..e770ba315c73c0795caf708e9f51c68fd9d3fa0e --- /dev/null +++ b/testsuite/tests/codeGen/should_run/T10521.hs @@ -0,0 +1,11 @@ +import Data.Word( Word8 ) + +toV :: Float -> Word8 +toV d = let coeff = significand d * 255.9999 / d + a = truncate $ d * coeff + b = exponent d + in a `seq` (b `seq` a) + +main :: IO () +main = + print $ map toV [ 3.56158e-2, 0.7415215, 0.5383201, 0.1289829, 0.45520145 ] diff --git a/testsuite/tests/codeGen/should_run/T10521.stdout b/testsuite/tests/codeGen/should_run/T10521.stdout new file mode 100644 index 0000000000000000000000000000000000000000..9843a1725d1081cad2100aed1b08fa84a82adee3 --- /dev/null +++ b/testsuite/tests/codeGen/should_run/T10521.stdout @@ -0,0 +1 @@ +[145,189,137,132,233] diff --git a/testsuite/tests/codeGen/should_run/T10521b.hs b/testsuite/tests/codeGen/should_run/T10521b.hs new file mode 100644 index 0000000000000000000000000000000000000000..d0433f9b763138bf832b50bd6e0e9bcc4a678731 --- /dev/null +++ b/testsuite/tests/codeGen/should_run/T10521b.hs @@ -0,0 +1,18 @@ +{-# LANGUAGE MagicHash #-} + +import GHC.Exts + +f :: Float# -> Float# +f x = x +{-# NOINLINE f #-} + +g :: Double# -> Double# +g x = x +{-# NOINLINE g #-} + +h :: Float -> Float +h (F# x) = let a = F# (f x) + b = D# (g (2.0##)) + in a `seq` (b `seq` a) + +main = print (h 1.0) diff --git a/testsuite/tests/codeGen/should_run/T10521b.stdout b/testsuite/tests/codeGen/should_run/T10521b.stdout new file mode 100644 index 0000000000000000000000000000000000000000..d3827e75a5cadb9fe4a27e1cb9b6d192e7323120 --- /dev/null +++ b/testsuite/tests/codeGen/should_run/T10521b.stdout @@ -0,0 +1 @@ +1.0 diff --git a/testsuite/tests/codeGen/should_run/all.T b/testsuite/tests/codeGen/should_run/all.T index 12418f0aadc610392478b8198186bd001acabd3c..b2970a2eba74c25cfbfb0ee864503356b8af84cd 100644 --- a/testsuite/tests/codeGen/should_run/all.T +++ b/testsuite/tests/codeGen/should_run/all.T @@ -129,3 +129,5 @@ test('T9013', omit_ways(['ghci']), # ghci doesn't support unboxed tuples compile_and_run, ['']) test('T9340', normal, compile_and_run, ['']) test('cgrun074', normal, compile_and_run, ['']) +test('T10521', normal, compile_and_run, ['']) +test('T10521b', normal, compile_and_run, [''])