Skip to content
Commits on Source (3)
  • Joachim Breitner's avatar
    GHC no longer ever defines TABLES_NEXT_TO_CODE on its own · fc3f421b
    Joachim Breitner authored and Marge Bot's avatar Marge Bot committed
    It should be entirely the responsibility of make/Hadrian to ensure that
    everything that needs this flag gets it. GHC shouldn't be hardcoded to
    assist with bootstrapping since it builds other things besides itself.
    
    Reviewers:
    
    Subscribers: TerrorJack, rwbarton, carter
    
    GHC Trac Issues: #15548 -- progress towards but not fix
    
    Differential Revision: https://phabricator.haskell.org/D5082 -- extract
    from that
    fc3f421b
  • Ryan Scott's avatar
    Use ghc-prim < 0.7, not <= 0.6.1, as upper version bounds · be0dde8e
    Ryan Scott authored and Marge Bot's avatar Marge Bot committed
    Using `ghc-prim <= 0.6.1` is somewhat dodgy from a PVP point of view,
    as it makes it awkward to support new minor releases of `ghc-prim`.
    Let's instead use `< 0.7`, which is the idiomatic way of expressing
    PVP-compliant upper version bounds.
    be0dde8e
  • Carter Schonwald's avatar
    removing x87 register support from native code gen · fbf7331b
    Carter Schonwald authored
    * simplifies registers to have GPR, Float and Double, by removing the SSE2 and X87 Constructors
    * makes -msse2 assumed/default for x86 platforms, fixing a long standing nondeterminism in rounding
    behavior in 32bit haskell code
    * removes the 80bit floating point representation from the supported float sizes
    * theres still 1 tiny bit of x87 support needed,
    for handling float and double return values in FFI calls  wrt the C ABI on x86_32,
    but this one piece does not leak into the rest of NCG.
    * Lots of code thats not been touched in a long time got deleted as a
    consequence of all of this
    
    all in all, this change paves the way towards a lot of future further
    improvements in how GHC handles floating point computations, along with
    making the native code gen more accessible to a larger pool of contributors.
    fbf7331b
...@@ -81,7 +81,6 @@ assignArgumentsPos dflags off conv arg_ty reps = (stk_off, assignments) ...@@ -81,7 +81,6 @@ assignArgumentsPos dflags off conv arg_ty reps = (stk_off, assignments)
| passFloatInXmm -> k (RegisterParam (DoubleReg s), (vs, fs, ds, ls, ss)) | passFloatInXmm -> k (RegisterParam (DoubleReg s), (vs, fs, ds, ls, ss))
(W64, (vs, fs, d:ds, ls, ss)) (W64, (vs, fs, d:ds, ls, ss))
| not passFloatInXmm -> k (RegisterParam d, (vs, fs, ds, ls, ss)) | not passFloatInXmm -> k (RegisterParam d, (vs, fs, ds, ls, ss))
(W80, _) -> panic "F80 unsupported register type"
_ -> (assts, (r:rs)) _ -> (assts, (r:rs))
int = case (w, regs) of int = case (w, regs) of
(W128, _) -> panic "W128 unsupported register type" (W128, _) -> panic "W128 unsupported register type"
...@@ -100,6 +99,7 @@ assignArgumentsPos dflags off conv arg_ty reps = (stk_off, assignments) ...@@ -100,6 +99,7 @@ assignArgumentsPos dflags off conv arg_ty reps = (stk_off, assignments)
passFloatArgsInXmm :: DynFlags -> Bool passFloatArgsInXmm :: DynFlags -> Bool
passFloatArgsInXmm dflags = case platformArch (targetPlatform dflags) of passFloatArgsInXmm dflags = case platformArch (targetPlatform dflags) of
ArchX86_64 -> True ArchX86_64 -> True
ArchX86 -> False
_ -> False _ -> False
-- We used to spill vector registers to the stack since the LLVM backend didn't -- We used to spill vector registers to the stack since the LLVM backend didn't
......
...@@ -474,6 +474,9 @@ instance Eq GlobalReg where ...@@ -474,6 +474,9 @@ instance Eq GlobalReg where
FloatReg i == FloatReg j = i==j FloatReg i == FloatReg j = i==j
DoubleReg i == DoubleReg j = i==j DoubleReg i == DoubleReg j = i==j
LongReg i == LongReg j = i==j LongReg i == LongReg j = i==j
-- NOTE: XMM, YMM, ZMM registers actually are the same registers
-- at least with respect to store at YMM i and then read from XMM i
-- and similarly for ZMM etc.
XmmReg i == XmmReg j = i==j XmmReg i == XmmReg j = i==j
YmmReg i == YmmReg j = i==j YmmReg i == YmmReg j = i==j
ZmmReg i == ZmmReg j = i==j ZmmReg i == ZmmReg j = i==j
...@@ -584,6 +587,9 @@ globalRegType dflags (VanillaReg _ VNonGcPtr) = bWord dflags ...@@ -584,6 +587,9 @@ globalRegType dflags (VanillaReg _ VNonGcPtr) = bWord dflags
globalRegType _ (FloatReg _) = cmmFloat W32 globalRegType _ (FloatReg _) = cmmFloat W32
globalRegType _ (DoubleReg _) = cmmFloat W64 globalRegType _ (DoubleReg _) = cmmFloat W64
globalRegType _ (LongReg _) = cmmBits W64 globalRegType _ (LongReg _) = cmmBits W64
-- TODO: improve the internal model of SIMD/vectorized registers
-- the right design SHOULd improve handling of float and double code too.
-- see remarks in "NOTE [SIMD Design for the future]"" in StgCmmPrim
globalRegType _ (XmmReg _) = cmmVec 4 (cmmBits W32) globalRegType _ (XmmReg _) = cmmVec 4 (cmmBits W32)
globalRegType _ (YmmReg _) = cmmVec 8 (cmmBits W32) globalRegType _ (YmmReg _) = cmmVec 8 (cmmBits W32)
globalRegType _ (ZmmReg _) = cmmVec 16 (cmmBits W32) globalRegType _ (ZmmReg _) = cmmVec 16 (cmmBits W32)
......
...@@ -166,9 +166,6 @@ isFloat64 _other = False ...@@ -166,9 +166,6 @@ isFloat64 _other = False
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
data Width = W8 | W16 | W32 | W64 data Width = W8 | W16 | W32 | W64
| W80 -- Extended double-precision float,
-- used in x86 native codegen only.
-- (we use Ord, so it'd better be in this order)
| W128 | W128
| W256 | W256
| W512 | W512
...@@ -185,7 +182,7 @@ mrStr W64 = sLit("W64") ...@@ -185,7 +182,7 @@ mrStr W64 = sLit("W64")
mrStr W128 = sLit("W128") mrStr W128 = sLit("W128")
mrStr W256 = sLit("W256") mrStr W256 = sLit("W256")
mrStr W512 = sLit("W512") mrStr W512 = sLit("W512")
mrStr W80 = sLit("W80")
-------- Common Widths ------------ -------- Common Widths ------------
...@@ -222,7 +219,7 @@ widthInBits W64 = 64 ...@@ -222,7 +219,7 @@ widthInBits W64 = 64
widthInBits W128 = 128 widthInBits W128 = 128
widthInBits W256 = 256 widthInBits W256 = 256
widthInBits W512 = 512 widthInBits W512 = 512
widthInBits W80 = 80
widthInBytes :: Width -> Int widthInBytes :: Width -> Int
widthInBytes W8 = 1 widthInBytes W8 = 1
...@@ -232,7 +229,7 @@ widthInBytes W64 = 8 ...@@ -232,7 +229,7 @@ widthInBytes W64 = 8
widthInBytes W128 = 16 widthInBytes W128 = 16
widthInBytes W256 = 32 widthInBytes W256 = 32
widthInBytes W512 = 64 widthInBytes W512 = 64
widthInBytes W80 = 10
widthFromBytes :: Int -> Width widthFromBytes :: Int -> Width
widthFromBytes 1 = W8 widthFromBytes 1 = W8
...@@ -242,7 +239,7 @@ widthFromBytes 8 = W64 ...@@ -242,7 +239,7 @@ widthFromBytes 8 = W64
widthFromBytes 16 = W128 widthFromBytes 16 = W128
widthFromBytes 32 = W256 widthFromBytes 32 = W256
widthFromBytes 64 = W512 widthFromBytes 64 = W512
widthFromBytes 10 = W80
widthFromBytes n = pprPanic "no width for given number of bytes" (ppr n) widthFromBytes n = pprPanic "no width for given number of bytes" (ppr n)
-- log_2 of the width in bytes, useful for generating shifts. -- log_2 of the width in bytes, useful for generating shifts.
...@@ -254,7 +251,7 @@ widthInLog W64 = 3 ...@@ -254,7 +251,7 @@ widthInLog W64 = 3
widthInLog W128 = 4 widthInLog W128 = 4
widthInLog W256 = 5 widthInLog W256 = 5
widthInLog W512 = 6 widthInLog W512 = 6
widthInLog W80 = panic "widthInLog: F80"
-- widening / narrowing -- widening / narrowing
......
...@@ -1727,8 +1727,38 @@ vecElemProjectCast dflags WordVec W32 = Just (mo_u_32ToWord dflags) ...@@ -1727,8 +1727,38 @@ vecElemProjectCast dflags WordVec W32 = Just (mo_u_32ToWord dflags)
vecElemProjectCast _ WordVec W64 = Nothing vecElemProjectCast _ WordVec W64 = Nothing
vecElemProjectCast _ _ _ = Nothing vecElemProjectCast _ _ _ = Nothing
-- NOTE [SIMD Design for the future]
-- Check to make sure that we can generate code for the specified vector type -- Check to make sure that we can generate code for the specified vector type
-- given the current set of dynamic flags. -- given the current set of dynamic flags.
-- Currently these checks are specific to x86 and x86_64 architecture.
-- This should be fixed!
-- In particular,
-- 1) Add better support for other architectures! (this may require a redesign)
-- 2) Decouple design choices from LLVM's pseudo SIMD model!
-- The high level LLVM naive rep makes per CPU family SIMD generation is own
-- optimization problem, and hides important differences in eg ARM vs x86_64 simd
-- 3) Depending on the architecture, the SIMD registers may also support general
-- computations on Float/Double/Word/Int scalars, but currently on
-- for example x86_64, we always put Word/Int (or sized) in GPR
-- (general purpose) registers. Would relaxing that allow for
-- useful optimization opportunities?
-- Phrased differently, it is worth experimenting with supporting
-- different register mapping strategies than we currently have, especially if
-- someday we want SIMD to be a first class denizen in GHC along with scalar
-- values!
-- The current design with respect to register mapping of scalars could
-- very well be the best,but exploring the design space and doing careful
-- measurments is the only only way to validate that.
-- In some next generation CPU ISAs, notably RISC V, the SIMD extension
-- includes support for a sort of run time CPU dependent vectorization parameter,
-- where a loop may act upon a single scalar each iteration OR some 2,4,8 ...
-- element chunk! Time will tell if that direction sees wide adoption,
-- but it is from that context that unifying our handling of simd and scalars
-- may benefit. It is not likely to benefit current architectures, though
-- it may very well be a design perspective that helps guide improving the NCG.
checkVecCompatibility :: DynFlags -> PrimOpVecCat -> Length -> Width -> FCode () checkVecCompatibility :: DynFlags -> PrimOpVecCat -> Length -> Width -> FCode ()
checkVecCompatibility dflags vcat l w = do checkVecCompatibility dflags vcat l w = do
when (hscTarget dflags /= HscLlvm) $ do when (hscTarget dflags /= HscLlvm) $ do
......
...@@ -97,7 +97,6 @@ cmmToLlvmType ty | isVecType ty = LMVector (vecLength ty) (cmmToLlvmType (vecE ...@@ -97,7 +97,6 @@ cmmToLlvmType ty | isVecType ty = LMVector (vecLength ty) (cmmToLlvmType (vecE
widthToLlvmFloat :: Width -> LlvmType widthToLlvmFloat :: Width -> LlvmType
widthToLlvmFloat W32 = LMFloat widthToLlvmFloat W32 = LMFloat
widthToLlvmFloat W64 = LMDouble widthToLlvmFloat W64 = LMDouble
widthToLlvmFloat W80 = LMFloat80
widthToLlvmFloat W128 = LMFloat128 widthToLlvmFloat W128 = LMFloat128
widthToLlvmFloat w = panic $ "widthToLlvmFloat: Bad float size: " ++ show w widthToLlvmFloat w = panic $ "widthToLlvmFloat: Bad float size: " ++ show w
......
...@@ -58,7 +58,7 @@ module DynFlags ( ...@@ -58,7 +58,7 @@ module DynFlags (
fFlags, fLangFlags, xFlags, fFlags, fLangFlags, xFlags,
wWarningFlags, wWarningFlags,
dynFlagDependencies, dynFlagDependencies,
tablesNextToCode, mkTablesNextToCode, tablesNextToCode,
makeDynFlagsConsistent, makeDynFlagsConsistent,
shouldUseColor, shouldUseColor,
shouldUseHexWordLiterals, shouldUseHexWordLiterals,
...@@ -5833,20 +5833,24 @@ data SseVersion = SSE1 ...@@ -5833,20 +5833,24 @@ data SseVersion = SSE1
isSseEnabled :: DynFlags -> Bool isSseEnabled :: DynFlags -> Bool
isSseEnabled dflags = case platformArch (targetPlatform dflags) of isSseEnabled dflags = case platformArch (targetPlatform dflags) of
ArchX86_64 -> True ArchX86_64 -> True
ArchX86 -> sseVersion dflags >= Just SSE1 ArchX86 -> True
_ -> False _ -> False
isSse2Enabled :: DynFlags -> Bool isSse2Enabled :: DynFlags -> Bool
isSse2Enabled dflags = case platformArch (targetPlatform dflags) of isSse2Enabled dflags = case platformArch (targetPlatform dflags) of
ArchX86_64 -> -- SSE2 is fixed on for x86_64. It would be -- We Assume SSE1 and SSE2 operations are available on both
-- possible to make it optional, but we'd need to -- x86 and x86_64. Historically we didn't default to SSE2 and
-- fix at least the foreign call code where the -- SSE1 on x86, which results in defacto nondeterminism for how
-- calling convention specifies the use of xmm regs, -- rounding behaves in the associated x87 floating point instructions
-- and possibly other places. -- because variations in the spill/fpu stack placement of arguments for
True -- operations would change the precision and final result of what
ArchX86 -> sseVersion dflags >= Just SSE2 -- would otherwise be the same expressions with respect to single or
-- double precision IEEE floating point computations.
ArchX86_64 -> True
ArchX86 -> True
_ -> False _ -> False
isSse4_2Enabled :: DynFlags -> Bool isSse4_2Enabled :: DynFlags -> Bool
isSse4_2Enabled dflags = sseVersion dflags >= Just SSE42 isSse4_2Enabled dflags = sseVersion dflags >= Just SSE42
......
...@@ -199,15 +199,9 @@ initSysTools top_dir ...@@ -199,15 +199,9 @@ initSysTools top_dir
let unreg_gcc_args = if targetUnregisterised let unreg_gcc_args = if targetUnregisterised
then ["-DNO_REGS", "-DUSE_MINIINTERPRETER"] then ["-DNO_REGS", "-DUSE_MINIINTERPRETER"]
else [] else []
-- TABLES_NEXT_TO_CODE affects the info table layout.
tntc_gcc_args
| mkTablesNextToCode targetUnregisterised
= ["-DTABLES_NEXT_TO_CODE"]
| otherwise = []
cpp_args= map Option (words cpp_args_str) cpp_args= map Option (words cpp_args_str)
gcc_args = map Option (words gcc_args_str gcc_args = map Option (words gcc_args_str
++ unreg_gcc_args ++ unreg_gcc_args)
++ tntc_gcc_args)
ldSupportsCompactUnwind <- getBooleanSetting "ld supports compact unwind" ldSupportsCompactUnwind <- getBooleanSetting "ld supports compact unwind"
ldSupportsBuildId <- getBooleanSetting "ld supports build-id" ldSupportsBuildId <- getBooleanSetting "ld supports build-id"
ldSupportsFilelist <- getBooleanSetting "ld supports filelist" ldSupportsFilelist <- getBooleanSetting "ld supports filelist"
......
...@@ -179,7 +179,7 @@ nativeCodeGen dflags this_mod modLoc h us cmms ...@@ -179,7 +179,7 @@ nativeCodeGen dflags this_mod modLoc h us cmms
x86NcgImpl :: DynFlags -> NcgImpl (Alignment, CmmStatics) x86NcgImpl :: DynFlags -> NcgImpl (Alignment, CmmStatics)
X86.Instr.Instr X86.Instr.JumpDest X86.Instr.Instr X86.Instr.JumpDest
x86NcgImpl dflags x86NcgImpl dflags
= (x86_64NcgImpl dflags) { ncg_x86fp_kludge = map x86fp_kludge } = (x86_64NcgImpl dflags)
x86_64NcgImpl :: DynFlags -> NcgImpl (Alignment, CmmStatics) x86_64NcgImpl :: DynFlags -> NcgImpl (Alignment, CmmStatics)
X86.Instr.Instr X86.Instr.JumpDest X86.Instr.Instr X86.Instr.JumpDest
...@@ -194,7 +194,6 @@ x86_64NcgImpl dflags ...@@ -194,7 +194,6 @@ x86_64NcgImpl dflags
,pprNatCmmDecl = X86.Ppr.pprNatCmmDecl ,pprNatCmmDecl = X86.Ppr.pprNatCmmDecl
,maxSpillSlots = X86.Instr.maxSpillSlots dflags ,maxSpillSlots = X86.Instr.maxSpillSlots dflags
,allocatableRegs = X86.Regs.allocatableRegs platform ,allocatableRegs = X86.Regs.allocatableRegs platform
,ncg_x86fp_kludge = id
,ncgAllocMoreStack = X86.Instr.allocMoreStack platform ,ncgAllocMoreStack = X86.Instr.allocMoreStack platform
,ncgExpandTop = id ,ncgExpandTop = id
,ncgMakeFarBranches = const id ,ncgMakeFarBranches = const id
...@@ -215,7 +214,6 @@ ppcNcgImpl dflags ...@@ -215,7 +214,6 @@ ppcNcgImpl dflags
,pprNatCmmDecl = PPC.Ppr.pprNatCmmDecl ,pprNatCmmDecl = PPC.Ppr.pprNatCmmDecl
,maxSpillSlots = PPC.Instr.maxSpillSlots dflags ,maxSpillSlots = PPC.Instr.maxSpillSlots dflags
,allocatableRegs = PPC.Regs.allocatableRegs platform ,allocatableRegs = PPC.Regs.allocatableRegs platform
,ncg_x86fp_kludge = id
,ncgAllocMoreStack = PPC.Instr.allocMoreStack platform ,ncgAllocMoreStack = PPC.Instr.allocMoreStack platform
,ncgExpandTop = id ,ncgExpandTop = id
,ncgMakeFarBranches = PPC.Instr.makeFarBranches ,ncgMakeFarBranches = PPC.Instr.makeFarBranches
...@@ -236,7 +234,6 @@ sparcNcgImpl dflags ...@@ -236,7 +234,6 @@ sparcNcgImpl dflags
,pprNatCmmDecl = SPARC.Ppr.pprNatCmmDecl ,pprNatCmmDecl = SPARC.Ppr.pprNatCmmDecl
,maxSpillSlots = SPARC.Instr.maxSpillSlots dflags ,maxSpillSlots = SPARC.Instr.maxSpillSlots dflags
,allocatableRegs = SPARC.Regs.allocatableRegs ,allocatableRegs = SPARC.Regs.allocatableRegs
,ncg_x86fp_kludge = id
,ncgAllocMoreStack = noAllocMoreStack ,ncgAllocMoreStack = noAllocMoreStack
,ncgExpandTop = map SPARC.CodeGen.Expand.expandTop ,ncgExpandTop = map SPARC.CodeGen.Expand.expandTop
,ncgMakeFarBranches = const id ,ncgMakeFarBranches = const id
...@@ -680,19 +677,10 @@ cmmNativeGen dflags this_mod modLoc ncgImpl us fileIds dbgMap cmm count ...@@ -680,19 +677,10 @@ cmmNativeGen dflags this_mod modLoc ncgImpl us fileIds dbgMap cmm count
foldl' (\m (from,to) -> addImmediateSuccessor from to m ) foldl' (\m (from,to) -> addImmediateSuccessor from to m )
cfgWithFixupBlks stack_updt_blks cfgWithFixupBlks stack_updt_blks
---- x86fp_kludge. This pass inserts ffree instructions to clear
---- the FPU stack on x86. The x86 ABI requires that the FPU stack
---- is clear, and library functions can return odd results if it
---- isn't.
----
---- NB. must happen before shortcutBranches, because that
---- generates JXX_GBLs which we can't fix up in x86fp_kludge.
let kludged = {-# SCC "x86fp_kludge" #-} ncg_x86fp_kludge ncgImpl alloced
---- generate jump tables ---- generate jump tables
let tabled = let tabled =
{-# SCC "generateJumpTables" #-} {-# SCC "generateJumpTables" #-}
generateJumpTables ncgImpl kludged generateJumpTables ncgImpl alloced
dumpIfSet_dyn dflags dumpIfSet_dyn dflags
Opt_D_dump_cfg_weights "CFG Update information" Opt_D_dump_cfg_weights "CFG Update information"
...@@ -787,12 +775,6 @@ checkLayout procsUnsequenced procsSequenced = ...@@ -787,12 +775,6 @@ checkLayout procsUnsequenced procsSequenced =
getBlockIds (CmmProc _ _ _ (ListGraph blocks)) = getBlockIds (CmmProc _ _ _ (ListGraph blocks)) =
setFromList $ map blockId blocks setFromList $ map blockId blocks
x86fp_kludge :: NatCmmDecl (Alignment, CmmStatics) X86.Instr.Instr -> NatCmmDecl (Alignment, CmmStatics) X86.Instr.Instr
x86fp_kludge top@(CmmData _ _) = top
x86fp_kludge (CmmProc info lbl live (ListGraph code)) =
CmmProc info lbl live (ListGraph $ X86.Instr.i386_insert_ffrees code)
-- | Compute unwinding tables for the blocks of a procedure -- | Compute unwinding tables for the blocks of a procedure
computeUnwinding :: Instruction instr computeUnwinding :: Instruction instr
=> DynFlags -> NcgImpl statics instr jumpDest => DynFlags -> NcgImpl statics instr jumpDest
......
...@@ -47,7 +47,6 @@ data Format ...@@ -47,7 +47,6 @@ data Format
| II64 | II64
| FF32 | FF32
| FF64 | FF64
| FF80
deriving (Show, Eq) deriving (Show, Eq)
...@@ -70,7 +69,7 @@ floatFormat width ...@@ -70,7 +69,7 @@ floatFormat width
= case width of = case width of
W32 -> FF32 W32 -> FF32
W64 -> FF64 W64 -> FF64
W80 -> FF80
other -> pprPanic "Format.floatFormat" (ppr other) other -> pprPanic "Format.floatFormat" (ppr other)
...@@ -80,7 +79,6 @@ isFloatFormat format ...@@ -80,7 +79,6 @@ isFloatFormat format
= case format of = case format of
FF32 -> True FF32 -> True
FF64 -> True FF64 -> True
FF80 -> True
_ -> False _ -> False
...@@ -101,7 +99,7 @@ formatToWidth format ...@@ -101,7 +99,7 @@ formatToWidth format
II64 -> W64 II64 -> W64
FF32 -> W32 FF32 -> W32
FF64 -> W64 FF64 -> W64
FF80 -> W80
formatInBytes :: Format -> Int formatInBytes :: Format -> Int
formatInBytes = widthInBytes . formatToWidth formatInBytes = widthInBytes . formatToWidth
...@@ -76,7 +76,6 @@ data NcgImpl statics instr jumpDest = NcgImpl { ...@@ -76,7 +76,6 @@ data NcgImpl statics instr jumpDest = NcgImpl {
pprNatCmmDecl :: NatCmmDecl statics instr -> SDoc, pprNatCmmDecl :: NatCmmDecl statics instr -> SDoc,
maxSpillSlots :: Int, maxSpillSlots :: Int,
allocatableRegs :: [RealReg], allocatableRegs :: [RealReg],
ncg_x86fp_kludge :: [NatCmmDecl statics instr] -> [NatCmmDecl statics instr],
ncgExpandTop :: [NatCmmDecl statics instr] -> [NatCmmDecl statics instr], ncgExpandTop :: [NatCmmDecl statics instr] -> [NatCmmDecl statics instr],
ncgAllocMoreStack :: Int -> NatCmmDecl statics instr ncgAllocMoreStack :: Int -> NatCmmDecl statics instr
-> UniqSM (NatCmmDecl statics instr, [(BlockId,BlockId)]), -> UniqSM (NatCmmDecl statics instr, [(BlockId,BlockId)]),
......
...@@ -1593,7 +1593,7 @@ genCCall' ...@@ -1593,7 +1593,7 @@ genCCall'
-> [CmmActual] -- arguments (of mixed type) -> [CmmActual] -- arguments (of mixed type)
-> NatM InstrBlock -> NatM InstrBlock
{- {-
PowerPC Linux uses the System V Release 4 Calling Convention PowerPC Linux uses the System V Release 4 Calling Convention
for PowerPC. It is described in the for PowerPC. It is described in the
"System V Application Binary Interface PowerPC Processor Supplement". "System V Application Binary Interface PowerPC Processor Supplement".
...@@ -1906,7 +1906,7 @@ genCCall' dflags gcp target dest_regs args ...@@ -1906,7 +1906,7 @@ genCCall' dflags gcp target dest_regs args
FF32 -> (1, 1, 4, fprs) FF32 -> (1, 1, 4, fprs)
FF64 -> (2, 1, 8, fprs) FF64 -> (2, 1, 8, fprs)
II64 -> panic "genCCall' passArguments II64" II64 -> panic "genCCall' passArguments II64"
FF80 -> panic "genCCall' passArguments FF80"
GCP32ELF -> GCP32ELF ->
case cmmTypeFormat rep of case cmmTypeFormat rep of
II8 -> (1, 0, 4, gprs) II8 -> (1, 0, 4, gprs)
...@@ -1916,7 +1916,6 @@ genCCall' dflags gcp target dest_regs args ...@@ -1916,7 +1916,6 @@ genCCall' dflags gcp target dest_regs args
FF32 -> (0, 1, 4, fprs) FF32 -> (0, 1, 4, fprs)
FF64 -> (0, 1, 8, fprs) FF64 -> (0, 1, 8, fprs)
II64 -> panic "genCCall' passArguments II64" II64 -> panic "genCCall' passArguments II64"
FF80 -> panic "genCCall' passArguments FF80"
GCP64ELF _ -> GCP64ELF _ ->
case cmmTypeFormat rep of case cmmTypeFormat rep of
II8 -> (1, 0, 8, gprs) II8 -> (1, 0, 8, gprs)
...@@ -1928,7 +1927,6 @@ genCCall' dflags gcp target dest_regs args ...@@ -1928,7 +1927,6 @@ genCCall' dflags gcp target dest_regs args
-- the FPRs. -- the FPRs.
FF32 -> (1, 1, 8, fprs) FF32 -> (1, 1, 8, fprs)
FF64 -> (1, 1, 8, fprs) FF64 -> (1, 1, 8, fprs)
FF80 -> panic "genCCall' passArguments FF80"
moveResult reduceToFF32 = moveResult reduceToFF32 =
case dest_regs of case dest_regs of
......
...@@ -161,7 +161,7 @@ pprReg r ...@@ -161,7 +161,7 @@ pprReg r
RegVirtual (VirtualRegHi u) -> text "%vHi_" <> pprUniqueAlways u RegVirtual (VirtualRegHi u) -> text "%vHi_" <> pprUniqueAlways u
RegVirtual (VirtualRegF u) -> text "%vF_" <> pprUniqueAlways u RegVirtual (VirtualRegF u) -> text "%vF_" <> pprUniqueAlways u
RegVirtual (VirtualRegD u) -> text "%vD_" <> pprUniqueAlways u RegVirtual (VirtualRegD u) -> text "%vD_" <> pprUniqueAlways u
RegVirtual (VirtualRegSSE u) -> text "%vSSE_" <> pprUniqueAlways u
where where
ppr_reg_no :: Int -> SDoc ppr_reg_no :: Int -> SDoc
ppr_reg_no i ppr_reg_no i
...@@ -179,8 +179,7 @@ pprFormat x ...@@ -179,8 +179,7 @@ pprFormat x
II32 -> sLit "w" II32 -> sLit "w"
II64 -> sLit "d" II64 -> sLit "d"
FF32 -> sLit "fs" FF32 -> sLit "fs"
FF64 -> sLit "fd" FF64 -> sLit "fd")
_ -> panic "PPC.Ppr.pprFormat: no match")
pprCond :: Cond -> SDoc pprCond :: Cond -> SDoc
...@@ -365,7 +364,6 @@ pprInstr (LD fmt reg addr) = hcat [ ...@@ -365,7 +364,6 @@ pprInstr (LD fmt reg addr) = hcat [
II64 -> sLit "d" II64 -> sLit "d"
FF32 -> sLit "fs" FF32 -> sLit "fs"
FF64 -> sLit "fd" FF64 -> sLit "fd"
_ -> panic "PPC.Ppr.pprInstr: no match"
), ),
case addr of AddrRegImm _ _ -> empty case addr of AddrRegImm _ _ -> empty
AddrRegReg _ _ -> char 'x', AddrRegReg _ _ -> char 'x',
...@@ -405,7 +403,6 @@ pprInstr (LA fmt reg addr) = hcat [ ...@@ -405,7 +403,6 @@ pprInstr (LA fmt reg addr) = hcat [
II64 -> sLit "d" II64 -> sLit "d"
FF32 -> sLit "fs" FF32 -> sLit "fs"
FF64 -> sLit "fd" FF64 -> sLit "fd"
_ -> panic "PPC.Ppr.pprInstr: no match"
), ),
case addr of AddrRegImm _ _ -> empty case addr of AddrRegImm _ _ -> empty
AddrRegReg _ _ -> char 'x', AddrRegReg _ _ -> char 'x',
......
...@@ -131,7 +131,7 @@ regDotColor reg ...@@ -131,7 +131,7 @@ regDotColor reg
RcInteger -> text "blue" RcInteger -> text "blue"
RcFloat -> text "red" RcFloat -> text "red"
RcDouble -> text "green" RcDouble -> text "green"
RcDoubleSSE -> text "yellow"
-- immediates ------------------------------------------------------------------ -- immediates ------------------------------------------------------------------
......
...@@ -56,7 +56,7 @@ data VirtualReg ...@@ -56,7 +56,7 @@ data VirtualReg
| VirtualRegHi {-# UNPACK #-} !Unique -- High part of 2-word register | VirtualRegHi {-# UNPACK #-} !Unique -- High part of 2-word register
| VirtualRegF {-# UNPACK #-} !Unique | VirtualRegF {-# UNPACK #-} !Unique
| VirtualRegD {-# UNPACK #-} !Unique | VirtualRegD {-# UNPACK #-} !Unique
| VirtualRegSSE {-# UNPACK #-} !Unique
deriving (Eq, Show) deriving (Eq, Show)
-- This is laborious, but necessary. We can't derive Ord because -- This is laborious, but necessary. We can't derive Ord because
...@@ -69,15 +69,14 @@ instance Ord VirtualReg where ...@@ -69,15 +69,14 @@ instance Ord VirtualReg where
compare (VirtualRegHi a) (VirtualRegHi b) = nonDetCmpUnique a b compare (VirtualRegHi a) (VirtualRegHi b) = nonDetCmpUnique a b
compare (VirtualRegF a) (VirtualRegF b) = nonDetCmpUnique a b compare (VirtualRegF a) (VirtualRegF b) = nonDetCmpUnique a b
compare (VirtualRegD a) (VirtualRegD b) = nonDetCmpUnique a b compare (VirtualRegD a) (VirtualRegD b) = nonDetCmpUnique a b
compare (VirtualRegSSE a) (VirtualRegSSE b) = nonDetCmpUnique a b
compare VirtualRegI{} _ = LT compare VirtualRegI{} _ = LT
compare _ VirtualRegI{} = GT compare _ VirtualRegI{} = GT
compare VirtualRegHi{} _ = LT compare VirtualRegHi{} _ = LT
compare _ VirtualRegHi{} = GT compare _ VirtualRegHi{} = GT
compare VirtualRegF{} _ = LT compare VirtualRegF{} _ = LT
compare _ VirtualRegF{} = GT compare _ VirtualRegF{} = GT
compare VirtualRegD{} _ = LT
compare _ VirtualRegD{} = GT
instance Uniquable VirtualReg where instance Uniquable VirtualReg where
...@@ -87,16 +86,18 @@ instance Uniquable VirtualReg where ...@@ -87,16 +86,18 @@ instance Uniquable VirtualReg where
VirtualRegHi u -> u VirtualRegHi u -> u
VirtualRegF u -> u VirtualRegF u -> u
VirtualRegD u -> u VirtualRegD u -> u
VirtualRegSSE u -> u
instance Outputable VirtualReg where instance Outputable VirtualReg where
ppr reg ppr reg
= case reg of = case reg of
VirtualRegI u -> text "%vI_" <> pprUniqueAlways u VirtualRegI u -> text "%vI_" <> pprUniqueAlways u
VirtualRegHi u -> text "%vHi_" <> pprUniqueAlways u VirtualRegHi u -> text "%vHi_" <> pprUniqueAlways u
VirtualRegF u -> text "%vF_" <> pprUniqueAlways u -- this code is kinda wrong on x86
VirtualRegD u -> text "%vD_" <> pprUniqueAlways u -- because float and double occupy the same register set
VirtualRegSSE u -> text "%vSSE_" <> pprUniqueAlways u -- namely SSE2 register xmm0 .. xmm15
VirtualRegF u -> text "%vFloat_" <> pprUniqueAlways u
VirtualRegD u -> text "%vDouble_" <> pprUniqueAlways u
renameVirtualReg :: Unique -> VirtualReg -> VirtualReg renameVirtualReg :: Unique -> VirtualReg -> VirtualReg
...@@ -106,7 +107,6 @@ renameVirtualReg u r ...@@ -106,7 +107,6 @@ renameVirtualReg u r
VirtualRegHi _ -> VirtualRegHi u VirtualRegHi _ -> VirtualRegHi u
VirtualRegF _ -> VirtualRegF u VirtualRegF _ -> VirtualRegF u
VirtualRegD _ -> VirtualRegD u VirtualRegD _ -> VirtualRegD u
VirtualRegSSE _ -> VirtualRegSSE u
classOfVirtualReg :: VirtualReg -> RegClass classOfVirtualReg :: VirtualReg -> RegClass
...@@ -116,7 +116,7 @@ classOfVirtualReg vr ...@@ -116,7 +116,7 @@ classOfVirtualReg vr
VirtualRegHi{} -> RcInteger VirtualRegHi{} -> RcInteger
VirtualRegF{} -> RcFloat VirtualRegF{} -> RcFloat
VirtualRegD{} -> RcDouble VirtualRegD{} -> RcDouble
VirtualRegSSE{} -> RcDoubleSSE
-- Determine the upper-half vreg for a 64-bit quantity on a 32-bit platform -- Determine the upper-half vreg for a 64-bit quantity on a 32-bit platform
......
...@@ -134,6 +134,10 @@ trivColorable platform virtualRegSqueeze realRegSqueeze RcInteger conflicts excl ...@@ -134,6 +134,10 @@ trivColorable platform virtualRegSqueeze realRegSqueeze RcInteger conflicts excl
trivColorable platform virtualRegSqueeze realRegSqueeze RcFloat conflicts exclusions trivColorable platform virtualRegSqueeze realRegSqueeze RcFloat conflicts exclusions
| let cALLOCATABLE_REGS_FLOAT | let cALLOCATABLE_REGS_FLOAT
= (case platformArch platform of = (case platformArch platform of
-- On x86_64 and x86, Float and RcDouble
-- use the same registers,
-- so we only use RcDouble to represent the
-- register allocation problem on those types.
ArchX86 -> 0 ArchX86 -> 0
ArchX86_64 -> 0 ArchX86_64 -> 0
ArchPPC -> 0 ArchPPC -> 0
...@@ -160,8 +164,14 @@ trivColorable platform virtualRegSqueeze realRegSqueeze RcFloat conflicts exclus ...@@ -160,8 +164,14 @@ trivColorable platform virtualRegSqueeze realRegSqueeze RcFloat conflicts exclus
trivColorable platform virtualRegSqueeze realRegSqueeze RcDouble conflicts exclusions trivColorable platform virtualRegSqueeze realRegSqueeze RcDouble conflicts exclusions
| let cALLOCATABLE_REGS_DOUBLE | let cALLOCATABLE_REGS_DOUBLE
= (case platformArch platform of = (case platformArch platform of
ArchX86 -> 6 ArchX86 -> 8
ArchX86_64 -> 0 -- in x86 32bit mode sse2 there are only
-- 8 XMM registers xmm0 ... xmm7
ArchX86_64 -> 10
-- in x86_64 there are 16 XMM registers
-- xmm0 .. xmm15, here 10 is a
-- "dont need to solve conflicts" count that
-- was chosen at some point in the past.
ArchPPC -> 26 ArchPPC -> 26
ArchSPARC -> 11 ArchSPARC -> 11
ArchSPARC64 -> panic "trivColorable ArchSPARC64" ArchSPARC64 -> panic "trivColorable ArchSPARC64"
...@@ -183,31 +193,7 @@ trivColorable platform virtualRegSqueeze realRegSqueeze RcDouble conflicts exclu ...@@ -183,31 +193,7 @@ trivColorable platform virtualRegSqueeze realRegSqueeze RcDouble conflicts exclu
= count3 < cALLOCATABLE_REGS_DOUBLE = count3 < cALLOCATABLE_REGS_DOUBLE
trivColorable platform virtualRegSqueeze realRegSqueeze RcDoubleSSE conflicts exclusions
| let cALLOCATABLE_REGS_SSE
= (case platformArch platform of
ArchX86 -> 8
ArchX86_64 -> 10
ArchPPC -> 0
ArchSPARC -> 0
ArchSPARC64 -> panic "trivColorable ArchSPARC64"
ArchPPC_64 _ -> 0
ArchARM _ _ _ -> panic "trivColorable ArchARM"
ArchARM64 -> panic "trivColorable ArchARM64"
ArchAlpha -> panic "trivColorable ArchAlpha"
ArchMipseb -> panic "trivColorable ArchMipseb"
ArchMipsel -> panic "trivColorable ArchMipsel"
ArchJavaScript-> panic "trivColorable ArchJavaScript"
ArchUnknown -> panic "trivColorable ArchUnknown")
, count2 <- accSqueeze 0 cALLOCATABLE_REGS_SSE
(virtualRegSqueeze RcDoubleSSE)
conflicts
, count3 <- accSqueeze count2 cALLOCATABLE_REGS_SSE
(realRegSqueeze RcDoubleSSE)
exclusions
= count3 < cALLOCATABLE_REGS_SSE
-- Specification Code ---------------------------------------------------------- -- Specification Code ----------------------------------------------------------
......
...@@ -18,7 +18,6 @@ data RegClass ...@@ -18,7 +18,6 @@ data RegClass
= RcInteger = RcInteger
| RcFloat | RcFloat
| RcDouble | RcDouble
| RcDoubleSSE -- x86 only: the SSE regs are a separate class
deriving Eq deriving Eq
...@@ -26,10 +25,8 @@ instance Uniquable RegClass where ...@@ -26,10 +25,8 @@ instance Uniquable RegClass where
getUnique RcInteger = mkRegClassUnique 0 getUnique RcInteger = mkRegClassUnique 0
getUnique RcFloat = mkRegClassUnique 1 getUnique RcFloat = mkRegClassUnique 1
getUnique RcDouble = mkRegClassUnique 2 getUnique RcDouble = mkRegClassUnique 2
getUnique RcDoubleSSE = mkRegClassUnique 3
instance Outputable RegClass where instance Outputable RegClass where
ppr RcInteger = Outputable.text "I" ppr RcInteger = Outputable.text "I"
ppr RcFloat = Outputable.text "F" ppr RcFloat = Outputable.text "F"
ppr RcDouble = Outputable.text "D" ppr RcDouble = Outputable.text "D"
ppr RcDoubleSSE = Outputable.text "S"
...@@ -384,7 +384,6 @@ sparc_mkSpillInstr dflags reg _ slot ...@@ -384,7 +384,6 @@ sparc_mkSpillInstr dflags reg _ slot
RcInteger -> II32 RcInteger -> II32
RcFloat -> FF32 RcFloat -> FF32
RcDouble -> FF64 RcDouble -> FF64
_ -> panic "sparc_mkSpillInstr"
in ST fmt reg (fpRel (negate off_w)) in ST fmt reg (fpRel (negate off_w))
...@@ -405,7 +404,6 @@ sparc_mkLoadInstr dflags reg _ slot ...@@ -405,7 +404,6 @@ sparc_mkLoadInstr dflags reg _ slot
RcInteger -> II32 RcInteger -> II32
RcFloat -> FF32 RcFloat -> FF32
RcDouble -> FF64 RcDouble -> FF64
_ -> panic "sparc_mkLoadInstr"
in LD fmt (fpRel (- off_w)) reg in LD fmt (fpRel (- off_w)) reg
...@@ -454,7 +452,6 @@ sparc_mkRegRegMoveInstr platform src dst ...@@ -454,7 +452,6 @@ sparc_mkRegRegMoveInstr platform src dst
RcInteger -> ADD False False src (RIReg g0) dst RcInteger -> ADD False False src (RIReg g0) dst
RcDouble -> FMOV FF64 src dst RcDouble -> FMOV FF64 src dst
RcFloat -> FMOV FF32 src dst RcFloat -> FMOV FF32 src dst
_ -> panic "sparc_mkRegRegMoveInstr"
| otherwise | otherwise
= panic "SPARC.Instr.mkRegRegMoveInstr: classes of src and dest not the same" = panic "SPARC.Instr.mkRegRegMoveInstr: classes of src and dest not the same"
......
...@@ -143,7 +143,7 @@ pprReg reg ...@@ -143,7 +143,7 @@ pprReg reg
VirtualRegHi u -> text "%vHi_" <> pprUniqueAlways u VirtualRegHi u -> text "%vHi_" <> pprUniqueAlways u
VirtualRegF u -> text "%vF_" <> pprUniqueAlways u VirtualRegF u -> text "%vF_" <> pprUniqueAlways u
VirtualRegD u -> text "%vD_" <> pprUniqueAlways u VirtualRegD u -> text "%vD_" <> pprUniqueAlways u
VirtualRegSSE u -> text "%vSSE_" <> pprUniqueAlways u
RegReal rr RegReal rr
-> case rr of -> case rr of
...@@ -211,8 +211,7 @@ pprFormat x ...@@ -211,8 +211,7 @@ pprFormat x
II32 -> sLit "" II32 -> sLit ""
II64 -> sLit "d" II64 -> sLit "d"
FF32 -> sLit "" FF32 -> sLit ""
FF64 -> sLit "d" FF64 -> sLit "d")
_ -> panic "SPARC.Ppr.pprFormat: no match")
-- | Pretty print a format for an instruction suffix. -- | Pretty print a format for an instruction suffix.
...@@ -226,8 +225,8 @@ pprStFormat x ...@@ -226,8 +225,8 @@ pprStFormat x
II32 -> sLit "" II32 -> sLit ""
II64 -> sLit "x" II64 -> sLit "x"
FF32 -> sLit "" FF32 -> sLit ""
FF64 -> sLit "d" FF64 -> sLit "d")
_ -> panic "SPARC.Ppr.pprFormat: no match")
-- | Pretty print a condition code. -- | Pretty print a condition code.
......
...@@ -104,7 +104,6 @@ virtualRegSqueeze cls vr ...@@ -104,7 +104,6 @@ virtualRegSqueeze cls vr
VirtualRegD{} -> 1 VirtualRegD{} -> 1
_other -> 0 _other -> 0
_other -> 0
{-# INLINE realRegSqueeze #-} {-# INLINE realRegSqueeze #-}
realRegSqueeze :: RegClass -> RealReg -> Int realRegSqueeze :: RegClass -> RealReg -> Int
...@@ -135,7 +134,6 @@ realRegSqueeze cls rr ...@@ -135,7 +134,6 @@ realRegSqueeze cls rr
RealRegPair{} -> 1 RealRegPair{} -> 1
_other -> 0
-- | All the allocatable registers in the machine, -- | All the allocatable registers in the machine,
-- including register pairs. -- including register pairs.
......
...@@ -98,17 +98,25 @@ is32BitPlatform = do ...@@ -98,17 +98,25 @@ is32BitPlatform = do
sse2Enabled :: NatM Bool sse2Enabled :: NatM Bool
sse2Enabled = do sse2Enabled = do
dflags <- getDynFlags dflags <- getDynFlags
return (isSse2Enabled dflags) case platformArch (targetPlatform dflags) of
-- We Assume SSE1 and SSE2 operations are available on both
-- x86 and x86_64. Historically we didn't default to SSE2 and
-- SSE1 on x86, which results in defacto nondeterminism for how
-- rounding behaves in the associated x87 floating point instructions
-- because variations in the spill/fpu stack placement of arguments for
-- operations would change the precision and final result of what
-- would otherwise be the same expressions with respect to single or
-- double precision IEEE floating point computations.
ArchX86_64 -> return True
ArchX86 -> return True
_ -> panic "trying to generate x86/x86_64 on the wrong platform"
sse4_2Enabled :: NatM Bool sse4_2Enabled :: NatM Bool
sse4_2Enabled = do sse4_2Enabled = do
dflags <- getDynFlags dflags <- getDynFlags
return (isSse4_2Enabled dflags) return (isSse4_2Enabled dflags)
if_sse2 :: NatM a -> NatM a -> NatM a
if_sse2 sse2 x87 = do
b <- sse2Enabled
if b then sse2 else x87
cmmTopCodeGen cmmTopCodeGen
:: RawCmmDecl :: RawCmmDecl
...@@ -284,15 +292,14 @@ swizzleRegisterRep (Any _ codefn) format = Any format codefn ...@@ -284,15 +292,14 @@ swizzleRegisterRep (Any _ codefn) format = Any format codefn
-- | Grab the Reg for a CmmReg -- | Grab the Reg for a CmmReg
getRegisterReg :: Platform -> Bool -> CmmReg -> Reg getRegisterReg :: Platform -> CmmReg -> Reg
getRegisterReg _ use_sse2 (CmmLocal (LocalReg u pk)) getRegisterReg _ (CmmLocal (LocalReg u pk))
= let fmt = cmmTypeFormat pk in = -- by Assuming SSE2, Int,Word,Float,Double all can be register allocated
if isFloatFormat fmt && not use_sse2 let fmt = cmmTypeFormat pk in
then RegVirtual (mkVirtualReg u FF80) RegVirtual (mkVirtualReg u fmt)
else RegVirtual (mkVirtualReg u fmt)
getRegisterReg platform _ (CmmGlobal mid) getRegisterReg platform (CmmGlobal mid)
= case globalRegMaybe platform mid of = case globalRegMaybe platform mid of
Just reg -> RegReal $ reg Just reg -> RegReal $ reg
Nothing -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid) Nothing -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
...@@ -513,15 +520,14 @@ getRegister' dflags is32Bit (CmmReg reg) ...@@ -513,15 +520,14 @@ getRegister' dflags is32Bit (CmmReg reg)
do reg' <- getPicBaseNat (archWordFormat is32Bit) do reg' <- getPicBaseNat (archWordFormat is32Bit)
return (Fixed (archWordFormat is32Bit) reg' nilOL) return (Fixed (archWordFormat is32Bit) reg' nilOL)
_ -> _ ->
do use_sse2 <- sse2Enabled do
let let
fmt = cmmTypeFormat (cmmRegType dflags reg) fmt = cmmTypeFormat (cmmRegType dflags reg)
format | not use_sse2 && isFloatFormat fmt = FF80 format = fmt
| otherwise = fmt
-- --
let platform = targetPlatform dflags let platform = targetPlatform dflags
return (Fixed format return (Fixed format
(getRegisterReg platform use_sse2 reg) (getRegisterReg platform reg)
nilOL) nilOL)
...@@ -557,8 +563,7 @@ getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W64 W32) [x]) ...@@ -557,8 +563,7 @@ getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W64 W32) [x])
return $ Fixed II32 rlo code return $ Fixed II32 rlo code
getRegister' _ _ (CmmLit lit@(CmmFloat f w)) = getRegister' _ _ (CmmLit lit@(CmmFloat f w)) =
if_sse2 float_const_sse2 float_const_x87 float_const_sse2 where
where
float_const_sse2 float_const_sse2
| f == 0.0 = do | f == 0.0 = do
let let
...@@ -570,21 +575,7 @@ getRegister' _ _ (CmmLit lit@(CmmFloat f w)) = ...@@ -570,21 +575,7 @@ getRegister' _ _ (CmmLit lit@(CmmFloat f w)) =
| otherwise = do | otherwise = do
Amode addr code <- memConstant (mkAlignment $ widthInBytes w) lit Amode addr code <- memConstant (mkAlignment $ widthInBytes w) lit
loadFloatAmode True w addr code loadFloatAmode w addr code
float_const_x87 = case w of
W64
| f == 0.0 ->
let code dst = unitOL (GLDZ dst)
in return (Any FF80 code)
| f == 1.0 ->
let code dst = unitOL (GLD1 dst)
in return (Any FF80 code)
_otherwise -> do
Amode addr code <- memConstant (mkAlignment $ widthInBytes w) lit
loadFloatAmode False w addr code
-- catch simple cases of zero- or sign-extended load -- catch simple cases of zero- or sign-extended load
getRegister' _ _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do getRegister' _ _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do
...@@ -641,11 +632,9 @@ getRegister' _ is32Bit (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg), ...@@ -641,11 +632,9 @@ getRegister' _ is32Bit (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst)) LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
sse2 <- sse2Enabled
case mop of case mop of
MO_F_Neg w MO_F_Neg w -> sse2NegCode w x
| sse2 -> sse2NegCode w x
| otherwise -> trivialUFCode FF80 (GNEG FF80) x
MO_S_Neg w -> triv_ucode NEGI (intFormat w) MO_S_Neg w -> triv_ucode NEGI (intFormat w)
MO_Not w -> triv_ucode NOT (intFormat w) MO_Not w -> triv_ucode NOT (intFormat w)
...@@ -711,9 +700,8 @@ getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps ...@@ -711,9 +700,8 @@ getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
MO_XX_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOV x MO_XX_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOV x
MO_XX_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOV x MO_XX_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOV x
MO_FF_Conv W32 W64 MO_FF_Conv W32 W64 -> coerceFP2FP W64 x
| sse2 -> coerceFP2FP W64 x
| otherwise -> conversionNop FF80 x
MO_FF_Conv W64 W32 -> coerceFP2FP W32 x MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
...@@ -776,7 +764,6 @@ getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps ...@@ -776,7 +764,6 @@ getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
sse2 <- sse2Enabled
case mop of case mop of
MO_F_Eq _ -> condFltReg is32Bit EQQ x y MO_F_Eq _ -> condFltReg is32Bit EQQ x y
MO_F_Ne _ -> condFltReg is32Bit NE x y MO_F_Ne _ -> condFltReg is32Bit NE x y
...@@ -800,14 +787,14 @@ getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps ...@@ -800,14 +787,14 @@ getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
MO_U_Lt _ -> condIntReg LU x y MO_U_Lt _ -> condIntReg LU x y
MO_U_Le _ -> condIntReg LEU x y MO_U_Le _ -> condIntReg LEU x y
MO_F_Add w | sse2 -> trivialFCode_sse2 w ADD x y MO_F_Add w -> trivialFCode_sse2 w ADD x y
| otherwise -> trivialFCode_x87 GADD x y
MO_F_Sub w | sse2 -> trivialFCode_sse2 w SUB x y MO_F_Sub w -> trivialFCode_sse2 w SUB x y
| otherwise -> trivialFCode_x87 GSUB x y
MO_F_Quot w | sse2 -> trivialFCode_sse2 w FDIV x y MO_F_Quot w -> trivialFCode_sse2 w FDIV x y
| otherwise -> trivialFCode_x87 GDIV x y
MO_F_Mul w | sse2 -> trivialFCode_sse2 w MUL x y MO_F_Mul w -> trivialFCode_sse2 w MUL x y
| otherwise -> trivialFCode_x87 GMUL x y
MO_Add rep -> add_code rep x y MO_Add rep -> add_code rep x y
MO_Sub rep -> sub_code rep x y MO_Sub rep -> sub_code rep x y
...@@ -1001,8 +988,7 @@ getRegister' _ _ (CmmLoad mem pk) ...@@ -1001,8 +988,7 @@ getRegister' _ _ (CmmLoad mem pk)
| isFloatType pk | isFloatType pk
= do = do
Amode addr mem_code <- getAmode mem Amode addr mem_code <- getAmode mem
use_sse2 <- sse2Enabled loadFloatAmode (typeWidth pk) addr mem_code
loadFloatAmode use_sse2 (typeWidth pk) addr mem_code
getRegister' _ is32Bit (CmmLoad mem pk) getRegister' _ is32Bit (CmmLoad mem pk)
| is32Bit && not (isWord64 pk) | is32Bit && not (isWord64 pk)
...@@ -1132,9 +1118,7 @@ getNonClobberedReg expr = do ...@@ -1132,9 +1118,7 @@ getNonClobberedReg expr = do
return (reg, code) return (reg, code)
reg2reg :: Format -> Reg -> Reg -> Instr reg2reg :: Format -> Reg -> Reg -> Instr
reg2reg format src dst reg2reg format src dst = MOV format (OpReg src) (OpReg dst)
| format == FF80 = GMOV src dst
| otherwise = MOV format (OpReg src) (OpReg dst)
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
...@@ -1243,8 +1227,7 @@ x86_complex_amode base index shift offset ...@@ -1243,8 +1227,7 @@ x86_complex_amode base index shift offset
getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock) getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
getNonClobberedOperand (CmmLit lit) = do getNonClobberedOperand (CmmLit lit) = do
use_sse2 <- sse2Enabled if isSuitableFloatingPointLit lit
if use_sse2 && isSuitableFloatingPointLit lit
then do then do
let CmmFloat _ w = lit let CmmFloat _ w = lit
Amode addr code <- memConstant (mkAlignment $ widthInBytes w) lit Amode addr code <- memConstant (mkAlignment $ widthInBytes w) lit
...@@ -1259,9 +1242,12 @@ getNonClobberedOperand (CmmLit lit) = do ...@@ -1259,9 +1242,12 @@ getNonClobberedOperand (CmmLit lit) = do
getNonClobberedOperand (CmmLoad mem pk) = do getNonClobberedOperand (CmmLoad mem pk) = do
is32Bit <- is32BitPlatform is32Bit <- is32BitPlatform
use_sse2 <- sse2Enabled -- this logic could be simplified
if (not (isFloatType pk) || use_sse2) -- TODO FIXME
&& (if is32Bit then not (isWord64 pk) else True) if (if is32Bit then not (isWord64 pk) else True)
-- if 32bit and pk is at float/double/simd value
-- or if 64bit
-- this could use some eyeballs or i'll need to stare at it more later
then do then do
dflags <- getDynFlags dflags <- getDynFlags
let platform = targetPlatform dflags let platform = targetPlatform dflags
...@@ -1278,6 +1264,7 @@ getNonClobberedOperand (CmmLoad mem pk) = do ...@@ -1278,6 +1264,7 @@ getNonClobberedOperand (CmmLoad mem pk) = do
return (src, nilOL) return (src, nilOL)
return (OpAddr src', mem_code `appOL` save_code) return (OpAddr src', mem_code `appOL` save_code)
else do else do
-- if its a word or gcptr on 32bit?
getNonClobberedOperand_generic (CmmLoad mem pk) getNonClobberedOperand_generic (CmmLoad mem pk)
getNonClobberedOperand e = getNonClobberedOperand_generic e getNonClobberedOperand e = getNonClobberedOperand_generic e
...@@ -1370,14 +1357,13 @@ memConstant align lit = do ...@@ -1370,14 +1357,13 @@ memConstant align lit = do
return (Amode addr code) return (Amode addr code)
loadFloatAmode :: Bool -> Width -> AddrMode -> InstrBlock -> NatM Register loadFloatAmode :: Width -> AddrMode -> InstrBlock -> NatM Register
loadFloatAmode use_sse2 w addr addr_code = do loadFloatAmode w addr addr_code = do
let format = floatFormat w let format = floatFormat w
code dst = addr_code `snocOL` code dst = addr_code `snocOL`
if use_sse2 MOV format (OpAddr addr) (OpReg dst)
then MOV format (OpAddr addr) (OpReg dst)
else GLD format addr dst return (Any format code)
return (Any (if use_sse2 then format else FF80) code)
-- if we want a floating-point literal as an operand, we can -- if we want a floating-point literal as an operand, we can
...@@ -1538,19 +1524,9 @@ condIntCode' _ cond x y = do ...@@ -1538,19 +1524,9 @@ condIntCode' _ cond x y = do
condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
condFltCode cond x y condFltCode cond x y
= if_sse2 condFltCode_sse2 condFltCode_x87 = condFltCode_sse2
where where
condFltCode_x87
= ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
(x_reg, x_code) <- getNonClobberedReg x
(y_reg, y_code) <- getSomeReg y
let
code = x_code `appOL` y_code `snocOL`
GCMP cond x_reg y_reg
-- The GCMP insn does the test and sets the zero flag if comparable
-- and true. Hence we always supply EQQ as the condition to test.
return (CondCode True EQQ code)
-- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
-- an operand, but the right must be a reg. We can probably do better -- an operand, but the right must be a reg. We can probably do better
...@@ -1634,35 +1610,33 @@ assignReg_IntCode pk reg (CmmLoad src _) = do ...@@ -1634,35 +1610,33 @@ assignReg_IntCode pk reg (CmmLoad src _) = do
load_code <- intLoadCode (MOV pk) src load_code <- intLoadCode (MOV pk) src
dflags <- getDynFlags dflags <- getDynFlags
let platform = targetPlatform dflags let platform = targetPlatform dflags
return (load_code (getRegisterReg platform False{-no sse2-} reg)) return (load_code (getRegisterReg platform reg))
-- dst is a reg, but src could be anything -- dst is a reg, but src could be anything
assignReg_IntCode _ reg src = do assignReg_IntCode _ reg src = do
dflags <- getDynFlags dflags <- getDynFlags
let platform = targetPlatform dflags let platform = targetPlatform dflags
code <- getAnyReg src code <- getAnyReg src
return (code (getRegisterReg platform False{-no sse2-} reg)) return (code (getRegisterReg platform reg))
-- Floating point assignment to memory -- Floating point assignment to memory
assignMem_FltCode pk addr src = do assignMem_FltCode pk addr src = do
(src_reg, src_code) <- getNonClobberedReg src (src_reg, src_code) <- getNonClobberedReg src
Amode addr addr_code <- getAmode addr Amode addr addr_code <- getAmode addr
use_sse2 <- sse2Enabled
let let
code = src_code `appOL` code = src_code `appOL`
addr_code `snocOL` addr_code `snocOL`
if use_sse2 then MOV pk (OpReg src_reg) (OpAddr addr) MOV pk (OpReg src_reg) (OpAddr addr)
else GST pk src_reg addr
return code return code
-- Floating point assignment to a register/temporary -- Floating point assignment to a register/temporary
assignReg_FltCode _ reg src = do assignReg_FltCode _ reg src = do
use_sse2 <- sse2Enabled
src_code <- getAnyReg src src_code <- getAnyReg src
dflags <- getDynFlags dflags <- getDynFlags
let platform = targetPlatform dflags let platform = targetPlatform dflags
return (src_code (getRegisterReg platform use_sse2 reg)) return (src_code (getRegisterReg platform reg))
genJump :: CmmExpr{-the branch target-} -> [Reg] -> NatM InstrBlock genJump :: CmmExpr{-the branch target-} -> [Reg] -> NatM InstrBlock
...@@ -1945,7 +1919,7 @@ genCCall _ is32bit (PrimTarget (MO_Prefetch_Data n )) _ [src] _ = ...@@ -1945,7 +1919,7 @@ genCCall _ is32bit (PrimTarget (MO_Prefetch_Data n )) _ [src] _ =
genCCall dflags is32Bit (PrimTarget (MO_BSwap width)) [dst] [src] _ = do genCCall dflags is32Bit (PrimTarget (MO_BSwap width)) [dst] [src] _ = do
let platform = targetPlatform dflags let platform = targetPlatform dflags
let dst_r = getRegisterReg platform False (CmmLocal dst) let dst_r = getRegisterReg platform (CmmLocal dst)
case width of case width of
W64 | is32Bit -> do W64 | is32Bit -> do
ChildCode64 vcode rlo <- iselExpr64 src ChildCode64 vcode rlo <- iselExpr64 src
...@@ -1972,7 +1946,7 @@ genCCall dflags is32Bit (PrimTarget (MO_PopCnt width)) dest_regs@[dst] ...@@ -1972,7 +1946,7 @@ genCCall dflags is32Bit (PrimTarget (MO_PopCnt width)) dest_regs@[dst]
if sse4_2 if sse4_2
then do code_src <- getAnyReg src then do code_src <- getAnyReg src
src_r <- getNewRegNat format src_r <- getNewRegNat format
let dst_r = getRegisterReg platform False (CmmLocal dst) let dst_r = getRegisterReg platform (CmmLocal dst)
return $ code_src src_r `appOL` return $ code_src src_r `appOL`
(if width == W8 then (if width == W8 then
-- The POPCNT instruction doesn't take a r/m8 -- The POPCNT instruction doesn't take a r/m8
...@@ -2004,7 +1978,7 @@ genCCall dflags is32Bit (PrimTarget (MO_Pdep width)) dest_regs@[dst] ...@@ -2004,7 +1978,7 @@ genCCall dflags is32Bit (PrimTarget (MO_Pdep width)) dest_regs@[dst]
code_mask <- getAnyReg mask code_mask <- getAnyReg mask
src_r <- getNewRegNat format src_r <- getNewRegNat format
mask_r <- getNewRegNat format mask_r <- getNewRegNat format
let dst_r = getRegisterReg platform False (CmmLocal dst) let dst_r = getRegisterReg platform (CmmLocal dst)
return $ code_src src_r `appOL` code_mask mask_r `appOL` return $ code_src src_r `appOL` code_mask mask_r `appOL`
(if width == W8 then (if width == W8 then
-- The PDEP instruction doesn't take a r/m8 -- The PDEP instruction doesn't take a r/m8
...@@ -2037,7 +2011,7 @@ genCCall dflags is32Bit (PrimTarget (MO_Pext width)) dest_regs@[dst] ...@@ -2037,7 +2011,7 @@ genCCall dflags is32Bit (PrimTarget (MO_Pext width)) dest_regs@[dst]
code_mask <- getAnyReg mask code_mask <- getAnyReg mask
src_r <- getNewRegNat format src_r <- getNewRegNat format
mask_r <- getNewRegNat format mask_r <- getNewRegNat format
let dst_r = getRegisterReg platform False (CmmLocal dst) let dst_r = getRegisterReg platform (CmmLocal dst)
return $ code_src src_r `appOL` code_mask mask_r `appOL` return $ code_src src_r `appOL` code_mask mask_r `appOL`
(if width == W8 then (if width == W8 then
-- The PEXT instruction doesn't take a r/m8 -- The PEXT instruction doesn't take a r/m8
...@@ -2073,7 +2047,7 @@ genCCall dflags is32Bit (PrimTarget (MO_Clz width)) dest_regs@[dst] args@[src] b ...@@ -2073,7 +2047,7 @@ genCCall dflags is32Bit (PrimTarget (MO_Clz width)) dest_regs@[dst] args@[src] b
| otherwise = do | otherwise = do
code_src <- getAnyReg src code_src <- getAnyReg src
let dst_r = getRegisterReg platform False (CmmLocal dst) let dst_r = getRegisterReg platform (CmmLocal dst)
if isBmi2Enabled dflags if isBmi2Enabled dflags
then do then do
src_r <- getNewRegNat (intFormat width) src_r <- getNewRegNat (intFormat width)
...@@ -2110,7 +2084,7 @@ genCCall dflags is32Bit (PrimTarget (MO_Ctz width)) [dst] [src] bid ...@@ -2110,7 +2084,7 @@ genCCall dflags is32Bit (PrimTarget (MO_Ctz width)) [dst] [src] bid
| is32Bit, width == W64 = do | is32Bit, width == W64 = do
ChildCode64 vcode rlo <- iselExpr64 src ChildCode64 vcode rlo <- iselExpr64 src
let rhi = getHiVRegFromLo rlo let rhi = getHiVRegFromLo rlo
dst_r = getRegisterReg platform False (CmmLocal dst) dst_r = getRegisterReg platform (CmmLocal dst)
lbl1 <- getBlockIdNat lbl1 <- getBlockIdNat
lbl2 <- getBlockIdNat lbl2 <- getBlockIdNat
let format = if width == W8 then II16 else intFormat width let format = if width == W8 then II16 else intFormat width
...@@ -2150,7 +2124,7 @@ genCCall dflags is32Bit (PrimTarget (MO_Ctz width)) [dst] [src] bid ...@@ -2150,7 +2124,7 @@ genCCall dflags is32Bit (PrimTarget (MO_Ctz width)) [dst] [src] bid
| otherwise = do | otherwise = do
code_src <- getAnyReg src code_src <- getAnyReg src
let dst_r = getRegisterReg platform False (CmmLocal dst) let dst_r = getRegisterReg platform (CmmLocal dst)
if isBmi2Enabled dflags if isBmi2Enabled dflags
then do then do
...@@ -2201,9 +2175,8 @@ genCCall dflags is32Bit (PrimTarget (MO_AtomicRMW width amop)) ...@@ -2201,9 +2175,8 @@ genCCall dflags is32Bit (PrimTarget (MO_AtomicRMW width amop))
else getSimpleAmode dflags is32Bit addr -- See genCCall for MO_Cmpxchg else getSimpleAmode dflags is32Bit addr -- See genCCall for MO_Cmpxchg
arg <- getNewRegNat format arg <- getNewRegNat format
arg_code <- getAnyReg n arg_code <- getAnyReg n
use_sse2 <- sse2Enabled
let platform = targetPlatform dflags let platform = targetPlatform dflags
dst_r = getRegisterReg platform use_sse2 (CmmLocal dst) dst_r = getRegisterReg platform (CmmLocal dst)
code <- op_code dst_r arg amode code <- op_code dst_r arg amode
return $ addr_code `appOL` arg_code arg `appOL` code return $ addr_code `appOL` arg_code arg `appOL` code
where where
...@@ -2260,8 +2233,8 @@ genCCall dflags is32Bit (PrimTarget (MO_AtomicRMW width amop)) ...@@ -2260,8 +2233,8 @@ genCCall dflags is32Bit (PrimTarget (MO_AtomicRMW width amop))
genCCall dflags _ (PrimTarget (MO_AtomicRead width)) [dst] [addr] _ = do genCCall dflags _ (PrimTarget (MO_AtomicRead width)) [dst] [addr] _ = do
load_code <- intLoadCode (MOV (intFormat width)) addr load_code <- intLoadCode (MOV (intFormat width)) addr
let platform = targetPlatform dflags let platform = targetPlatform dflags
use_sse2 <- sse2Enabled
return (load_code (getRegisterReg platform use_sse2 (CmmLocal dst))) return (load_code (getRegisterReg platform (CmmLocal dst)))
genCCall _ _ (PrimTarget (MO_AtomicWrite width)) [] [addr, val] _ = do genCCall _ _ (PrimTarget (MO_AtomicWrite width)) [] [addr, val] _ = do
code <- assignMem_IntCode (intFormat width) addr val code <- assignMem_IntCode (intFormat width) addr val
...@@ -2276,9 +2249,8 @@ genCCall dflags is32Bit (PrimTarget (MO_Cmpxchg width)) [dst] [addr, old, new] _ ...@@ -2276,9 +2249,8 @@ genCCall dflags is32Bit (PrimTarget (MO_Cmpxchg width)) [dst] [addr, old, new] _
newval_code <- getAnyReg new newval_code <- getAnyReg new
oldval <- getNewRegNat format oldval <- getNewRegNat format
oldval_code <- getAnyReg old oldval_code <- getAnyReg old
use_sse2 <- sse2Enabled
let platform = targetPlatform dflags let platform = targetPlatform dflags
dst_r = getRegisterReg platform use_sse2 (CmmLocal dst) dst_r = getRegisterReg platform (CmmLocal dst)
code = toOL code = toOL
[ MOV format (OpReg oldval) (OpReg eax) [ MOV format (OpReg oldval) (OpReg eax)
, LOCK (CMPXCHG format (OpReg newval) (OpAddr amode)) , LOCK (CMPXCHG format (OpReg newval) (OpAddr amode))
...@@ -2292,14 +2264,12 @@ genCCall dflags is32Bit (PrimTarget (MO_Cmpxchg width)) [dst] [addr, old, new] _ ...@@ -2292,14 +2264,12 @@ genCCall dflags is32Bit (PrimTarget (MO_Cmpxchg width)) [dst] [addr, old, new] _
genCCall _ is32Bit target dest_regs args bid = do genCCall _ is32Bit target dest_regs args bid = do
dflags <- getDynFlags dflags <- getDynFlags
let platform = targetPlatform dflags let platform = targetPlatform dflags
sse2 = isSse2Enabled dflags
case (target, dest_regs) of case (target, dest_regs) of
-- void return type prim op -- void return type prim op
(PrimTarget op, []) -> (PrimTarget op, []) ->
outOfLineCmmOp bid op Nothing args outOfLineCmmOp bid op Nothing args
-- we only cope with a single result for foreign calls -- we only cope with a single result for foreign calls
(PrimTarget op, [r]) (PrimTarget op, [r]) -> case op of
| sse2 -> case op of
MO_F32_Fabs -> case args of MO_F32_Fabs -> case args of
[x] -> sse2FabsCode W32 x [x] -> sse2FabsCode W32 x
_ -> panic "genCCall: Wrong number of arguments for fabs" _ -> panic "genCCall: Wrong number of arguments for fabs"
...@@ -2310,36 +2280,16 @@ genCCall _ is32Bit target dest_regs args bid = do ...@@ -2310,36 +2280,16 @@ genCCall _ is32Bit target dest_regs args bid = do
MO_F32_Sqrt -> actuallyInlineSSE2Op (\fmt r -> SQRT fmt (OpReg r)) FF32 args MO_F32_Sqrt -> actuallyInlineSSE2Op (\fmt r -> SQRT fmt (OpReg r)) FF32 args
MO_F64_Sqrt -> actuallyInlineSSE2Op (\fmt r -> SQRT fmt (OpReg r)) FF64 args MO_F64_Sqrt -> actuallyInlineSSE2Op (\fmt r -> SQRT fmt (OpReg r)) FF64 args
_other_op -> outOfLineCmmOp bid op (Just r) args _other_op -> outOfLineCmmOp bid op (Just r) args
| otherwise -> do
l1 <- getNewLabelNat
l2 <- getNewLabelNat
if sse2
then outOfLineCmmOp bid op (Just r) args
else case op of
MO_F32_Sqrt -> actuallyInlineFloatOp GSQRT FF32 args
MO_F64_Sqrt -> actuallyInlineFloatOp GSQRT FF64 args
MO_F32_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF32 args
MO_F64_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF64 args
MO_F32_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF32 args
MO_F64_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF64 args
MO_F32_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF32 args
MO_F64_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF64 args
_other_op -> outOfLineCmmOp bid op (Just r) args
where where
actuallyInlineFloatOp = actuallyInlineFloatOp' False actuallyInlineSSE2Op = actuallyInlineFloatOp'
actuallyInlineSSE2Op = actuallyInlineFloatOp' True
actuallyInlineFloatOp' usesSSE instr format [x] actuallyInlineFloatOp' instr format [x]
= do res <- trivialUFCode format (instr format) x = do res <- trivialUFCode format (instr format) x
any <- anyReg res any <- anyReg res
return (any (getRegisterReg platform usesSSE (CmmLocal r))) return (any (getRegisterReg platform (CmmLocal r)))
actuallyInlineFloatOp' _ _ _ args actuallyInlineFloatOp' _ _ args
= panic $ "genCCall.actuallyInlineFloatOp': bad number of arguments! (" = panic $ "genCCall.actuallyInlineFloatOp': bad number of arguments! ("
++ show (length args) ++ ")" ++ show (length args) ++ ")"
...@@ -2358,7 +2308,7 @@ genCCall _ is32Bit target dest_regs args bid = do ...@@ -2358,7 +2308,7 @@ genCCall _ is32Bit target dest_regs args bid = do
AND fmt (OpReg tmp) (OpReg dst) AND fmt (OpReg tmp) (OpReg dst)
] ]
return $ code (getRegisterReg platform True (CmmLocal r)) return $ code (getRegisterReg platform (CmmLocal r))
(PrimTarget (MO_S_QuotRem width), _) -> divOp1 platform True width dest_regs args (PrimTarget (MO_S_QuotRem width), _) -> divOp1 platform True width dest_regs args
(PrimTarget (MO_U_QuotRem width), _) -> divOp1 platform False width dest_regs args (PrimTarget (MO_U_QuotRem width), _) -> divOp1 platform False width dest_regs args
...@@ -2370,8 +2320,8 @@ genCCall _ is32Bit target dest_regs args bid = do ...@@ -2370,8 +2320,8 @@ genCCall _ is32Bit target dest_regs args bid = do
let format = intFormat width let format = intFormat width
lCode <- anyReg =<< trivialCode width (ADD_CC format) lCode <- anyReg =<< trivialCode width (ADD_CC format)
(Just (ADD_CC format)) arg_x arg_y (Just (ADD_CC format)) arg_x arg_y
let reg_l = getRegisterReg platform True (CmmLocal res_l) let reg_l = getRegisterReg platform (CmmLocal res_l)
reg_h = getRegisterReg platform True (CmmLocal res_h) reg_h = getRegisterReg platform (CmmLocal res_h)
code = hCode reg_h `appOL` code = hCode reg_h `appOL`
lCode reg_l `snocOL` lCode reg_l `snocOL`
ADC format (OpImm (ImmInteger 0)) (OpReg reg_h) ADC format (OpImm (ImmInteger 0)) (OpReg reg_h)
...@@ -2391,8 +2341,8 @@ genCCall _ is32Bit target dest_regs args bid = do ...@@ -2391,8 +2341,8 @@ genCCall _ is32Bit target dest_regs args bid = do
do (y_reg, y_code) <- getRegOrMem arg_y do (y_reg, y_code) <- getRegOrMem arg_y
x_code <- getAnyReg arg_x x_code <- getAnyReg arg_x
let format = intFormat width let format = intFormat width
reg_h = getRegisterReg platform True (CmmLocal res_h) reg_h = getRegisterReg platform (CmmLocal res_h)
reg_l = getRegisterReg platform True (CmmLocal res_l) reg_l = getRegisterReg platform (CmmLocal res_l)
code = y_code `appOL` code = y_code `appOL`
x_code rax `appOL` x_code rax `appOL`
toOL [MUL2 format y_reg, toOL [MUL2 format y_reg,
...@@ -2428,8 +2378,8 @@ genCCall _ is32Bit target dest_regs args bid = do ...@@ -2428,8 +2378,8 @@ genCCall _ is32Bit target dest_regs args bid = do
divOp platform signed width [res_q, res_r] divOp platform signed width [res_q, res_r]
m_arg_x_high arg_x_low arg_y m_arg_x_high arg_x_low arg_y
= do let format = intFormat width = do let format = intFormat width
reg_q = getRegisterReg platform True (CmmLocal res_q) reg_q = getRegisterReg platform (CmmLocal res_q)
reg_r = getRegisterReg platform True (CmmLocal res_r) reg_r = getRegisterReg platform (CmmLocal res_r)
widen | signed = CLTD format widen | signed = CLTD format
| otherwise = XOR format (OpReg rdx) (OpReg rdx) | otherwise = XOR format (OpReg rdx) (OpReg rdx)
instr | signed = IDIV instr | signed = IDIV
...@@ -2456,8 +2406,8 @@ genCCall _ is32Bit target dest_regs args bid = do ...@@ -2456,8 +2406,8 @@ genCCall _ is32Bit target dest_regs args bid = do
rCode <- anyReg =<< trivialCode width (instr format) rCode <- anyReg =<< trivialCode width (instr format)
(mrevinstr format) arg_x arg_y (mrevinstr format) arg_x arg_y
reg_tmp <- getNewRegNat II8 reg_tmp <- getNewRegNat II8
let reg_c = getRegisterReg platform True (CmmLocal res_c) let reg_c = getRegisterReg platform (CmmLocal res_c)
reg_r = getRegisterReg platform True (CmmLocal res_r) reg_r = getRegisterReg platform (CmmLocal res_r)
code = rCode reg_r `snocOL` code = rCode reg_r `snocOL`
SETCC cond (OpReg reg_tmp) `snocOL` SETCC cond (OpReg reg_tmp) `snocOL`
MOVZxL II8 (OpReg reg_tmp) (OpReg reg_c) MOVZxL II8 (OpReg reg_tmp) (OpReg reg_c)
...@@ -2501,8 +2451,7 @@ genCCall32' dflags target dest_regs args = do ...@@ -2501,8 +2451,7 @@ genCCall32' dflags target dest_regs args = do
delta0 <- getDeltaNat delta0 <- getDeltaNat
setDeltaNat (delta0 - arg_pad_size) setDeltaNat (delta0 - arg_pad_size)
use_sse2 <- sse2Enabled push_codes <- mapM push_arg (reverse prom_args)
push_codes <- mapM (push_arg use_sse2) (reverse prom_args)
delta <- getDeltaNat delta <- getDeltaNat
MASSERT(delta == delta0 - tot_arg_size) MASSERT(delta == delta0 - tot_arg_size)
...@@ -2555,18 +2504,21 @@ genCCall32' dflags target dest_regs args = do ...@@ -2555,18 +2504,21 @@ genCCall32' dflags target dest_regs args = do
assign_code [] = nilOL assign_code [] = nilOL
assign_code [dest] assign_code [dest]
| isFloatType ty = | isFloatType ty =
if use_sse2 -- we assume SSE2
then let tmp_amode = AddrBaseIndex (EABaseReg esp) let tmp_amode = AddrBaseIndex (EABaseReg esp)
EAIndexNone EAIndexNone
(ImmInt 0) (ImmInt 0)
fmt = floatFormat w fmt = floatFormat w
in toOL [ SUB II32 (OpImm (ImmInt b)) (OpReg esp), in toOL [ SUB II32 (OpImm (ImmInt b)) (OpReg esp),
DELTA (delta0 - b), DELTA (delta0 - b),
GST fmt fake0 tmp_amode, X87Store fmt tmp_amode,
-- X87Store only supported for the CDECL ABI
-- NB: This code will need to be
-- revisted once GHC does more work around
-- SIGFPE f
MOV fmt (OpAddr tmp_amode) (OpReg r_dest), MOV fmt (OpAddr tmp_amode) (OpReg r_dest),
ADD II32 (OpImm (ImmInt b)) (OpReg esp), ADD II32 (OpImm (ImmInt b)) (OpReg esp),
DELTA delta0] DELTA delta0]
else unitOL (GMOV fake0 r_dest)
| isWord64 ty = toOL [MOV II32 (OpReg eax) (OpReg r_dest), | isWord64 ty = toOL [MOV II32 (OpReg eax) (OpReg r_dest),
MOV II32 (OpReg edx) (OpReg r_dest_hi)] MOV II32 (OpReg edx) (OpReg r_dest_hi)]
| otherwise = unitOL (MOV (intFormat w) | otherwise = unitOL (MOV (intFormat w)
...@@ -2577,7 +2529,7 @@ genCCall32' dflags target dest_regs args = do ...@@ -2577,7 +2529,7 @@ genCCall32' dflags target dest_regs args = do
w = typeWidth ty w = typeWidth ty
b = widthInBytes w b = widthInBytes w
r_dest_hi = getHiVRegFromLo r_dest r_dest_hi = getHiVRegFromLo r_dest
r_dest = getRegisterReg platform use_sse2 (CmmLocal dest) r_dest = getRegisterReg platform (CmmLocal dest)
assign_code many = pprPanic "genCCall.assign_code - too many return values:" (ppr many) assign_code many = pprPanic "genCCall.assign_code - too many return values:" (ppr many)
return (push_code `appOL` return (push_code `appOL`
...@@ -2592,10 +2544,10 @@ genCCall32' dflags target dest_regs args = do ...@@ -2592,10 +2544,10 @@ genCCall32' dflags target dest_regs args = do
roundTo a x | x `mod` a == 0 = x roundTo a x | x `mod` a == 0 = x
| otherwise = x + a - (x `mod` a) | otherwise = x + a - (x `mod` a)
push_arg :: Bool -> CmmActual {-current argument-} push_arg :: CmmActual {-current argument-}
-> NatM InstrBlock -- code -> NatM InstrBlock -- code
push_arg use_sse2 arg -- we don't need the hints on x86 push_arg arg -- we don't need the hints on x86
| isWord64 arg_ty = do | isWord64 arg_ty = do
ChildCode64 code r_lo <- iselExpr64 arg ChildCode64 code r_lo <- iselExpr64 arg
delta <- getDeltaNat delta <- getDeltaNat
...@@ -2619,9 +2571,10 @@ genCCall32' dflags target dest_regs args = do ...@@ -2619,9 +2571,10 @@ genCCall32' dflags target dest_regs args = do
(ImmInt 0) (ImmInt 0)
format = floatFormat (typeWidth arg_ty) format = floatFormat (typeWidth arg_ty)
in in
if use_sse2
then MOV format (OpReg reg) (OpAddr addr) -- assume SSE2
else GST format reg addr MOV format (OpReg reg) (OpAddr addr)
] ]
) )
...@@ -2749,7 +2702,7 @@ genCCall64' dflags target dest_regs args = do ...@@ -2749,7 +2702,7 @@ genCCall64' dflags target dest_regs args = do
_ -> unitOL (MOV (cmmTypeFormat rep) (OpReg rax) (OpReg r_dest)) _ -> unitOL (MOV (cmmTypeFormat rep) (OpReg rax) (OpReg r_dest))
where where
rep = localRegType dest rep = localRegType dest
r_dest = getRegisterReg platform True (CmmLocal dest) r_dest = getRegisterReg platform (CmmLocal dest)
assign_code _many = panic "genCCall.assign_code many" assign_code _many = panic "genCCall.assign_code many"
return (adjust_rsp `appOL` return (adjust_rsp `appOL`
...@@ -3162,17 +3115,9 @@ condIntReg cond x y = do ...@@ -3162,17 +3115,9 @@ condIntReg cond x y = do
-- and plays better with the uOP cache. -- and plays better with the uOP cache.
condFltReg :: Bool -> Cond -> CmmExpr -> CmmExpr -> NatM Register condFltReg :: Bool -> Cond -> CmmExpr -> CmmExpr -> NatM Register
condFltReg is32Bit cond x y = if_sse2 condFltReg_sse2 condFltReg_x87 condFltReg is32Bit cond x y = condFltReg_sse2
where where
condFltReg_x87 = do
CondCode _ cond cond_code <- condFltCode cond x y
tmp <- getNewRegNat II8
let
code dst = cond_code `appOL` toOL [
SETCC cond (OpReg tmp),
MOVZxL II8 (OpReg tmp) (OpReg dst)
]
return (Any II32 code)
condFltReg_sse2 = do condFltReg_sse2 = do
CondCode _ cond cond_code <- condFltCode cond x y CondCode _ cond cond_code <- condFltCode cond x y
...@@ -3336,18 +3281,6 @@ trivialUCode rep instr x = do ...@@ -3336,18 +3281,6 @@ trivialUCode rep instr x = do
----------- -----------
trivialFCode_x87 :: (Format -> Reg -> Reg -> Reg -> Instr)
-> CmmExpr -> CmmExpr -> NatM Register
trivialFCode_x87 instr x y = do
(x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
(y_reg, y_code) <- getSomeReg y
let
format = FF80 -- always, on x87
code dst =
x_code `appOL`
y_code `snocOL`
instr format x_reg y_reg dst
return (Any format code)
trivialFCode_sse2 :: Width -> (Format -> Operand -> Operand -> Instr) trivialFCode_sse2 :: Width -> (Format -> Operand -> Operand -> Instr)
-> CmmExpr -> CmmExpr -> NatM Register -> CmmExpr -> CmmExpr -> NatM Register
...@@ -3368,17 +3301,8 @@ trivialUFCode format instr x = do ...@@ -3368,17 +3301,8 @@ trivialUFCode format instr x = do
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
coerceInt2FP from to x = if_sse2 coerce_sse2 coerce_x87 coerceInt2FP from to x = coerce_sse2
where where
coerce_x87 = do
(x_reg, x_code) <- getSomeReg x
let
opc = case to of W32 -> GITOF; W64 -> GITOD;
n -> panic $ "coerceInt2FP.x87: unhandled width ("
++ show n ++ ")"
code dst = x_code `snocOL` opc x_reg dst
-- ToDo: works for non-II32 reps?
return (Any FF80 code)
coerce_sse2 = do coerce_sse2 = do
(x_op, x_code) <- getOperand x -- ToDo: could be a safe operand (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
...@@ -3392,18 +3316,8 @@ coerceInt2FP from to x = if_sse2 coerce_sse2 coerce_x87 ...@@ -3392,18 +3316,8 @@ coerceInt2FP from to x = if_sse2 coerce_sse2 coerce_x87
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
coerceFP2Int from to x = if_sse2 coerceFP2Int_sse2 coerceFP2Int_x87 coerceFP2Int from to x = coerceFP2Int_sse2
where where
coerceFP2Int_x87 = do
(x_reg, x_code) <- getSomeReg x
let
opc = case from of W32 -> GFTOI; W64 -> GDTOI
n -> panic $ "coerceFP2Int.x87: unhandled width ("
++ show n ++ ")"
code dst = x_code `snocOL` opc x_reg dst
-- ToDo: works for non-II32 reps?
return (Any (intFormat to) code)
coerceFP2Int_sse2 = do coerceFP2Int_sse2 = do
(x_op, x_code) <- getOperand x -- ToDo: could be a safe operand (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
let let
...@@ -3418,15 +3332,13 @@ coerceFP2Int from to x = if_sse2 coerceFP2Int_sse2 coerceFP2Int_x87 ...@@ -3418,15 +3332,13 @@ coerceFP2Int from to x = if_sse2 coerceFP2Int_sse2 coerceFP2Int_x87
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
coerceFP2FP :: Width -> CmmExpr -> NatM Register coerceFP2FP :: Width -> CmmExpr -> NatM Register
coerceFP2FP to x = do coerceFP2FP to x = do
use_sse2 <- sse2Enabled
(x_reg, x_code) <- getSomeReg x (x_reg, x_code) <- getSomeReg x
let let
opc | use_sse2 = case to of W32 -> CVTSD2SS; W64 -> CVTSS2SD; opc = case to of W32 -> CVTSD2SS; W64 -> CVTSS2SD;
n -> panic $ "coerceFP2FP: unhandled width (" n -> panic $ "coerceFP2FP: unhandled width ("
++ show n ++ ")" ++ show n ++ ")"
| otherwise = GDTOF
code dst = x_code `snocOL` opc x_reg dst code dst = x_code `snocOL` opc x_reg dst
return (Any (if use_sse2 then floatFormat to else FF80) code) return (Any ( floatFormat to) code)
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
...@@ -3443,7 +3355,7 @@ sse2NegCode w x = do ...@@ -3443,7 +3355,7 @@ sse2NegCode w x = do
x@II16 -> wrongFmt x x@II16 -> wrongFmt x
x@II32 -> wrongFmt x x@II32 -> wrongFmt x
x@II64 -> wrongFmt x x@II64 -> wrongFmt x
x@FF80 -> wrongFmt x
where where
wrongFmt x = panic $ "sse2NegCode: " ++ show x wrongFmt x = panic $ "sse2NegCode: " ++ show x
Amode amode amode_code <- memConstant (mkAlignment $ widthInBytes w) const Amode amode amode_code <- memConstant (mkAlignment $ widthInBytes w) const
......