Skip to content
Commits on Source (3)
  • Joachim Breitner's avatar
    GHC no longer ever defines TABLES_NEXT_TO_CODE on its own · fc3f421b
    Joachim Breitner authored and Marge Bot's avatar Marge Bot committed
    It should be entirely the responsibility of make/Hadrian to ensure that
    everything that needs this flag gets it. GHC shouldn't be hardcoded to
    assist with bootstrapping since it builds other things besides itself.
    
    Reviewers:
    
    Subscribers: TerrorJack, rwbarton, carter
    
    GHC Trac Issues: #15548 -- progress towards but not fix
    
    Differential Revision: https://phabricator.haskell.org/D5082 -- extract
    from that
    fc3f421b
  • Ryan Scott's avatar
    Use ghc-prim < 0.7, not <= 0.6.1, as upper version bounds · be0dde8e
    Ryan Scott authored and Marge Bot's avatar Marge Bot committed
    Using `ghc-prim <= 0.6.1` is somewhat dodgy from a PVP point of view,
    as it makes it awkward to support new minor releases of `ghc-prim`.
    Let's instead use `< 0.7`, which is the idiomatic way of expressing
    PVP-compliant upper version bounds.
    be0dde8e
  • Carter Schonwald's avatar
    removing x87 register support from native code gen · fbf7331b
    Carter Schonwald authored
    * simplifies registers to have GPR, Float and Double, by removing the SSE2 and X87 Constructors
    * makes -msse2 assumed/default for x86 platforms, fixing a long standing nondeterminism in rounding
    behavior in 32bit haskell code
    * removes the 80bit floating point representation from the supported float sizes
    * theres still 1 tiny bit of x87 support needed,
    for handling float and double return values in FFI calls  wrt the C ABI on x86_32,
    but this one piece does not leak into the rest of NCG.
    * Lots of code thats not been touched in a long time got deleted as a
    consequence of all of this
    
    all in all, this change paves the way towards a lot of future further
    improvements in how GHC handles floating point computations, along with
    making the native code gen more accessible to a larger pool of contributors.
    fbf7331b
......@@ -81,7 +81,6 @@ assignArgumentsPos dflags off conv arg_ty reps = (stk_off, assignments)
| passFloatInXmm -> k (RegisterParam (DoubleReg s), (vs, fs, ds, ls, ss))
(W64, (vs, fs, d:ds, ls, ss))
| not passFloatInXmm -> k (RegisterParam d, (vs, fs, ds, ls, ss))
(W80, _) -> panic "F80 unsupported register type"
_ -> (assts, (r:rs))
int = case (w, regs) of
(W128, _) -> panic "W128 unsupported register type"
......@@ -100,6 +99,7 @@ assignArgumentsPos dflags off conv arg_ty reps = (stk_off, assignments)
passFloatArgsInXmm :: DynFlags -> Bool
passFloatArgsInXmm dflags = case platformArch (targetPlatform dflags) of
ArchX86_64 -> True
ArchX86 -> False
_ -> False
-- We used to spill vector registers to the stack since the LLVM backend didn't
......
......@@ -474,6 +474,9 @@ instance Eq GlobalReg where
FloatReg i == FloatReg j = i==j
DoubleReg i == DoubleReg j = i==j
LongReg i == LongReg j = i==j
-- NOTE: XMM, YMM, ZMM registers actually are the same registers
-- at least with respect to store at YMM i and then read from XMM i
-- and similarly for ZMM etc.
XmmReg i == XmmReg j = i==j
YmmReg i == YmmReg j = i==j
ZmmReg i == ZmmReg j = i==j
......@@ -584,6 +587,9 @@ globalRegType dflags (VanillaReg _ VNonGcPtr) = bWord dflags
globalRegType _ (FloatReg _) = cmmFloat W32
globalRegType _ (DoubleReg _) = cmmFloat W64
globalRegType _ (LongReg _) = cmmBits W64
-- TODO: improve the internal model of SIMD/vectorized registers
-- the right design SHOULd improve handling of float and double code too.
-- see remarks in "NOTE [SIMD Design for the future]"" in StgCmmPrim
globalRegType _ (XmmReg _) = cmmVec 4 (cmmBits W32)
globalRegType _ (YmmReg _) = cmmVec 8 (cmmBits W32)
globalRegType _ (ZmmReg _) = cmmVec 16 (cmmBits W32)
......
......@@ -166,9 +166,6 @@ isFloat64 _other = False
-----------------------------------------------------------------------------
data Width = W8 | W16 | W32 | W64
| W80 -- Extended double-precision float,
-- used in x86 native codegen only.
-- (we use Ord, so it'd better be in this order)
| W128
| W256
| W512
......@@ -185,7 +182,7 @@ mrStr W64 = sLit("W64")
mrStr W128 = sLit("W128")
mrStr W256 = sLit("W256")
mrStr W512 = sLit("W512")
mrStr W80 = sLit("W80")
-------- Common Widths ------------
......@@ -222,7 +219,7 @@ widthInBits W64 = 64
widthInBits W128 = 128
widthInBits W256 = 256
widthInBits W512 = 512
widthInBits W80 = 80
widthInBytes :: Width -> Int
widthInBytes W8 = 1
......@@ -232,7 +229,7 @@ widthInBytes W64 = 8
widthInBytes W128 = 16
widthInBytes W256 = 32
widthInBytes W512 = 64
widthInBytes W80 = 10
widthFromBytes :: Int -> Width
widthFromBytes 1 = W8
......@@ -242,7 +239,7 @@ widthFromBytes 8 = W64
widthFromBytes 16 = W128
widthFromBytes 32 = W256
widthFromBytes 64 = W512
widthFromBytes 10 = W80
widthFromBytes n = pprPanic "no width for given number of bytes" (ppr n)
-- log_2 of the width in bytes, useful for generating shifts.
......@@ -254,7 +251,7 @@ widthInLog W64 = 3
widthInLog W128 = 4
widthInLog W256 = 5
widthInLog W512 = 6
widthInLog W80 = panic "widthInLog: F80"
-- widening / narrowing
......
......@@ -1727,8 +1727,38 @@ vecElemProjectCast dflags WordVec W32 = Just (mo_u_32ToWord dflags)
vecElemProjectCast _ WordVec W64 = Nothing
vecElemProjectCast _ _ _ = Nothing
-- NOTE [SIMD Design for the future]
-- Check to make sure that we can generate code for the specified vector type
-- given the current set of dynamic flags.
-- Currently these checks are specific to x86 and x86_64 architecture.
-- This should be fixed!
-- In particular,
-- 1) Add better support for other architectures! (this may require a redesign)
-- 2) Decouple design choices from LLVM's pseudo SIMD model!
-- The high level LLVM naive rep makes per CPU family SIMD generation is own
-- optimization problem, and hides important differences in eg ARM vs x86_64 simd
-- 3) Depending on the architecture, the SIMD registers may also support general
-- computations on Float/Double/Word/Int scalars, but currently on
-- for example x86_64, we always put Word/Int (or sized) in GPR
-- (general purpose) registers. Would relaxing that allow for
-- useful optimization opportunities?
-- Phrased differently, it is worth experimenting with supporting
-- different register mapping strategies than we currently have, especially if
-- someday we want SIMD to be a first class denizen in GHC along with scalar
-- values!
-- The current design with respect to register mapping of scalars could
-- very well be the best,but exploring the design space and doing careful
-- measurments is the only only way to validate that.
-- In some next generation CPU ISAs, notably RISC V, the SIMD extension
-- includes support for a sort of run time CPU dependent vectorization parameter,
-- where a loop may act upon a single scalar each iteration OR some 2,4,8 ...
-- element chunk! Time will tell if that direction sees wide adoption,
-- but it is from that context that unifying our handling of simd and scalars
-- may benefit. It is not likely to benefit current architectures, though
-- it may very well be a design perspective that helps guide improving the NCG.
checkVecCompatibility :: DynFlags -> PrimOpVecCat -> Length -> Width -> FCode ()
checkVecCompatibility dflags vcat l w = do
when (hscTarget dflags /= HscLlvm) $ do
......
......@@ -97,7 +97,6 @@ cmmToLlvmType ty | isVecType ty = LMVector (vecLength ty) (cmmToLlvmType (vecE
widthToLlvmFloat :: Width -> LlvmType
widthToLlvmFloat W32 = LMFloat
widthToLlvmFloat W64 = LMDouble
widthToLlvmFloat W80 = LMFloat80
widthToLlvmFloat W128 = LMFloat128
widthToLlvmFloat w = panic $ "widthToLlvmFloat: Bad float size: " ++ show w
......
......@@ -58,7 +58,7 @@ module DynFlags (
fFlags, fLangFlags, xFlags,
wWarningFlags,
dynFlagDependencies,
tablesNextToCode, mkTablesNextToCode,
tablesNextToCode,
makeDynFlagsConsistent,
shouldUseColor,
shouldUseHexWordLiterals,
......@@ -5833,20 +5833,24 @@ data SseVersion = SSE1
isSseEnabled :: DynFlags -> Bool
isSseEnabled dflags = case platformArch (targetPlatform dflags) of
ArchX86_64 -> True
ArchX86 -> sseVersion dflags >= Just SSE1
ArchX86 -> True
_ -> False
isSse2Enabled :: DynFlags -> Bool
isSse2Enabled dflags = case platformArch (targetPlatform dflags) of
ArchX86_64 -> -- SSE2 is fixed on for x86_64. It would be
-- possible to make it optional, but we'd need to
-- fix at least the foreign call code where the
-- calling convention specifies the use of xmm regs,
-- and possibly other places.
True
ArchX86 -> sseVersion dflags >= Just SSE2
-- We Assume SSE1 and SSE2 operations are available on both
-- x86 and x86_64. Historically we didn't default to SSE2 and
-- SSE1 on x86, which results in defacto nondeterminism for how
-- rounding behaves in the associated x87 floating point instructions
-- because variations in the spill/fpu stack placement of arguments for
-- operations would change the precision and final result of what
-- would otherwise be the same expressions with respect to single or
-- double precision IEEE floating point computations.
ArchX86_64 -> True
ArchX86 -> True
_ -> False
isSse4_2Enabled :: DynFlags -> Bool
isSse4_2Enabled dflags = sseVersion dflags >= Just SSE42
......
......@@ -199,15 +199,9 @@ initSysTools top_dir
let unreg_gcc_args = if targetUnregisterised
then ["-DNO_REGS", "-DUSE_MINIINTERPRETER"]
else []
-- TABLES_NEXT_TO_CODE affects the info table layout.
tntc_gcc_args
| mkTablesNextToCode targetUnregisterised
= ["-DTABLES_NEXT_TO_CODE"]
| otherwise = []
cpp_args= map Option (words cpp_args_str)
gcc_args = map Option (words gcc_args_str
++ unreg_gcc_args
++ tntc_gcc_args)
++ unreg_gcc_args)
ldSupportsCompactUnwind <- getBooleanSetting "ld supports compact unwind"
ldSupportsBuildId <- getBooleanSetting "ld supports build-id"
ldSupportsFilelist <- getBooleanSetting "ld supports filelist"
......
......@@ -179,7 +179,7 @@ nativeCodeGen dflags this_mod modLoc h us cmms
x86NcgImpl :: DynFlags -> NcgImpl (Alignment, CmmStatics)
X86.Instr.Instr X86.Instr.JumpDest
x86NcgImpl dflags
= (x86_64NcgImpl dflags) { ncg_x86fp_kludge = map x86fp_kludge }
= (x86_64NcgImpl dflags)
x86_64NcgImpl :: DynFlags -> NcgImpl (Alignment, CmmStatics)
X86.Instr.Instr X86.Instr.JumpDest
......@@ -194,7 +194,6 @@ x86_64NcgImpl dflags
,pprNatCmmDecl = X86.Ppr.pprNatCmmDecl
,maxSpillSlots = X86.Instr.maxSpillSlots dflags
,allocatableRegs = X86.Regs.allocatableRegs platform
,ncg_x86fp_kludge = id
,ncgAllocMoreStack = X86.Instr.allocMoreStack platform
,ncgExpandTop = id
,ncgMakeFarBranches = const id
......@@ -215,7 +214,6 @@ ppcNcgImpl dflags
,pprNatCmmDecl = PPC.Ppr.pprNatCmmDecl
,maxSpillSlots = PPC.Instr.maxSpillSlots dflags
,allocatableRegs = PPC.Regs.allocatableRegs platform
,ncg_x86fp_kludge = id
,ncgAllocMoreStack = PPC.Instr.allocMoreStack platform
,ncgExpandTop = id
,ncgMakeFarBranches = PPC.Instr.makeFarBranches
......@@ -236,7 +234,6 @@ sparcNcgImpl dflags
,pprNatCmmDecl = SPARC.Ppr.pprNatCmmDecl
,maxSpillSlots = SPARC.Instr.maxSpillSlots dflags
,allocatableRegs = SPARC.Regs.allocatableRegs
,ncg_x86fp_kludge = id
,ncgAllocMoreStack = noAllocMoreStack
,ncgExpandTop = map SPARC.CodeGen.Expand.expandTop
,ncgMakeFarBranches = const id
......@@ -680,19 +677,10 @@ cmmNativeGen dflags this_mod modLoc ncgImpl us fileIds dbgMap cmm count
foldl' (\m (from,to) -> addImmediateSuccessor from to m )
cfgWithFixupBlks stack_updt_blks
---- x86fp_kludge. This pass inserts ffree instructions to clear
---- the FPU stack on x86. The x86 ABI requires that the FPU stack
---- is clear, and library functions can return odd results if it
---- isn't.
----
---- NB. must happen before shortcutBranches, because that
---- generates JXX_GBLs which we can't fix up in x86fp_kludge.
let kludged = {-# SCC "x86fp_kludge" #-} ncg_x86fp_kludge ncgImpl alloced
---- generate jump tables
let tabled =
{-# SCC "generateJumpTables" #-}
generateJumpTables ncgImpl kludged
generateJumpTables ncgImpl alloced
dumpIfSet_dyn dflags
Opt_D_dump_cfg_weights "CFG Update information"
......@@ -787,12 +775,6 @@ checkLayout procsUnsequenced procsSequenced =
getBlockIds (CmmProc _ _ _ (ListGraph blocks)) =
setFromList $ map blockId blocks
x86fp_kludge :: NatCmmDecl (Alignment, CmmStatics) X86.Instr.Instr -> NatCmmDecl (Alignment, CmmStatics) X86.Instr.Instr
x86fp_kludge top@(CmmData _ _) = top
x86fp_kludge (CmmProc info lbl live (ListGraph code)) =
CmmProc info lbl live (ListGraph $ X86.Instr.i386_insert_ffrees code)
-- | Compute unwinding tables for the blocks of a procedure
computeUnwinding :: Instruction instr
=> DynFlags -> NcgImpl statics instr jumpDest
......
......@@ -47,7 +47,6 @@ data Format
| II64
| FF32
| FF64
| FF80
deriving (Show, Eq)
......@@ -70,7 +69,7 @@ floatFormat width
= case width of
W32 -> FF32
W64 -> FF64
W80 -> FF80
other -> pprPanic "Format.floatFormat" (ppr other)
......@@ -80,7 +79,6 @@ isFloatFormat format
= case format of
FF32 -> True
FF64 -> True
FF80 -> True
_ -> False
......@@ -101,7 +99,7 @@ formatToWidth format
II64 -> W64
FF32 -> W32
FF64 -> W64
FF80 -> W80
formatInBytes :: Format -> Int
formatInBytes = widthInBytes . formatToWidth
......@@ -76,7 +76,6 @@ data NcgImpl statics instr jumpDest = NcgImpl {
pprNatCmmDecl :: NatCmmDecl statics instr -> SDoc,
maxSpillSlots :: Int,
allocatableRegs :: [RealReg],
ncg_x86fp_kludge :: [NatCmmDecl statics instr] -> [NatCmmDecl statics instr],
ncgExpandTop :: [NatCmmDecl statics instr] -> [NatCmmDecl statics instr],
ncgAllocMoreStack :: Int -> NatCmmDecl statics instr
-> UniqSM (NatCmmDecl statics instr, [(BlockId,BlockId)]),
......
......@@ -1593,7 +1593,7 @@ genCCall'
-> [CmmActual] -- arguments (of mixed type)
-> NatM InstrBlock
{-
{-
PowerPC Linux uses the System V Release 4 Calling Convention
for PowerPC. It is described in the
"System V Application Binary Interface PowerPC Processor Supplement".
......@@ -1906,7 +1906,7 @@ genCCall' dflags gcp target dest_regs args
FF32 -> (1, 1, 4, fprs)
FF64 -> (2, 1, 8, fprs)
II64 -> panic "genCCall' passArguments II64"
FF80 -> panic "genCCall' passArguments FF80"
GCP32ELF ->
case cmmTypeFormat rep of
II8 -> (1, 0, 4, gprs)
......@@ -1916,7 +1916,6 @@ genCCall' dflags gcp target dest_regs args
FF32 -> (0, 1, 4, fprs)
FF64 -> (0, 1, 8, fprs)
II64 -> panic "genCCall' passArguments II64"
FF80 -> panic "genCCall' passArguments FF80"
GCP64ELF _ ->
case cmmTypeFormat rep of
II8 -> (1, 0, 8, gprs)
......@@ -1928,7 +1927,6 @@ genCCall' dflags gcp target dest_regs args
-- the FPRs.
FF32 -> (1, 1, 8, fprs)
FF64 -> (1, 1, 8, fprs)
FF80 -> panic "genCCall' passArguments FF80"
moveResult reduceToFF32 =
case dest_regs of
......
......@@ -161,7 +161,7 @@ pprReg r
RegVirtual (VirtualRegHi u) -> text "%vHi_" <> pprUniqueAlways u
RegVirtual (VirtualRegF u) -> text "%vF_" <> pprUniqueAlways u
RegVirtual (VirtualRegD u) -> text "%vD_" <> pprUniqueAlways u
RegVirtual (VirtualRegSSE u) -> text "%vSSE_" <> pprUniqueAlways u
where
ppr_reg_no :: Int -> SDoc
ppr_reg_no i
......@@ -179,8 +179,7 @@ pprFormat x
II32 -> sLit "w"
II64 -> sLit "d"
FF32 -> sLit "fs"
FF64 -> sLit "fd"
_ -> panic "PPC.Ppr.pprFormat: no match")
FF64 -> sLit "fd")
pprCond :: Cond -> SDoc
......@@ -365,7 +364,6 @@ pprInstr (LD fmt reg addr) = hcat [
II64 -> sLit "d"
FF32 -> sLit "fs"
FF64 -> sLit "fd"
_ -> panic "PPC.Ppr.pprInstr: no match"
),
case addr of AddrRegImm _ _ -> empty
AddrRegReg _ _ -> char 'x',
......@@ -405,7 +403,6 @@ pprInstr (LA fmt reg addr) = hcat [
II64 -> sLit "d"
FF32 -> sLit "fs"
FF64 -> sLit "fd"
_ -> panic "PPC.Ppr.pprInstr: no match"
),
case addr of AddrRegImm _ _ -> empty
AddrRegReg _ _ -> char 'x',
......
......@@ -131,7 +131,7 @@ regDotColor reg
RcInteger -> text "blue"
RcFloat -> text "red"
RcDouble -> text "green"
RcDoubleSSE -> text "yellow"
-- immediates ------------------------------------------------------------------
......
......@@ -56,7 +56,7 @@ data VirtualReg
| VirtualRegHi {-# UNPACK #-} !Unique -- High part of 2-word register
| VirtualRegF {-# UNPACK #-} !Unique
| VirtualRegD {-# UNPACK #-} !Unique
| VirtualRegSSE {-# UNPACK #-} !Unique
deriving (Eq, Show)
-- This is laborious, but necessary. We can't derive Ord because
......@@ -69,15 +69,14 @@ instance Ord VirtualReg where
compare (VirtualRegHi a) (VirtualRegHi b) = nonDetCmpUnique a b
compare (VirtualRegF a) (VirtualRegF b) = nonDetCmpUnique a b
compare (VirtualRegD a) (VirtualRegD b) = nonDetCmpUnique a b
compare (VirtualRegSSE a) (VirtualRegSSE b) = nonDetCmpUnique a b
compare VirtualRegI{} _ = LT
compare _ VirtualRegI{} = GT
compare VirtualRegHi{} _ = LT
compare _ VirtualRegHi{} = GT
compare VirtualRegF{} _ = LT
compare _ VirtualRegF{} = GT
compare VirtualRegD{} _ = LT
compare _ VirtualRegD{} = GT
instance Uniquable VirtualReg where
......@@ -87,16 +86,18 @@ instance Uniquable VirtualReg where
VirtualRegHi u -> u
VirtualRegF u -> u
VirtualRegD u -> u
VirtualRegSSE u -> u
instance Outputable VirtualReg where
ppr reg
= case reg of
VirtualRegI u -> text "%vI_" <> pprUniqueAlways u
VirtualRegHi u -> text "%vHi_" <> pprUniqueAlways u
VirtualRegF u -> text "%vF_" <> pprUniqueAlways u
VirtualRegD u -> text "%vD_" <> pprUniqueAlways u
VirtualRegSSE u -> text "%vSSE_" <> pprUniqueAlways u
-- this code is kinda wrong on x86
-- because float and double occupy the same register set
-- namely SSE2 register xmm0 .. xmm15
VirtualRegF u -> text "%vFloat_" <> pprUniqueAlways u
VirtualRegD u -> text "%vDouble_" <> pprUniqueAlways u
renameVirtualReg :: Unique -> VirtualReg -> VirtualReg
......@@ -106,7 +107,6 @@ renameVirtualReg u r
VirtualRegHi _ -> VirtualRegHi u
VirtualRegF _ -> VirtualRegF u
VirtualRegD _ -> VirtualRegD u
VirtualRegSSE _ -> VirtualRegSSE u
classOfVirtualReg :: VirtualReg -> RegClass
......@@ -116,7 +116,7 @@ classOfVirtualReg vr
VirtualRegHi{} -> RcInteger
VirtualRegF{} -> RcFloat
VirtualRegD{} -> RcDouble
VirtualRegSSE{} -> RcDoubleSSE
-- Determine the upper-half vreg for a 64-bit quantity on a 32-bit platform
......
......@@ -134,6 +134,10 @@ trivColorable platform virtualRegSqueeze realRegSqueeze RcInteger conflicts excl
trivColorable platform virtualRegSqueeze realRegSqueeze RcFloat conflicts exclusions
| let cALLOCATABLE_REGS_FLOAT
= (case platformArch platform of
-- On x86_64 and x86, Float and RcDouble
-- use the same registers,
-- so we only use RcDouble to represent the
-- register allocation problem on those types.
ArchX86 -> 0
ArchX86_64 -> 0
ArchPPC -> 0
......@@ -160,8 +164,14 @@ trivColorable platform virtualRegSqueeze realRegSqueeze RcFloat conflicts exclus
trivColorable platform virtualRegSqueeze realRegSqueeze RcDouble conflicts exclusions
| let cALLOCATABLE_REGS_DOUBLE
= (case platformArch platform of
ArchX86 -> 6
ArchX86_64 -> 0
ArchX86 -> 8
-- in x86 32bit mode sse2 there are only
-- 8 XMM registers xmm0 ... xmm7
ArchX86_64 -> 10
-- in x86_64 there are 16 XMM registers
-- xmm0 .. xmm15, here 10 is a
-- "dont need to solve conflicts" count that
-- was chosen at some point in the past.
ArchPPC -> 26
ArchSPARC -> 11
ArchSPARC64 -> panic "trivColorable ArchSPARC64"
......@@ -183,31 +193,7 @@ trivColorable platform virtualRegSqueeze realRegSqueeze RcDouble conflicts exclu
= count3 < cALLOCATABLE_REGS_DOUBLE
trivColorable platform virtualRegSqueeze realRegSqueeze RcDoubleSSE conflicts exclusions
| let cALLOCATABLE_REGS_SSE
= (case platformArch platform of
ArchX86 -> 8
ArchX86_64 -> 10
ArchPPC -> 0
ArchSPARC -> 0
ArchSPARC64 -> panic "trivColorable ArchSPARC64"
ArchPPC_64 _ -> 0
ArchARM _ _ _ -> panic "trivColorable ArchARM"
ArchARM64 -> panic "trivColorable ArchARM64"
ArchAlpha -> panic "trivColorable ArchAlpha"
ArchMipseb -> panic "trivColorable ArchMipseb"
ArchMipsel -> panic "trivColorable ArchMipsel"
ArchJavaScript-> panic "trivColorable ArchJavaScript"
ArchUnknown -> panic "trivColorable ArchUnknown")
, count2 <- accSqueeze 0 cALLOCATABLE_REGS_SSE
(virtualRegSqueeze RcDoubleSSE)
conflicts
, count3 <- accSqueeze count2 cALLOCATABLE_REGS_SSE
(realRegSqueeze RcDoubleSSE)
exclusions
= count3 < cALLOCATABLE_REGS_SSE
-- Specification Code ----------------------------------------------------------
......
......@@ -18,7 +18,6 @@ data RegClass
= RcInteger
| RcFloat
| RcDouble
| RcDoubleSSE -- x86 only: the SSE regs are a separate class
deriving Eq
......@@ -26,10 +25,8 @@ instance Uniquable RegClass where
getUnique RcInteger = mkRegClassUnique 0
getUnique RcFloat = mkRegClassUnique 1
getUnique RcDouble = mkRegClassUnique 2
getUnique RcDoubleSSE = mkRegClassUnique 3
instance Outputable RegClass where
ppr RcInteger = Outputable.text "I"
ppr RcFloat = Outputable.text "F"
ppr RcDouble = Outputable.text "D"
ppr RcDoubleSSE = Outputable.text "S"
......@@ -384,7 +384,6 @@ sparc_mkSpillInstr dflags reg _ slot
RcInteger -> II32
RcFloat -> FF32
RcDouble -> FF64
_ -> panic "sparc_mkSpillInstr"
in ST fmt reg (fpRel (negate off_w))
......@@ -405,7 +404,6 @@ sparc_mkLoadInstr dflags reg _ slot
RcInteger -> II32
RcFloat -> FF32
RcDouble -> FF64
_ -> panic "sparc_mkLoadInstr"
in LD fmt (fpRel (- off_w)) reg
......@@ -454,7 +452,6 @@ sparc_mkRegRegMoveInstr platform src dst
RcInteger -> ADD False False src (RIReg g0) dst
RcDouble -> FMOV FF64 src dst
RcFloat -> FMOV FF32 src dst
_ -> panic "sparc_mkRegRegMoveInstr"
| otherwise
= panic "SPARC.Instr.mkRegRegMoveInstr: classes of src and dest not the same"
......
......@@ -143,7 +143,7 @@ pprReg reg
VirtualRegHi u -> text "%vHi_" <> pprUniqueAlways u
VirtualRegF u -> text "%vF_" <> pprUniqueAlways u
VirtualRegD u -> text "%vD_" <> pprUniqueAlways u
VirtualRegSSE u -> text "%vSSE_" <> pprUniqueAlways u
RegReal rr
-> case rr of
......@@ -211,8 +211,7 @@ pprFormat x
II32 -> sLit ""
II64 -> sLit "d"
FF32 -> sLit ""
FF64 -> sLit "d"
_ -> panic "SPARC.Ppr.pprFormat: no match")
FF64 -> sLit "d")
-- | Pretty print a format for an instruction suffix.
......@@ -226,8 +225,8 @@ pprStFormat x
II32 -> sLit ""
II64 -> sLit "x"
FF32 -> sLit ""
FF64 -> sLit "d"
_ -> panic "SPARC.Ppr.pprFormat: no match")
FF64 -> sLit "d")
-- | Pretty print a condition code.
......
......@@ -104,7 +104,6 @@ virtualRegSqueeze cls vr
VirtualRegD{} -> 1
_other -> 0
_other -> 0
{-# INLINE realRegSqueeze #-}
realRegSqueeze :: RegClass -> RealReg -> Int
......@@ -135,7 +134,6 @@ realRegSqueeze cls rr
RealRegPair{} -> 1
_other -> 0
-- | All the allocatable registers in the machine,
-- including register pairs.
......
......@@ -98,17 +98,25 @@ is32BitPlatform = do
sse2Enabled :: NatM Bool
sse2Enabled = do
dflags <- getDynFlags
return (isSse2Enabled dflags)
case platformArch (targetPlatform dflags) of
-- We Assume SSE1 and SSE2 operations are available on both
-- x86 and x86_64. Historically we didn't default to SSE2 and
-- SSE1 on x86, which results in defacto nondeterminism for how
-- rounding behaves in the associated x87 floating point instructions
-- because variations in the spill/fpu stack placement of arguments for
-- operations would change the precision and final result of what
-- would otherwise be the same expressions with respect to single or
-- double precision IEEE floating point computations.
ArchX86_64 -> return True
ArchX86 -> return True
_ -> panic "trying to generate x86/x86_64 on the wrong platform"
sse4_2Enabled :: NatM Bool
sse4_2Enabled = do
dflags <- getDynFlags
return (isSse4_2Enabled dflags)
if_sse2 :: NatM a -> NatM a -> NatM a
if_sse2 sse2 x87 = do
b <- sse2Enabled
if b then sse2 else x87
cmmTopCodeGen
:: RawCmmDecl
......@@ -284,15 +292,14 @@ swizzleRegisterRep (Any _ codefn) format = Any format codefn
-- | Grab the Reg for a CmmReg
getRegisterReg :: Platform -> Bool -> CmmReg -> Reg
getRegisterReg :: Platform -> CmmReg -> Reg
getRegisterReg _ use_sse2 (CmmLocal (LocalReg u pk))
= let fmt = cmmTypeFormat pk in
if isFloatFormat fmt && not use_sse2
then RegVirtual (mkVirtualReg u FF80)
else RegVirtual (mkVirtualReg u fmt)
getRegisterReg _ (CmmLocal (LocalReg u pk))
= -- by Assuming SSE2, Int,Word,Float,Double all can be register allocated
let fmt = cmmTypeFormat pk in
RegVirtual (mkVirtualReg u fmt)
getRegisterReg platform _ (CmmGlobal mid)
getRegisterReg platform (CmmGlobal mid)
= case globalRegMaybe platform mid of
Just reg -> RegReal $ reg
Nothing -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
......@@ -513,15 +520,14 @@ getRegister' dflags is32Bit (CmmReg reg)
do reg' <- getPicBaseNat (archWordFormat is32Bit)
return (Fixed (archWordFormat is32Bit) reg' nilOL)
_ ->
do use_sse2 <- sse2Enabled
do
let
fmt = cmmTypeFormat (cmmRegType dflags reg)
format | not use_sse2 && isFloatFormat fmt = FF80
| otherwise = fmt
format = fmt
--
let platform = targetPlatform dflags
return (Fixed format
(getRegisterReg platform use_sse2 reg)
(getRegisterReg platform reg)
nilOL)
......@@ -557,8 +563,7 @@ getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W64 W32) [x])
return $ Fixed II32 rlo code
getRegister' _ _ (CmmLit lit@(CmmFloat f w)) =
if_sse2 float_const_sse2 float_const_x87
where
float_const_sse2 where
float_const_sse2
| f == 0.0 = do
let
......@@ -570,21 +575,7 @@ getRegister' _ _ (CmmLit lit@(CmmFloat f w)) =
| otherwise = do
Amode addr code <- memConstant (mkAlignment $ widthInBytes w) lit
loadFloatAmode True w addr code
float_const_x87 = case w of
W64
| f == 0.0 ->
let code dst = unitOL (GLDZ dst)
in return (Any FF80 code)
| f == 1.0 ->
let code dst = unitOL (GLD1 dst)
in return (Any FF80 code)
_otherwise -> do
Amode addr code <- memConstant (mkAlignment $ widthInBytes w) lit
loadFloatAmode False w addr code
loadFloatAmode w addr code
-- catch simple cases of zero- or sign-extended load
getRegister' _ _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do
......@@ -641,11 +632,9 @@ getRegister' _ is32Bit (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
sse2 <- sse2Enabled
case mop of
MO_F_Neg w
| sse2 -> sse2NegCode w x
| otherwise -> trivialUFCode FF80 (GNEG FF80) x
MO_F_Neg w -> sse2NegCode w x
MO_S_Neg w -> triv_ucode NEGI (intFormat w)
MO_Not w -> triv_ucode NOT (intFormat w)
......@@ -711,9 +700,8 @@ getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
MO_XX_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOV x
MO_XX_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOV x
MO_FF_Conv W32 W64
| sse2 -> coerceFP2FP W64 x
| otherwise -> conversionNop FF80 x
MO_FF_Conv W32 W64 -> coerceFP2FP W64 x
MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
......@@ -776,7 +764,6 @@ getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
sse2 <- sse2Enabled
case mop of
MO_F_Eq _ -> condFltReg is32Bit EQQ x y
MO_F_Ne _ -> condFltReg is32Bit NE x y
......@@ -800,14 +787,14 @@ getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
MO_U_Lt _ -> condIntReg LU x y
MO_U_Le _ -> condIntReg LEU x y
MO_F_Add w | sse2 -> trivialFCode_sse2 w ADD x y
| otherwise -> trivialFCode_x87 GADD x y
MO_F_Sub w | sse2 -> trivialFCode_sse2 w SUB x y
| otherwise -> trivialFCode_x87 GSUB x y
MO_F_Quot w | sse2 -> trivialFCode_sse2 w FDIV x y
| otherwise -> trivialFCode_x87 GDIV x y
MO_F_Mul w | sse2 -> trivialFCode_sse2 w MUL x y
| otherwise -> trivialFCode_x87 GMUL x y
MO_F_Add w -> trivialFCode_sse2 w ADD x y
MO_F_Sub w -> trivialFCode_sse2 w SUB x y
MO_F_Quot w -> trivialFCode_sse2 w FDIV x y
MO_F_Mul w -> trivialFCode_sse2 w MUL x y
MO_Add rep -> add_code rep x y
MO_Sub rep -> sub_code rep x y
......@@ -1001,8 +988,7 @@ getRegister' _ _ (CmmLoad mem pk)
| isFloatType pk
= do
Amode addr mem_code <- getAmode mem
use_sse2 <- sse2Enabled
loadFloatAmode use_sse2 (typeWidth pk) addr mem_code
loadFloatAmode (typeWidth pk) addr mem_code
getRegister' _ is32Bit (CmmLoad mem pk)
| is32Bit && not (isWord64 pk)
......@@ -1132,9 +1118,7 @@ getNonClobberedReg expr = do
return (reg, code)
reg2reg :: Format -> Reg -> Reg -> Instr
reg2reg format src dst
| format == FF80 = GMOV src dst
| otherwise = MOV format (OpReg src) (OpReg dst)
reg2reg format src dst = MOV format (OpReg src) (OpReg dst)
--------------------------------------------------------------------------------
......@@ -1243,8 +1227,7 @@ x86_complex_amode base index shift offset
getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
getNonClobberedOperand (CmmLit lit) = do
use_sse2 <- sse2Enabled
if use_sse2 && isSuitableFloatingPointLit lit
if isSuitableFloatingPointLit lit
then do
let CmmFloat _ w = lit
Amode addr code <- memConstant (mkAlignment $ widthInBytes w) lit
......@@ -1259,9 +1242,12 @@ getNonClobberedOperand (CmmLit lit) = do
getNonClobberedOperand (CmmLoad mem pk) = do
is32Bit <- is32BitPlatform
use_sse2 <- sse2Enabled
if (not (isFloatType pk) || use_sse2)
&& (if is32Bit then not (isWord64 pk) else True)
-- this logic could be simplified
-- TODO FIXME
if (if is32Bit then not (isWord64 pk) else True)
-- if 32bit and pk is at float/double/simd value
-- or if 64bit
-- this could use some eyeballs or i'll need to stare at it more later
then do
dflags <- getDynFlags
let platform = targetPlatform dflags
......@@ -1278,6 +1264,7 @@ getNonClobberedOperand (CmmLoad mem pk) = do
return (src, nilOL)
return (OpAddr src', mem_code `appOL` save_code)
else do
-- if its a word or gcptr on 32bit?
getNonClobberedOperand_generic (CmmLoad mem pk)
getNonClobberedOperand e = getNonClobberedOperand_generic e
......@@ -1370,14 +1357,13 @@ memConstant align lit = do
return (Amode addr code)
loadFloatAmode :: Bool -> Width -> AddrMode -> InstrBlock -> NatM Register
loadFloatAmode use_sse2 w addr addr_code = do
loadFloatAmode :: Width -> AddrMode -> InstrBlock -> NatM Register
loadFloatAmode w addr addr_code = do
let format = floatFormat w
code dst = addr_code `snocOL`
if use_sse2
then MOV format (OpAddr addr) (OpReg dst)
else GLD format addr dst
return (Any (if use_sse2 then format else FF80) code)
MOV format (OpAddr addr) (OpReg dst)
return (Any format code)
-- if we want a floating-point literal as an operand, we can
......@@ -1538,19 +1524,9 @@ condIntCode' _ cond x y = do
condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
condFltCode cond x y
= if_sse2 condFltCode_sse2 condFltCode_x87
= condFltCode_sse2
where
condFltCode_x87
= ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
(x_reg, x_code) <- getNonClobberedReg x
(y_reg, y_code) <- getSomeReg y
let
code = x_code `appOL` y_code `snocOL`
GCMP cond x_reg y_reg
-- The GCMP insn does the test and sets the zero flag if comparable
-- and true. Hence we always supply EQQ as the condition to test.
return (CondCode True EQQ code)
-- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
-- an operand, but the right must be a reg. We can probably do better
......@@ -1634,35 +1610,33 @@ assignReg_IntCode pk reg (CmmLoad src _) = do
load_code <- intLoadCode (MOV pk) src
dflags <- getDynFlags
let platform = targetPlatform dflags
return (load_code (getRegisterReg platform False{-no sse2-} reg))
return (load_code (getRegisterReg platform reg))
-- dst is a reg, but src could be anything
assignReg_IntCode _ reg src = do
dflags <- getDynFlags
let platform = targetPlatform dflags
code <- getAnyReg src
return (code (getRegisterReg platform False{-no sse2-} reg))
return (code (getRegisterReg platform reg))
-- Floating point assignment to memory
assignMem_FltCode pk addr src = do
(src_reg, src_code) <- getNonClobberedReg src
Amode addr addr_code <- getAmode addr
use_sse2 <- sse2Enabled
let
code = src_code `appOL`
addr_code `snocOL`
if use_sse2 then MOV pk (OpReg src_reg) (OpAddr addr)
else GST pk src_reg addr
MOV pk (OpReg src_reg) (OpAddr addr)
return code
-- Floating point assignment to a register/temporary
assignReg_FltCode _ reg src = do
use_sse2 <- sse2Enabled
src_code <- getAnyReg src
dflags <- getDynFlags
let platform = targetPlatform dflags
return (src_code (getRegisterReg platform use_sse2 reg))
return (src_code (getRegisterReg platform reg))
genJump :: CmmExpr{-the branch target-} -> [Reg] -> NatM InstrBlock
......@@ -1945,7 +1919,7 @@ genCCall _ is32bit (PrimTarget (MO_Prefetch_Data n )) _ [src] _ =
genCCall dflags is32Bit (PrimTarget (MO_BSwap width)) [dst] [src] _ = do
let platform = targetPlatform dflags
let dst_r = getRegisterReg platform False (CmmLocal dst)
let dst_r = getRegisterReg platform (CmmLocal dst)
case width of
W64 | is32Bit -> do
ChildCode64 vcode rlo <- iselExpr64 src
......@@ -1972,7 +1946,7 @@ genCCall dflags is32Bit (PrimTarget (MO_PopCnt width)) dest_regs@[dst]
if sse4_2
then do code_src <- getAnyReg src
src_r <- getNewRegNat format
let dst_r = getRegisterReg platform False (CmmLocal dst)
let dst_r = getRegisterReg platform (CmmLocal dst)
return $ code_src src_r `appOL`
(if width == W8 then
-- The POPCNT instruction doesn't take a r/m8
......@@ -2004,7 +1978,7 @@ genCCall dflags is32Bit (PrimTarget (MO_Pdep width)) dest_regs@[dst]
code_mask <- getAnyReg mask
src_r <- getNewRegNat format
mask_r <- getNewRegNat format
let dst_r = getRegisterReg platform False (CmmLocal dst)
let dst_r = getRegisterReg platform (CmmLocal dst)
return $ code_src src_r `appOL` code_mask mask_r `appOL`
(if width == W8 then
-- The PDEP instruction doesn't take a r/m8
......@@ -2037,7 +2011,7 @@ genCCall dflags is32Bit (PrimTarget (MO_Pext width)) dest_regs@[dst]
code_mask <- getAnyReg mask
src_r <- getNewRegNat format
mask_r <- getNewRegNat format
let dst_r = getRegisterReg platform False (CmmLocal dst)
let dst_r = getRegisterReg platform (CmmLocal dst)
return $ code_src src_r `appOL` code_mask mask_r `appOL`
(if width == W8 then
-- The PEXT instruction doesn't take a r/m8
......@@ -2073,7 +2047,7 @@ genCCall dflags is32Bit (PrimTarget (MO_Clz width)) dest_regs@[dst] args@[src] b
| otherwise = do
code_src <- getAnyReg src
let dst_r = getRegisterReg platform False (CmmLocal dst)
let dst_r = getRegisterReg platform (CmmLocal dst)
if isBmi2Enabled dflags
then do
src_r <- getNewRegNat (intFormat width)
......@@ -2110,7 +2084,7 @@ genCCall dflags is32Bit (PrimTarget (MO_Ctz width)) [dst] [src] bid
| is32Bit, width == W64 = do
ChildCode64 vcode rlo <- iselExpr64 src
let rhi = getHiVRegFromLo rlo
dst_r = getRegisterReg platform False (CmmLocal dst)
dst_r = getRegisterReg platform (CmmLocal dst)
lbl1 <- getBlockIdNat
lbl2 <- getBlockIdNat
let format = if width == W8 then II16 else intFormat width
......@@ -2150,7 +2124,7 @@ genCCall dflags is32Bit (PrimTarget (MO_Ctz width)) [dst] [src] bid
| otherwise = do
code_src <- getAnyReg src
let dst_r = getRegisterReg platform False (CmmLocal dst)
let dst_r = getRegisterReg platform (CmmLocal dst)
if isBmi2Enabled dflags
then do
......@@ -2201,9 +2175,8 @@ genCCall dflags is32Bit (PrimTarget (MO_AtomicRMW width amop))
else getSimpleAmode dflags is32Bit addr -- See genCCall for MO_Cmpxchg
arg <- getNewRegNat format
arg_code <- getAnyReg n
use_sse2 <- sse2Enabled
let platform = targetPlatform dflags
dst_r = getRegisterReg platform use_sse2 (CmmLocal dst)
dst_r = getRegisterReg platform (CmmLocal dst)
code <- op_code dst_r arg amode
return $ addr_code `appOL` arg_code arg `appOL` code
where
......@@ -2260,8 +2233,8 @@ genCCall dflags is32Bit (PrimTarget (MO_AtomicRMW width amop))
genCCall dflags _ (PrimTarget (MO_AtomicRead width)) [dst] [addr] _ = do
load_code <- intLoadCode (MOV (intFormat width)) addr
let platform = targetPlatform dflags
use_sse2 <- sse2Enabled
return (load_code (getRegisterReg platform use_sse2 (CmmLocal dst)))
return (load_code (getRegisterReg platform (CmmLocal dst)))
genCCall _ _ (PrimTarget (MO_AtomicWrite width)) [] [addr, val] _ = do
code <- assignMem_IntCode (intFormat width) addr val
......@@ -2276,9 +2249,8 @@ genCCall dflags is32Bit (PrimTarget (MO_Cmpxchg width)) [dst] [addr, old, new] _
newval_code <- getAnyReg new
oldval <- getNewRegNat format
oldval_code <- getAnyReg old
use_sse2 <- sse2Enabled
let platform = targetPlatform dflags
dst_r = getRegisterReg platform use_sse2 (CmmLocal dst)
dst_r = getRegisterReg platform (CmmLocal dst)
code = toOL
[ MOV format (OpReg oldval) (OpReg eax)
, LOCK (CMPXCHG format (OpReg newval) (OpAddr amode))
......@@ -2292,14 +2264,12 @@ genCCall dflags is32Bit (PrimTarget (MO_Cmpxchg width)) [dst] [addr, old, new] _
genCCall _ is32Bit target dest_regs args bid = do
dflags <- getDynFlags
let platform = targetPlatform dflags
sse2 = isSse2Enabled dflags
case (target, dest_regs) of
-- void return type prim op
(PrimTarget op, []) ->
outOfLineCmmOp bid op Nothing args
-- we only cope with a single result for foreign calls
(PrimTarget op, [r])
| sse2 -> case op of
(PrimTarget op, [r]) -> case op of
MO_F32_Fabs -> case args of
[x] -> sse2FabsCode W32 x
_ -> panic "genCCall: Wrong number of arguments for fabs"
......@@ -2310,36 +2280,16 @@ genCCall _ is32Bit target dest_regs args bid = do
MO_F32_Sqrt -> actuallyInlineSSE2Op (\fmt r -> SQRT fmt (OpReg r)) FF32 args
MO_F64_Sqrt -> actuallyInlineSSE2Op (\fmt r -> SQRT fmt (OpReg r)) FF64 args
_other_op -> outOfLineCmmOp bid op (Just r) args
| otherwise -> do
l1 <- getNewLabelNat
l2 <- getNewLabelNat
if sse2
then outOfLineCmmOp bid op (Just r) args
else case op of
MO_F32_Sqrt -> actuallyInlineFloatOp GSQRT FF32 args
MO_F64_Sqrt -> actuallyInlineFloatOp GSQRT FF64 args
MO_F32_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF32 args
MO_F64_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF64 args
MO_F32_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF32 args
MO_F64_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF64 args
MO_F32_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF32 args
MO_F64_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF64 args
_other_op -> outOfLineCmmOp bid op (Just r) args
where
actuallyInlineFloatOp = actuallyInlineFloatOp' False
actuallyInlineSSE2Op = actuallyInlineFloatOp' True
actuallyInlineSSE2Op = actuallyInlineFloatOp'
actuallyInlineFloatOp' usesSSE instr format [x]
actuallyInlineFloatOp' instr format [x]
= do res <- trivialUFCode format (instr format) x
any <- anyReg res
return (any (getRegisterReg platform usesSSE (CmmLocal r)))
return (any (getRegisterReg platform (CmmLocal r)))
actuallyInlineFloatOp' _ _ _ args
actuallyInlineFloatOp' _ _ args
= panic $ "genCCall.actuallyInlineFloatOp': bad number of arguments! ("
++ show (length args) ++ ")"
......@@ -2358,7 +2308,7 @@ genCCall _ is32Bit target dest_regs args bid = do
AND fmt (OpReg tmp) (OpReg dst)
]
return $ code (getRegisterReg platform True (CmmLocal r))
return $ code (getRegisterReg platform (CmmLocal r))
(PrimTarget (MO_S_QuotRem width), _) -> divOp1 platform True width dest_regs args
(PrimTarget (MO_U_QuotRem width), _) -> divOp1 platform False width dest_regs args
......@@ -2370,8 +2320,8 @@ genCCall _ is32Bit target dest_regs args bid = do
let format = intFormat width
lCode <- anyReg =<< trivialCode width (ADD_CC format)
(Just (ADD_CC format)) arg_x arg_y
let reg_l = getRegisterReg platform True (CmmLocal res_l)
reg_h = getRegisterReg platform True (CmmLocal res_h)
let reg_l = getRegisterReg platform (CmmLocal res_l)
reg_h = getRegisterReg platform (CmmLocal res_h)
code = hCode reg_h `appOL`
lCode reg_l `snocOL`
ADC format (OpImm (ImmInteger 0)) (OpReg reg_h)
......@@ -2391,8 +2341,8 @@ genCCall _ is32Bit target dest_regs args bid = do
do (y_reg, y_code) <- getRegOrMem arg_y
x_code <- getAnyReg arg_x
let format = intFormat width
reg_h = getRegisterReg platform True (CmmLocal res_h)
reg_l = getRegisterReg platform True (CmmLocal res_l)
reg_h = getRegisterReg platform (CmmLocal res_h)
reg_l = getRegisterReg platform (CmmLocal res_l)
code = y_code `appOL`
x_code rax `appOL`
toOL [MUL2 format y_reg,
......@@ -2428,8 +2378,8 @@ genCCall _ is32Bit target dest_regs args bid = do
divOp platform signed width [res_q, res_r]
m_arg_x_high arg_x_low arg_y
= do let format = intFormat width
reg_q = getRegisterReg platform True (CmmLocal res_q)
reg_r = getRegisterReg platform True (CmmLocal res_r)
reg_q = getRegisterReg platform (CmmLocal res_q)
reg_r = getRegisterReg platform (CmmLocal res_r)
widen | signed = CLTD format
| otherwise = XOR format (OpReg rdx) (OpReg rdx)
instr | signed = IDIV
......@@ -2456,8 +2406,8 @@ genCCall _ is32Bit target dest_regs args bid = do
rCode <- anyReg =<< trivialCode width (instr format)
(mrevinstr format) arg_x arg_y
reg_tmp <- getNewRegNat II8
let reg_c = getRegisterReg platform True (CmmLocal res_c)
reg_r = getRegisterReg platform True (CmmLocal res_r)
let reg_c = getRegisterReg platform (CmmLocal res_c)
reg_r = getRegisterReg platform (CmmLocal res_r)
code = rCode reg_r `snocOL`
SETCC cond (OpReg reg_tmp) `snocOL`
MOVZxL II8 (OpReg reg_tmp) (OpReg reg_c)
......@@ -2501,8 +2451,7 @@ genCCall32' dflags target dest_regs args = do
delta0 <- getDeltaNat
setDeltaNat (delta0 - arg_pad_size)
use_sse2 <- sse2Enabled
push_codes <- mapM (push_arg use_sse2) (reverse prom_args)
push_codes <- mapM push_arg (reverse prom_args)
delta <- getDeltaNat
MASSERT(delta == delta0 - tot_arg_size)
......@@ -2555,18 +2504,21 @@ genCCall32' dflags target dest_regs args = do
assign_code [] = nilOL
assign_code [dest]
| isFloatType ty =
if use_sse2
then let tmp_amode = AddrBaseIndex (EABaseReg esp)
-- we assume SSE2
let tmp_amode = AddrBaseIndex (EABaseReg esp)
EAIndexNone
(ImmInt 0)
fmt = floatFormat w
fmt = floatFormat w
in toOL [ SUB II32 (OpImm (ImmInt b)) (OpReg esp),
DELTA (delta0 - b),
GST fmt fake0 tmp_amode,
X87Store fmt tmp_amode,
-- X87Store only supported for the CDECL ABI
-- NB: This code will need to be
-- revisted once GHC does more work around
-- SIGFPE f
MOV fmt (OpAddr tmp_amode) (OpReg r_dest),
ADD II32 (OpImm (ImmInt b)) (OpReg esp),
DELTA delta0]
else unitOL (GMOV fake0 r_dest)
| isWord64 ty = toOL [MOV II32 (OpReg eax) (OpReg r_dest),
MOV II32 (OpReg edx) (OpReg r_dest_hi)]
| otherwise = unitOL (MOV (intFormat w)
......@@ -2577,7 +2529,7 @@ genCCall32' dflags target dest_regs args = do
w = typeWidth ty
b = widthInBytes w
r_dest_hi = getHiVRegFromLo r_dest
r_dest = getRegisterReg platform use_sse2 (CmmLocal dest)
r_dest = getRegisterReg platform (CmmLocal dest)
assign_code many = pprPanic "genCCall.assign_code - too many return values:" (ppr many)
return (push_code `appOL`
......@@ -2592,10 +2544,10 @@ genCCall32' dflags target dest_regs args = do
roundTo a x | x `mod` a == 0 = x
| otherwise = x + a - (x `mod` a)
push_arg :: Bool -> CmmActual {-current argument-}
push_arg :: CmmActual {-current argument-}
-> NatM InstrBlock -- code
push_arg use_sse2 arg -- we don't need the hints on x86
push_arg arg -- we don't need the hints on x86
| isWord64 arg_ty = do
ChildCode64 code r_lo <- iselExpr64 arg
delta <- getDeltaNat
......@@ -2619,9 +2571,10 @@ genCCall32' dflags target dest_regs args = do
(ImmInt 0)
format = floatFormat (typeWidth arg_ty)
in
if use_sse2
then MOV format (OpReg reg) (OpAddr addr)
else GST format reg addr
-- assume SSE2
MOV format (OpReg reg) (OpAddr addr)
]
)
......@@ -2749,7 +2702,7 @@ genCCall64' dflags target dest_regs args = do
_ -> unitOL (MOV (cmmTypeFormat rep) (OpReg rax) (OpReg r_dest))
where
rep = localRegType dest
r_dest = getRegisterReg platform True (CmmLocal dest)
r_dest = getRegisterReg platform (CmmLocal dest)
assign_code _many = panic "genCCall.assign_code many"
return (adjust_rsp `appOL`
......@@ -3162,17 +3115,9 @@ condIntReg cond x y = do
-- and plays better with the uOP cache.
condFltReg :: Bool -> Cond -> CmmExpr -> CmmExpr -> NatM Register
condFltReg is32Bit cond x y = if_sse2 condFltReg_sse2 condFltReg_x87
condFltReg is32Bit cond x y = condFltReg_sse2
where
condFltReg_x87 = do
CondCode _ cond cond_code <- condFltCode cond x y
tmp <- getNewRegNat II8
let
code dst = cond_code `appOL` toOL [
SETCC cond (OpReg tmp),
MOVZxL II8 (OpReg tmp) (OpReg dst)
]
return (Any II32 code)
condFltReg_sse2 = do
CondCode _ cond cond_code <- condFltCode cond x y
......@@ -3336,18 +3281,6 @@ trivialUCode rep instr x = do
-----------
trivialFCode_x87 :: (Format -> Reg -> Reg -> Reg -> Instr)
-> CmmExpr -> CmmExpr -> NatM Register
trivialFCode_x87 instr x y = do
(x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
(y_reg, y_code) <- getSomeReg y
let
format = FF80 -- always, on x87
code dst =
x_code `appOL`
y_code `snocOL`
instr format x_reg y_reg dst
return (Any format code)
trivialFCode_sse2 :: Width -> (Format -> Operand -> Operand -> Instr)
-> CmmExpr -> CmmExpr -> NatM Register
......@@ -3368,17 +3301,8 @@ trivialUFCode format instr x = do
--------------------------------------------------------------------------------
coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
coerceInt2FP from to x = if_sse2 coerce_sse2 coerce_x87
coerceInt2FP from to x = coerce_sse2
where
coerce_x87 = do
(x_reg, x_code) <- getSomeReg x
let
opc = case to of W32 -> GITOF; W64 -> GITOD;
n -> panic $ "coerceInt2FP.x87: unhandled width ("
++ show n ++ ")"
code dst = x_code `snocOL` opc x_reg dst
-- ToDo: works for non-II32 reps?
return (Any FF80 code)
coerce_sse2 = do
(x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
......@@ -3392,18 +3316,8 @@ coerceInt2FP from to x = if_sse2 coerce_sse2 coerce_x87
--------------------------------------------------------------------------------
coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
coerceFP2Int from to x = if_sse2 coerceFP2Int_sse2 coerceFP2Int_x87
coerceFP2Int from to x = coerceFP2Int_sse2
where
coerceFP2Int_x87 = do
(x_reg, x_code) <- getSomeReg x
let
opc = case from of W32 -> GFTOI; W64 -> GDTOI
n -> panic $ "coerceFP2Int.x87: unhandled width ("
++ show n ++ ")"
code dst = x_code `snocOL` opc x_reg dst
-- ToDo: works for non-II32 reps?
return (Any (intFormat to) code)
coerceFP2Int_sse2 = do
(x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
let
......@@ -3418,15 +3332,13 @@ coerceFP2Int from to x = if_sse2 coerceFP2Int_sse2 coerceFP2Int_x87
--------------------------------------------------------------------------------
coerceFP2FP :: Width -> CmmExpr -> NatM Register
coerceFP2FP to x = do
use_sse2 <- sse2Enabled
(x_reg, x_code) <- getSomeReg x
let
opc | use_sse2 = case to of W32 -> CVTSD2SS; W64 -> CVTSS2SD;
opc = case to of W32 -> CVTSD2SS; W64 -> CVTSS2SD;
n -> panic $ "coerceFP2FP: unhandled width ("
++ show n ++ ")"
| otherwise = GDTOF
code dst = x_code `snocOL` opc x_reg dst
return (Any (if use_sse2 then floatFormat to else FF80) code)
return (Any ( floatFormat to) code)
--------------------------------------------------------------------------------
......@@ -3443,7 +3355,7 @@ sse2NegCode w x = do
x@II16 -> wrongFmt x
x@II32 -> wrongFmt x
x@II64 -> wrongFmt x
x@FF80 -> wrongFmt x
where
wrongFmt x = panic $ "sse2NegCode: " ++ show x
Amode amode amode_code <- memConstant (mkAlignment $ widthInBytes w) const
......