Commit 335b9f36 authored by Simon Marlow's avatar Simon Marlow
Browse files

Implement SSE2 floating-point support in the x86 native code generator (#594)

The new flag -msse2 enables code generation for SSE2 on x86.  It
results in substantially faster floating-point performance; the main
reason for doing this was that our x87 code generation is appallingly
bad, and since we plan to drop -fvia-C soon, we need a way to generate
half-decent floating-point code.

The catch is that SSE2 is only available on CPUs that support it (P4+,
AMD K8+).  We'll have to think hard about whether we should enable it
by default for the libraries we ship.  In the meantime, at least
-msse2 should be an acceptable replacement for "-fvia-C
-optc-ffast-math -fexcess-precision".

SSE2 also has the advantage of performing all operations at the
correct precision, so floating-point results are consistent with other
platforms.

I also tweaked the x87 code generation a bit while I was here, now
it's slighlty less bad than before.
parent d9f71774
...@@ -312,6 +312,7 @@ data DynFlag ...@@ -312,6 +312,7 @@ data DynFlag
| Opt_EmitExternalCore | Opt_EmitExternalCore
| Opt_SharedImplib | Opt_SharedImplib
| Opt_BuildingCabalPackage | Opt_BuildingCabalPackage
| Opt_SSE2
-- temporary flags -- temporary flags
| Opt_RunCPS | Opt_RunCPS
...@@ -1265,6 +1266,9 @@ dynamic_flags = [ ...@@ -1265,6 +1266,9 @@ dynamic_flags = [
, Flag "monly-4-regs" (NoArg (upd (\s -> s{stolen_x86_regs = 4}) )) , Flag "monly-4-regs" (NoArg (upd (\s -> s{stolen_x86_regs = 4}) ))
Supported Supported
, Flag "msse2" (NoArg (setDynFlag Opt_SSE2))
Supported
------ Warning opts ------------------------------------------------- ------ Warning opts -------------------------------------------------
, Flag "W" (NoArg (mapM_ setDynFlag minusWOpts)) , Flag "W" (NoArg (mapM_ setDynFlag minusWOpts))
Supported Supported
......
...@@ -171,6 +171,7 @@ pprReg r ...@@ -171,6 +171,7 @@ pprReg r
RegVirtual (VirtualRegHi u) -> text "%vHi_" <> asmSDoc (pprUnique u) RegVirtual (VirtualRegHi u) -> text "%vHi_" <> asmSDoc (pprUnique u)
RegVirtual (VirtualRegF u) -> text "%vF_" <> asmSDoc (pprUnique u) RegVirtual (VirtualRegF u) -> text "%vF_" <> asmSDoc (pprUnique u)
RegVirtual (VirtualRegD u) -> text "%vD_" <> asmSDoc (pprUnique u) RegVirtual (VirtualRegD u) -> text "%vD_" <> asmSDoc (pprUnique u)
RegVirtual (VirtualRegSSE u) -> text "%vSSE_" <> asmSDoc (pprUnique u)
where where
#if darwin_TARGET_OS #if darwin_TARGET_OS
ppr_reg_no :: Int -> Doc ppr_reg_no :: Int -> Doc
......
...@@ -87,25 +87,15 @@ virtualRegSqueeze cls vr ...@@ -87,25 +87,15 @@ virtualRegSqueeze cls vr
-> case vr of -> case vr of
VirtualRegI{} -> _ILIT(1) VirtualRegI{} -> _ILIT(1)
VirtualRegHi{} -> _ILIT(1) VirtualRegHi{} -> _ILIT(1)
VirtualRegD{} -> _ILIT(0) _other -> _ILIT(0)
VirtualRegF{} -> _ILIT(0)
-- We don't use floats on this arch, but we can't
-- return error because the return type is unboxed...
RcFloat
-> case vr of
VirtualRegI{} -> _ILIT(0)
VirtualRegHi{} -> _ILIT(0)
VirtualRegD{} -> _ILIT(0)
VirtualRegF{} -> _ILIT(0)
RcDouble RcDouble
-> case vr of -> case vr of
VirtualRegI{} -> _ILIT(0)
VirtualRegHi{} -> _ILIT(0)
VirtualRegD{} -> _ILIT(1) VirtualRegD{} -> _ILIT(1)
VirtualRegF{} -> _ILIT(0) VirtualRegF{} -> _ILIT(0)
_other -> _ILIT(0)
_other -> _ILIT(0)
{-# INLINE realRegSqueeze #-} {-# INLINE realRegSqueeze #-}
realRegSqueeze :: RegClass -> RealReg -> FastInt realRegSqueeze :: RegClass -> RealReg -> FastInt
...@@ -119,16 +109,6 @@ realRegSqueeze cls rr ...@@ -119,16 +109,6 @@ realRegSqueeze cls rr
RealRegPair{} -> _ILIT(0) RealRegPair{} -> _ILIT(0)
-- We don't use floats on this arch, but we can't
-- return error because the return type is unboxed...
RcFloat
-> case rr of
RealRegSingle regNo
| regNo < 32 -> _ILIT(0)
| otherwise -> _ILIT(0)
RealRegPair{} -> _ILIT(0)
RcDouble RcDouble
-> case rr of -> case rr of
RealRegSingle regNo RealRegSingle regNo
...@@ -137,6 +117,8 @@ realRegSqueeze cls rr ...@@ -137,6 +117,8 @@ realRegSqueeze cls rr
RealRegPair{} -> _ILIT(0) RealRegPair{} -> _ILIT(0)
_other -> _ILIT(0)
mkVirtualReg :: Unique -> Size -> VirtualReg mkVirtualReg :: Unique -> Size -> VirtualReg
mkVirtualReg u size mkVirtualReg u size
| not (isFloatSize size) = VirtualRegI u | not (isFloatSize size) = VirtualRegI u
...@@ -152,6 +134,7 @@ regDotColor reg ...@@ -152,6 +134,7 @@ regDotColor reg
RcInteger -> Outputable.text "blue" RcInteger -> Outputable.text "blue"
RcFloat -> Outputable.text "red" RcFloat -> Outputable.text "red"
RcDouble -> Outputable.text "green" RcDouble -> Outputable.text "green"
RcDoubleSSE -> Outputable.text "yellow"
-- immediates ------------------------------------------------------------------ -- immediates ------------------------------------------------------------------
......
...@@ -55,6 +55,7 @@ data VirtualReg ...@@ -55,6 +55,7 @@ data VirtualReg
| VirtualRegHi {-# UNPACK #-} !Unique -- High part of 2-word register | VirtualRegHi {-# UNPACK #-} !Unique -- High part of 2-word register
| VirtualRegF {-# UNPACK #-} !Unique | VirtualRegF {-# UNPACK #-} !Unique
| VirtualRegD {-# UNPACK #-} !Unique | VirtualRegD {-# UNPACK #-} !Unique
| VirtualRegSSE {-# UNPACK #-} !Unique
deriving (Eq, Show, Ord) deriving (Eq, Show, Ord)
instance Uniquable VirtualReg where instance Uniquable VirtualReg where
...@@ -64,6 +65,7 @@ instance Uniquable VirtualReg where ...@@ -64,6 +65,7 @@ instance Uniquable VirtualReg where
VirtualRegHi u -> u VirtualRegHi u -> u
VirtualRegF u -> u VirtualRegF u -> u
VirtualRegD u -> u VirtualRegD u -> u
VirtualRegSSE u -> u
instance Outputable VirtualReg where instance Outputable VirtualReg where
ppr reg ppr reg
...@@ -72,6 +74,7 @@ instance Outputable VirtualReg where ...@@ -72,6 +74,7 @@ instance Outputable VirtualReg where
VirtualRegHi u -> text "%vHi_" <> pprUnique u VirtualRegHi u -> text "%vHi_" <> pprUnique u
VirtualRegF u -> text "%vF_" <> pprUnique u VirtualRegF u -> text "%vF_" <> pprUnique u
VirtualRegD u -> text "%vD_" <> pprUnique u VirtualRegD u -> text "%vD_" <> pprUnique u
VirtualRegSSE u -> text "%vSSE_" <> pprUnique u
renameVirtualReg :: Unique -> VirtualReg -> VirtualReg renameVirtualReg :: Unique -> VirtualReg -> VirtualReg
...@@ -81,6 +84,7 @@ renameVirtualReg u r ...@@ -81,6 +84,7 @@ renameVirtualReg u r
VirtualRegHi _ -> VirtualRegHi u VirtualRegHi _ -> VirtualRegHi u
VirtualRegF _ -> VirtualRegF u VirtualRegF _ -> VirtualRegF u
VirtualRegD _ -> VirtualRegD u VirtualRegD _ -> VirtualRegD u
VirtualRegSSE _ -> VirtualRegSSE u
classOfVirtualReg :: VirtualReg -> RegClass classOfVirtualReg :: VirtualReg -> RegClass
...@@ -90,6 +94,7 @@ classOfVirtualReg vr ...@@ -90,6 +94,7 @@ classOfVirtualReg vr
VirtualRegHi{} -> RcInteger VirtualRegHi{} -> RcInteger
VirtualRegF{} -> RcFloat VirtualRegF{} -> RcFloat
VirtualRegD{} -> RcDouble VirtualRegD{} -> RcDouble
VirtualRegSSE{} -> RcDoubleSSE
-- Determine the upper-half vreg for a 64-bit quantity on a 32-bit platform -- Determine the upper-half vreg for a 64-bit quantity on a 32-bit platform
......
...@@ -380,25 +380,13 @@ seqNode node ...@@ -380,25 +380,13 @@ seqNode node
`seq` (seqVirtualRegList (uniqSetToList (Color.nodeCoalesce node))) `seq` (seqVirtualRegList (uniqSetToList (Color.nodeCoalesce node)))
seqVirtualReg :: VirtualReg -> () seqVirtualReg :: VirtualReg -> ()
seqVirtualReg reg seqVirtualReg reg = reg `seq` ()
= case reg of
VirtualRegI _ -> ()
VirtualRegHi _ -> ()
VirtualRegF _ -> ()
VirtualRegD _ -> ()
seqRealReg :: RealReg -> () seqRealReg :: RealReg -> ()
seqRealReg reg seqRealReg reg = reg `seq` ()
= case reg of
RealRegSingle _ -> ()
RealRegPair _ _ -> ()
seqRegClass :: RegClass -> () seqRegClass :: RegClass -> ()
seqRegClass c seqRegClass c = c `seq` ()
= case c of
RcInteger -> ()
RcFloat -> ()
RcDouble -> ()
seqMaybeRealReg :: Maybe RealReg -> () seqMaybeRealReg :: Maybe RealReg -> ()
seqMaybeRealReg mr seqMaybeRealReg mr
......
...@@ -50,24 +50,27 @@ import FastTypes ...@@ -50,24 +50,27 @@ import FastTypes
#define ALLOCATABLE_REGS_INTEGER (_ILIT(3)) #define ALLOCATABLE_REGS_INTEGER (_ILIT(3))
#define ALLOCATABLE_REGS_DOUBLE (_ILIT(6)) #define ALLOCATABLE_REGS_DOUBLE (_ILIT(6))
#define ALLOCATABLE_REGS_FLOAT (_ILIT(0)) #define ALLOCATABLE_REGS_FLOAT (_ILIT(0))
#define ALLOCATABLE_REGS_SSE (_ILIT(16))
#elif x86_64_TARGET_ARCH #elif x86_64_TARGET_ARCH
#define ALLOCATABLE_REGS_INTEGER (_ILIT(5)) #define ALLOCATABLE_REGS_INTEGER (_ILIT(5))
#define ALLOCATABLE_REGS_DOUBLE (_ILIT(2)) #define ALLOCATABLE_REGS_DOUBLE (_ILIT(0))
#define ALLOCATABLE_REGS_FLOAT (_ILIT(0)) #define ALLOCATABLE_REGS_FLOAT (_ILIT(0))
#define ALLOCATABLE_REGS_SSE (_ILIT(10))
#elif powerpc_TARGET_ARCH #elif powerpc_TARGET_ARCH
#define ALLOCATABLE_REGS_INTEGER (_ILIT(16)) #define ALLOCATABLE_REGS_INTEGER (_ILIT(16))
#define ALLOCATABLE_REGS_DOUBLE (_ILIT(26)) #define ALLOCATABLE_REGS_DOUBLE (_ILIT(26))
#define ALLOCATABLE_REGS_FLOAT (_ILIT(0)) #define ALLOCATABLE_REGS_FLOAT (_ILIT(0))
#define ALLOCATABLE_REGS_SSE (_ILIT(0))
#elif sparc_TARGET_ARCH #elif sparc_TARGET_ARCH
#define ALLOCATABLE_REGS_INTEGER (_ILIT(14)) #define ALLOCATABLE_REGS_INTEGER (_ILIT(14))
#define ALLOCATABLE_REGS_DOUBLE (_ILIT(11)) #define ALLOCATABLE_REGS_DOUBLE (_ILIT(11))
#define ALLOCATABLE_REGS_FLOAT (_ILIT(22)) #define ALLOCATABLE_REGS_FLOAT (_ILIT(22))
#define ALLOCATABLE_REGS_SSE (_ILIT(0))
#else #else
...@@ -139,6 +142,17 @@ trivColorable virtualRegSqueeze realRegSqueeze RcDouble conflicts exclusions ...@@ -139,6 +142,17 @@ trivColorable virtualRegSqueeze realRegSqueeze RcDouble conflicts exclusions
= count3 <# ALLOCATABLE_REGS_DOUBLE = count3 <# ALLOCATABLE_REGS_DOUBLE
trivColorable virtualRegSqueeze realRegSqueeze RcDoubleSSE conflicts exclusions
| count2 <- accSqueeze (_ILIT(0)) ALLOCATABLE_REGS_SSE
(virtualRegSqueeze RcDoubleSSE)
conflicts
, count3 <- accSqueeze count2 ALLOCATABLE_REGS_SSE
(realRegSqueeze RcDoubleSSE)
exclusions
= count3 <# ALLOCATABLE_REGS_SSE
-- Specification Code ---------------------------------------------------------- -- Specification Code ----------------------------------------------------------
-- --
......
...@@ -17,6 +17,7 @@ data RegClass ...@@ -17,6 +17,7 @@ data RegClass
= RcInteger = RcInteger
| RcFloat | RcFloat
| RcDouble | RcDouble
| RcDoubleSSE -- x86 only: the SSE regs are a separate class
deriving Eq deriving Eq
...@@ -24,8 +25,10 @@ instance Uniquable RegClass where ...@@ -24,8 +25,10 @@ instance Uniquable RegClass where
getUnique RcInteger = mkRegClassUnique 0 getUnique RcInteger = mkRegClassUnique 0
getUnique RcFloat = mkRegClassUnique 1 getUnique RcFloat = mkRegClassUnique 1
getUnique RcDouble = mkRegClassUnique 2 getUnique RcDouble = mkRegClassUnique 2
getUnique RcDoubleSSE = mkRegClassUnique 3
instance Outputable RegClass where instance Outputable RegClass where
ppr RcInteger = Outputable.text "I" ppr RcInteger = Outputable.text "I"
ppr RcFloat = Outputable.text "F" ppr RcFloat = Outputable.text "F"
ppr RcDouble = Outputable.text "D" ppr RcDouble = Outputable.text "D"
ppr RcDoubleSSE = Outputable.text "S"
...@@ -373,6 +373,7 @@ sparc_mkSpillInstr reg _ slot ...@@ -373,6 +373,7 @@ sparc_mkSpillInstr reg _ slot
RcInteger -> II32 RcInteger -> II32
RcFloat -> FF32 RcFloat -> FF32
RcDouble -> FF64 RcDouble -> FF64
_ -> panic "sparc_mkSpillInstr"
in ST sz reg (fpRel (negate off_w)) in ST sz reg (fpRel (negate off_w))
...@@ -391,6 +392,7 @@ sparc_mkLoadInstr reg _ slot ...@@ -391,6 +392,7 @@ sparc_mkLoadInstr reg _ slot
RcInteger -> II32 RcInteger -> II32
RcFloat -> FF32 RcFloat -> FF32
RcDouble -> FF64 RcDouble -> FF64
_ -> panic "sparc_mkLoadInstr"
in LD sz (fpRel (- off_w)) reg in LD sz (fpRel (- off_w)) reg
...@@ -438,6 +440,7 @@ sparc_mkRegRegMoveInstr src dst ...@@ -438,6 +440,7 @@ sparc_mkRegRegMoveInstr src dst
RcInteger -> ADD False False src (RIReg g0) dst RcInteger -> ADD False False src (RIReg g0) dst
RcDouble -> FMOV FF64 src dst RcDouble -> FMOV FF64 src dst
RcFloat -> FMOV FF32 src dst RcFloat -> FMOV FF32 src dst
_ -> panic "sparc_mkRegRegMoveInstr"
| otherwise | otherwise
= panic "SPARC.Instr.mkRegRegMoveInstr: classes of src and dest not the same" = panic "SPARC.Instr.mkRegRegMoveInstr: classes of src and dest not the same"
......
...@@ -156,6 +156,7 @@ pprReg reg ...@@ -156,6 +156,7 @@ pprReg reg
VirtualRegHi u -> text "%vHi_" <> asmSDoc (pprUnique u) VirtualRegHi u -> text "%vHi_" <> asmSDoc (pprUnique u)
VirtualRegF u -> text "%vF_" <> asmSDoc (pprUnique u) VirtualRegF u -> text "%vF_" <> asmSDoc (pprUnique u)
VirtualRegD u -> text "%vD_" <> asmSDoc (pprUnique u) VirtualRegD u -> text "%vD_" <> asmSDoc (pprUnique u)
VirtualRegSSE u -> text "%vSSE_" <> asmSDoc (pprUnique u)
RegReal rr RegReal rr
-> case rr of -> case rr of
......
...@@ -95,22 +95,21 @@ virtualRegSqueeze cls vr ...@@ -95,22 +95,21 @@ virtualRegSqueeze cls vr
-> case vr of -> case vr of
VirtualRegI{} -> _ILIT(1) VirtualRegI{} -> _ILIT(1)
VirtualRegHi{} -> _ILIT(1) VirtualRegHi{} -> _ILIT(1)
VirtualRegF{} -> _ILIT(0) _other -> _ILIT(0)
VirtualRegD{} -> _ILIT(0)
RcFloat RcFloat
-> case vr of -> case vr of
VirtualRegI{} -> _ILIT(0)
VirtualRegHi{} -> _ILIT(0)
VirtualRegF{} -> _ILIT(1) VirtualRegF{} -> _ILIT(1)
VirtualRegD{} -> _ILIT(2) VirtualRegD{} -> _ILIT(2)
_other -> _ILIT(0)
RcDouble RcDouble
-> case vr of -> case vr of
VirtualRegI{} -> _ILIT(0)
VirtualRegHi{} -> _ILIT(0)
VirtualRegF{} -> _ILIT(1) VirtualRegF{} -> _ILIT(1)
VirtualRegD{} -> _ILIT(1) VirtualRegD{} -> _ILIT(1)
_other -> _ILIT(0)
_other -> _ILIT(0)
{-# INLINE realRegSqueeze #-} {-# INLINE realRegSqueeze #-}
realRegSqueeze :: RegClass -> RealReg -> FastInt realRegSqueeze :: RegClass -> RealReg -> FastInt
...@@ -141,6 +140,7 @@ realRegSqueeze cls rr ...@@ -141,6 +140,7 @@ realRegSqueeze cls rr
RealRegPair{} -> _ILIT(1) RealRegPair{} -> _ILIT(1)
_other -> _ILIT(0)
-- | All the allocatable registers in the machine, -- | All the allocatable registers in the machine,
-- including register pairs. -- including register pairs.
...@@ -283,7 +283,7 @@ regDotColor reg ...@@ -283,7 +283,7 @@ regDotColor reg
= case classOfRealReg reg of = case classOfRealReg reg of
RcInteger -> text "blue" RcInteger -> text "blue"
RcFloat -> text "red" RcFloat -> text "red"
RcDouble -> text "green" _other -> text "green"
......
This diff is collapsed.
...@@ -253,10 +253,10 @@ data Instr ...@@ -253,10 +253,10 @@ data Instr
-- use MOV for moving (either movss or movsd (movlpd better?)) -- use MOV for moving (either movss or movsd (movlpd better?))
| CVTSS2SD Reg Reg -- F32 to F64 | CVTSS2SD Reg Reg -- F32 to F64
| CVTSD2SS Reg Reg -- F64 to F32 | CVTSD2SS Reg Reg -- F64 to F32
| CVTTSS2SIQ Operand Reg -- F32 to I32/I64 (with truncation) | CVTTSS2SIQ Size Operand Reg -- F32 to I32/I64 (with truncation)
| CVTTSD2SIQ Operand Reg -- F64 to I32/I64 (with truncation) | CVTTSD2SIQ Size Operand Reg -- F64 to I32/I64 (with truncation)
| CVTSI2SS Operand Reg -- I32/I64 to F32 | CVTSI2SS Size Operand Reg -- I32/I64 to F32
| CVTSI2SD Operand Reg -- I32/I64 to F64 | CVTSI2SD Size Operand Reg -- I32/I64 to F64
-- use ADD & SUB for arithmetic. In both cases, operands -- use ADD & SUB for arithmetic. In both cases, operands
-- are Operand Reg. -- are Operand Reg.
...@@ -353,7 +353,6 @@ x86_regUsageOfInstr instr ...@@ -353,7 +353,6 @@ x86_regUsageOfInstr instr
CLTD _ -> mkRU [eax] [edx] CLTD _ -> mkRU [eax] [edx]
NOP -> mkRU [] [] NOP -> mkRU [] []
#if i386_TARGET_ARCH
GMOV src dst -> mkRU [src] [dst] GMOV src dst -> mkRU [src] [dst]
GLD _ src dst -> mkRU (use_EA src) [dst] GLD _ src dst -> mkRU (use_EA src) [dst]
GST _ src dst -> mkRUR (src : use_EA dst) GST _ src dst -> mkRUR (src : use_EA dst)
...@@ -379,17 +378,14 @@ x86_regUsageOfInstr instr ...@@ -379,17 +378,14 @@ x86_regUsageOfInstr instr
GSIN _ _ _ src dst -> mkRU [src] [dst] GSIN _ _ _ src dst -> mkRU [src] [dst]
GCOS _ _ _ src dst -> mkRU [src] [dst] GCOS _ _ _ src dst -> mkRU [src] [dst]
GTAN _ _ _ src dst -> mkRU [src] [dst] GTAN _ _ _ src dst -> mkRU [src] [dst]
#endif
#if x86_64_TARGET_ARCH
CVTSS2SD src dst -> mkRU [src] [dst] CVTSS2SD src dst -> mkRU [src] [dst]
CVTSD2SS src dst -> mkRU [src] [dst] CVTSD2SS src dst -> mkRU [src] [dst]
CVTTSS2SIQ src dst -> mkRU (use_R src) [dst] CVTTSS2SIQ _ src dst -> mkRU (use_R src) [dst]
CVTTSD2SIQ src dst -> mkRU (use_R src) [dst] CVTTSD2SIQ _ src dst -> mkRU (use_R src) [dst]
CVTSI2SS src dst -> mkRU (use_R src) [dst] CVTSI2SS _ src dst -> mkRU (use_R src) [dst]
CVTSI2SD src dst -> mkRU (use_R src) [dst] CVTSI2SD _ src dst -> mkRU (use_R src) [dst]
FDIV _ src dst -> usageRM src dst FDIV _ src dst -> usageRM src dst
#endif
FETCHGOT reg -> mkRU [] [reg] FETCHGOT reg -> mkRU [] [reg]
FETCHPC reg -> mkRU [] [reg] FETCHPC reg -> mkRU [] [reg]
...@@ -483,7 +479,6 @@ x86_patchRegsOfInstr instr env ...@@ -483,7 +479,6 @@ x86_patchRegsOfInstr instr env
JMP op -> patch1 JMP op JMP op -> patch1 JMP op
JMP_TBL op ids -> patch1 JMP_TBL op $ ids JMP_TBL op ids -> patch1 JMP_TBL op $ ids
#if i386_TARGET_ARCH
GMOV src dst -> GMOV (env src) (env dst) GMOV src dst -> GMOV (env src) (env dst)
GLD sz src dst -> GLD sz (lookupAddr src) (env dst) GLD sz src dst -> GLD sz (lookupAddr src) (env dst)
GST sz src dst -> GST sz (env src) (lookupAddr dst) GST sz src dst -> GST sz (env src) (lookupAddr dst)
...@@ -509,17 +504,14 @@ x86_patchRegsOfInstr instr env ...@@ -509,17 +504,14 @@ x86_patchRegsOfInstr instr env
GSIN sz l1 l2 src dst -> GSIN sz l1 l2 (env src) (env dst) GSIN sz l1 l2 src dst -> GSIN sz l1 l2 (env src) (env dst)
GCOS sz l1 l2 src dst -> GCOS sz l1 l2 (env src) (env dst) GCOS sz l1 l2 src dst -> GCOS sz l1 l2 (env src) (env dst)
GTAN sz l1 l2 src dst -> GTAN sz l1 l2 (env src) (env dst) GTAN sz l1 l2 src dst -> GTAN sz l1 l2 (env src) (env dst)
#endif
#if x86_64_TARGET_ARCH
CVTSS2SD src dst -> CVTSS2SD (env src) (env dst) CVTSS2SD src dst -> CVTSS2SD (env src) (env dst)
CVTSD2SS src dst -> CVTSD2SS (env src) (env dst) CVTSD2SS src dst -> CVTSD2SS (env src) (env dst)
CVTTSS2SIQ src dst -> CVTTSS2SIQ (patchOp src) (env dst) CVTTSS2SIQ sz src dst -> CVTTSS2SIQ sz (patchOp src) (env dst)
CVTTSD2SIQ src dst -> CVTTSD2SIQ (patchOp src) (env dst) CVTTSD2SIQ sz src dst -> CVTTSD2SIQ sz (patchOp src) (env dst)
CVTSI2SS src dst -> CVTSI2SS (patchOp src) (env dst) CVTSI2SS sz src dst -> CVTSI2SS sz (patchOp src) (env dst)
CVTSI2SD src dst -> CVTSI2SD (patchOp src) (env dst) CVTSI2SD sz src dst -> CVTSI2SD sz (patchOp src) (env dst)
FDIV sz src dst -> FDIV sz (patchOp src) (patchOp dst) FDIV sz src dst -> FDIV sz (patchOp src) (patchOp dst)
#endif
CALL (Left _) _ -> instr CALL (Left _) _ -> instr
CALL (Right reg) p -> CALL (Right (env reg)) p CALL (Right reg) p -> CALL (Right (env reg)) p
...@@ -602,30 +594,16 @@ x86_mkSpillInstr ...@@ -602,30 +594,16 @@ x86_mkSpillInstr
-> Int -- spill slot to use -> Int -- spill slot to use
-> Instr -> Instr
#if i386_TARGET_ARCH
x86_mkSpillInstr reg delta slot
= let off = spillSlotToOffset slot
in
let off_w = (off-delta) `div` 4
in case targetClassOfReg reg of
RcInteger -> MOV II32 (OpReg reg) (OpAddr (spRel off_w))
_ -> GST FF80 reg (spRel off_w) {- RcFloat/RcDouble -}
#elif x86_64_TARGET_ARCH
x86_mkSpillInstr reg delta slot x86_mkSpillInstr reg delta slot
= let off = spillSlotToOffset slot = let off = spillSlotToOffset slot
in in
let off_w = (off-delta) `div` 8 let off_w = (off-delta) `div` IF_ARCH_i386(4,8)
in case targetClassOfReg reg of in case targetClassOfReg reg of
RcInteger -> MOV II64 (OpReg reg) (OpAddr (spRel off_w)) RcInteger -> MOV IF_ARCH_i386(II32,II64)
RcDouble -> MOV FF64 (OpReg reg) (OpAddr (spRel off_w)) (OpReg reg) (OpAddr (spRel off_w))
RcDouble -> GST FF80 reg (spRel off_w) {- RcFloat/RcDouble -}
RcDoubleSSE -> MOV FF64 (OpReg reg) (OpAddr (spRel off_w))
_ -> panic "X86.mkSpillInstr: no match" _ -> panic "X86.mkSpillInstr: no match"
-- ToDo: will it work to always spill as a double?
-- does that cause a stall if the data was a float?
#else
x86_mkSpillInstr _ _ _
= panic "X86.RegInfo.mkSpillInstr: not defined for this architecture."
#endif
-- | Make a spill reload instruction. -- | Make a spill reload instruction.
...@@ -635,26 +613,16 @@ x86_mkLoadInstr ...@@ -635,26 +613,16 @@ x86_mkLoadInstr
-> Int -- spill slot to use