Commit 335b9f36 authored by Simon Marlow's avatar Simon Marlow
Browse files

Implement SSE2 floating-point support in the x86 native code generator (#594)

The new flag -msse2 enables code generation for SSE2 on x86.  It
results in substantially faster floating-point performance; the main
reason for doing this was that our x87 code generation is appallingly
bad, and since we plan to drop -fvia-C soon, we need a way to generate
half-decent floating-point code.

The catch is that SSE2 is only available on CPUs that support it (P4+,
AMD K8+).  We'll have to think hard about whether we should enable it
by default for the libraries we ship.  In the meantime, at least
-msse2 should be an acceptable replacement for "-fvia-C
-optc-ffast-math -fexcess-precision".

SSE2 also has the advantage of performing all operations at the
correct precision, so floating-point results are consistent with other
platforms.

I also tweaked the x87 code generation a bit while I was here, now
it's slighlty less bad than before.
parent d9f71774
......@@ -312,6 +312,7 @@ data DynFlag
| Opt_EmitExternalCore
| Opt_SharedImplib
| Opt_BuildingCabalPackage
| Opt_SSE2
-- temporary flags
| Opt_RunCPS
......@@ -1265,6 +1266,9 @@ dynamic_flags = [
, Flag "monly-4-regs" (NoArg (upd (\s -> s{stolen_x86_regs = 4}) ))
Supported
, Flag "msse2" (NoArg (setDynFlag Opt_SSE2))
Supported
------ Warning opts -------------------------------------------------
, Flag "W" (NoArg (mapM_ setDynFlag minusWOpts))
Supported
......
......@@ -171,6 +171,7 @@ pprReg r
RegVirtual (VirtualRegHi u) -> text "%vHi_" <> asmSDoc (pprUnique u)
RegVirtual (VirtualRegF u) -> text "%vF_" <> asmSDoc (pprUnique u)
RegVirtual (VirtualRegD u) -> text "%vD_" <> asmSDoc (pprUnique u)
RegVirtual (VirtualRegSSE u) -> text "%vSSE_" <> asmSDoc (pprUnique u)
where
#if darwin_TARGET_OS
ppr_reg_no :: Int -> Doc
......
......@@ -87,25 +87,15 @@ virtualRegSqueeze cls vr
-> case vr of
VirtualRegI{} -> _ILIT(1)
VirtualRegHi{} -> _ILIT(1)
VirtualRegD{} -> _ILIT(0)
VirtualRegF{} -> _ILIT(0)
-- We don't use floats on this arch, but we can't
-- return error because the return type is unboxed...
RcFloat
-> case vr of
VirtualRegI{} -> _ILIT(0)
VirtualRegHi{} -> _ILIT(0)
VirtualRegD{} -> _ILIT(0)
VirtualRegF{} -> _ILIT(0)
_other -> _ILIT(0)
RcDouble
-> case vr of
VirtualRegI{} -> _ILIT(0)
VirtualRegHi{} -> _ILIT(0)
VirtualRegD{} -> _ILIT(1)
VirtualRegF{} -> _ILIT(0)
_other -> _ILIT(0)
_other -> _ILIT(0)
{-# INLINE realRegSqueeze #-}
realRegSqueeze :: RegClass -> RealReg -> FastInt
......@@ -119,16 +109,6 @@ realRegSqueeze cls rr
RealRegPair{} -> _ILIT(0)
-- We don't use floats on this arch, but we can't
-- return error because the return type is unboxed...
RcFloat
-> case rr of
RealRegSingle regNo
| regNo < 32 -> _ILIT(0)
| otherwise -> _ILIT(0)
RealRegPair{} -> _ILIT(0)
RcDouble
-> case rr of
RealRegSingle regNo
......@@ -137,6 +117,8 @@ realRegSqueeze cls rr
RealRegPair{} -> _ILIT(0)
_other -> _ILIT(0)
mkVirtualReg :: Unique -> Size -> VirtualReg
mkVirtualReg u size
| not (isFloatSize size) = VirtualRegI u
......@@ -152,6 +134,7 @@ regDotColor reg
RcInteger -> Outputable.text "blue"
RcFloat -> Outputable.text "red"
RcDouble -> Outputable.text "green"
RcDoubleSSE -> Outputable.text "yellow"
-- immediates ------------------------------------------------------------------
......
......@@ -55,6 +55,7 @@ data VirtualReg
| VirtualRegHi {-# UNPACK #-} !Unique -- High part of 2-word register
| VirtualRegF {-# UNPACK #-} !Unique
| VirtualRegD {-# UNPACK #-} !Unique
| VirtualRegSSE {-# UNPACK #-} !Unique
deriving (Eq, Show, Ord)
instance Uniquable VirtualReg where
......@@ -64,6 +65,7 @@ instance Uniquable VirtualReg where
VirtualRegHi u -> u
VirtualRegF u -> u
VirtualRegD u -> u
VirtualRegSSE u -> u
instance Outputable VirtualReg where
ppr reg
......@@ -72,6 +74,7 @@ instance Outputable VirtualReg where
VirtualRegHi u -> text "%vHi_" <> pprUnique u
VirtualRegF u -> text "%vF_" <> pprUnique u
VirtualRegD u -> text "%vD_" <> pprUnique u
VirtualRegSSE u -> text "%vSSE_" <> pprUnique u
renameVirtualReg :: Unique -> VirtualReg -> VirtualReg
......@@ -81,6 +84,7 @@ renameVirtualReg u r
VirtualRegHi _ -> VirtualRegHi u
VirtualRegF _ -> VirtualRegF u
VirtualRegD _ -> VirtualRegD u
VirtualRegSSE _ -> VirtualRegSSE u
classOfVirtualReg :: VirtualReg -> RegClass
......@@ -90,6 +94,7 @@ classOfVirtualReg vr
VirtualRegHi{} -> RcInteger
VirtualRegF{} -> RcFloat
VirtualRegD{} -> RcDouble
VirtualRegSSE{} -> RcDoubleSSE
-- Determine the upper-half vreg for a 64-bit quantity on a 32-bit platform
......
......@@ -380,25 +380,13 @@ seqNode node
`seq` (seqVirtualRegList (uniqSetToList (Color.nodeCoalesce node)))
seqVirtualReg :: VirtualReg -> ()
seqVirtualReg reg
= case reg of
VirtualRegI _ -> ()
VirtualRegHi _ -> ()
VirtualRegF _ -> ()
VirtualRegD _ -> ()
seqVirtualReg reg = reg `seq` ()
seqRealReg :: RealReg -> ()
seqRealReg reg
= case reg of
RealRegSingle _ -> ()
RealRegPair _ _ -> ()
seqRealReg reg = reg `seq` ()
seqRegClass :: RegClass -> ()
seqRegClass c
= case c of
RcInteger -> ()
RcFloat -> ()
RcDouble -> ()
seqRegClass c = c `seq` ()
seqMaybeRealReg :: Maybe RealReg -> ()
seqMaybeRealReg mr
......
......@@ -50,24 +50,27 @@ import FastTypes
#define ALLOCATABLE_REGS_INTEGER (_ILIT(3))
#define ALLOCATABLE_REGS_DOUBLE (_ILIT(6))
#define ALLOCATABLE_REGS_FLOAT (_ILIT(0))
#define ALLOCATABLE_REGS_SSE (_ILIT(16))
#elif x86_64_TARGET_ARCH
#define ALLOCATABLE_REGS_INTEGER (_ILIT(5))
#define ALLOCATABLE_REGS_DOUBLE (_ILIT(2))
#define ALLOCATABLE_REGS_DOUBLE (_ILIT(0))
#define ALLOCATABLE_REGS_FLOAT (_ILIT(0))
#define ALLOCATABLE_REGS_SSE (_ILIT(10))
#elif powerpc_TARGET_ARCH
#define ALLOCATABLE_REGS_INTEGER (_ILIT(16))
#define ALLOCATABLE_REGS_DOUBLE (_ILIT(26))
#define ALLOCATABLE_REGS_FLOAT (_ILIT(0))
#define ALLOCATABLE_REGS_SSE (_ILIT(0))
#elif sparc_TARGET_ARCH
#define ALLOCATABLE_REGS_INTEGER (_ILIT(14))
#define ALLOCATABLE_REGS_DOUBLE (_ILIT(11))
#define ALLOCATABLE_REGS_FLOAT (_ILIT(22))
#define ALLOCATABLE_REGS_SSE (_ILIT(0))
#else
......@@ -139,6 +142,17 @@ trivColorable virtualRegSqueeze realRegSqueeze RcDouble conflicts exclusions
= count3 <# ALLOCATABLE_REGS_DOUBLE
trivColorable virtualRegSqueeze realRegSqueeze RcDoubleSSE conflicts exclusions
| count2 <- accSqueeze (_ILIT(0)) ALLOCATABLE_REGS_SSE
(virtualRegSqueeze RcDoubleSSE)
conflicts
, count3 <- accSqueeze count2 ALLOCATABLE_REGS_SSE
(realRegSqueeze RcDoubleSSE)
exclusions
= count3 <# ALLOCATABLE_REGS_SSE
-- Specification Code ----------------------------------------------------------
--
......
......@@ -17,6 +17,7 @@ data RegClass
= RcInteger
| RcFloat
| RcDouble
| RcDoubleSSE -- x86 only: the SSE regs are a separate class
deriving Eq
......@@ -24,8 +25,10 @@ instance Uniquable RegClass where
getUnique RcInteger = mkRegClassUnique 0
getUnique RcFloat = mkRegClassUnique 1
getUnique RcDouble = mkRegClassUnique 2
getUnique RcDoubleSSE = mkRegClassUnique 3
instance Outputable RegClass where
ppr RcInteger = Outputable.text "I"
ppr RcFloat = Outputable.text "F"
ppr RcDouble = Outputable.text "D"
ppr RcDoubleSSE = Outputable.text "S"
......@@ -373,6 +373,7 @@ sparc_mkSpillInstr reg _ slot
RcInteger -> II32
RcFloat -> FF32
RcDouble -> FF64
_ -> panic "sparc_mkSpillInstr"
in ST sz reg (fpRel (negate off_w))
......@@ -391,6 +392,7 @@ sparc_mkLoadInstr reg _ slot
RcInteger -> II32
RcFloat -> FF32
RcDouble -> FF64
_ -> panic "sparc_mkLoadInstr"
in LD sz (fpRel (- off_w)) reg
......@@ -438,6 +440,7 @@ sparc_mkRegRegMoveInstr src dst
RcInteger -> ADD False False src (RIReg g0) dst
RcDouble -> FMOV FF64 src dst
RcFloat -> FMOV FF32 src dst
_ -> panic "sparc_mkRegRegMoveInstr"
| otherwise
= panic "SPARC.Instr.mkRegRegMoveInstr: classes of src and dest not the same"
......
......@@ -156,6 +156,7 @@ pprReg reg
VirtualRegHi u -> text "%vHi_" <> asmSDoc (pprUnique u)
VirtualRegF u -> text "%vF_" <> asmSDoc (pprUnique u)
VirtualRegD u -> text "%vD_" <> asmSDoc (pprUnique u)
VirtualRegSSE u -> text "%vSSE_" <> asmSDoc (pprUnique u)
RegReal rr
-> case rr of
......
......@@ -95,22 +95,21 @@ virtualRegSqueeze cls vr
-> case vr of
VirtualRegI{} -> _ILIT(1)
VirtualRegHi{} -> _ILIT(1)
VirtualRegF{} -> _ILIT(0)
VirtualRegD{} -> _ILIT(0)
_other -> _ILIT(0)
RcFloat
-> case vr of
VirtualRegI{} -> _ILIT(0)
VirtualRegHi{} -> _ILIT(0)
VirtualRegF{} -> _ILIT(1)
VirtualRegD{} -> _ILIT(2)
_other -> _ILIT(0)
RcDouble
-> case vr of
VirtualRegI{} -> _ILIT(0)
VirtualRegHi{} -> _ILIT(0)
VirtualRegF{} -> _ILIT(1)
VirtualRegD{} -> _ILIT(1)
_other -> _ILIT(0)
_other -> _ILIT(0)
{-# INLINE realRegSqueeze #-}
realRegSqueeze :: RegClass -> RealReg -> FastInt
......@@ -141,6 +140,7 @@ realRegSqueeze cls rr
RealRegPair{} -> _ILIT(1)
_other -> _ILIT(0)
-- | All the allocatable registers in the machine,
-- including register pairs.
......@@ -283,7 +283,7 @@ regDotColor reg
= case classOfRealReg reg of
RcInteger -> text "blue"
RcFloat -> text "red"
RcDouble -> text "green"
_other -> text "green"
......
This diff is collapsed.
......@@ -253,10 +253,10 @@ data Instr
-- use MOV for moving (either movss or movsd (movlpd better?))
| CVTSS2SD Reg Reg -- F32 to F64
| CVTSD2SS Reg Reg -- F64 to F32
| CVTTSS2SIQ Operand Reg -- F32 to I32/I64 (with truncation)
| CVTTSD2SIQ Operand Reg -- F64 to I32/I64 (with truncation)
| CVTSI2SS Operand Reg -- I32/I64 to F32
| CVTSI2SD Operand Reg -- I32/I64 to F64
| CVTTSS2SIQ Size Operand Reg -- F32 to I32/I64 (with truncation)
| CVTTSD2SIQ Size Operand Reg -- F64 to I32/I64 (with truncation)
| CVTSI2SS Size Operand Reg -- I32/I64 to F32
| CVTSI2SD Size Operand Reg -- I32/I64 to F64
-- use ADD & SUB for arithmetic. In both cases, operands
-- are Operand Reg.
......@@ -353,7 +353,6 @@ x86_regUsageOfInstr instr
CLTD _ -> mkRU [eax] [edx]
NOP -> mkRU [] []
#if i386_TARGET_ARCH
GMOV src dst -> mkRU [src] [dst]
GLD _ src dst -> mkRU (use_EA src) [dst]
GST _ src dst -> mkRUR (src : use_EA dst)
......@@ -379,17 +378,14 @@ x86_regUsageOfInstr instr
GSIN _ _ _ src dst -> mkRU [src] [dst]
GCOS _ _ _ src dst -> mkRU [src] [dst]
GTAN _ _ _ src dst -> mkRU [src] [dst]
#endif
#if x86_64_TARGET_ARCH
CVTSS2SD src dst -> mkRU [src] [dst]
CVTSD2SS src dst -> mkRU [src] [dst]
CVTTSS2SIQ src dst -> mkRU (use_R src) [dst]
CVTTSD2SIQ src dst -> mkRU (use_R src) [dst]
CVTSI2SS src dst -> mkRU (use_R src) [dst]
CVTSI2SD src dst -> mkRU (use_R src) [dst]
CVTTSS2SIQ _ src dst -> mkRU (use_R src) [dst]
CVTTSD2SIQ _ src dst -> mkRU (use_R src) [dst]
CVTSI2SS _ src dst -> mkRU (use_R src) [dst]
CVTSI2SD _ src dst -> mkRU (use_R src) [dst]
FDIV _ src dst -> usageRM src dst
#endif
FETCHGOT reg -> mkRU [] [reg]
FETCHPC reg -> mkRU [] [reg]
......@@ -483,7 +479,6 @@ x86_patchRegsOfInstr instr env
JMP op -> patch1 JMP op
JMP_TBL op ids -> patch1 JMP_TBL op $ ids
#if i386_TARGET_ARCH
GMOV src dst -> GMOV (env src) (env dst)
GLD sz src dst -> GLD sz (lookupAddr src) (env dst)
GST sz src dst -> GST sz (env src) (lookupAddr dst)
......@@ -509,17 +504,14 @@ x86_patchRegsOfInstr instr env
GSIN sz l1 l2 src dst -> GSIN sz l1 l2 (env src) (env dst)
GCOS sz l1 l2 src dst -> GCOS sz l1 l2 (env src) (env dst)
GTAN sz l1 l2 src dst -> GTAN sz l1 l2 (env src) (env dst)
#endif
#if x86_64_TARGET_ARCH
CVTSS2SD src dst -> CVTSS2SD (env src) (env dst)
CVTSD2SS src dst -> CVTSD2SS (env src) (env dst)
CVTTSS2SIQ src dst -> CVTTSS2SIQ (patchOp src) (env dst)
CVTTSD2SIQ src dst -> CVTTSD2SIQ (patchOp src) (env dst)
CVTSI2SS src dst -> CVTSI2SS (patchOp src) (env dst)
CVTSI2SD src dst -> CVTSI2SD (patchOp src) (env dst)
CVTTSS2SIQ sz src dst -> CVTTSS2SIQ sz (patchOp src) (env dst)
CVTTSD2SIQ sz src dst -> CVTTSD2SIQ sz (patchOp src) (env dst)
CVTSI2SS sz src dst -> CVTSI2SS sz (patchOp src) (env dst)
CVTSI2SD sz src dst -> CVTSI2SD sz (patchOp src) (env dst)
FDIV sz src dst -> FDIV sz (patchOp src) (patchOp dst)
#endif
CALL (Left _) _ -> instr
CALL (Right reg) p -> CALL (Right (env reg)) p
......@@ -602,30 +594,16 @@ x86_mkSpillInstr
-> Int -- spill slot to use
-> Instr
#if i386_TARGET_ARCH
x86_mkSpillInstr reg delta slot
= let off = spillSlotToOffset slot
in
let off_w = (off-delta) `div` 4
in case targetClassOfReg reg of
RcInteger -> MOV II32 (OpReg reg) (OpAddr (spRel off_w))
_ -> GST FF80 reg (spRel off_w) {- RcFloat/RcDouble -}
#elif x86_64_TARGET_ARCH
x86_mkSpillInstr reg delta slot
= let off = spillSlotToOffset slot
in
let off_w = (off-delta) `div` 8
let off_w = (off-delta) `div` IF_ARCH_i386(4,8)
in case targetClassOfReg reg of
RcInteger -> MOV II64 (OpReg reg) (OpAddr (spRel off_w))
RcDouble -> MOV FF64 (OpReg reg) (OpAddr (spRel off_w))
RcInteger -> MOV IF_ARCH_i386(II32,II64)
(OpReg reg) (OpAddr (spRel off_w))
RcDouble -> GST FF80 reg (spRel off_w) {- RcFloat/RcDouble -}
RcDoubleSSE -> MOV FF64 (OpReg reg) (OpAddr (spRel off_w))
_ -> panic "X86.mkSpillInstr: no match"
-- ToDo: will it work to always spill as a double?
-- does that cause a stall if the data was a float?
#else
x86_mkSpillInstr _ _ _
= panic "X86.RegInfo.mkSpillInstr: not defined for this architecture."
#endif
-- | Make a spill reload instruction.
......@@ -635,26 +613,16 @@ x86_mkLoadInstr
-> Int -- spill slot to use
-> Instr
#if i386_TARGET_ARCH
x86_mkLoadInstr reg delta slot
= let off = spillSlotToOffset slot
in
let off_w = (off-delta) `div` 4
in case targetClassOfReg reg of {
RcInteger -> MOV II32 (OpAddr (spRel off_w)) (OpReg reg);
_ -> GLD FF80 (spRel off_w) reg} {- RcFloat/RcDouble -}
#elif x86_64_TARGET_ARCH
x86_mkLoadInstr reg delta slot
= let off = spillSlotToOffset slot
in
let off_w = (off-delta) `div` 8
let off_w = (off-delta) `div` IF_ARCH_i386(4,8)
in case targetClassOfReg reg of
RcInteger -> MOV II64 (OpAddr (spRel off_w)) (OpReg reg)
_ -> MOV FF64 (OpAddr (spRel off_w)) (OpReg reg)
#else
x86_mkLoadInstr _ _ _
= panic "X86.RegInfo.mkLoadInstr: not defined for this architecture."
#endif
RcInteger -> MOV IF_ARCH_i386(II32,II64)
(OpAddr (spRel off_w)) (OpReg reg)
RcDouble -> GLD FF80 (spRel off_w) reg {- RcFloat/RcDouble -}
RcDoubleSSE -> MOV FF64 (OpAddr (spRel off_w)) (OpReg reg)
_ -> panic "X86.x86_mkLoadInstr"
spillSlotSize :: Int
spillSlotSize = IF_ARCH_i386(12, 8)
......@@ -715,14 +683,12 @@ x86_mkRegRegMoveInstr src dst
= case targetClassOfReg src of
#if i386_TARGET_ARCH
RcInteger -> MOV II32 (OpReg src) (OpReg dst)
RcDouble -> GMOV src dst
RcFloat -> panic "X86.RegInfo.mkRegRegMoveInstr: no match"
#else
RcInteger -> MOV II64 (OpReg src) (OpReg dst)
RcDouble -> MOV FF64 (OpReg src) (OpReg dst)
RcFloat -> panic "X86.RegInfo.mkRegRegMoveInstr: no match"
#endif
RcDouble -> GMOV src dst
RcDoubleSSE -> MOV FF64 (OpReg src) (OpReg dst)
_ -> panic "X86.RegInfo.mkRegRegMoveInstr: no match"
-- | Check whether an instruction represents a reg-reg move.
-- The register allocator attempts to eliminate reg->reg moves whenever it can,
......
......@@ -181,6 +181,7 @@ pprReg s r
RegVirtual (VirtualRegHi u) -> text "%vHi_" <> asmSDoc (pprUnique u)
RegVirtual (VirtualRegF u) -> text "%vF_" <> asmSDoc (pprUnique u)
RegVirtual (VirtualRegD u) -> text "%vD_" <> asmSDoc (pprUnique u)
RegVirtual (VirtualRegSSE u) -> text "%vSSE_" <> asmSDoc (pprUnique u)
where
#if i386_TARGET_ARCH
ppr_reg_no :: Size -> Int -> Doc
......@@ -210,10 +211,7 @@ pprReg s r
2 -> sLit "%ecx"; 3 -> sLit "%edx";
4 -> sLit "%esi"; 5 -> sLit "%edi";
6 -> sLit "%ebp"; 7 -> sLit "%esp";
8 -> sLit "%fake0"; 9 -> sLit "%fake1";
10 -> sLit "%fake2"; 11 -> sLit "%fake3";
12 -> sLit "%fake4"; 13 -> sLit "%fake5";
_ -> sLit "very naughty I386 register"
_ -> ppr_reg_float i
})
#elif x86_64_TARGET_ARCH
ppr_reg_no :: Size -> Int -> Doc
......@@ -271,20 +269,26 @@ pprReg s r
10 -> sLit "%r10"; 11 -> sLit "%r11";
12 -> sLit "%r12"; 13 -> sLit "%r13";
14 -> sLit "%r14"; 15 -> sLit "%r15";
16 -> sLit "%xmm0"; 17 -> sLit "%xmm1";
18 -> sLit "%xmm2"; 19 -> sLit "%xmm3";
20 -> sLit "%xmm4"; 21 -> sLit "%xmm5";
22 -> sLit "%xmm6"; 23 -> sLit "%xmm7";
24 -> sLit "%xmm8"; 25 -> sLit "%xmm9";
26 -> sLit "%xmm10"; 27 -> sLit "%xmm11";
28 -> sLit "%xmm12"; 29 -> sLit "%xmm13";
30 -> sLit "%xmm14"; 31 -> sLit "%xmm15";
_ -> sLit "very naughty x86_64 register"
_ -> ppr_reg_float i
})
#else
ppr_reg_no _ = panic "X86.Ppr.ppr_reg_no: no match"
#endif
ppr_reg_float :: Int -> LitString
ppr_reg_float i = case i of
16 -> sLit "%fake0"; 17 -> sLit "%fake1"
18 -> sLit "%fake2"; 19 -> sLit "%fake3"
20 -> sLit "%fake4"; 21 -> sLit "%fake5"
24 -> sLit "%xmm0"; 25 -> sLit "%xmm1"
26 -> sLit "%xmm2"; 27 -> sLit "%xmm3"
28 -> sLit "%xmm4"; 29 -> sLit "%xmm5"
30 -> sLit "%xmm6"; 31 -> sLit "%xmm7"
32 -> sLit "%xmm8"; 33 -> sLit "%xmm9"
34 -> sLit "%xmm10"; 35 -> sLit "%xmm11"
36 -> sLit "%xmm12"; 37 -> sLit "%xmm13"
38 -> sLit "%xmm14"; 39 -> sLit "%xmm15"
_ -> sLit "very naughty x86 register"
pprSize :: Size -> Doc
pprSize x
......@@ -293,19 +297,19 @@ pprSize x
II16 -> sLit "w"
II32 -> sLit "l"
II64 -> sLit "q"
#if i386_TARGET_ARCH
FF32 -> sLit "s"
FF64 -> sLit "l"
FF80 -> sLit "t"
#elif x86_64_TARGET_ARCH
FF32 -> sLit "ss" -- "scalar single-precision float" (SSE2)
FF64 -> sLit "sd" -- "scalar double-precision float" (SSE2)
_ -> panic "X86.Ppr.pprSize: no match"
#else
_ -> panic "X86.Ppr.pprSize: no match"
#endif
FF80 -> sLit "t"
)
pprSize_x87 :: Size -> Doc
pprSize_x87 x
= ptext $ case x of
FF32 -> sLit "s"
FF64 -> sLit "l"
FF80 -> sLit "t"
_ -> panic "X86.Ppr.pprSize_x87"
pprCond :: Cond -> Doc
pprCond c
= ptext (case c of {
......@@ -636,12 +640,12 @@ pprInstr (MUL size op1 op2) = pprSizeOpOp (sLit "mul") size op1 op2
pprInstr (FDIV size op1 op2) = pprSizeOpOp (sLit "div") size op1 op2
pprInstr (CVTSS2SD from to) = pprRegReg (sLit "cvtss2sd") from to
pprInstr (CVTSD2SS from to) = pprRegReg (sLit "cvtsd2ss") from to
pprInstr (CVTTSS2SIQ from to) = pprOpReg (sLit "cvttss2siq") from to
pprInstr (CVTTSD2SIQ from to) = pprOpReg (sLit "cvttsd2siq") from to
pprInstr (CVTSI2SS from to) = pprOpReg (sLit "cvtsi2ssq") from to
pprInstr (CVTSI2SD from to) = pprOpReg (sLit "cvtsi2sdq") from to
pprInstr (CVTSS2SD from to) = pprRegReg (sLit "cvtss2sd") from to
pprInstr (CVTSD2SS from to) = pprRegReg (sLit "cvtsd2ss") from to
pprInstr (CVTTSS2SIQ sz from to) = pprSizeOpReg (sLit "cvttss2si") sz from to
pprInstr (CVTTSD2SIQ sz from to) = pprSizeOpReg (sLit "cvttsd2si") sz from to
pprInstr (CVTSI2SS sz from to) = pprSizeOpReg (sLit "cvtsi2ss") sz from to
pprInstr (CVTSI2SD sz from to) = pprSizeOpReg (sLit "cvtsi2sd") sz from to
-- FETCHGOT for PIC on ELF platforms
pprInstr (FETCHGOT reg)
......@@ -673,20 +677,24 @@ pprInstr g@(GMOV src dst)
| otherwise
= pprG g (hcat [gtab, gpush src 0, gsemi, gpop dst 1])
-- GLD sz addr dst ==> FFREE %st(7) ; FLDsz addr ; FSTP (dst+1)
-- GLD sz addr dst ==> FLDsz addr ; FSTP (dst+1)
pprInstr g@(GLD sz addr dst)
= pprG g (hcat [gtab, text "ffree %st(7) ; fld", pprSize sz, gsp,
= pprG g (hcat [gtab, text "fld", pprSize_x87 sz, gsp,
pprAddr addr, gsemi, gpop dst 1])
-- GST sz src addr ==> FFREE %st(7) ; FLD dst ; FSTPsz addr
-- GST sz src addr ==> FLD dst ; FSTPsz addr
pprInstr g@(GST sz src addr)
| src == fake0 && sz /= FF80 -- fstt instruction doesn't exist
= pprG g (hcat [gtab,
text "fst", pprSize_x87 sz, gsp, pprAddr addr])
| otherwise
= pprG g (hcat [gtab, gpush src 0, gsemi,
text "fstp", pprSize sz, gsp, pprAddr addr])
text "fstp", pprSize_x87 sz, gsp, pprAddr addr])
pprInstr g@(GLDZ dst)
= pprG g (hcat [gtab, text "ffree %st(7) ; fldz ; ", gpop dst 1])
= pprG g (hcat [gtab, text "fldz ; ", gpop dst 1])
pprInstr g@(GLD1 dst)
= pprG g (hcat [gtab, text "ffree %st(7) ; fld1 ; ", gpop dst 1])
= pprG g (hcat [gtab, text "fld1 ; ", gpop dst 1])
pprInstr (GFTOI src dst)
= pprInstr (GDTOI src dst)
......@@ -710,7 +718,7 @@ pprInstr (GITOF src dst)
pprInstr g@(GITOD src dst)
= pprG g (hcat [gtab, text "pushl ", pprReg II32 src,