Commit aff2e3f0 authored by simonmar's avatar simonmar
Browse files

[project @ 2005-04-08 09:54:54 by simonmar]

x86_64 hacking:

  - use %rip-relative addressing in a couple of places
  - floating-point comparisons handle NaN properly

I believe the x86_64 NCG is now ready for prime time.  It is
successfully bootstrapping the compiler, and I think this fixes the
last of the test failures.
parent ce1a3417
......@@ -809,8 +809,7 @@ getRegister (CmmLit (CmmFloat f rep)) = do
LDATA ReadOnlyData
[CmmDataLabel lbl,
CmmStaticLit (CmmFloat f rep)],
MOV rep (OpAddr (ImmAddr (ImmCLbl lbl) 0)) (OpReg dst)
-- ToDo: should use %rip-relative
MOV rep (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
]
-- in
return (Any rep code)
......@@ -882,7 +881,7 @@ getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
CmmStaticLit (CmmInt 0 I32),
CmmStaticLit (CmmInt 0 I32)
],
XOR F32 (OpAddr (ImmAddr (ImmCLbl lbl) 0)) (OpReg dst)
XOR F32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
-- xorps, so we need the 128-bit constant
-- ToDo: rip-relative
]
......@@ -902,9 +901,8 @@ getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do
CmmStaticLit (CmmInt 0 I64)
],
-- gcc puts an unpck here. Wonder if we need it.
XOR F64 (OpAddr (ImmAddr (ImmCLbl lbl) 0)) (OpReg dst)
XOR F64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
-- xorpd, so we need the 128-bit constant
-- ToDo: rip-relative
]
--
return (Any F64 code)
......@@ -1155,7 +1153,7 @@ getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
code dst
= x_code `snocOL`
LEA rep
(OpAddr (AddrBaseIndex (Just x_reg) Nothing imm))
(OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
(OpReg dst)
--
return (Any rep code)
......@@ -1841,14 +1839,14 @@ getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
-- ASSERT(rep == I32)???
= do (x_reg, x_code) <- getSomeReg x
let off = ImmInt (-(fromInteger i))
return (Amode (AddrBaseIndex (Just x_reg) Nothing off) x_code)
return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
| not (is64BitLit lit)
-- ASSERT(rep == I32)???
= do (x_reg, x_code) <- getSomeReg x
let off = ImmInt (fromInteger i)
return (Amode (AddrBaseIndex (Just x_reg) Nothing off) x_code)
return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
-- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be
-- recognised by the next rule.
......@@ -1866,7 +1864,7 @@ getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
let
code = x_code `appOL` y_code
base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
return (Amode (AddrBaseIndex (Just x_reg) (Just (y_reg,base)) (ImmInt 0))
return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt 0))
code)
getAmode (CmmLit lit) | not (is64BitLit lit)
......@@ -1874,7 +1872,7 @@ getAmode (CmmLit lit) | not (is64BitLit lit)
getAmode expr = do
(reg,code) <- getSomeReg expr
return (Amode (AddrBaseIndex (Just reg) Nothing (ImmInt 0)) code)
return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
#endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
......@@ -2008,7 +2006,7 @@ getNonClobberedOperand (CmmLoad mem pk)
if (amodeCouldBeClobbered src)
then do
tmp <- getNewRegNat wordRep
return (AddrBaseIndex (Just tmp) Nothing (ImmInt 0),
return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
unitOL (LEA I32 (OpAddr src) (OpReg tmp)))
else
return (src, nilOL)
......@@ -2238,10 +2236,10 @@ condFltCode cond x y = do
code = x_code `appOL`
y_code `snocOL`
CMP (cmmExprRep x) y_op (OpReg x_reg)
-- in
return (CondCode False (condToUnsigned cond) code)
-- we need to use the unsigned comparison operators on the
-- NB(1): we need to use the unsigned comparison operators on the
-- result of this comparison.
-- in
return (CondCode True (condToUnsigned cond) code)
#endif
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
......@@ -2888,14 +2886,51 @@ genCondJump lbl (StPrim op [x, y])
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
#if i386_TARGET_ARCH
genCondJump id bool = do
CondCode _ cond code <- getCondCode bool
return (code `snocOL` JXX cond id)
#endif /* i386_TARGET_ARCH */
#endif
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#if x86_64_TARGET_ARCH
genCondJump id bool = do
CondCode is_float cond cond_code <- getCondCode bool
if not is_float
then
return (cond_code `snocOL` JXX cond id)
else do
lbl <- getBlockIdNat
-- see comment with condFltReg
let code = case cond of
NE -> or_unordered
GU -> plain_test
GEU -> plain_test
_ -> and_ordered
plain_test = unitOL (
JXX cond id
)
or_unordered = toOL [
JXX cond id,
JXX PARITY id
]
and_ordered = toOL [
JXX PARITY lbl,
JXX cond id,
JXX ALWAYS lbl,
NEWBLOCK lbl
]
return (cond_code `appOL` code)
#endif
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#if sparc_TARGET_ARCH
......@@ -3762,7 +3797,7 @@ genSwitch expr ids = do
lbl <- getNewLabelNat
let
jumpTable = map jumpTableEntry ids
op = OpAddr (AddrBaseIndex Nothing (Just (reg,wORD_SIZE)) (ImmCLbl lbl))
op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
code = e_code `appOL` toOL [
LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
JMP_TBL op [ id | Just id <- ids ]
......@@ -3854,38 +3889,57 @@ condIntReg cond x y = do
let
code dst = cond_code `appOL` toOL [
SETCC cond (OpReg tmp),
MOV I32 (OpReg tmp) (OpReg dst),
AND I32 (OpImm (ImmInt 1)) (OpReg dst)
MOVZxL I8 (OpReg tmp) (OpReg dst)
]
-- NB. (1) Tha AND is needed here because the x86 only
-- sets the low byte in the SETCC instruction.
-- NB. (2) The extra temporary register is a hack to
-- work around the fact that the setcc instructions only
-- accept byte registers. dst might not be a byte-able reg,
-- but currently all free registers are byte-able, so we're
-- guaranteed that a new temporary is byte-able.
-- in
return (Any I32 code)
condFltReg cond x y = do
lbl1 <- getBlockIdNat
lbl2 <- getBlockIdNat
CondCode _ cond cond_code <- condFltCode cond x y
let
code dst = cond_code `appOL` toOL [
JXX cond lbl1,
MOV I32 (OpImm (ImmInt 0)) (OpReg dst),
JXX ALWAYS lbl2,
NEWBLOCK lbl1,
MOV I32 (OpImm (ImmInt 1)) (OpReg dst),
JXX ALWAYS lbl2,
NEWBLOCK lbl2]
-- SIGH, have to split up this block somehow...
tmp1 <- getNewRegNat wordRep
tmp2 <- getNewRegNat wordRep
let
-- We have to worry about unordered operands (eg. comparisons
-- against NaN). If the operands are unordered, the comparison
-- sets the parity flag, carry flag and zero flag.
-- All comparisons are supposed to return false for unordered
-- operands except for !=, which returns true.
--
-- Optimisation: we don't have to test the parity flag if we
-- know the test has already excluded the unordered case: eg >
-- and >= test for a zero carry flag, which can only occur for
-- ordered operands.
--
-- ToDo: by reversing comparisons we could avoid testing the
-- parity flag in more cases.
code dst =
cond_code `appOL`
(case cond of
NE -> or_unordered dst
GU -> plain_test dst
GEU -> plain_test dst
_ -> and_ordered dst)
plain_test dst = toOL [
SETCC cond (OpReg tmp1),
MOVZxL I8 (OpReg tmp1) (OpReg dst)
]
or_unordered dst = toOL [
SETCC cond (OpReg tmp1),
SETCC PARITY (OpReg tmp2),
OR I8 (OpReg tmp1) (OpReg tmp2),
MOVZxL I8 (OpReg tmp2) (OpReg dst)
]
and_ordered dst = toOL [
SETCC cond (OpReg tmp1),
SETCC NOTPARITY (OpReg tmp2),
AND I8 (OpReg tmp1) (OpReg tmp2),
MOVZxL I8 (OpReg tmp2) (OpReg dst)
]
-- in
return (Any I32 code)
#endif /* i386_TARGET_ARCH */
#endif
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
......
......@@ -85,6 +85,8 @@ data Cond
| POS
| CARRY
| OFLO
| PARITY
| NOTPARITY
#endif
#if sparc_TARGET_ARCH
= ALWAYS -- What's really used? ToDo
......
......@@ -46,11 +46,13 @@ module MachRegs (
gp, pv, ra, sp, t9, t10, t11, t12, v0, f0, zeroh,
#endif
#if i386_TARGET_ARCH
EABase(..), EAIndex(..),
eax, ebx, ecx, edx, esi, edi, ebp, esp,
fake0, fake1, fake2, fake3, fake4, fake5,
addrModeRegs,
#endif
#if x86_64_TARGET_ARCH
EABase(..), EAIndex(..), ripRel,
rax, rbx, rcx, rdx, rsi, rdi, rbp, rsp,
eax, ebx, ecx, edx, esi, edi, ebp, esp,
r8, r9, r10, r11, r12, r13, r14, r15,
......@@ -150,11 +152,11 @@ data AddrMode
#endif
#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
= AddrBaseIndex Base Index Displacement
= AddrBaseIndex EABase EAIndex Displacement
| ImmAddr Imm Int
type Base = Maybe Reg
type Index = Maybe (Reg, Int) -- Int is 2, 4 or 8
data EABase = EABaseNone | EABaseReg Reg | EABaseRip
data EAIndex = EAIndexNone | EAIndex Reg Int
type Displacement = Imm
#endif
......@@ -172,8 +174,8 @@ type Displacement = Imm
addrModeRegs :: AddrMode -> [Reg]
addrModeRegs (AddrBaseIndex b i _) = b_regs ++ i_regs
where
b_regs = case b of { Just r -> [r]; _ -> [] }
i_regs = case i of { Just (r,_) -> [r]; _ -> [] }
b_regs = case b of { EABaseReg r -> [r]; _ -> [] }
i_regs = case i of { EAIndex r _ -> [r]; _ -> [] }
addrModeRegs _ = []
#endif
......@@ -289,9 +291,9 @@ spRel :: Int -- desired stack offset in words, positive or negative
spRel n
#if defined(i386_TARGET_ARCH)
= AddrBaseIndex (Just esp) Nothing (ImmInt (n * wORD_SIZE))
= AddrBaseIndex (EABaseReg esp) EAIndexNone (ImmInt (n * wORD_SIZE))
#elif defined(x86_64_TARGET_ARCH)
= AddrBaseIndex (Just rsp) Nothing (ImmInt (n * wORD_SIZE))
= AddrBaseIndex (EABaseReg rsp) EAIndexNone (ImmInt (n * wORD_SIZE))
#else
= AddrRegImm sp (ImmInt (n * wORD_SIZE))
#endif
......@@ -304,6 +306,9 @@ fpRel n
= AddrRegImm fp (ImmInt (n * wORD_SIZE))
#endif
#if x86_64_TARGET_ARCH
ripRel imm = AddrBaseIndex EABaseRip EAIndexNone imm
#endif
-- -----------------------------------------------------------------------------
-- Global registers
......
......@@ -446,6 +446,7 @@ pprCond c = ptext (case c of {
LEU -> SLIT("be"); NE -> SLIT("ne");
NEG -> SLIT("s"); POS -> SLIT("ns");
CARRY -> SLIT("c"); OFLO -> SLIT("o");
PARITY -> SLIT("p"); NOTPARITY -> SLIT("np");
ALWAYS -> SLIT("mp") -- hack
#endif
#if sparc_TARGET_ARCH
......@@ -480,8 +481,8 @@ pprImm (ImmCLbl l) = pprCLabel_asm l
pprImm (ImmIndex l i) = pprCLabel_asm l <> char '+' <> int i
pprImm (ImmLit s) = s
pprImm (ImmFloat _) = panic "pprImm:ImmFloat"
pprImm (ImmDouble _) = panic "pprImm:ImmDouble"
pprImm (ImmFloat _) = ptext SLIT("naughty float immediate")
pprImm (ImmDouble _) = ptext SLIT("naughty double immediate")
pprImm (ImmConstantSum a b) = pprImm a <> char '+' <> pprImm b
pprImm (ImmConstantDiff a b) = pprImm a <> char '-'
......@@ -560,10 +561,11 @@ pprAddr (AddrBaseIndex base index displacement)
pp_reg r = pprReg wordRep r
in
case (base,index) of
(Nothing, Nothing) -> pp_disp
(Just b, Nothing) -> pp_off (pp_reg b)
(Nothing, Just (r,i)) -> pp_off (comma <> pp_reg r <> comma <> int i)
(Just b, Just (r,i)) -> pp_off (pp_reg b <> comma <> pp_reg r
(EABaseNone, EAIndexNone) -> pp_disp
(EABaseReg b, EAIndexNone) -> pp_off (pp_reg b)
(EABaseRip, EAIndexNone) -> pp_off (ptext SLIT("%rip"))
(EABaseNone, EAIndex r i) -> pp_off (comma <> pp_reg r <> comma <> int i)
(EABaseReg b, EAIndex r i) -> pp_off (pp_reg b <> comma <> pp_reg r
<> comma <> int i)
where
ppr_disp (ImmInt 0) = empty
......@@ -1202,18 +1204,22 @@ pprInstr (MOVZxL I32 src dst) = pprSizeOpOp SLIT("mov") I32 src dst
-- the reg alloc would tend to throw away a plain reg-to-reg
-- move, and we still want it to do that.
pprInstr (MOVZxL sizes src dst) = pprSizeOpOpCoerce SLIT("movz") sizes wordRep src dst
pprInstr (MOVZxL sizes src dst) = pprSizeOpOpCoerce SLIT("movz") sizes I32 src dst
-- zero-extension only needs to extend to 32 bits: on x86_64,
-- the remaining zero-extension to 64 bits is automatic, and the 32-bit
-- instruction is shorter.
pprInstr (MOVSxL sizes src dst) = pprSizeOpOpCoerce SLIT("movs") sizes wordRep src dst
-- here we do some patching, since the physical registers are only set late
-- in the code generation.
pprInstr (LEA size (OpAddr (AddrBaseIndex src1@(Just reg1) (Just (reg2,1)) (ImmInt 0))) dst@(OpReg reg3))
pprInstr (LEA size (OpAddr (AddrBaseIndex src1@(EABaseReg reg1) (EAIndex reg2 1) (ImmInt 0))) dst@(OpReg reg3))
| reg1 == reg3
= pprSizeOpOp SLIT("add") size (OpReg reg2) dst
pprInstr (LEA size (OpAddr (AddrBaseIndex src1@(Just reg1) (Just (reg2,1)) (ImmInt 0))) dst@(OpReg reg3))
pprInstr (LEA size (OpAddr (AddrBaseIndex src1@(EABaseReg reg1) (EAIndex reg2 1) (ImmInt 0))) dst@(OpReg reg3))
| reg2 == reg3
= pprSizeOpOp SLIT("add") size (OpReg reg1) dst
pprInstr (LEA size (OpAddr (AddrBaseIndex src1@(Just reg1) Nothing displ)) dst@(OpReg reg3))
pprInstr (LEA size (OpAddr (AddrBaseIndex src1@(EABaseReg reg1) EAIndexNone displ)) dst@(OpReg reg3))
| reg1 == reg3
= pprInstr (ADD size (OpImm displ) dst)
pprInstr (LEA size src dst) = pprSizeOpOp SLIT("lea") size src dst
......
......@@ -258,11 +258,13 @@ regUsage instr = case instr of
use_R (OpAddr ea) = use_EA ea
-- Registers used to compute an effective address.
use_EA (ImmAddr _ _) = []
use_EA (AddrBaseIndex Nothing Nothing _) = []
use_EA (AddrBaseIndex (Just b) Nothing _) = [b]
use_EA (AddrBaseIndex Nothing (Just (i,_)) _) = [i]
use_EA (AddrBaseIndex (Just b) (Just (i,_)) _) = [b,i]
use_EA (ImmAddr _ _) = []
use_EA (AddrBaseIndex base index _) =
use_base base $! use_index index
where use_base (EABaseReg r) x = r : x
use_base _ x = x
use_index EAIndexNone = []
use_index (EAIndex i _) = [i]
mkRU src dst = RU (filter interesting src)
(filter interesting dst)
......@@ -555,19 +557,20 @@ patchRegs instr env = case instr of
patch1 insn op = insn $! patchOp op
patch2 insn src dst = (insn $! patchOp src) $! patchOp dst
patchOp (OpReg reg) = OpReg (env reg)
patchOp (OpReg reg) = OpReg $! env reg
patchOp (OpImm imm) = OpImm imm
patchOp (OpAddr ea) = OpAddr (lookupAddr ea)
patchOp (OpAddr ea) = OpAddr $! lookupAddr ea
lookupAddr (ImmAddr imm off) = ImmAddr imm off
lookupAddr (AddrBaseIndex base index disp)
= AddrBaseIndex (lookupBase base) (lookupIndex index) disp
= ((AddrBaseIndex $! lookupBase base) $! lookupIndex index) disp
where
lookupBase Nothing = Nothing
lookupBase (Just r) = Just (env r)
lookupBase EABaseNone = EABaseNone
lookupBase EABaseRip = EABaseRip
lookupBase (EABaseReg r) = EABaseReg (env r)
lookupIndex Nothing = Nothing
lookupIndex (Just (r,i)) = Just (env r, i)
lookupIndex EAIndexNone = EAIndexNone
lookupIndex (EAIndex r i) = EAIndex (env r) i
#endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH*/
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment