Commit 6c554010 authored by simonmar's avatar simonmar
Browse files

[project @ 2005-04-01 12:14:29 by simonmar]

First cut at the x86_64 native code generator.  Lots of code is shared
with i386, but floating point uses SSE2.

This more or less works, the things I know that don't work are:

  - the floating-point primitives (sin, cos etc.) are missing
  - floating-point comparisons involving NaN are wrong
  - there's no PIC support yet

Also, I have a long list of small things to fix up to improve
performance.

I think the small memory model is assumed, for now.
parent 79c03a2f
......@@ -241,6 +241,7 @@ data Section
| ReadOnlyData
| RelocatableReadOnlyData
| UninitialisedData
| ReadOnlyData16 -- .rodata.cst16 on x86_64, 16-byte aligned
| OtherSection String
data CmmStatic
......
......@@ -552,12 +552,37 @@ cmmMachOpFold op arg@[CmmLit (CmmInt x rep)]
cmmMachOpFold (MO_S_Conv rep1 rep2) [x] | rep1 == rep2 = x
cmmMachOpFold (MO_U_Conv rep1 rep2) [x] | rep1 == rep2 = x
-- ToDo: eliminate multiple conversions. Be careful though: can't remove
-- a narrowing, and can't remove conversions to/from floating point types.
-- ToDo: eliminate nested comparisons:
-- CmmMachOp MO_Lt [CmmMachOp MO_Eq [x,y], CmmLit (CmmInt 0 _)]
-- turns into a simple equality test.
-- Eliminate nested conversions where possible
cmmMachOpFold conv_outer args@[CmmMachOp conv_inner [x]]
| Just (rep1,rep2,signed1) <- isIntConversion conv_inner,
Just (_, rep3,signed2) <- isIntConversion conv_outer
= case () of
-- widen then narrow to the same size is a nop
_ | rep1 < rep2 && rep1 == rep3 -> x
-- Widen then narrow to different size: collapse to single conversion
-- but remember to use the signedness from the widening, just in case
-- the final conversion is a widen.
| rep1 < rep2 && rep2 > rep3 ->
cmmMachOpFold (intconv signed1 rep1 rep3) [x]
-- Nested widenings: collapse if the signedness is the same
| rep1 < rep2 && rep2 < rep3 && signed1 == signed2 ->
cmmMachOpFold (intconv signed1 rep1 rep3) [x]
-- Nested narrowings: collapse
| rep1 > rep2 && rep2 > rep3 ->
cmmMachOpFold (MO_U_Conv rep1 rep3) [x]
| otherwise ->
CmmMachOp conv_outer args
where
isIntConversion (MO_U_Conv rep1 rep2) = Just (rep1,rep2,False)
isIntConversion (MO_S_Conv rep1 rep2) = Just (rep1,rep2,True)
isIntConversion _ = Nothing
intconv True = MO_S_Conv
intconv False = MO_U_Conv
-- ToDo: a narrow of a load can be collapsed into a narrow load, right?
-- but what if the architecture only supports word-sized loads, should
-- we do the transformation anyway?
cmmMachOpFold mop args@[CmmLit (CmmInt x xrep), CmmLit (CmmInt y _)]
= case mop of
......
This diff is collapsed.
......@@ -14,24 +14,22 @@ module MachInstrs (
-- * Machine instructions
Instr(..),
Cond(..),
#if !powerpc_TARGET_ARCH && !i386_TARGET_ARCH
Cond(..), condUnsigned, condToSigned, condToUnsigned,
#if !powerpc_TARGET_ARCH && !i386_TARGET_ARCH && !x86_64_TARGET_ARCH
Size(..), machRepSize,
#endif
RI(..),
#if i386_TARGET_ARCH
#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
Operand(..),
#endif
#if i386_TARGET_ARCH
i386_insert_ffrees,
#endif
#if sparc_TARGET_ARCH
riZero, fpRelEA, moveSp, fPair,
#endif
#if powerpc_TARGET_ARCH
condUnsigned, condToSigned,
#endif
DestInfo(..), hasDestInfo, pprDests,
) where
#include "HsVersions.h"
......@@ -42,7 +40,6 @@ import MachOp ( MachRep(..) )
import CLabel ( CLabel, pprCLabel )
import Panic ( panic )
import Outputable
import Config ( cLeadingUnderscore )
import FastString
import GLAEXTS
......@@ -72,7 +69,7 @@ data Cond
| ULE -- For CMP only
| ULT -- For CMP only
#endif
#if i386_TARGET_ARCH
#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
= ALWAYS -- What's really used? ToDo
| EQQ
| GE
......@@ -122,6 +119,23 @@ data Cond
#endif
deriving Eq -- to make an assertion work
condUnsigned GU = True
condUnsigned LU = True
condUnsigned GEU = True
condUnsigned LEU = True
condUnsigned _ = False
condToSigned GU = GTT
condToSigned LU = LTT
condToSigned GEU = GE
condToSigned LEU = LE
condToSigned x = x
condToUnsigned GTT = GU
condToUnsigned LTT = LU
condToUnsigned GE = GEU
condToUnsigned LE = LEU
condToUnsigned x = x
-- -----------------------------------------------------------------------------
-- Sizes on this architecture
......@@ -129,7 +143,7 @@ data Cond
-- ToDo: it's not clear to me that we need separate signed-vs-unsigned sizes
-- here. I've removed them from the x86 version, we'll see what happens --SDM
#if !powerpc_TARGET_ARCH && !i386_TARGET_ARCH
#if !powerpc_TARGET_ARCH && !i386_TARGET_ARCH && !x86_64_TARGET_ARCH
data Size
#if alpha_TARGET_ARCH
= B -- byte
......@@ -363,7 +377,7 @@ bit or 64 bit precision.
--SDM 1/2003
-}
#if i386_TARGET_ARCH
#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-- data Instr continues...
......@@ -371,6 +385,9 @@ bit or 64 bit precision.
| MOV MachRep Operand Operand
| MOVZxL MachRep Operand Operand -- size is the size of operand 1
| MOVSxL MachRep Operand Operand -- size is the size of operand 1
-- x86_64 note: plain mov into a 32-bit register always zero-extends
-- into the 64-bit reg, in contrast to the 8 and 16-bit movs which
-- don't affect the high bits of the register.
-- Load effective address (also a very useful three-operand add instruction :-)
| LEA MachRep Operand Operand
......@@ -379,9 +396,9 @@ bit or 64 bit precision.
| ADD MachRep Operand Operand
| ADC MachRep Operand Operand
| SUB MachRep Operand Operand
| IMUL MachRep Operand Operand -- signed int mul
| MUL MachRep Operand Operand -- unsigned int mul
| MUL MachRep Operand Operand
| IMUL MachRep Operand Operand -- signed int mul
| IMUL64 Reg Reg
-- operand1:operand2 := (operand1[31:0] *signed operand2[31:0])
......@@ -403,6 +420,7 @@ bit or 64 bit precision.
| BT MachRep Imm Operand
| NOP
#if i386_TARGET_ARCH
-- Float Arithmetic.
-- Note that we cheat by treating G{ABS,MOV,NEG} of doubles
......@@ -442,6 +460,32 @@ bit or 64 bit precision.
| GTAN MachRep Reg Reg -- src, dst
| GFREE -- do ffree on all x86 regs; an ugly hack
#endif
#if x86_64_TARGET_ARCH
-- SSE2 floating point: we use a restricted set of the available SSE2
-- instructions for floating-point.
-- use MOV for moving (either movss or movsd (movlpd better?))
| CVTSS2SD Reg Reg -- F32 to F64
| CVTSD2SS Reg Reg -- F64 to F32
| CVTSS2SI Operand Reg -- F32 to I32/I64 (with rounding)
| CVTSD2SI Operand Reg -- F64 to I32/I64 (with rounding)
| CVTSI2SS Operand Reg -- I32/I64 to F32
| CVTSI2SD Operand Reg -- I32/I64 to F64
-- use ADD & SUB for arithmetic. In both cases, operands
-- are Operand Reg.
-- SSE2 floating-point division:
| FDIV MachRep Operand Operand -- divisor, dividend(dst)
-- use CMP for comparisons. ucomiss and ucomisd instructions
-- compare single/double prec floating point respectively.
| SQRT MachRep Operand Reg -- src, dst
#endif
-- Comparison
| TEST MachRep Operand Operand
......@@ -462,7 +506,7 @@ bit or 64 bit precision.
| CALL (Either Imm Reg)
-- Other things.
| CLTD -- sign extend %eax into %edx:%eax
| CLTD MachRep -- sign extend %eax into %edx:%eax
| FETCHGOT Reg -- pseudo-insn for position-independent code
-- pretty-prints as
......@@ -475,7 +519,9 @@ data Operand
| OpImm Imm -- immediate value
| OpAddr AddrMode -- memory reference
#endif /* i386 or x86_64 */
#if i386_TARGET_ARCH
i386_insert_ffrees :: [Instr] -> [Instr]
i386_insert_ffrees insns
| any is_G_instr insns
......@@ -506,7 +552,6 @@ is_G_instr instr
GSIN _ _ _ -> True; GCOS _ _ _ -> True; GTAN _ _ _ -> True
GFREE -> panic "is_G_instr: GFREE (!)"
other -> False
#endif /* i386_TARGET_ARCH */
......@@ -670,33 +715,4 @@ fPair other = pprPanic "fPair(sparc NCG)" (ppr other)
| FETCHPC Reg -- pseudo-instruction:
-- bcl to next insn, mflr reg
condUnsigned GU = True
condUnsigned LU = True
condUnsigned GEU = True
condUnsigned LEU = True
condUnsigned _ = False
condToSigned GU = GTT
condToSigned LU = LTT
condToSigned GEU = GE
condToSigned LEU = LE
condToSigned x = x
#endif /* powerpc_TARGET_ARCH */
-- -----------------------------------------------------------------------------
-- DestInfo
-- ToDo: might not be needed anymore --SDM
-- used by insnFuture in RegAllocInfo.lhs
data DestInfo
= NoDestInfo -- no supplied dests; infer from context
| DestInfo [CLabel] -- precisely these dests and no others
hasDestInfo NoDestInfo = False
hasDestInfo (DestInfo _) = True
pprDests :: DestInfo -> SDoc
pprDests NoDestInfo = text "NoDestInfo"
pprDests (DestInfo dsts) = brackets (hsep (map pprCLabel dsts))
......@@ -51,6 +51,15 @@ module MachRegs (
fake0, fake1, fake2, fake3, fake4, fake5,
addrModeRegs,
#endif
#if x86_64_TARGET_ARCH
rax, rbx, rcx, rdx, rsi, rdi, rbp, rsp,
eax, ebx, ecx, edx, esi, edi, ebp, esp,
r8, r9, r10, r11, r12, r13, r14, r15,
xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,
xmm, eax, edx,
addrModeRegs, allFPArgRegs,
#endif
#if sparc_TARGET_ARCH
fits13Bits,
fpRel, gReg, iReg, lReg, oReg, largeOffsetError,
......@@ -141,7 +150,7 @@ data AddrMode
| AddrRegImm Reg Imm
#endif
#if i386_TARGET_ARCH
#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
= AddrBaseIndex Base Index Displacement
| ImmAddr Imm Int
......@@ -160,7 +169,7 @@ type Displacement = Imm
| AddrRegImm Reg Imm
#endif
#if i386_TARGET_ARCH
#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
addrModeRegs :: AddrMode -> [Reg]
addrModeRegs (AddrBaseIndex b i _) = b_regs ++ i_regs
where
......@@ -177,7 +186,7 @@ addrOffset addr off
#if alpha_TARGET_ARCH
_ -> panic "MachMisc.addrOffset not defined for Alpha"
#endif
#if i386_TARGET_ARCH
#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
ImmAddr i off0 -> Just (ImmAddr i (off0 + off))
AddrBaseIndex r i (ImmInt n) -> Just (AddrBaseIndex r i (ImmInt (n + off)))
......@@ -280,8 +289,10 @@ spRel :: Int -- desired stack offset in words, positive or negative
-> AddrMode
spRel n
#if i386_TARGET_ARCH
#if defined(i386_TARGET_ARCH)
= AddrBaseIndex (Just esp) Nothing (ImmInt (n * wORD_SIZE))
#elif defined(x86_64_TARGET_ARCH)
= AddrBaseIndex (Just rsp) Nothing (ImmInt (n * wORD_SIZE))
#else
= AddrRegImm sp (ImmInt (n * wORD_SIZE))
#endif
......@@ -496,6 +507,88 @@ showReg n
#endif
{-
AMD x86_64 architecture:
- Registers 0-16 have 32-bit counterparts (eax, ebx etc.)
- Registers 0-7 have 16-bit counterparts (ax, bx etc.)
- Registers 0-3 have 8 bit counterparts (ah, bh etc.)
-}
#if x86_64_TARGET_ARCH
rax, rbx, rcx, rdx, rsp, rbp, rsi, rdi,
r8, r9, r10, r11, r12, r13, r14, r15,
xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15 :: Reg
rax = RealReg 0
rbx = RealReg 1
rcx = RealReg 2
rdx = RealReg 3
rsi = RealReg 4
rdi = RealReg 5
rbp = RealReg 6
rsp = RealReg 7
r8 = RealReg 8
r9 = RealReg 9
r10 = RealReg 10
r11 = RealReg 11
r12 = RealReg 12
r13 = RealReg 13
r14 = RealReg 14
r15 = RealReg 15
xmm0 = RealReg 16
xmm1 = RealReg 17
xmm2 = RealReg 18
xmm3 = RealReg 19
xmm4 = RealReg 20
xmm5 = RealReg 21
xmm6 = RealReg 22
xmm7 = RealReg 23
xmm8 = RealReg 24
xmm9 = RealReg 25
xmm10 = RealReg 26
xmm11 = RealReg 27
xmm12 = RealReg 28
xmm13 = RealReg 29
xmm14 = RealReg 30
xmm15 = RealReg 31
-- so we can re-use some x86 code:
eax = rax
ebx = rbx
ecx = rcx
edx = rdx
esi = rsi
edi = rdi
ebp = rbp
esp = rsp
xmm n = RealReg (16+n)
-- On x86, we might want to have an 8-bit RegClass, which would
-- contain just regs 1-4 (the others don't have 8-bit versions).
-- However, we can get away without this at the moment because the
-- only allocatable integer regs are also 8-bit compatible (1, 3, 4).
regClass (RealReg i) = if i < 16 then RcInteger else RcDouble
regClass (VirtualRegI u) = RcInteger
regClass (VirtualRegHi u) = RcInteger
regClass (VirtualRegD u) = RcDouble
regClass (VirtualRegF u) = pprPanic "regClass(x86_64):VirtualRegF"
(ppr (VirtualRegF u))
regNames
= ["%rax", "%rbx", "%rcx", "%rdx", "%rsi", "%rdi", "%rbp", "%rsp" ]
showReg :: RegNo -> String
showReg n
| n >= 16 = "%xmm" ++ show n
| n >= 8 = "%r" ++ show n
| otherwise = regNames !! n
#endif
{-
The SPARC has 64 registers of interest; 32 integer registers and 32
floating point registers. The mapping of STG registers to SPARC
......@@ -647,6 +740,42 @@ names in the header files. Gag me with a spoon, eh?
#define fake4 12
#define fake5 13
#endif
#if x86_64_TARGET_ARCH
#define rax 0
#define rbx 1
#define rcx 2
#define rdx 3
#define rsi 4
#define rdi 5
#define rbp 6
#define rsp 7
#define r8 8
#define r9 9
#define r10 10
#define r11 11
#define r12 12
#define r13 13
#define r14 14
#define r15 15
#define xmm0 16
#define xmm1 17
#define xmm2 18
#define xmm3 19
#define xmm4 20
#define xmm5 21
#define xmm6 22
#define xmm7 23
#define xmm8 24
#define xmm9 25
#define xmm10 26
#define xmm11 27
#define xmm12 28
#define xmm13 29
#define xmm14 30
#define xmm15 31
#endif
#if sparc_TARGET_ARCH
#define g0 0
#define g1 1
......@@ -824,11 +953,12 @@ allMachRegNos :: [RegNo]
allMachRegNos
= IF_ARCH_alpha( [0..63],
IF_ARCH_i386( [0..13],
IF_ARCH_x86_64( [0..31],
IF_ARCH_sparc( ([0..31]
++ [f0,f2 .. nCG_FirstFloatReg-1]
++ [nCG_FirstFloatReg .. f31]),
IF_ARCH_powerpc([0..63],
))))
)))))
-- allocatableRegs is allMachRegNos with the fixed-use regs removed.
-- i.e., these are the regs for which we are prepared to allow the
......@@ -854,6 +984,11 @@ callClobberedRegs
-- caller-saves registers
map RealReg [eax,ecx,edx,fake0,fake1,fake2,fake3,fake4,fake5]
#endif /* i386_TARGET_ARCH */
#if x86_64_TARGET_ARCH
-- caller-saves registers
map RealReg ([rax,rcx,rdx,rsi,rdi,r8,r9,r10,r11] ++ [16..31])
-- all xmm regs are caller-saves
#endif /* x86_64_TARGET_ARCH */
#if sparc_TARGET_ARCH
map RealReg
( oReg 7 :
......@@ -880,6 +1015,10 @@ argRegs :: RegNo -> [Reg]
argRegs _ = panic "MachRegs.argRegs(x86): should not be used!"
#endif
#if x86_64_TARGET_ARCH
argRegs _ = panic "MachRegs.argRegs(x86_64): should not be used!"
#endif
#if alpha_TARGET_ARCH
argRegs 0 = []
argRegs 1 = freeMappedRegs [16, fReg 16]
......@@ -932,6 +1071,13 @@ allArgRegs :: [Reg]
allArgRegs = panic "MachRegs.allArgRegs(x86): should not be used!"
#endif
#if x86_64_TARGET_ARCH
allArgRegs :: [Reg]
allArgRegs = map RealReg [rdi,rsi,rdx,rcx,r8,r9]
allFPArgRegs :: [Reg]
allFPArgRegs = map RealReg [xmm0 .. xmm7]
#endif
#if powerpc_TARGET_ARCH
allArgRegs :: [Reg]
allArgRegs = map RealReg [3..10]
......@@ -960,6 +1106,10 @@ freeReg 63 = fastBool False -- always zero (f31)
freeReg esp = fastBool False -- %esp is the C stack pointer
#endif
#if x86_64_TARGET_ARCH
freeReg rsp = fastBool False -- %rsp is the C stack pointer
#endif
#if sparc_TARGET_ARCH
freeReg g0 = fastBool False -- %g0 is always 0.
freeReg g5 = fastBool False -- %g5 is reserved (ABI).
......
......@@ -26,6 +26,12 @@
# define IF_ARCH_i386(x,y) y
#endif
-- - - - - - - - - - - - - - - - - - - - - -
#if x86_64_TARGET_ARCH
# define IF_ARCH_x86_64(x,y) x
#else
# define IF_ARCH_x86_64(x,y) y
#endif
-- - - - - - - - - - - - - - - - - - - - - -
#if freebsd_TARGET_OS
# define IF_OS_freebsd(x,y) x
#else
......
......@@ -2,8 +2,8 @@
--
-- Pretty-printing assembly language
--
-- (c) The University of Glasgow 1993-2004
--
-- (c) The University of Glasgow 1993-2005
--
-----------------------------------------------------------------------------
-- We start with the @pprXXX@s with some cross-platform commonality
......@@ -21,7 +21,7 @@ module PprMach (
#include "HsVersions.h"
import Cmm
import MachOp ( MachRep(..) )
import MachOp ( MachRep(..), wordRep, isFloatingRep )
import MachRegs -- may differ per-platform
import MachInstrs
......@@ -115,13 +115,13 @@ pprBasicBlock (BasicBlock (BlockId id) instrs) =
-- on which bit of it we care about. Yurgh.
pprUserReg :: Reg -> Doc
pprUserReg = pprReg IF_ARCH_i386(I32,)
pprUserReg = pprReg IF_ARCH_i386(I32,) IF_ARCH_x86_64(I64,)
pprReg :: IF_ARCH_i386(MachRep ->,) Reg -> Doc
pprReg :: IF_ARCH_i386(MachRep ->, IF_ARCH_x86_64(MachRep ->,)) Reg -> Doc
pprReg IF_ARCH_i386(s,) r
pprReg IF_ARCH_i386(s, IF_ARCH_x86_64(s,)) r
= case r of
RealReg i -> ppr_reg_no IF_ARCH_i386(s,) i
RealReg i -> ppr_reg_no IF_ARCH_i386(s, IF_ARCH_x86_64(s,)) i
VirtualRegI u -> text "%vI_" <> asmSDoc (pprUnique u)
VirtualRegHi u -> text "%vHi_" <> asmSDoc (pprUnique u)
VirtualRegF u -> text "%vF_" <> asmSDoc (pprUnique u)
......@@ -200,6 +200,74 @@ pprReg IF_ARCH_i386(s,) r
_ -> SLIT("very naughty I386 register")
})
#endif
#if x86_64_TARGET_ARCH
ppr_reg_no :: MachRep -> Int -> Doc
ppr_reg_no I8 = ppr_reg_byte
ppr_reg_no I16 = ppr_reg_word
ppr_reg_no I32 = ppr_reg_long
ppr_reg_no _ = ppr_reg_quad
ppr_reg_byte i = ptext
(case i of {
0 -> SLIT("%al"); 1 -> SLIT("%bl");
2 -> SLIT("%cl"); 3 -> SLIT("%dl");
4 -> SLIT("%sil"); 5 -> SLIT("%dil"); -- new 8-bit regs!
6 -> SLIT("%bpl"); 7 -> SLIT("%spl");
8 -> SLIT("%r8b"); 9 -> SLIT("%r9b");
10 -> SLIT("%r10b"); 11 -> SLIT("%r11b");
12 -> SLIT("%r12b"); 13 -> SLIT("%r13b");
14 -> SLIT("%r14b"); 15 -> SLIT("%r15b");
_ -> SLIT("very naughty x86_64 byte register")
})
ppr_reg_word i = ptext
(case i of {
0 -> SLIT("%ax"); 1 -> SLIT("%bx");
2 -> SLIT("%cx"); 3 -> SLIT("%dx");
4 -> SLIT("%si"); 5 -> SLIT("%di");
6 -> SLIT("%bp"); 7 -> SLIT("%sp");
8 -> SLIT("%r8w"); 9 -> SLIT("%r9w");
10 -> SLIT("%r10w"); 11 -> SLIT("%r11w");
12 -> SLIT("%r12w"); 13 -> SLIT("%r13w");
14 -> SLIT("%r14w"); 15 -> SLIT("%r15w");
_ -> SLIT("very naughty x86_64 word register")
})
ppr_reg_long i = ptext
(case i of {
0 -> SLIT("%eax"); 1 -> SLIT("%ebx");
2 -> SLIT("%ecx"); 3 -> SLIT("%edx");
4 -> SLIT("%esi"); 5 -> SLIT("%edi");
6 -> SLIT("%ebp"); 7 -> SLIT("%esp");
8 -> SLIT("%r8d"); 9 -> SLIT("%r9d");
10 -> SLIT("%r10d"); 11 -> SLIT("%r11d");
12 -> SLIT("%r12d"); 13 -> SLIT("%r13d");
14 -> SLIT("%r14d"); 15 -> SLIT("%r15d");
_ -> SLIT("very naughty x86_64 register")
})
ppr_reg_quad i = ptext
(case i of {
0 -> SLIT("%rax"); 1 -> SLIT("%rbx");
2 -> SLIT("%rcx"); 3 -> SLIT("%rdx");
4 -> SLIT("%rsi"); 5 -> SLIT("%rdi");
6 -> SLIT("%rbp"); 7 -> SLIT("%rsp");
8 -> SLIT("%r8"); 9 -> SLIT("%r9");
10 -> SLIT("%r10"); 11 -> SLIT("%r11");
12 -> SLIT("%r12"); 13 -> SLIT("%r13");
14 -> SLIT("%r14"); 15 -> SLIT("%r15");
16 -> SLIT("%xmm0"); 17 -> SLIT("%xmm1");
18 -> SLIT("%xmm2"); 19 -> SLIT("%xmm3");
20 -> SLIT("%xmm4"); 21 -> SLIT("%xmm5");
22 -> SLIT("%xmm6"); 23 -> SLIT("%xmm7");
24 -> SLIT("%xmm8"); 25 -> SLIT("%xmm9");
26 -> SLIT("%xmm10"); 27 -> SLIT("%xmm11");
28 -> SLIT("%xmm12"); 28 -> SLIT("%xmm13");
30 -> SLIT("%xmm13"); 31 -> SLIT("%xmm15")
})
#endif
#if sparc_TARGET_ARCH
ppr_reg_no :: Int -> Doc