CodeGen.hs 92.5 KB
Newer Older
1 2 3 4 5 6 7 8 9
-----------------------------------------------------------------------------
--
-- Generating machine code (instruction selection)
--
-- (c) The University of Glasgow 1996-2004
--
-----------------------------------------------------------------------------

-- This is a big module, but, if you pay attention to
Ian Lynagh's avatar
Ian Lynagh committed
10
-- (a) the sectioning, and (b) the type signatures, the
11 12
-- structure should not be too overwhelming.

13 14 15 16 17
module X86.CodeGen (
        cmmTopCodeGen,
        generateJumpTableForInstr,
        InstrBlock
)
18 19 20 21 22

where

#include "HsVersions.h"
#include "nativeGen/NCG.h"
Simon Marlow's avatar
Simon Marlow committed
23
#include "../includes/MachDeps.h"
24 25 26 27 28 29

-- NCG stuff:
import X86.Instr
import X86.Cond
import X86.Regs
import X86.RegInfo
tibbe's avatar
tibbe committed
30
import CPrim
31 32 33 34 35 36 37 38 39 40
import Instruction
import PIC
import NCGMonad
import Size
import Reg
import Platform

-- Our intermediate code:
import BasicTypes
import BlockId
41
import Module           ( primPackageId )
42
import PprCmm           ()
43
import OldCmm
dterei's avatar
dterei committed
44
import OldPprCmm        ()
45 46 47
import CLabel

-- The rest:
48 49
import StaticFlags      ( opt_PIC )
import ForeignCall      ( CCallConv(..) )
50 51
import OrdList
import Outputable
52
import Unique
53
import FastString
54 55
import FastBool         ( isFastTrue )
import Constants        ( wORD_SIZE )
56 57
import DynFlags

58
import Control.Monad
tibbe's avatar
tibbe committed
59
import Data.Bits
60
import Data.Int
61
import Data.Maybe
dterei's avatar
dterei committed
62 63
import Data.Word

Ian Lynagh's avatar
Ian Lynagh committed
64 65
is32BitPlatform :: NatM Bool
is32BitPlatform = do
66
    dflags <- getDynFlags
Ian Lynagh's avatar
Ian Lynagh committed
67 68
    return $ target32Bit (targetPlatform dflags)

69 70
sse2Enabled :: NatM Bool
sse2Enabled = do
71
  dflags <- getDynFlags
72 73 74 75 76 77 78
  case platformArch (targetPlatform dflags) of
      ArchX86_64 -> -- SSE2 is fixed on for x86_64.  It would be
                    -- possible to make it optional, but we'd need to
                    -- fix at least the foreign call code where the
                    -- calling convention specifies the use of xmm regs,
                    -- and possibly other places.
                    return True
tibbe's avatar
tibbe committed
79
      ArchX86    -> return (dopt Opt_SSE2 dflags || dopt Opt_SSE4_2 dflags)
80
      _          -> panic "sse2Enabled: Not an X86* arch"
81

tibbe's avatar
tibbe committed
82 83
sse4_2Enabled :: NatM Bool
sse4_2Enabled = do
84
  dflags <- getDynFlags
tibbe's avatar
tibbe committed
85 86
  return (dopt Opt_SSE4_2 dflags)

87 88 89 90
if_sse2 :: NatM a -> NatM a -> NatM a
if_sse2 sse2 x87 = do
  b <- sse2Enabled
  if b then sse2 else x87
91

92
cmmTopCodeGen
Simon Peyton Jones's avatar
Simon Peyton Jones committed
93 94
        :: RawCmmDecl
        -> NatM [NatCmmDecl (Alignment, CmmStatics) Instr]
95

Ian Lynagh's avatar
Ian Lynagh committed
96
cmmTopCodeGen (CmmProc info lab (ListGraph blocks)) = do
97 98
  (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
  picBaseMb <- getPicBaseMaybeNat
99
  dflags <- getDynFlags
100
  let proc = CmmProc info lab (ListGraph $ concat nat_blocks)
101
      tops = proc : concat statics
102
      os   = platformOS $ targetPlatform dflags
103 104 105 106

  case picBaseMb of
      Just picBase -> initializePicBase_x86 ArchX86 os picBase tops
      Nothing -> return tops
107

Ian Lynagh's avatar
Ian Lynagh committed
108
cmmTopCodeGen (CmmData sec dat) = do
109
  return [CmmData sec (1, dat)]  -- no translation, we just use CmmStatic
110 111


112 113 114
basicBlockCodeGen
        :: CmmBasicBlock
        -> NatM ( [NatBasicBlock Instr]
Simon Peyton Jones's avatar
Simon Peyton Jones committed
115
                , [NatCmmDecl (Alignment, CmmStatics) Instr])
116 117 118 119 120 121 122 123

basicBlockCodeGen (BasicBlock id stmts) = do
  instrs <- stmtsToInstrs stmts
  -- code generation may introduce new basic block boundaries, which
  -- are indicated by the NEWBLOCK instruction.  We must split up the
  -- instruction stream into basic blocks again.  Also, we extract
  -- LDATAs here too.
  let
124 125 126 127 128 129 130 131
        (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs

        mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
          = ([], BasicBlock id instrs : blocks, statics)
        mkBlocks (LDATA sec dat) (instrs,blocks,statics)
          = (instrs, blocks, CmmData sec dat:statics)
        mkBlocks instr (instrs,blocks,statics)
          = (instr:instrs, blocks, statics)
132 133 134 135 136 137 138 139 140 141 142
  -- in
  return (BasicBlock id top : other_blocks, statics)


stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
stmtsToInstrs stmts
   = do instrss <- mapM stmtToInstrs stmts
        return (concatOL instrss)


stmtToInstrs :: CmmStmt -> NatM InstrBlock
143
stmtToInstrs stmt = do
Ian Lynagh's avatar
Ian Lynagh committed
144
  is32Bit <- is32BitPlatform
145
  case stmt of
146
    CmmNop         -> return nilOL
147 148 149
    CmmComment s   -> return (unitOL (COMMENT s))

    CmmAssign reg src
150 151 152
      | isFloatType ty         -> assignReg_FltCode size reg src
      | is32Bit && isWord64 ty -> assignReg_I64Code      reg src
      | otherwise              -> assignReg_IntCode size reg src
153 154
        where ty = cmmRegType reg
              size = cmmTypeSize ty
155 156

    CmmStore addr src
157 158 159
      | isFloatType ty         -> assignMem_FltCode size addr src
      | is32Bit && isWord64 ty -> assignMem_I64Code      addr src
      | otherwise              -> assignMem_IntCode size addr src
160 161
        where ty = cmmExprType src
              size = cmmTypeSize ty
162

163
    CmmCall target result_regs args _
Ian Lynagh's avatar
Ian Lynagh committed
164
       -> genCCall is32Bit target result_regs args
165

166
    CmmBranch id          -> genBranch id
167 168
    CmmCondBranch arg id  -> genCondJump id arg
    CmmSwitch arg ids     -> genSwitch arg ids
169
    CmmJump arg _         -> genJump arg
dterei's avatar
dterei committed
170
    CmmReturn             ->
171 172 173 174 175
      panic "stmtToInstrs: return statement should have been cps'd away"


--------------------------------------------------------------------------------
-- | 'InstrBlock's are the insn sequences generated by the insn selectors.
176 177
--      They are really trees of insns to facilitate fast appending, where a
--      left-to-right traversal yields the insns in the correct order.
178
--
179 180
type InstrBlock
        = OrdList Instr
181 182 183 184


-- | Condition codes passed up the tree.
--
185 186
data CondCode
        = CondCode Bool Cond InstrBlock
187 188 189


-- | a.k.a "Register64"
190 191
--      Reg is the lower 32-bit temporary which contains the result.
--      Use getHiVRegFromLo to find the other VRegUnique.
192
--
193 194
--      Rules of this simplified insn selection game are therefore that
--      the returned Reg may be modified
195
--
196 197
data ChildCode64
   = ChildCode64
198
        InstrBlock
199
        Reg
200 201 202


-- | Register's passed up the tree.  If the stix code forces the register
203 204 205
--      to live in a pre-decided machine register, it comes out as @Fixed@;
--      otherwise, it comes out as @Any@, and the parent can decide which
--      register to put it in.
206 207
--
data Register
208 209
        = Fixed Size Reg InstrBlock
        | Any   Size (Reg -> InstrBlock)
210 211 212 213 214 215 216 217


swizzleRegisterRep :: Register -> Size -> Register
swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code
swizzleRegisterRep (Any _ codefn)     size = Any   size codefn


-- | Grab the Reg for a CmmReg
218
getRegisterReg :: Bool -> CmmReg -> Reg
219

220 221 222 223 224
getRegisterReg use_sse2 (CmmLocal (LocalReg u pk))
  = let sz = cmmTypeSize pk in
    if isFloatSize sz && not use_sse2
       then RegVirtual (mkVirtualReg u FF80)
       else RegVirtual (mkVirtualReg u sz)
225

226
getRegisterReg _ (CmmGlobal mid)
227 228 229 230 231 232
  = case globalRegMaybe mid of
        Just reg -> RegReal $ reg
        Nothing  -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
        -- By this stage, the only MagicIds remaining should be the
        -- ones which map to a real machine register on this
        -- platform.  Hence ...
233 234 235


-- | Memory addressing modes passed up the tree.
236 237
data Amode
        = Amode AddrMode InstrBlock
238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258

{-
Now, given a tree (the argument to an CmmLoad) that references memory,
produce a suitable addressing mode.

A Rule of the Game (tm) for Amodes: use of the addr bit must
immediately follow use of the code part, since the code part puts
values in registers which the addr then refers to.  So you can't put
anything in between, lest it overwrite some of those registers.  If
you need to do some other computation between the code part and use of
the addr bit, first store the effective address from the amode in a
temporary, then do the other computation, and then use the temporary:

    code
    LEA amode, tmp
    ... other computation ...
    ... (tmp) ...
-}


-- | Check whether an integer will fit in 32 bits.
259 260 261 262
--      A CmmInt is intended to be truncated to the appropriate
--      number of bits, so here we truncate it to Int64.  This is
--      important because e.g. -1 as a CmmInt might be either
--      -1 or 18446744073709551615.
263 264 265 266 267 268 269 270 271
--
is32BitInteger :: Integer -> Bool
is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
  where i64 = fromIntegral i :: Int64


-- | Convert a BlockId to some CmmStatic data
jumpTableEntry :: Maybe BlockId -> CmmStatic
jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth)
272 273
jumpTableEntry (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
    where blockLabel = mkAsmTempLabel (getUnique blockid)
274 275 276 277 278 279 280


-- -----------------------------------------------------------------------------
-- General things for putting together code sequences

-- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
-- CmmExprs into CmmRegOff?
dterei's avatar
dterei committed
281 282
mangleIndexTree :: CmmReg -> Int -> CmmExpr
mangleIndexTree reg off
283 284 285 286
  = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
  where width = typeWidth (cmmRegType reg)

-- | The dual to getAnyReg: compute an expression into a register, but
287
--      we don't mind which one it is.
288 289 290 291 292
getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg expr = do
  r <- getRegister expr
  case r of
    Any rep code -> do
293 294 295 296
        tmp <- getNewRegNat rep
        return (tmp, code tmp)
    Fixed _ reg code ->
        return (reg, code)
297 298 299 300 301 302


assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
assignMem_I64Code addrTree valueTree = do
  Amode addr addr_code <- getAmode addrTree
  ChildCode64 vcode rlo <- iselExpr64 valueTree
303
  let
304 305 306 307 308 309 310 311 312 313
        rhi = getHiVRegFromLo rlo

        -- Little-endian store
        mov_lo = MOV II32 (OpReg rlo) (OpAddr addr)
        mov_hi = MOV II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
  -- in
  return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)


assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
dterei's avatar
dterei committed
314
assignReg_I64Code (CmmLocal (LocalReg u_dst _)) valueTree = do
315
   ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
316
   let
317
         r_dst_lo = RegVirtual $ mkVirtualReg u_dst II32
318 319 320 321 322 323 324 325 326
         r_dst_hi = getHiVRegFromLo r_dst_lo
         r_src_hi = getHiVRegFromLo r_src_lo
         mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo)
         mov_hi = MOV II32 (OpReg r_src_hi) (OpReg r_dst_hi)
   -- in
   return (
        vcode `snocOL` mov_lo `snocOL` mov_hi
     )

dterei's avatar
dterei committed
327
assignReg_I64Code _ _
328 329 330 331 332 333 334
   = panic "assignReg_I64Code(i386): invalid lvalue"


iselExpr64        :: CmmExpr -> NatM ChildCode64
iselExpr64 (CmmLit (CmmInt i _)) = do
  (rlo,rhi) <- getNewRegPairNat II32
  let
335
        r = fromIntegral (fromIntegral i :: Word32)
336
        q = fromIntegral (fromIntegral (i `shiftR` 32) :: Word32)
337 338 339 340
        code = toOL [
                MOV II32 (OpImm (ImmInteger r)) (OpReg rlo),
                MOV II32 (OpImm (ImmInteger q)) (OpReg rhi)
                ]
341 342 343 344 345 346
  -- in
  return (ChildCode64 code rlo)

iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
   Amode addr addr_code <- getAmode addrTree
   (rlo,rhi) <- getNewRegPairNat II32
347
   let
348 349 350 351
        mov_lo = MOV II32 (OpAddr addr) (OpReg rlo)
        mov_hi = MOV II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
   -- in
   return (
352
            ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
353 354 355 356
                        rlo
     )

iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
357
   = return (ChildCode64 nilOL (RegVirtual $ mkVirtualReg vu II32))
358

359 360 361 362 363
-- we handle addition, but rather badly
iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
   ChildCode64 code1 r1lo <- iselExpr64 e1
   (rlo,rhi) <- getNewRegPairNat II32
   let
364
        r = fromIntegral (fromIntegral i :: Word32)
365
        q = fromIntegral (fromIntegral (i `shiftR` 32) :: Word32)
366 367 368 369 370 371
        r1hi = getHiVRegFromLo r1lo
        code =  code1 `appOL`
                toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
                       ADD II32 (OpImm (ImmInteger r)) (OpReg rlo),
                       MOV II32 (OpReg r1hi) (OpReg rhi),
                       ADC II32 (OpImm (ImmInteger q)) (OpReg rhi) ]
372 373 374 375 376 377 378 379
   -- in
   return (ChildCode64 code rlo)

iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
   ChildCode64 code1 r1lo <- iselExpr64 e1
   ChildCode64 code2 r2lo <- iselExpr64 e2
   (rlo,rhi) <- getNewRegPairNat II32
   let
380 381 382 383 384 385 386 387
        r1hi = getHiVRegFromLo r1lo
        r2hi = getHiVRegFromLo r2lo
        code =  code1 `appOL`
                code2 `appOL`
                toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
                       ADD II32 (OpReg r2lo) (OpReg rlo),
                       MOV II32 (OpReg r1hi) (OpReg rhi),
                       ADC II32 (OpReg r2hi) (OpReg rhi) ]
388 389 390 391 392 393 394 395 396
   -- in
   return (ChildCode64 code rlo)

iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do
     fn <- getAnyReg expr
     r_dst_lo <-  getNewRegNat II32
     let r_dst_hi = getHiVRegFromLo r_dst_lo
         code = fn r_dst_lo
     return (
397
             ChildCode64 (code `snocOL`
398 399 400 401 402
                          MOV II32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
                          r_dst_lo
            )

iselExpr64 expr
403
   = do dflags <- getDynFlags
404
        pprPanic "iselExpr64(i386)" (pprPlatform (targetPlatform dflags) expr)
405 406 407 408


--------------------------------------------------------------------------------
getRegister :: CmmExpr -> NatM Register
Ian Lynagh's avatar
Ian Lynagh committed
409 410
getRegister e = do is32Bit <- is32BitPlatform
                   getRegister' is32Bit e
411 412 413 414 415 416 417 418 419 420

getRegister' :: Bool -> CmmExpr -> NatM Register

getRegister' is32Bit (CmmReg reg)
  = case reg of
        CmmGlobal PicBaseReg
         | is32Bit ->
            -- on x86_64, we have %rip for PicBaseReg, but it's not
            -- a full-featured register, it can only be used for
            -- rip-relative addressing.
Ian Lynagh's avatar
Ian Lynagh committed
421 422
            do reg' <- getPicBaseNat (archWordSize is32Bit)
               return (Fixed (archWordSize is32Bit) reg' nilOL)
423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441
        _ ->
            do use_sse2 <- sse2Enabled
               let
                 sz = cmmTypeSize (cmmRegType reg)
                 size | not use_sse2 && isFloatSize sz = FF80
                      | otherwise                      = sz
               --
               return (Fixed size (getRegisterReg use_sse2 reg) nilOL)


getRegister' is32Bit (CmmRegOff r n)
  = getRegister' is32Bit $ mangleIndexTree r n

-- for 32-bit architectuers, support some 64 -> 32 bit conversions:
-- TO_W_(x), TO_W_(x >> 32)

getRegister' is32Bit (CmmMachOp (MO_UU_Conv W64 W32)
                     [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
 | is32Bit = do
442 443 444
  ChildCode64 code rlo <- iselExpr64 x
  return $ Fixed II32 (getHiVRegFromLo rlo) code

445 446 447
getRegister' is32Bit (CmmMachOp (MO_SS_Conv W64 W32)
                     [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
 | is32Bit = do
448 449 450
  ChildCode64 code rlo <- iselExpr64 x
  return $ Fixed II32 (getHiVRegFromLo rlo) code

451 452
getRegister' is32Bit (CmmMachOp (MO_UU_Conv W64 W32) [x])
 | is32Bit = do
453 454 455
  ChildCode64 code rlo <- iselExpr64 x
  return $ Fixed II32 rlo code

456 457
getRegister' is32Bit (CmmMachOp (MO_SS_Conv W64 W32) [x])
 | is32Bit = do
458
  ChildCode64 code rlo <- iselExpr64 x
459
  return $ Fixed II32 rlo code
460

461
getRegister' _ (CmmLit lit@(CmmFloat f w)) =
462 463 464 465 466 467 468
  if_sse2 float_const_sse2 float_const_x87
 where
  float_const_sse2
    | f == 0.0 = do
      let
          size = floatSize w
          code dst = unitOL  (XOR size (OpReg dst) (OpReg dst))
469 470
        -- I don't know why there are xorpd, xorps, and pxor instructions.
        -- They all appear to do the same thing --SDM
471 472 473 474 475 476 477 478 479 480 481
      return (Any size code)

   | otherwise = do
      Amode addr code <- memConstant (widthInBytes w) lit
      loadFloatAmode True w addr code

  float_const_x87 = case w of
    W64
      | f == 0.0 ->
        let code dst = unitOL (GLDZ dst)
        in  return (Any FF80 code)
482

483 484 485
      | f == 1.0 ->
        let code dst = unitOL (GLD1 dst)
        in  return (Any FF80 code)
486

487 488 489
    _otherwise -> do
      Amode addr code <- memConstant (widthInBytes w) lit
      loadFloatAmode False w addr code
490 491

-- catch simple cases of zero- or sign-extended load
492
getRegister' _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do
493 494 495
  code <- intLoadCode (MOVZxL II8) addr
  return (Any II32 code)

496
getRegister' _ (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _]) = do
497 498 499
  code <- intLoadCode (MOVSxL II8) addr
  return (Any II32 code)

500
getRegister' _ (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _]) = do
501 502 503
  code <- intLoadCode (MOVZxL II16) addr
  return (Any II32 code)

504
getRegister' _ (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _]) = do
505 506 507 508
  code <- intLoadCode (MOVSxL II16) addr
  return (Any II32 code)

-- catch simple cases of zero- or sign-extended load
509 510
getRegister' is32Bit (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _])
 | not is32Bit = do
511 512 513
  code <- intLoadCode (MOVZxL II8) addr
  return (Any II64 code)

514 515
getRegister' is32Bit (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _])
 | not is32Bit = do
516 517 518
  code <- intLoadCode (MOVSxL II8) addr
  return (Any II64 code)

519 520
getRegister' is32Bit (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _])
 | not is32Bit = do
521 522 523
  code <- intLoadCode (MOVZxL II16) addr
  return (Any II64 code)

524 525
getRegister' is32Bit (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _])
 | not is32Bit = do
526 527 528
  code <- intLoadCode (MOVSxL II16) addr
  return (Any II64 code)

529 530
getRegister' is32Bit (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _])
 | not is32Bit = do
531 532 533
  code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend
  return (Any II64 code)

534 535
getRegister' is32Bit (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _])
 | not is32Bit = do
536 537 538
  code <- intLoadCode (MOVSxL II32) addr
  return (Any II64 code)

539
getRegister' is32Bit (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
540
                                     CmmLit displacement])
541 542
 | not is32Bit = do
      return $ Any II64 (\dst -> unitOL $
543 544
        LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))

545
getRegister' is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
546 547 548 549 550
    sse2 <- sse2Enabled
    case mop of
      MO_F_Neg w
         | sse2      -> sse2NegCode w x
         | otherwise -> trivialUFCode FF80 (GNEG FF80) x
551 552 553 554 555 556 557 558 559 560 561 562

      MO_S_Neg w -> triv_ucode NEGI (intSize w)
      MO_Not w   -> triv_ucode NOT  (intSize w)

      -- Nop conversions
      MO_UU_Conv W32 W8  -> toI8Reg  W32 x
      MO_SS_Conv W32 W8  -> toI8Reg  W32 x
      MO_UU_Conv W16 W8  -> toI8Reg  W16 x
      MO_SS_Conv W16 W8  -> toI8Reg  W16 x
      MO_UU_Conv W32 W16 -> toI16Reg W32 x
      MO_SS_Conv W32 W16 -> toI16Reg W32 x

563 564 565 566 567 568
      MO_UU_Conv W64 W32 | not is32Bit -> conversionNop II64 x
      MO_SS_Conv W64 W32 | not is32Bit -> conversionNop II64 x
      MO_UU_Conv W64 W16 | not is32Bit -> toI16Reg W64 x
      MO_SS_Conv W64 W16 | not is32Bit -> toI16Reg W64 x
      MO_UU_Conv W64 W8  | not is32Bit -> toI8Reg  W64 x
      MO_SS_Conv W64 W8  | not is32Bit -> toI8Reg  W64 x
569 570 571 572 573 574 575 576 577 578 579 580 581

      MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
      MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x

      -- widenings
      MO_UU_Conv W8  W32 -> integerExtend W8  W32 MOVZxL x
      MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x
      MO_UU_Conv W8  W16 -> integerExtend W8  W16 MOVZxL x

      MO_SS_Conv W8  W32 -> integerExtend W8  W32 MOVSxL x
      MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x
      MO_SS_Conv W8  W16 -> integerExtend W8  W16 MOVSxL x

582 583 584 585 586 587
      MO_UU_Conv W8  W64 | not is32Bit -> integerExtend W8  W64 MOVZxL x
      MO_UU_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVZxL x
      MO_UU_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOVZxL x
      MO_SS_Conv W8  W64 | not is32Bit -> integerExtend W8  W64 MOVSxL x
      MO_SS_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVSxL x
      MO_SS_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOVSxL x
588 589 590 591
        -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
        -- However, we don't want the register allocator to throw it
        -- away as an unnecessary reg-to-reg move, so we keep it in
        -- the form of a movzl and print it as a movl later.
592

593 594
      MO_FF_Conv W32 W64
        | sse2      -> coerceFP2FP W64 x
595
        | otherwise -> conversionNop FF80 x
596

597
      MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
598 599 600 601

      MO_FS_Conv from to -> coerceFP2Int from to x
      MO_SF_Conv from to -> coerceInt2FP from to x

dterei's avatar
dterei committed
602
      _other -> pprPanic "getRegister" (pprMachOp mop)
603
   where
604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621
        triv_ucode :: (Size -> Operand -> Instr) -> Size -> NatM Register
        triv_ucode instr size = trivialUCode size (instr size) x

        -- signed or unsigned extension.
        integerExtend :: Width -> Width
                      -> (Size -> Operand -> Operand -> Instr)
                      -> CmmExpr -> NatM Register
        integerExtend from to instr expr = do
            (reg,e_code) <- if from == W8 then getByteReg expr
                                          else getSomeReg expr
            let
                code dst =
                  e_code `snocOL`
                  instr (intSize from) (OpReg reg) (OpReg dst)
            return (Any (intSize to) code)

        toI8Reg :: Width -> CmmExpr -> NatM Register
        toI8Reg new_rep expr
622
            = do codefn <- getAnyReg expr
623 624 625 626 627 628 629
                 return (Any (intSize new_rep) codefn)
                -- HACK: use getAnyReg to get a byte-addressable register.
                -- If the source was a Fixed register, this will add the
                -- mov instruction to put it into the desired destination.
                -- We're assuming that the destination won't be a fixed
                -- non-byte-addressable register; it won't be, because all
                -- fixed registers are word-sized.
630

631
        toI16Reg = toI8Reg -- for now
632

633
        conversionNop :: Size -> CmmExpr -> NatM Register
634
        conversionNop new_size expr
635
            = do e_code <- getRegister' is32Bit expr
636 637 638
                 return (swizzleRegisterRep e_code new_size)


Ian Lynagh's avatar
Ian Lynagh committed
639
getRegister' is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
640 641
  sse2 <- sse2Enabled
  case mop of
Ian Lynagh's avatar
Ian Lynagh committed
642 643 644 645 646 647
      MO_F_Eq _ -> condFltReg is32Bit EQQ x y
      MO_F_Ne _ -> condFltReg is32Bit NE  x y
      MO_F_Gt _ -> condFltReg is32Bit GTT x y
      MO_F_Ge _ -> condFltReg is32Bit GE  x y
      MO_F_Lt _ -> condFltReg is32Bit LTT x y
      MO_F_Le _ -> condFltReg is32Bit LE  x y
dterei's avatar
dterei committed
648 649 650 651 652 653 654 655 656 657 658 659 660

      MO_Eq _   -> condIntReg EQQ x y
      MO_Ne _   -> condIntReg NE  x y

      MO_S_Gt _ -> condIntReg GTT x y
      MO_S_Ge _ -> condIntReg GE  x y
      MO_S_Lt _ -> condIntReg LTT x y
      MO_S_Le _ -> condIntReg LE  x y

      MO_U_Gt _ -> condIntReg GU  x y
      MO_U_Ge _ -> condIntReg GEU x y
      MO_U_Lt _ -> condIntReg LU  x y
      MO_U_Le _ -> condIntReg LEU x y
661

662
      MO_F_Add w  | sse2      -> trivialFCode_sse2 w ADD  x y
dterei's avatar
dterei committed
663
                  | otherwise -> trivialFCode_x87    GADD x y
664
      MO_F_Sub w  | sse2      -> trivialFCode_sse2 w SUB  x y
dterei's avatar
dterei committed
665
                  | otherwise -> trivialFCode_x87    GSUB x y
666
      MO_F_Quot w | sse2      -> trivialFCode_sse2 w FDIV x y
dterei's avatar
dterei committed
667
                  | otherwise -> trivialFCode_x87    GDIV x y
668
      MO_F_Mul w  | sse2      -> trivialFCode_sse2 w MUL x y
dterei's avatar
dterei committed
669
                  | otherwise -> trivialFCode_x87    GMUL x y
670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685

      MO_Add rep -> add_code rep x y
      MO_Sub rep -> sub_code rep x y

      MO_S_Quot rep -> div_code rep True  True  x y
      MO_S_Rem  rep -> div_code rep True  False x y
      MO_U_Quot rep -> div_code rep False True  x y
      MO_U_Rem  rep -> div_code rep False False x y

      MO_S_MulMayOflo rep -> imulMayOflo rep x y

      MO_Mul rep -> triv_op rep IMUL
      MO_And rep -> triv_op rep AND
      MO_Or  rep -> triv_op rep OR
      MO_Xor rep -> triv_op rep XOR

686 687 688 689
        {- Shift ops on x86s have constraints on their source, it
           either has to be Imm, CL or 1
            => trivialCode is not restrictive enough (sigh.)
        -}
690 691 692 693
      MO_Shl rep   -> shift_code rep SHL x y {-False-}
      MO_U_Shr rep -> shift_code rep SHR x y {-False-}
      MO_S_Shr rep -> shift_code rep SAR x y {-False-}

dterei's avatar
dterei committed
694
      _other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
695 696 697
  where
    --------------------
    triv_op width instr = trivialCode width op (Just op) x y
698
                        where op   = instr (intSize width)
699 700 701 702 703

    imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
    imulMayOflo rep a b = do
         (a_reg, a_code) <- getNonClobberedReg a
         b_code <- getAnyReg b
704 705 706 707 708
         let
             shift_amt  = case rep of
                           W32 -> 31
                           W64 -> 63
                           _ -> panic "shift_amt"
709

710
             size = intSize rep
711 712
             code = a_code `appOL` b_code eax `appOL`
                        toOL [
713
                           IMUL2 size (OpReg a_reg),   -- result in %edx:%eax
714
                           SAR size (OpImm (ImmInt shift_amt)) (OpReg eax),
715
                                -- sign extend lower part
716
                           SUB size (OpReg edx) (OpReg eax)
717
                                -- compare against upper
718 719 720
                           -- eax==0 if high part == sign extended low part
                        ]
         -- in
721
         return (Fixed size eax code)
722 723 724

    --------------------
    shift_code :: Width
725 726 727 728
               -> (Size -> Operand -> Operand -> Instr)
               -> CmmExpr
               -> CmmExpr
               -> NatM Register
729 730

    {- Case1: shift length as immediate -}
dterei's avatar
dterei committed
731
    shift_code width instr x (CmmLit lit) = do
732 733 734 735 736 737 738 739 740
          x_code <- getAnyReg x
          let
               size = intSize width
               code dst
                  = x_code dst `snocOL`
                    instr size (OpImm (litToImm lit)) (OpReg dst)
          -- in
          return (Any size code)

741 742 743 744
    {- Case2: shift length is complex (non-immediate)
      * y must go in %ecx.
      * we cannot do y first *and* put its result in %ecx, because
        %ecx might be clobbered by x.
745
      * if we do y second, then x cannot be
746 747 748 749 750 751 752 753 754 755 756
        in a clobbered reg.  Also, we cannot clobber x's reg
        with the instruction itself.
      * so we can either:
        - do y first, put its result in a fresh tmp, then copy it to %ecx later
        - do y second and put its result into %ecx.  x gets placed in a fresh
          tmp.  This is likely to be better, becuase the reg alloc can
          eliminate this reg->reg move here (it won't eliminate the other one,
          because the move is into the fixed %ecx).
    -}
    shift_code width instr x y{-amount-} = do
        x_code <- getAnyReg x
757 758
        let size = intSize width
        tmp <- getNewRegNat size
759
        y_code <- getAnyReg y
760 761 762 763
        let
           code = x_code tmp `appOL`
                  y_code ecx `snocOL`
                  instr size (OpReg ecx) (OpReg tmp)
764 765 766 767 768 769
        -- in
        return (Fixed size tmp code)

    --------------------
    add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
    add_code rep x (CmmLit (CmmInt y _))
770
        | is32BitInteger y = add_int rep x y
771 772 773 774 775 776
    add_code rep x y = trivialCode rep (ADD size) (Just (ADD size)) x y
      where size = intSize rep

    --------------------
    sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
    sub_code rep x (CmmLit (CmmInt y _))
777
        | is32BitInteger (-y) = add_int rep x (-y)
778 779 780 781
    sub_code rep x y = trivialCode rep (SUB (intSize rep)) Nothing x y

    -- our three-operand add instruction:
    add_int width x y = do
782 783 784 785 786
        (x_reg, x_code) <- getSomeReg x
        let
            size = intSize width
            imm = ImmInt (fromInteger y)
            code dst
787
               = x_code `snocOL`
788 789
                 LEA size
                        (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
790
                        (OpReg dst)
791 792
        --
        return (Any size code)
793 794 795

    ----------------------
    div_code width signed quotient x y = do
796 797 798 799 800 801
           (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
           x_code <- getAnyReg x
           let
             size = intSize width
             widen | signed    = CLTD size
                   | otherwise = XOR size (OpReg edx) (OpReg edx)
802

803 804
             instr | signed    = IDIV
                   | otherwise = DIV
805

806 807 808
             code = y_code `appOL`
                    x_code eax `appOL`
                    toOL [widen, instr size y_op]
809

810 811
             result | quotient  = eax
                    | otherwise = edx
812

813
           -- in
814 815 816
           return (Fixed size result code)


817
getRegister' _ (CmmLoad mem pk)
818 819
  | isFloatType pk
  = do
820 821 822
    Amode addr mem_code <- getAmode mem
    use_sse2 <- sse2Enabled
    loadFloatAmode use_sse2 (typeWidth pk) addr mem_code
823

824 825
getRegister' is32Bit (CmmLoad mem pk)
  | is32Bit && not (isWord64 pk)
826
  = do
827 828 829 830 831 832
    code <- intLoadCode instr mem
    return (Any size code)
  where
    width = typeWidth pk
    size = intSize width
    instr = case width of
833 834 835 836 837 838 839
                W8     -> MOVZxL II8
                _other -> MOV size
        -- We always zero-extend 8-bit loads, if we
        -- can't think of anything better.  This is because
        -- we can't guarantee access to an 8-bit variant of every register
        -- (esi and edi don't have 8-bit variants), so to make things
        -- simpler we do our 8-bit arithmetic with full 32-bit registers.
840 841

-- Simpler memory load code on x86_64
842 843
getRegister' is32Bit (CmmLoad mem pk)
 | not is32Bit
844
  = do
845 846 847 848
    code <- intLoadCode (MOV size) mem
    return (Any size code)
  where size = intSize $ typeWidth pk

849
getRegister' is32Bit (CmmLit (CmmInt 0 width))
850
  = let
851
        size = intSize width
852

853
        -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
854 855 856 857
        size1 = if is32Bit then size
                           else case size of
                                II64 -> II32
                                _ -> size
858
        code dst
859 860
           = unitOL (XOR size1 (OpReg dst) (OpReg dst))
    in
861
        return (Any size code)
862 863 864 865

  -- optimisation for loading small literals on x86_64: take advantage
  -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
  -- instruction forms are shorter.
866 867
getRegister' is32Bit (CmmLit lit)
  | not is32Bit, isWord64 (cmmLitType lit), not (isBigLit lit)
868 869 870
  = let
        imm = litToImm lit
        code dst = unitOL (MOV II32 (OpImm imm) (OpReg dst))
871
    in
872
        return (Any II64 code)
873 874 875
  where
   isBigLit (CmmInt i _) = i < 0 || i > 0xffffffff
   isBigLit _ = False
876 877 878 879 880
        -- note1: not the same as (not.is32BitLit), because that checks for
        -- signed literals that fit in 32 bits, but we want unsigned
        -- literals here.
        -- note2: all labels are small, because we're assuming the
        -- small memory model (see gcc docs, -mcmodel=small).
881

882
getRegister' _ (CmmLit lit)
883 884 885 886
  = let
        size = cmmTypeSize (cmmLitType lit)
        imm = litToImm lit
        code dst = unitOL (MOV size (OpImm imm) (OpReg dst))
887
    in
888
        return (Any size code)
889

890
getRegister' _ other = do dflags <- getDynFlags
891
                          pprPanic "getRegister(x86)" (pprPlatform (targetPlatform dflags) other)
892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915


intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
   -> NatM (Reg -> InstrBlock)
intLoadCode instr mem = do
  Amode src mem_code <- getAmode mem
  return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))

-- Compute an expression into *any* register, adding the appropriate
-- move instruction if necessary.
getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
getAnyReg expr = do
  r <- getRegister expr
  anyReg r

anyReg :: Register -> NatM (Reg -> InstrBlock)
anyReg (Any _ code)          = return code
anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)

-- A bit like getSomeReg, but we want a reg that can be byte-addressed.
-- Fixed registers might not be byte-addressable, so we make sure we've
-- got a temporary, inserting an extra reg copy if necessary.
getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
getByteReg expr = do
Ian Lynagh's avatar
Ian Lynagh committed
916 917
  is32Bit <- is32BitPlatform
  if is32Bit
918 919 920 921 922 923 924 925 926 927 928 929 930 931
      then do r <- getRegister expr
              case r of
                Any rep code -> do
                    tmp <- getNewRegNat rep
                    return (tmp, code tmp)
                Fixed rep reg code
                    | isVirtualReg reg -> return (reg,code)
                    | otherwise -> do
                        tmp <- getNewRegNat rep
                        return (tmp, code `snocOL` reg2reg rep reg tmp)
                    -- ToDo: could optimise slightly by checking for
                    -- byte-addressable real registers, but that will
                    -- happen very rarely if at all.
      else getSomeReg expr -- all regs are byte-addressable on x86_64
932 933 934 935 936 937 938 939

-- Another variant: this time we want the result in a register that cannot
-- be modified by code to evaluate an arbitrary expression.
getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
getNonClobberedReg expr = do
  r <- getRegister expr
  case r of
    Any rep code -> do
940 941
        tmp <- getNewRegNat rep
        return (tmp, code tmp)
942
    Fixed rep reg code
943 944 945 946 947 948 949 950
        -- only free regs can be clobbered
        | RegReal (RealRegSingle rr) <- reg
        , isFastTrue (freeReg rr)
        -> do
                tmp <- getNewRegNat rep
                return (tmp, code `snocOL` reg2reg rep reg tmp)
        | otherwise ->
                return (reg, code)
951 952

reg2reg :: Size -> Reg -> Reg -> Instr
953
reg2reg size src dst
954
  | size == FF80 = GMOV src dst
955
  | otherwise    = MOV size (OpReg src) (OpReg dst)
956 957 958 959


--------------------------------------------------------------------------------
getAmode :: CmmExpr -> NatM Amode
Ian Lynagh's avatar
Ian Lynagh committed
960 961
getAmode e = do is32Bit <- is32BitPlatform
                getAmode' is32Bit e
962

963 964
getAmode' :: Bool -> CmmExpr -> NatM Amode
getAmode' _ (CmmRegOff r n) = getAmode $ mangleIndexTree r n
965

966 967 968
getAmode' is32Bit (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
                                                  CmmLit displacement])
 | not is32Bit
969