CodeGen.hs 102 KB
Newer Older
1 2 3 4 5 6 7 8 9
-----------------------------------------------------------------------------
--
-- Generating machine code (instruction selection)
--
-- (c) The University of Glasgow 1996-2004
--
-----------------------------------------------------------------------------

-- This is a big module, but, if you pay attention to
Ian Lynagh's avatar
Ian Lynagh committed
10
-- (a) the sectioning, and (b) the type signatures, the
11 12
-- structure should not be too overwhelming.

13 14 15 16 17
module X86.CodeGen (
        cmmTopCodeGen,
        generateJumpTableForInstr,
        InstrBlock
)
18 19 20 21 22

where

#include "HsVersions.h"
#include "nativeGen/NCG.h"
Simon Marlow's avatar
Simon Marlow committed
23
#include "../includes/MachDeps.h"
24 25 26 27 28 29

-- NCG stuff:
import X86.Instr
import X86.Cond
import X86.Regs
import X86.RegInfo
30
import CodeGen.Platform
tibbe's avatar
tibbe committed
31
import CPrim
32 33 34 35 36 37 38 39 40 41
import Instruction
import PIC
import NCGMonad
import Size
import Reg
import Platform

-- Our intermediate code:
import BasicTypes
import BlockId
42
import Module           ( primPackageId )
43
import PprCmm           ()
44
import OldCmm
dterei's avatar
dterei committed
45
import OldPprCmm        ()
46 47 48
import CLabel

-- The rest:
49
import ForeignCall      ( CCallConv(..) )
50 51
import OrdList
import Outputable
52
import Unique
53
import FastString
54
import FastBool         ( isFastTrue )
55
import DynFlags
56
import Util
57

58
import Control.Monad
tibbe's avatar
tibbe committed
59
import Data.Bits
60
import Data.Int
61
import Data.Maybe
dterei's avatar
dterei committed
62 63
import Data.Word

Ian Lynagh's avatar
Ian Lynagh committed
64 65
is32BitPlatform :: NatM Bool
is32BitPlatform = do
66
    dflags <- getDynFlags
Ian Lynagh's avatar
Ian Lynagh committed
67 68
    return $ target32Bit (targetPlatform dflags)

69 70
sse2Enabled :: NatM Bool
sse2Enabled = do
71
  dflags <- getDynFlags
72 73 74 75 76 77 78
  case platformArch (targetPlatform dflags) of
      ArchX86_64 -> -- SSE2 is fixed on for x86_64.  It would be
                    -- possible to make it optional, but we'd need to
                    -- fix at least the foreign call code where the
                    -- calling convention specifies the use of xmm regs,
                    -- and possibly other places.
                    return True
ian@well-typed.com's avatar
ian@well-typed.com committed
79
      ArchX86    -> return (gopt Opt_SSE2 dflags || gopt Opt_SSE4_2 dflags)
80
      _          -> panic "sse2Enabled: Not an X86* arch"
81

tibbe's avatar
tibbe committed
82 83
sse4_2Enabled :: NatM Bool
sse4_2Enabled = do
84
  dflags <- getDynFlags
ian@well-typed.com's avatar
ian@well-typed.com committed
85
  return (gopt Opt_SSE4_2 dflags)
tibbe's avatar
tibbe committed
86

87 88 89 90
if_sse2 :: NatM a -> NatM a -> NatM a
if_sse2 sse2 x87 = do
  b <- sse2Enabled
  if b then sse2 else x87
91

92
cmmTopCodeGen
Simon Peyton Jones's avatar
Simon Peyton Jones committed
93 94
        :: RawCmmDecl
        -> NatM [NatCmmDecl (Alignment, CmmStatics) Instr]
95

96
cmmTopCodeGen (CmmProc info lab live (ListGraph blocks)) = do
97 98
  (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
  picBaseMb <- getPicBaseMaybeNat
99
  dflags <- getDynFlags
100
  let proc = CmmProc info lab live (ListGraph $ concat nat_blocks)
101
      tops = proc : concat statics
102
      os   = platformOS $ targetPlatform dflags
103 104 105 106

  case picBaseMb of
      Just picBase -> initializePicBase_x86 ArchX86 os picBase tops
      Nothing -> return tops
107

Ian Lynagh's avatar
Ian Lynagh committed
108
cmmTopCodeGen (CmmData sec dat) = do
109
  return [CmmData sec (1, dat)]  -- no translation, we just use CmmStatic
110 111


112 113 114
basicBlockCodeGen
        :: CmmBasicBlock
        -> NatM ( [NatBasicBlock Instr]
Simon Peyton Jones's avatar
Simon Peyton Jones committed
115
                , [NatCmmDecl (Alignment, CmmStatics) Instr])
116 117 118 119 120 121 122 123

basicBlockCodeGen (BasicBlock id stmts) = do
  instrs <- stmtsToInstrs stmts
  -- code generation may introduce new basic block boundaries, which
  -- are indicated by the NEWBLOCK instruction.  We must split up the
  -- instruction stream into basic blocks again.  Also, we extract
  -- LDATAs here too.
  let
124 125 126 127 128 129 130 131
        (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs

        mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
          = ([], BasicBlock id instrs : blocks, statics)
        mkBlocks (LDATA sec dat) (instrs,blocks,statics)
          = (instrs, blocks, CmmData sec dat:statics)
        mkBlocks instr (instrs,blocks,statics)
          = (instr:instrs, blocks, statics)
132 133 134 135 136 137 138 139 140 141
  return (BasicBlock id top : other_blocks, statics)


stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
stmtsToInstrs stmts
   = do instrss <- mapM stmtToInstrs stmts
        return (concatOL instrss)


stmtToInstrs :: CmmStmt -> NatM InstrBlock
142
stmtToInstrs stmt = do
143
  dflags <- getDynFlags
Ian Lynagh's avatar
Ian Lynagh committed
144
  is32Bit <- is32BitPlatform
145
  case stmt of
146
    CmmNop         -> return nilOL
147 148 149
    CmmComment s   -> return (unitOL (COMMENT s))

    CmmAssign reg src
150 151 152
      | isFloatType ty         -> assignReg_FltCode size reg src
      | is32Bit && isWord64 ty -> assignReg_I64Code      reg src
      | otherwise              -> assignReg_IntCode size reg src
153
        where ty = cmmRegType dflags reg
154
              size = cmmTypeSize ty
155 156

    CmmStore addr src
157 158 159
      | isFloatType ty         -> assignMem_FltCode size addr src
      | is32Bit && isWord64 ty -> assignMem_I64Code      addr src
      | otherwise              -> assignMem_IntCode size addr src
160
        where ty = cmmExprType dflags src
161
              size = cmmTypeSize ty
162

163
    CmmCall target result_regs args _
Ian Lynagh's avatar
Ian Lynagh committed
164
       -> genCCall is32Bit target result_regs args
165

166
    CmmBranch id          -> genBranch id
167
    CmmCondBranch arg id  -> genCondJump id arg
Ian Lynagh's avatar
Ian Lynagh committed
168 169
    CmmSwitch arg ids     -> do dflags <- getDynFlags
                                genSwitch dflags arg ids
170
    CmmJump arg gregs     -> do dflags <- getDynFlags
171
                                genJump arg (jumpRegs dflags gregs)
dterei's avatar
dterei committed
172
    CmmReturn             ->
173 174 175
      panic "stmtToInstrs: return statement should have been cps'd away"


176 177
jumpRegs :: DynFlags -> [GlobalReg] -> [Reg]
jumpRegs dflags gregs = [ RegReal r | Just r <- map (globalRegMaybe platform) gregs ]
178
    where platform = targetPlatform dflags
179

180 181
--------------------------------------------------------------------------------
-- | 'InstrBlock's are the insn sequences generated by the insn selectors.
182 183
--      They are really trees of insns to facilitate fast appending, where a
--      left-to-right traversal yields the insns in the correct order.
184
--
185 186
type InstrBlock
        = OrdList Instr
187 188 189 190


-- | Condition codes passed up the tree.
--
191 192
data CondCode
        = CondCode Bool Cond InstrBlock
193 194 195


-- | a.k.a "Register64"
196 197
--      Reg is the lower 32-bit temporary which contains the result.
--      Use getHiVRegFromLo to find the other VRegUnique.
198
--
199 200
--      Rules of this simplified insn selection game are therefore that
--      the returned Reg may be modified
201
--
202 203
data ChildCode64
   = ChildCode64
204
        InstrBlock
205
        Reg
206 207 208


-- | Register's passed up the tree.  If the stix code forces the register
209 210 211
--      to live in a pre-decided machine register, it comes out as @Fixed@;
--      otherwise, it comes out as @Any@, and the parent can decide which
--      register to put it in.
212 213
--
data Register
214 215
        = Fixed Size Reg InstrBlock
        | Any   Size (Reg -> InstrBlock)
216 217 218 219 220 221 222 223


swizzleRegisterRep :: Register -> Size -> Register
swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code
swizzleRegisterRep (Any _ codefn)     size = Any   size codefn


-- | Grab the Reg for a CmmReg
224
getRegisterReg :: Platform -> Bool -> CmmReg -> Reg
225

226
getRegisterReg _ use_sse2 (CmmLocal (LocalReg u pk))
227 228 229 230
  = let sz = cmmTypeSize pk in
    if isFloatSize sz && not use_sse2
       then RegVirtual (mkVirtualReg u FF80)
       else RegVirtual (mkVirtualReg u sz)
231

232 233
getRegisterReg platform _ (CmmGlobal mid)
  = case globalRegMaybe platform mid of
234 235 236 237 238
        Just reg -> RegReal $ reg
        Nothing  -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
        -- By this stage, the only MagicIds remaining should be the
        -- ones which map to a real machine register on this
        -- platform.  Hence ...
239 240 241


-- | Memory addressing modes passed up the tree.
242 243
data Amode
        = Amode AddrMode InstrBlock
244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264

{-
Now, given a tree (the argument to an CmmLoad) that references memory,
produce a suitable addressing mode.

A Rule of the Game (tm) for Amodes: use of the addr bit must
immediately follow use of the code part, since the code part puts
values in registers which the addr then refers to.  So you can't put
anything in between, lest it overwrite some of those registers.  If
you need to do some other computation between the code part and use of
the addr bit, first store the effective address from the amode in a
temporary, then do the other computation, and then use the temporary:

    code
    LEA amode, tmp
    ... other computation ...
    ... (tmp) ...
-}


-- | Check whether an integer will fit in 32 bits.
265 266 267 268
--      A CmmInt is intended to be truncated to the appropriate
--      number of bits, so here we truncate it to Int64.  This is
--      important because e.g. -1 as a CmmInt might be either
--      -1 or 18446744073709551615.
269 270 271 272 273 274 275
--
is32BitInteger :: Integer -> Bool
is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
  where i64 = fromIntegral i :: Int64


-- | Convert a BlockId to some CmmStatic data
276 277 278
jumpTableEntry :: DynFlags -> Maybe BlockId -> CmmStatic
jumpTableEntry dflags Nothing = CmmStaticLit (CmmInt 0 (wordWidth dflags))
jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
279
    where blockLabel = mkAsmTempLabel (getUnique blockid)
280 281 282 283 284 285 286


-- -----------------------------------------------------------------------------
-- General things for putting together code sequences

-- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
-- CmmExprs into CmmRegOff?
287 288
mangleIndexTree :: DynFlags -> CmmReg -> Int -> CmmExpr
mangleIndexTree dflags reg off
289
  = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
290
  where width = typeWidth (cmmRegType dflags reg)
291 292

-- | The dual to getAnyReg: compute an expression into a register, but
293
--      we don't mind which one it is.
294 295 296 297 298
getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg expr = do
  r <- getRegister expr
  case r of
    Any rep code -> do
299 300 301 302
        tmp <- getNewRegNat rep
        return (tmp, code tmp)
    Fixed _ reg code ->
        return (reg, code)
303 304 305 306 307 308


assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
assignMem_I64Code addrTree valueTree = do
  Amode addr addr_code <- getAmode addrTree
  ChildCode64 vcode rlo <- iselExpr64 valueTree
309
  let
310 311 312 313 314 315 316 317 318
        rhi = getHiVRegFromLo rlo

        -- Little-endian store
        mov_lo = MOV II32 (OpReg rlo) (OpAddr addr)
        mov_hi = MOV II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
  return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)


assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
dterei's avatar
dterei committed
319
assignReg_I64Code (CmmLocal (LocalReg u_dst _)) valueTree = do
320
   ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
321
   let
322
         r_dst_lo = RegVirtual $ mkVirtualReg u_dst II32
323 324 325 326 327 328 329 330
         r_dst_hi = getHiVRegFromLo r_dst_lo
         r_src_hi = getHiVRegFromLo r_src_lo
         mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo)
         mov_hi = MOV II32 (OpReg r_src_hi) (OpReg r_dst_hi)
   return (
        vcode `snocOL` mov_lo `snocOL` mov_hi
     )

dterei's avatar
dterei committed
331
assignReg_I64Code _ _
332 333 334 335 336 337 338
   = panic "assignReg_I64Code(i386): invalid lvalue"


iselExpr64        :: CmmExpr -> NatM ChildCode64
iselExpr64 (CmmLit (CmmInt i _)) = do
  (rlo,rhi) <- getNewRegPairNat II32
  let
339
        r = fromIntegral (fromIntegral i :: Word32)
340
        q = fromIntegral (fromIntegral (i `shiftR` 32) :: Word32)
341 342 343 344
        code = toOL [
                MOV II32 (OpImm (ImmInteger r)) (OpReg rlo),
                MOV II32 (OpImm (ImmInteger q)) (OpReg rhi)
                ]
345 346 347 348 349
  return (ChildCode64 code rlo)

iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
   Amode addr addr_code <- getAmode addrTree
   (rlo,rhi) <- getNewRegPairNat II32
350
   let
351 352 353
        mov_lo = MOV II32 (OpAddr addr) (OpReg rlo)
        mov_hi = MOV II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
   return (
354
            ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
355 356 357 358
                        rlo
     )

iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
359
   = return (ChildCode64 nilOL (RegVirtual $ mkVirtualReg vu II32))
360

361 362 363 364 365
-- we handle addition, but rather badly
iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
   ChildCode64 code1 r1lo <- iselExpr64 e1
   (rlo,rhi) <- getNewRegPairNat II32
   let
366
        r = fromIntegral (fromIntegral i :: Word32)
367
        q = fromIntegral (fromIntegral (i `shiftR` 32) :: Word32)
368 369 370 371 372 373
        r1hi = getHiVRegFromLo r1lo
        code =  code1 `appOL`
                toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
                       ADD II32 (OpImm (ImmInteger r)) (OpReg rlo),
                       MOV II32 (OpReg r1hi) (OpReg rhi),
                       ADC II32 (OpImm (ImmInteger q)) (OpReg rhi) ]
374 375 376 377 378 379 380
   return (ChildCode64 code rlo)

iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
   ChildCode64 code1 r1lo <- iselExpr64 e1
   ChildCode64 code2 r2lo <- iselExpr64 e2
   (rlo,rhi) <- getNewRegPairNat II32
   let
381 382 383 384 385 386 387 388
        r1hi = getHiVRegFromLo r1lo
        r2hi = getHiVRegFromLo r2lo
        code =  code1 `appOL`
                code2 `appOL`
                toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
                       ADD II32 (OpReg r2lo) (OpReg rlo),
                       MOV II32 (OpReg r1hi) (OpReg rhi),
                       ADC II32 (OpReg r2hi) (OpReg rhi) ]
389 390 391 392 393 394 395 396
   return (ChildCode64 code rlo)

iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do
     fn <- getAnyReg expr
     r_dst_lo <-  getNewRegNat II32
     let r_dst_hi = getHiVRegFromLo r_dst_lo
         code = fn r_dst_lo
     return (
397
             ChildCode64 (code `snocOL`
398 399 400 401 402
                          MOV II32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
                          r_dst_lo
            )

iselExpr64 expr
Ian Lynagh's avatar
Ian Lynagh committed
403
   = pprPanic "iselExpr64(i386)" (ppr expr)
404 405 406 407


--------------------------------------------------------------------------------
getRegister :: CmmExpr -> NatM Register
408 409 410
getRegister e = do dflags <- getDynFlags
                   is32Bit <- is32BitPlatform
                   getRegister' dflags is32Bit e
411

412
getRegister' :: DynFlags -> Bool -> CmmExpr -> NatM Register
413

414
getRegister' dflags is32Bit (CmmReg reg)
415 416 417 418 419 420
  = case reg of
        CmmGlobal PicBaseReg
         | is32Bit ->
            -- on x86_64, we have %rip for PicBaseReg, but it's not
            -- a full-featured register, it can only be used for
            -- rip-relative addressing.
Ian Lynagh's avatar
Ian Lynagh committed
421 422
            do reg' <- getPicBaseNat (archWordSize is32Bit)
               return (Fixed (archWordSize is32Bit) reg' nilOL)
423 424 425
        _ ->
            do use_sse2 <- sse2Enabled
               let
426
                 sz = cmmTypeSize (cmmRegType dflags reg)
427 428 429
                 size | not use_sse2 && isFloatSize sz = FF80
                      | otherwise                      = sz
               --
430 431
               let platform = targetPlatform dflags
               return (Fixed size (getRegisterReg platform use_sse2 reg) nilOL)
432 433


434 435
getRegister' dflags is32Bit (CmmRegOff r n)
  = getRegister' dflags is32Bit $ mangleIndexTree dflags r n
436 437 438 439

-- for 32-bit architectuers, support some 64 -> 32 bit conversions:
-- TO_W_(x), TO_W_(x >> 32)

440
getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W64 W32)
441 442
                     [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
 | is32Bit = do
443 444 445
  ChildCode64 code rlo <- iselExpr64 x
  return $ Fixed II32 (getHiVRegFromLo rlo) code

446
getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W64 W32)
447 448
                     [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
 | is32Bit = do
449 450 451
  ChildCode64 code rlo <- iselExpr64 x
  return $ Fixed II32 (getHiVRegFromLo rlo) code

452
getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W64 W32) [x])
453
 | is32Bit = do
454 455 456
  ChildCode64 code rlo <- iselExpr64 x
  return $ Fixed II32 rlo code

457
getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W64 W32) [x])
458
 | is32Bit = do
459
  ChildCode64 code rlo <- iselExpr64 x
460
  return $ Fixed II32 rlo code
461

462
getRegister' _ _ (CmmLit lit@(CmmFloat f w)) =
463 464 465 466 467 468 469
  if_sse2 float_const_sse2 float_const_x87
 where
  float_const_sse2
    | f == 0.0 = do
      let
          size = floatSize w
          code dst = unitOL  (XOR size (OpReg dst) (OpReg dst))
470 471
        -- I don't know why there are xorpd, xorps, and pxor instructions.
        -- They all appear to do the same thing --SDM
472 473 474 475 476 477 478 479 480 481 482
      return (Any size code)

   | otherwise = do
      Amode addr code <- memConstant (widthInBytes w) lit
      loadFloatAmode True w addr code

  float_const_x87 = case w of
    W64
      | f == 0.0 ->
        let code dst = unitOL (GLDZ dst)
        in  return (Any FF80 code)
483

484 485 486
      | f == 1.0 ->
        let code dst = unitOL (GLD1 dst)
        in  return (Any FF80 code)
487

488 489 490
    _otherwise -> do
      Amode addr code <- memConstant (widthInBytes w) lit
      loadFloatAmode False w addr code
491 492

-- catch simple cases of zero- or sign-extended load
493
getRegister' _ _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do
494 495 496
  code <- intLoadCode (MOVZxL II8) addr
  return (Any II32 code)

497
getRegister' _ _ (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _]) = do
498 499 500
  code <- intLoadCode (MOVSxL II8) addr
  return (Any II32 code)

501
getRegister' _ _ (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _]) = do
502 503 504
  code <- intLoadCode (MOVZxL II16) addr
  return (Any II32 code)

505
getRegister' _ _ (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _]) = do
506 507 508 509
  code <- intLoadCode (MOVSxL II16) addr
  return (Any II32 code)

-- catch simple cases of zero- or sign-extended load
510
getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _])
511
 | not is32Bit = do
512 513 514
  code <- intLoadCode (MOVZxL II8) addr
  return (Any II64 code)

515
getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _])
516
 | not is32Bit = do
517 518 519
  code <- intLoadCode (MOVSxL II8) addr
  return (Any II64 code)

520
getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _])
521
 | not is32Bit = do
522 523 524
  code <- intLoadCode (MOVZxL II16) addr
  return (Any II64 code)

525
getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _])
526
 | not is32Bit = do
527 528 529
  code <- intLoadCode (MOVSxL II16) addr
  return (Any II64 code)

530
getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _])
531
 | not is32Bit = do
532 533 534
  code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend
  return (Any II64 code)

535
getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _])
536
 | not is32Bit = do
537 538 539
  code <- intLoadCode (MOVSxL II32) addr
  return (Any II64 code)

540
getRegister' _ is32Bit (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
541
                                     CmmLit displacement])
542 543
 | not is32Bit = do
      return $ Any II64 (\dst -> unitOL $
544 545
        LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))

546
getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
547 548 549 550 551
    sse2 <- sse2Enabled
    case mop of
      MO_F_Neg w
         | sse2      -> sse2NegCode w x
         | otherwise -> trivialUFCode FF80 (GNEG FF80) x
552 553 554 555 556 557 558 559 560 561 562 563

      MO_S_Neg w -> triv_ucode NEGI (intSize w)
      MO_Not w   -> triv_ucode NOT  (intSize w)

      -- Nop conversions
      MO_UU_Conv W32 W8  -> toI8Reg  W32 x
      MO_SS_Conv W32 W8  -> toI8Reg  W32 x
      MO_UU_Conv W16 W8  -> toI8Reg  W16 x
      MO_SS_Conv W16 W8  -> toI8Reg  W16 x
      MO_UU_Conv W32 W16 -> toI16Reg W32 x
      MO_SS_Conv W32 W16 -> toI16Reg W32 x

564 565 566 567 568 569
      MO_UU_Conv W64 W32 | not is32Bit -> conversionNop II64 x
      MO_SS_Conv W64 W32 | not is32Bit -> conversionNop II64 x
      MO_UU_Conv W64 W16 | not is32Bit -> toI16Reg W64 x
      MO_SS_Conv W64 W16 | not is32Bit -> toI16Reg W64 x
      MO_UU_Conv W64 W8  | not is32Bit -> toI8Reg  W64 x
      MO_SS_Conv W64 W8  | not is32Bit -> toI8Reg  W64 x
570 571 572 573 574 575 576 577 578 579 580 581 582

      MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
      MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x

      -- widenings
      MO_UU_Conv W8  W32 -> integerExtend W8  W32 MOVZxL x
      MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x
      MO_UU_Conv W8  W16 -> integerExtend W8  W16 MOVZxL x

      MO_SS_Conv W8  W32 -> integerExtend W8  W32 MOVSxL x
      MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x
      MO_SS_Conv W8  W16 -> integerExtend W8  W16 MOVSxL x

583 584 585 586 587 588
      MO_UU_Conv W8  W64 | not is32Bit -> integerExtend W8  W64 MOVZxL x
      MO_UU_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVZxL x
      MO_UU_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOVZxL x
      MO_SS_Conv W8  W64 | not is32Bit -> integerExtend W8  W64 MOVSxL x
      MO_SS_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVSxL x
      MO_SS_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOVSxL x
589 590 591 592
        -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
        -- However, we don't want the register allocator to throw it
        -- away as an unnecessary reg-to-reg move, so we keep it in
        -- the form of a movzl and print it as a movl later.
593

594 595
      MO_FF_Conv W32 W64
        | sse2      -> coerceFP2FP W64 x
596
        | otherwise -> conversionNop FF80 x
597

598
      MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
599 600 601 602

      MO_FS_Conv from to -> coerceFP2Int from to x
      MO_SF_Conv from to -> coerceInt2FP from to x

dterei's avatar
dterei committed
603
      _other -> pprPanic "getRegister" (pprMachOp mop)
604
   where
605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622
        triv_ucode :: (Size -> Operand -> Instr) -> Size -> NatM Register
        triv_ucode instr size = trivialUCode size (instr size) x

        -- signed or unsigned extension.
        integerExtend :: Width -> Width
                      -> (Size -> Operand -> Operand -> Instr)
                      -> CmmExpr -> NatM Register
        integerExtend from to instr expr = do
            (reg,e_code) <- if from == W8 then getByteReg expr
                                          else getSomeReg expr
            let
                code dst =
                  e_code `snocOL`
                  instr (intSize from) (OpReg reg) (OpReg dst)
            return (Any (intSize to) code)

        toI8Reg :: Width -> CmmExpr -> NatM Register
        toI8Reg new_rep expr
623
            = do codefn <- getAnyReg expr
624 625 626 627 628 629 630
                 return (Any (intSize new_rep) codefn)
                -- HACK: use getAnyReg to get a byte-addressable register.
                -- If the source was a Fixed register, this will add the
                -- mov instruction to put it into the desired destination.
                -- We're assuming that the destination won't be a fixed
                -- non-byte-addressable register; it won't be, because all
                -- fixed registers are word-sized.
631

632
        toI16Reg = toI8Reg -- for now
633

634
        conversionNop :: Size -> CmmExpr -> NatM Register
635
        conversionNop new_size expr
636
            = do e_code <- getRegister' dflags is32Bit expr
637 638 639
                 return (swizzleRegisterRep e_code new_size)


640
getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
641 642
  sse2 <- sse2Enabled
  case mop of
Ian Lynagh's avatar
Ian Lynagh committed
643 644 645 646 647 648
      MO_F_Eq _ -> condFltReg is32Bit EQQ x y
      MO_F_Ne _ -> condFltReg is32Bit NE  x y
      MO_F_Gt _ -> condFltReg is32Bit GTT x y
      MO_F_Ge _ -> condFltReg is32Bit GE  x y
      MO_F_Lt _ -> condFltReg is32Bit LTT x y
      MO_F_Le _ -> condFltReg is32Bit LE  x y
dterei's avatar
dterei committed
649 650 651 652 653 654 655 656 657 658 659 660 661

      MO_Eq _   -> condIntReg EQQ x y
      MO_Ne _   -> condIntReg NE  x y

      MO_S_Gt _ -> condIntReg GTT x y
      MO_S_Ge _ -> condIntReg GE  x y
      MO_S_Lt _ -> condIntReg LTT x y
      MO_S_Le _ -> condIntReg LE  x y

      MO_U_Gt _ -> condIntReg GU  x y
      MO_U_Ge _ -> condIntReg GEU x y
      MO_U_Lt _ -> condIntReg LU  x y
      MO_U_Le _ -> condIntReg LEU x y
662

663
      MO_F_Add w  | sse2      -> trivialFCode_sse2 w ADD  x y
dterei's avatar
dterei committed
664
                  | otherwise -> trivialFCode_x87    GADD x y
665
      MO_F_Sub w  | sse2      -> trivialFCode_sse2 w SUB  x y
dterei's avatar
dterei committed
666
                  | otherwise -> trivialFCode_x87    GSUB x y
667
      MO_F_Quot w | sse2      -> trivialFCode_sse2 w FDIV x y
dterei's avatar
dterei committed
668
                  | otherwise -> trivialFCode_x87    GDIV x y
669
      MO_F_Mul w  | sse2      -> trivialFCode_sse2 w MUL x y
dterei's avatar
dterei committed
670
                  | otherwise -> trivialFCode_x87    GMUL x y
671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686

      MO_Add rep -> add_code rep x y
      MO_Sub rep -> sub_code rep x y

      MO_S_Quot rep -> div_code rep True  True  x y
      MO_S_Rem  rep -> div_code rep True  False x y
      MO_U_Quot rep -> div_code rep False True  x y
      MO_U_Rem  rep -> div_code rep False False x y

      MO_S_MulMayOflo rep -> imulMayOflo rep x y

      MO_Mul rep -> triv_op rep IMUL
      MO_And rep -> triv_op rep AND
      MO_Or  rep -> triv_op rep OR
      MO_Xor rep -> triv_op rep XOR

687 688 689 690
        {- Shift ops on x86s have constraints on their source, it
           either has to be Imm, CL or 1
            => trivialCode is not restrictive enough (sigh.)
        -}
691 692 693 694
      MO_Shl rep   -> shift_code rep SHL x y {-False-}
      MO_U_Shr rep -> shift_code rep SHR x y {-False-}
      MO_S_Shr rep -> shift_code rep SAR x y {-False-}

dterei's avatar
dterei committed
695
      _other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
696 697 698
  where
    --------------------
    triv_op width instr = trivialCode width op (Just op) x y
699
                        where op   = instr (intSize width)
700 701 702 703 704

    imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
    imulMayOflo rep a b = do
         (a_reg, a_code) <- getNonClobberedReg a
         b_code <- getAnyReg b
705 706 707 708 709
         let
             shift_amt  = case rep of
                           W32 -> 31
                           W64 -> 63
                           _ -> panic "shift_amt"
710

711
             size = intSize rep
712 713
             code = a_code `appOL` b_code eax `appOL`
                        toOL [
714
                           IMUL2 size (OpReg a_reg),   -- result in %edx:%eax
715
                           SAR size (OpImm (ImmInt shift_amt)) (OpReg eax),
716
                                -- sign extend lower part
717
                           SUB size (OpReg edx) (OpReg eax)
718
                                -- compare against upper
719 720
                           -- eax==0 if high part == sign extended low part
                        ]
721
         return (Fixed size eax code)
722 723 724

    --------------------
    shift_code :: Width
725 726 727 728
               -> (Size -> Operand -> Operand -> Instr)
               -> CmmExpr
               -> CmmExpr
               -> NatM Register
729 730

    {- Case1: shift length as immediate -}
dterei's avatar
dterei committed
731
    shift_code width instr x (CmmLit lit) = do
732 733 734 735 736 737 738 739
          x_code <- getAnyReg x
          let
               size = intSize width
               code dst
                  = x_code dst `snocOL`
                    instr size (OpImm (litToImm lit)) (OpReg dst)
          return (Any size code)

740 741 742 743
    {- Case2: shift length is complex (non-immediate)
      * y must go in %ecx.
      * we cannot do y first *and* put its result in %ecx, because
        %ecx might be clobbered by x.
744
      * if we do y second, then x cannot be
745 746 747 748 749 750 751 752 753 754 755
        in a clobbered reg.  Also, we cannot clobber x's reg
        with the instruction itself.
      * so we can either:
        - do y first, put its result in a fresh tmp, then copy it to %ecx later
        - do y second and put its result into %ecx.  x gets placed in a fresh
          tmp.  This is likely to be better, becuase the reg alloc can
          eliminate this reg->reg move here (it won't eliminate the other one,
          because the move is into the fixed %ecx).
    -}
    shift_code width instr x y{-amount-} = do
        x_code <- getAnyReg x
756 757
        let size = intSize width
        tmp <- getNewRegNat size
758
        y_code <- getAnyReg y
759 760 761 762
        let
           code = x_code tmp `appOL`
                  y_code ecx `snocOL`
                  instr size (OpReg ecx) (OpReg tmp)
763 764 765 766 767
        return (Fixed size tmp code)

    --------------------
    add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
    add_code rep x (CmmLit (CmmInt y _))
768
        | is32BitInteger y = add_int rep x y
769 770 771 772 773 774
    add_code rep x y = trivialCode rep (ADD size) (Just (ADD size)) x y
      where size = intSize rep

    --------------------
    sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
    sub_code rep x (CmmLit (CmmInt y _))
775
        | is32BitInteger (-y) = add_int rep x (-y)
776 777 778 779
    sub_code rep x y = trivialCode rep (SUB (intSize rep)) Nothing x y

    -- our three-operand add instruction:
    add_int width x y = do
780 781 782 783 784
        (x_reg, x_code) <- getSomeReg x
        let
            size = intSize width
            imm = ImmInt (fromInteger y)
            code dst
785
               = x_code `snocOL`
786 787
                 LEA size
                        (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
788
                        (OpReg dst)
789 790
        --
        return (Any size code)
791 792 793

    ----------------------
    div_code width signed quotient x y = do
794 795 796 797 798 799
           (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
           x_code <- getAnyReg x
           let
             size = intSize width
             widen | signed    = CLTD size
                   | otherwise = XOR size (OpReg edx) (OpReg edx)
800

801 802
             instr | signed    = IDIV
                   | otherwise = DIV
803

804 805 806
             code = y_code `appOL`
                    x_code eax `appOL`
                    toOL [widen, instr size y_op]
807

808 809
             result | quotient  = eax
                    | otherwise = edx
810 811 812 813

           return (Fixed size result code)


814
getRegister' _ _ (CmmLoad mem pk)
815 816
  | isFloatType pk
  = do
817 818 819
    Amode addr mem_code <- getAmode mem
    use_sse2 <- sse2Enabled
    loadFloatAmode use_sse2 (typeWidth pk) addr mem_code
820

821
getRegister' _ is32Bit (CmmLoad mem pk)
822
  | is32Bit && not (isWord64 pk)
823
  = do
824 825 826 827 828 829
    code <- intLoadCode instr mem
    return (Any size code)
  where
    width = typeWidth pk
    size = intSize width
    instr = case width of
830 831 832 833 834 835 836
                W8     -> MOVZxL II8
                _other -> MOV size
        -- We always zero-extend 8-bit loads, if we
        -- can't think of anything better.  This is because
        -- we can't guarantee access to an 8-bit variant of every register
        -- (esi and edi don't have 8-bit variants), so to make things
        -- simpler we do our 8-bit arithmetic with full 32-bit registers.
837 838

-- Simpler memory load code on x86_64
839
getRegister' _ is32Bit (CmmLoad mem pk)
840
 | not is32Bit
841
  = do
842 843 844 845
    code <- intLoadCode (MOV size) mem
    return (Any size code)
  where size = intSize $ typeWidth pk

846
getRegister' _ is32Bit (CmmLit (CmmInt 0 width))
847
  = let
848
        size = intSize width
849

850
        -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
851 852 853 854
        size1 = if is32Bit then size
                           else case size of
                                II64 -> II32
                                _ -> size
855
        code dst
856 857
           = unitOL (XOR size1 (OpReg dst) (OpReg dst))
    in
858
        return (Any size code)
859 860 861 862

  -- optimisation for loading small literals on x86_64: take advantage
  -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
  -- instruction forms are shorter.
863 864
getRegister' dflags is32Bit (CmmLit lit)
  | not is32Bit, isWord64 (cmmLitType dflags lit), not (isBigLit lit)
865 866 867
  = let
        imm = litToImm lit
        code dst = unitOL (MOV II32 (OpImm imm) (OpReg dst))
868
    in
869
        return (Any II64 code)
870 871 872
  where
   isBigLit (CmmInt i _) = i < 0 || i > 0xffffffff
   isBigLit _ = False
873 874 875 876 877
        -- note1: not the same as (not.is32BitLit), because that checks for
        -- signed literals that fit in 32 bits, but we want unsigned
        -- literals here.
        -- note2: all labels are small, because we're assuming the
        -- small memory model (see gcc docs, -mcmodel=small).
878

879 880 881 882 883
getRegister' dflags _ (CmmLit lit)
  = do let size = cmmTypeSize (cmmLitType dflags lit)
           imm = litToImm lit
           code dst = unitOL (MOV size (OpImm imm) (OpReg dst))
       return (Any size code)
884

885
getRegister' _ _ other = pprPanic "getRegister(x86)" (ppr other)
886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909


intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
   -> NatM (Reg -> InstrBlock)
intLoadCode instr mem = do
  Amode src mem_code <- getAmode mem
  return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))

-- Compute an expression into *any* register, adding the appropriate
-- move instruction if necessary.
getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
getAnyReg expr = do
  r <- getRegister expr
  anyReg r

anyReg :: Register -> NatM (Reg -> InstrBlock)
anyReg (Any _ code)          = return code
anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)

-- A bit like getSomeReg, but we want a reg that can be byte-addressed.
-- Fixed registers might not be byte-addressable, so we make sure we've
-- got a temporary, inserting an extra reg copy if necessary.
getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
getByteReg expr = do
Ian Lynagh's avatar
Ian Lynagh committed
910 911
  is32Bit <- is32BitPlatform
  if is32Bit
912 913 914 915 916 917 918 919 920 921 922 923 924 925
      then do r <- getRegister expr
              case r of
                Any rep code -> do
                    tmp <- getNewRegNat rep
                    return (tmp, code tmp)
                Fixed rep reg code
                    | isVirtualReg reg -> return (reg,code)
                    | otherwise -> do
                        tmp <- getNewRegNat rep
                        return (tmp, code `snocOL` reg2reg rep reg tmp)
                    -- ToDo: could optimise slightly by checking for
                    -- byte-addressable real registers, but that will
                    -- happen very rarely if at all.
      else getSomeReg expr -- all regs are byte-addressable on x86_64
926 927 928 929 930

-- Another variant: this time we want the result in a register that cannot
-- be modified by code to evaluate an arbitrary expression.
getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
getNonClobberedReg expr = do
931
  dflags <- getDynFlags
932 933 934
  r <- getRegister expr
  case r of
    Any rep code -> do
935 936
        tmp <- getNewRegNat rep
        return (tmp, code tmp)
937
    Fixed rep reg code
938
        -- only certain regs can be clobbered
939
        | reg `elem` instrClobberedRegs (targetPlatform dflags)
940 941 942 943 944
        -> do
                tmp <- getNewRegNat rep
                return (tmp, code `snocOL` reg2reg rep reg tmp)
        | otherwise ->
                return (reg, code)
945 946

reg2reg :: Size -> Reg -> Reg -> Instr
947
reg2reg size src dst
948
  | size == FF80 = GMOV src dst
949
  | otherwise    = MOV size (OpReg src) (OpReg dst)
950 951 952 953


--------------------------------------------------------------------------------
getAmode :: CmmExpr -> NatM Amode
Ian Lynagh's avatar
Ian Lynagh committed
954 955
getAmode e = do is32Bit <- is32BitPlatform
                getAmode' is32Bit e
956

957
getAmode' :: Bool -> CmmExpr -> NatM Amode
958 959
getAmode' _ (CmmRegOff r n) = do dflags <- getDynFlags
                                 getAmode $ mangleIndexTree dflags r n
960

961 962 963
getAmode' is32Bit (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
                                                  CmmLit displacement])
 | not is32Bit
964 965 966
    = return $ Amode (ripRel (litToImm displacement)) nilOL


967
-- This is all just ridiculous, since it carefully undoes
968
-- what mangleIndexTree has just done.
Ian Lynagh's avatar
Ian Lynagh committed
969 970
getAmode' is32Bit (CmmMachOp (MO_Sub _rep) [x, CmmLit lit@(CmmInt i _)])
  | is32BitLit is32Bit lit
971 972 973 974
  -- ASSERT(rep == II32)???
  = do (x_reg, x_code) <- getSomeReg x
       let off = ImmInt (-(fromInteger i))
       return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
975

Ian Lynagh's avatar
Ian Lynagh committed
976 977
getAmode' is32Bit (CmmMachOp (MO_Add _rep) [x, CmmLit lit])
  | is32BitLit is32Bit lit
978