CodeGen.hs 114 KB
Newer Older
1 2
{-# LANGUAGE CPP, GADTs, NondecreasingIndentation #-}

3 4 5 6 7 8 9 10 11
-----------------------------------------------------------------------------
--
-- Generating machine code (instruction selection)
--
-- (c) The University of Glasgow 1996-2004
--
-----------------------------------------------------------------------------

-- This is a big module, but, if you pay attention to
Ian Lynagh's avatar
Ian Lynagh committed
12
-- (a) the sectioning, and (b) the type signatures, the
13 14
-- structure should not be too overwhelming.

15 16 17 18 19
module X86.CodeGen (
        cmmTopCodeGen,
        generateJumpTableForInstr,
        InstrBlock
)
20 21 22 23 24

where

#include "HsVersions.h"
#include "nativeGen/NCG.h"
Simon Marlow's avatar
Simon Marlow committed
25
#include "../includes/MachDeps.h"
26 27 28 29 30 31

-- NCG stuff:
import X86.Instr
import X86.Cond
import X86.Regs
import X86.RegInfo
32
import CodeGen.Platform
tibbe's avatar
tibbe committed
33
import CPrim
34
import Debug            ( DebugBlock(..) )
35 36 37 38 39 40 41 42 43 44
import Instruction
import PIC
import NCGMonad
import Size
import Reg
import Platform

-- Our intermediate code:
import BasicTypes
import BlockId
45
import Module           ( primPackageKey )
46
import PprCmm           ()
47 48 49
import CmmUtils
import Cmm
import Hoopl
50
import CLabel
51 52
import CoreSyn          ( Tickish(..) )
import SrcLoc           ( srcSpanFile, srcSpanStartLine, srcSpanStartCol )
53 54

-- The rest:
55
import ForeignCall      ( CCallConv(..) )
56 57
import OrdList
import Outputable
58
import Unique
59
import FastString
60
import FastBool         ( isFastTrue )
61
import DynFlags
62
import Util
63

64
import Control.Monad
tibbe's avatar
tibbe committed
65
import Data.Bits
66
import Data.Int
67
import Data.Maybe
dterei's avatar
dterei committed
68 69
import Data.Word

Ian Lynagh's avatar
Ian Lynagh committed
70 71
is32BitPlatform :: NatM Bool
is32BitPlatform = do
72
    dflags <- getDynFlags
Ian Lynagh's avatar
Ian Lynagh committed
73 74
    return $ target32Bit (targetPlatform dflags)

75 76
sse2Enabled :: NatM Bool
sse2Enabled = do
77
  dflags <- getDynFlags
78
  return (isSse2Enabled dflags)
79

tibbe's avatar
tibbe committed
80 81
sse4_2Enabled :: NatM Bool
sse4_2Enabled = do
82
  dflags <- getDynFlags
83
  return (isSse4_2Enabled dflags)
tibbe's avatar
tibbe committed
84

85 86 87 88
if_sse2 :: NatM a -> NatM a -> NatM a
if_sse2 sse2 x87 = do
  b <- sse2Enabled
  if b then sse2 else x87
89

90
cmmTopCodeGen
Simon Peyton Jones's avatar
Simon Peyton Jones committed
91 92
        :: RawCmmDecl
        -> NatM [NatCmmDecl (Alignment, CmmStatics) Instr]
93

94 95
cmmTopCodeGen (CmmProc info lab live graph) = do
  let blocks = toBlockListEntryFirst graph
96 97
  (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
  picBaseMb <- getPicBaseMaybeNat
98
  dflags <- getDynFlags
99
  let proc = CmmProc info lab live (ListGraph $ concat nat_blocks)
100
      tops = proc : concat statics
101
      os   = platformOS $ targetPlatform dflags
102 103 104 105

  case picBaseMb of
      Just picBase -> initializePicBase_x86 ArchX86 os picBase tops
      Nothing -> return tops
106

Ian Lynagh's avatar
Ian Lynagh committed
107
cmmTopCodeGen (CmmData sec dat) = do
108
  return [CmmData sec (1, dat)]  -- no translation, we just use CmmStatic
109 110


111
basicBlockCodeGen
112
        :: CmmBlock
113
        -> NatM ( [NatBasicBlock Instr]
Simon Peyton Jones's avatar
Simon Peyton Jones committed
114
                , [NatCmmDecl (Alignment, CmmStatics) Instr])
115

116
basicBlockCodeGen block = do
Peter Wortmann's avatar
Peter Wortmann committed
117 118
  let (_, nodes, tail)  = blockSplit block
      id = entryLabel block
119
      stmts = blockToList nodes
120 121 122 123 124 125 126 127
  -- Generate location directive
  dbg <- getDebugBlock (entryLabel block)
  loc_instrs <- case dblSourceTick =<< dbg of
    Just (SourceNote span name)
      -> do fileId <- getFileId (srcSpanFile span)
            let line = srcSpanStartLine span; col = srcSpanStartCol span
            return $ unitOL $ LOCATION fileId line col name
    _ -> return nilOL
128 129
  mid_instrs <- stmtsToInstrs stmts
  tail_instrs <- stmtToInstrs tail
130
  let instrs = loc_instrs `appOL` mid_instrs `appOL` tail_instrs
131 132 133 134 135
  -- code generation may introduce new basic block boundaries, which
  -- are indicated by the NEWBLOCK instruction.  We must split up the
  -- instruction stream into basic blocks again.  Also, we extract
  -- LDATAs here too.
  let
136 137 138 139 140 141 142 143
        (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs

        mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
          = ([], BasicBlock id instrs : blocks, statics)
        mkBlocks (LDATA sec dat) (instrs,blocks,statics)
          = (instrs, blocks, CmmData sec dat:statics)
        mkBlocks instr (instrs,blocks,statics)
          = (instr:instrs, blocks, statics)
144 145 146
  return (BasicBlock id top : other_blocks, statics)


147
stmtsToInstrs :: [CmmNode e x] -> NatM InstrBlock
148 149 150 151 152
stmtsToInstrs stmts
   = do instrss <- mapM stmtToInstrs stmts
        return (concatOL instrss)


153
stmtToInstrs :: CmmNode e x -> NatM InstrBlock
154
stmtToInstrs stmt = do
155
  dflags <- getDynFlags
Ian Lynagh's avatar
Ian Lynagh committed
156
  is32Bit <- is32BitPlatform
157
  case stmt of
158
    CmmComment s   -> return (unitOL (COMMENT s))
Peter Wortmann's avatar
Peter Wortmann committed
159
    CmmTick {}     -> return nilOL
Peter Wortmann's avatar
Peter Wortmann committed
160
    CmmUnwind {}   -> return nilOL
161 162

    CmmAssign reg src
163 164 165
      | isFloatType ty         -> assignReg_FltCode size reg src
      | is32Bit && isWord64 ty -> assignReg_I64Code      reg src
      | otherwise              -> assignReg_IntCode size reg src
166
        where ty = cmmRegType dflags reg
167
              size = cmmTypeSize ty
168 169

    CmmStore addr src
170 171 172
      | isFloatType ty         -> assignMem_FltCode size addr src
      | is32Bit && isWord64 ty -> assignMem_I64Code      addr src
      | otherwise              -> assignMem_IntCode size addr src
173
        where ty = cmmExprType dflags src
174
              size = cmmTypeSize ty
175

176
    CmmUnsafeForeignCall target result_regs args
177
       -> genCCall dflags is32Bit target result_regs args
178

179
    CmmBranch id          -> genBranch id
180 181 182
    CmmCondBranch arg true false -> do b1 <- genCondJump true arg
                                       b2 <- genBranch false
                                       return (b1 `appOL` b2)
Ian Lynagh's avatar
Ian Lynagh committed
183 184
    CmmSwitch arg ids     -> do dflags <- getDynFlags
                                genSwitch dflags arg ids
185 186 187
    CmmCall { cml_target = arg
            , cml_args_regs = gregs } -> do
                                dflags <- getDynFlags
188
                                genJump arg (jumpRegs dflags gregs)
189 190
    _ ->
      panic "stmtToInstrs: statement should have been cps'd away"
191 192


193 194
jumpRegs :: DynFlags -> [GlobalReg] -> [Reg]
jumpRegs dflags gregs = [ RegReal r | Just r <- map (globalRegMaybe platform) gregs ]
195
    where platform = targetPlatform dflags
196

197 198
--------------------------------------------------------------------------------
-- | 'InstrBlock's are the insn sequences generated by the insn selectors.
199 200
--      They are really trees of insns to facilitate fast appending, where a
--      left-to-right traversal yields the insns in the correct order.
201
--
202 203
type InstrBlock
        = OrdList Instr
204 205 206 207


-- | Condition codes passed up the tree.
--
208 209
data CondCode
        = CondCode Bool Cond InstrBlock
210 211 212


-- | a.k.a "Register64"
213 214
--      Reg is the lower 32-bit temporary which contains the result.
--      Use getHiVRegFromLo to find the other VRegUnique.
215
--
216 217
--      Rules of this simplified insn selection game are therefore that
--      the returned Reg may be modified
218
--
219 220
data ChildCode64
   = ChildCode64
221
        InstrBlock
222
        Reg
223 224 225


-- | Register's passed up the tree.  If the stix code forces the register
226 227 228
--      to live in a pre-decided machine register, it comes out as @Fixed@;
--      otherwise, it comes out as @Any@, and the parent can decide which
--      register to put it in.
229 230
--
data Register
231 232
        = Fixed Size Reg InstrBlock
        | Any   Size (Reg -> InstrBlock)
233 234 235 236 237 238 239 240


swizzleRegisterRep :: Register -> Size -> Register
swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code
swizzleRegisterRep (Any _ codefn)     size = Any   size codefn


-- | Grab the Reg for a CmmReg
241
getRegisterReg :: Platform -> Bool -> CmmReg -> Reg
242

243
getRegisterReg _ use_sse2 (CmmLocal (LocalReg u pk))
244 245 246 247
  = let sz = cmmTypeSize pk in
    if isFloatSize sz && not use_sse2
       then RegVirtual (mkVirtualReg u FF80)
       else RegVirtual (mkVirtualReg u sz)
248

249 250
getRegisterReg platform _ (CmmGlobal mid)
  = case globalRegMaybe platform mid of
251 252 253 254 255
        Just reg -> RegReal $ reg
        Nothing  -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
        -- By this stage, the only MagicIds remaining should be the
        -- ones which map to a real machine register on this
        -- platform.  Hence ...
256 257 258


-- | Memory addressing modes passed up the tree.
259 260
data Amode
        = Amode AddrMode InstrBlock
261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281

{-
Now, given a tree (the argument to an CmmLoad) that references memory,
produce a suitable addressing mode.

A Rule of the Game (tm) for Amodes: use of the addr bit must
immediately follow use of the code part, since the code part puts
values in registers which the addr then refers to.  So you can't put
anything in between, lest it overwrite some of those registers.  If
you need to do some other computation between the code part and use of
the addr bit, first store the effective address from the amode in a
temporary, then do the other computation, and then use the temporary:

    code
    LEA amode, tmp
    ... other computation ...
    ... (tmp) ...
-}


-- | Check whether an integer will fit in 32 bits.
282 283 284 285
--      A CmmInt is intended to be truncated to the appropriate
--      number of bits, so here we truncate it to Int64.  This is
--      important because e.g. -1 as a CmmInt might be either
--      -1 or 18446744073709551615.
286 287 288 289 290 291 292
--
is32BitInteger :: Integer -> Bool
is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
  where i64 = fromIntegral i :: Int64


-- | Convert a BlockId to some CmmStatic data
293 294 295
jumpTableEntry :: DynFlags -> Maybe BlockId -> CmmStatic
jumpTableEntry dflags Nothing = CmmStaticLit (CmmInt 0 (wordWidth dflags))
jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
296
    where blockLabel = mkAsmTempLabel (getUnique blockid)
297 298 299 300 301 302 303


-- -----------------------------------------------------------------------------
-- General things for putting together code sequences

-- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
-- CmmExprs into CmmRegOff?
304 305
mangleIndexTree :: DynFlags -> CmmReg -> Int -> CmmExpr
mangleIndexTree dflags reg off
306
  = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
307
  where width = typeWidth (cmmRegType dflags reg)
308 309

-- | The dual to getAnyReg: compute an expression into a register, but
310
--      we don't mind which one it is.
311 312 313 314 315
getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg expr = do
  r <- getRegister expr
  case r of
    Any rep code -> do
316 317 318 319
        tmp <- getNewRegNat rep
        return (tmp, code tmp)
    Fixed _ reg code ->
        return (reg, code)
320 321 322 323 324 325


assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
assignMem_I64Code addrTree valueTree = do
  Amode addr addr_code <- getAmode addrTree
  ChildCode64 vcode rlo <- iselExpr64 valueTree
326
  let
327 328 329 330 331 332 333 334 335
        rhi = getHiVRegFromLo rlo

        -- Little-endian store
        mov_lo = MOV II32 (OpReg rlo) (OpAddr addr)
        mov_hi = MOV II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
  return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)


assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
dterei's avatar
dterei committed
336
assignReg_I64Code (CmmLocal (LocalReg u_dst _)) valueTree = do
337
   ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
338
   let
339
         r_dst_lo = RegVirtual $ mkVirtualReg u_dst II32
340 341 342 343 344 345 346 347
         r_dst_hi = getHiVRegFromLo r_dst_lo
         r_src_hi = getHiVRegFromLo r_src_lo
         mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo)
         mov_hi = MOV II32 (OpReg r_src_hi) (OpReg r_dst_hi)
   return (
        vcode `snocOL` mov_lo `snocOL` mov_hi
     )

dterei's avatar
dterei committed
348
assignReg_I64Code _ _
349 350 351 352 353 354 355
   = panic "assignReg_I64Code(i386): invalid lvalue"


iselExpr64        :: CmmExpr -> NatM ChildCode64
iselExpr64 (CmmLit (CmmInt i _)) = do
  (rlo,rhi) <- getNewRegPairNat II32
  let
356
        r = fromIntegral (fromIntegral i :: Word32)
357
        q = fromIntegral (fromIntegral (i `shiftR` 32) :: Word32)
358 359 360 361
        code = toOL [
                MOV II32 (OpImm (ImmInteger r)) (OpReg rlo),
                MOV II32 (OpImm (ImmInteger q)) (OpReg rhi)
                ]
362 363 364 365 366
  return (ChildCode64 code rlo)

iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
   Amode addr addr_code <- getAmode addrTree
   (rlo,rhi) <- getNewRegPairNat II32
367
   let
368 369 370
        mov_lo = MOV II32 (OpAddr addr) (OpReg rlo)
        mov_hi = MOV II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
   return (
371
            ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
372 373 374 375
                        rlo
     )

iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
376
   = return (ChildCode64 nilOL (RegVirtual $ mkVirtualReg vu II32))
377

378 379 380 381 382
-- we handle addition, but rather badly
iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
   ChildCode64 code1 r1lo <- iselExpr64 e1
   (rlo,rhi) <- getNewRegPairNat II32
   let
383
        r = fromIntegral (fromIntegral i :: Word32)
384
        q = fromIntegral (fromIntegral (i `shiftR` 32) :: Word32)
385 386 387 388 389 390
        r1hi = getHiVRegFromLo r1lo
        code =  code1 `appOL`
                toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
                       ADD II32 (OpImm (ImmInteger r)) (OpReg rlo),
                       MOV II32 (OpReg r1hi) (OpReg rhi),
                       ADC II32 (OpImm (ImmInteger q)) (OpReg rhi) ]
391 392 393 394 395 396 397
   return (ChildCode64 code rlo)

iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
   ChildCode64 code1 r1lo <- iselExpr64 e1
   ChildCode64 code2 r2lo <- iselExpr64 e2
   (rlo,rhi) <- getNewRegPairNat II32
   let
398 399 400 401 402 403 404 405
        r1hi = getHiVRegFromLo r1lo
        r2hi = getHiVRegFromLo r2lo
        code =  code1 `appOL`
                code2 `appOL`
                toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
                       ADD II32 (OpReg r2lo) (OpReg rlo),
                       MOV II32 (OpReg r1hi) (OpReg rhi),
                       ADC II32 (OpReg r2hi) (OpReg rhi) ]
406 407
   return (ChildCode64 code rlo)

408 409 410 411 412 413 414 415 416 417 418 419 420 421 422
iselExpr64 (CmmMachOp (MO_Sub _) [e1,e2]) = do
   ChildCode64 code1 r1lo <- iselExpr64 e1
   ChildCode64 code2 r2lo <- iselExpr64 e2
   (rlo,rhi) <- getNewRegPairNat II32
   let
        r1hi = getHiVRegFromLo r1lo
        r2hi = getHiVRegFromLo r2lo
        code =  code1 `appOL`
                code2 `appOL`
                toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
                       SUB II32 (OpReg r2lo) (OpReg rlo),
                       MOV II32 (OpReg r1hi) (OpReg rhi),
                       SBB II32 (OpReg r2hi) (OpReg rhi) ]
   return (ChildCode64 code rlo)

423 424 425 426 427 428
iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do
     fn <- getAnyReg expr
     r_dst_lo <-  getNewRegNat II32
     let r_dst_hi = getHiVRegFromLo r_dst_lo
         code = fn r_dst_lo
     return (
429
             ChildCode64 (code `snocOL`
430 431 432 433 434
                          MOV II32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
                          r_dst_lo
            )

iselExpr64 expr
Ian Lynagh's avatar
Ian Lynagh committed
435
   = pprPanic "iselExpr64(i386)" (ppr expr)
436 437 438 439


--------------------------------------------------------------------------------
getRegister :: CmmExpr -> NatM Register
440 441 442
getRegister e = do dflags <- getDynFlags
                   is32Bit <- is32BitPlatform
                   getRegister' dflags is32Bit e
443

444
getRegister' :: DynFlags -> Bool -> CmmExpr -> NatM Register
445

446
getRegister' dflags is32Bit (CmmReg reg)
447 448 449 450 451 452
  = case reg of
        CmmGlobal PicBaseReg
         | is32Bit ->
            -- on x86_64, we have %rip for PicBaseReg, but it's not
            -- a full-featured register, it can only be used for
            -- rip-relative addressing.
Ian Lynagh's avatar
Ian Lynagh committed
453 454
            do reg' <- getPicBaseNat (archWordSize is32Bit)
               return (Fixed (archWordSize is32Bit) reg' nilOL)
455 456 457
        _ ->
            do use_sse2 <- sse2Enabled
               let
458
                 sz = cmmTypeSize (cmmRegType dflags reg)
459 460 461
                 size | not use_sse2 && isFloatSize sz = FF80
                      | otherwise                      = sz
               --
462 463
               let platform = targetPlatform dflags
               return (Fixed size (getRegisterReg platform use_sse2 reg) nilOL)
464 465


466 467
getRegister' dflags is32Bit (CmmRegOff r n)
  = getRegister' dflags is32Bit $ mangleIndexTree dflags r n
468 469 470 471

-- for 32-bit architectuers, support some 64 -> 32 bit conversions:
-- TO_W_(x), TO_W_(x >> 32)

472
getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W64 W32)
473 474
                     [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
 | is32Bit = do
475 476 477
  ChildCode64 code rlo <- iselExpr64 x
  return $ Fixed II32 (getHiVRegFromLo rlo) code

478
getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W64 W32)
479 480
                     [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
 | is32Bit = do
481 482 483
  ChildCode64 code rlo <- iselExpr64 x
  return $ Fixed II32 (getHiVRegFromLo rlo) code

484
getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W64 W32) [x])
485
 | is32Bit = do
486 487 488
  ChildCode64 code rlo <- iselExpr64 x
  return $ Fixed II32 rlo code

489
getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W64 W32) [x])
490
 | is32Bit = do
491
  ChildCode64 code rlo <- iselExpr64 x
492
  return $ Fixed II32 rlo code
493

494
getRegister' _ _ (CmmLit lit@(CmmFloat f w)) =
495 496 497 498 499 500 501
  if_sse2 float_const_sse2 float_const_x87
 where
  float_const_sse2
    | f == 0.0 = do
      let
          size = floatSize w
          code dst = unitOL  (XOR size (OpReg dst) (OpReg dst))
502 503
        -- I don't know why there are xorpd, xorps, and pxor instructions.
        -- They all appear to do the same thing --SDM
504 505 506 507 508 509 510 511 512 513 514
      return (Any size code)

   | otherwise = do
      Amode addr code <- memConstant (widthInBytes w) lit
      loadFloatAmode True w addr code

  float_const_x87 = case w of
    W64
      | f == 0.0 ->
        let code dst = unitOL (GLDZ dst)
        in  return (Any FF80 code)
515

516 517 518
      | f == 1.0 ->
        let code dst = unitOL (GLD1 dst)
        in  return (Any FF80 code)
519

520 521 522
    _otherwise -> do
      Amode addr code <- memConstant (widthInBytes w) lit
      loadFloatAmode False w addr code
523 524

-- catch simple cases of zero- or sign-extended load
525
getRegister' _ _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do
526 527 528
  code <- intLoadCode (MOVZxL II8) addr
  return (Any II32 code)

529
getRegister' _ _ (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _]) = do
530 531 532
  code <- intLoadCode (MOVSxL II8) addr
  return (Any II32 code)

533
getRegister' _ _ (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _]) = do
534 535 536
  code <- intLoadCode (MOVZxL II16) addr
  return (Any II32 code)

537
getRegister' _ _ (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _]) = do
538 539 540 541
  code <- intLoadCode (MOVSxL II16) addr
  return (Any II32 code)

-- catch simple cases of zero- or sign-extended load
542
getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _])
543
 | not is32Bit = do
544 545 546
  code <- intLoadCode (MOVZxL II8) addr
  return (Any II64 code)

547
getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _])
548
 | not is32Bit = do
549 550 551
  code <- intLoadCode (MOVSxL II8) addr
  return (Any II64 code)

552
getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _])
553
 | not is32Bit = do
554 555 556
  code <- intLoadCode (MOVZxL II16) addr
  return (Any II64 code)

557
getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _])
558
 | not is32Bit = do
559 560 561
  code <- intLoadCode (MOVSxL II16) addr
  return (Any II64 code)

562
getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _])
563
 | not is32Bit = do
564 565 566
  code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend
  return (Any II64 code)

567
getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _])
568
 | not is32Bit = do
569 570 571
  code <- intLoadCode (MOVSxL II32) addr
  return (Any II64 code)

572
getRegister' _ is32Bit (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
573
                                     CmmLit displacement])
574 575
 | not is32Bit = do
      return $ Any II64 (\dst -> unitOL $
576 577
        LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))

578
getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
579 580 581 582 583
    sse2 <- sse2Enabled
    case mop of
      MO_F_Neg w
         | sse2      -> sse2NegCode w x
         | otherwise -> trivialUFCode FF80 (GNEG FF80) x
584 585 586 587 588 589 590 591 592 593 594 595

      MO_S_Neg w -> triv_ucode NEGI (intSize w)
      MO_Not w   -> triv_ucode NOT  (intSize w)

      -- Nop conversions
      MO_UU_Conv W32 W8  -> toI8Reg  W32 x
      MO_SS_Conv W32 W8  -> toI8Reg  W32 x
      MO_UU_Conv W16 W8  -> toI8Reg  W16 x
      MO_SS_Conv W16 W8  -> toI8Reg  W16 x
      MO_UU_Conv W32 W16 -> toI16Reg W32 x
      MO_SS_Conv W32 W16 -> toI16Reg W32 x

596 597 598 599 600 601
      MO_UU_Conv W64 W32 | not is32Bit -> conversionNop II64 x
      MO_SS_Conv W64 W32 | not is32Bit -> conversionNop II64 x
      MO_UU_Conv W64 W16 | not is32Bit -> toI16Reg W64 x
      MO_SS_Conv W64 W16 | not is32Bit -> toI16Reg W64 x
      MO_UU_Conv W64 W8  | not is32Bit -> toI8Reg  W64 x
      MO_SS_Conv W64 W8  | not is32Bit -> toI8Reg  W64 x
602 603 604 605 606 607 608 609 610 611 612 613 614

      MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
      MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x

      -- widenings
      MO_UU_Conv W8  W32 -> integerExtend W8  W32 MOVZxL x
      MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x
      MO_UU_Conv W8  W16 -> integerExtend W8  W16 MOVZxL x

      MO_SS_Conv W8  W32 -> integerExtend W8  W32 MOVSxL x
      MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x
      MO_SS_Conv W8  W16 -> integerExtend W8  W16 MOVSxL x

615 616 617 618 619 620
      MO_UU_Conv W8  W64 | not is32Bit -> integerExtend W8  W64 MOVZxL x
      MO_UU_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVZxL x
      MO_UU_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOVZxL x
      MO_SS_Conv W8  W64 | not is32Bit -> integerExtend W8  W64 MOVSxL x
      MO_SS_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVSxL x
      MO_SS_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOVSxL x
621 622 623 624
        -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
        -- However, we don't want the register allocator to throw it
        -- away as an unnecessary reg-to-reg move, so we keep it in
        -- the form of a movzl and print it as a movl later.
625

626 627
      MO_FF_Conv W32 W64
        | sse2      -> coerceFP2FP W64 x
628
        | otherwise -> conversionNop FF80 x
629

630
      MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
631 632 633 634

      MO_FS_Conv from to -> coerceFP2Int from to x
      MO_SF_Conv from to -> coerceInt2FP from to x

635 636 637 638 639 640 641 642
      MO_V_Insert {}   -> needLlvm
      MO_V_Extract {}  -> needLlvm
      MO_V_Add {}      -> needLlvm
      MO_V_Sub {}      -> needLlvm
      MO_V_Mul {}      -> needLlvm
      MO_VS_Quot {}    -> needLlvm
      MO_VS_Rem {}     -> needLlvm
      MO_VS_Neg {}     -> needLlvm
643 644
      MO_VU_Quot {}    -> needLlvm
      MO_VU_Rem {}     -> needLlvm
645 646 647 648 649 650 651
      MO_VF_Insert {}  -> needLlvm
      MO_VF_Extract {} -> needLlvm
      MO_VF_Add {}     -> needLlvm
      MO_VF_Sub {}     -> needLlvm
      MO_VF_Mul {}     -> needLlvm
      MO_VF_Quot {}    -> needLlvm
      MO_VF_Neg {}     -> needLlvm
652

dterei's avatar
dterei committed
653
      _other -> pprPanic "getRegister" (pprMachOp mop)
654
   where
655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672
        triv_ucode :: (Size -> Operand -> Instr) -> Size -> NatM Register
        triv_ucode instr size = trivialUCode size (instr size) x

        -- signed or unsigned extension.
        integerExtend :: Width -> Width
                      -> (Size -> Operand -> Operand -> Instr)
                      -> CmmExpr -> NatM Register
        integerExtend from to instr expr = do
            (reg,e_code) <- if from == W8 then getByteReg expr
                                          else getSomeReg expr
            let
                code dst =
                  e_code `snocOL`
                  instr (intSize from) (OpReg reg) (OpReg dst)
            return (Any (intSize to) code)

        toI8Reg :: Width -> CmmExpr -> NatM Register
        toI8Reg new_rep expr
673
            = do codefn <- getAnyReg expr
674 675 676 677 678 679 680
                 return (Any (intSize new_rep) codefn)
                -- HACK: use getAnyReg to get a byte-addressable register.
                -- If the source was a Fixed register, this will add the
                -- mov instruction to put it into the desired destination.
                -- We're assuming that the destination won't be a fixed
                -- non-byte-addressable register; it won't be, because all
                -- fixed registers are word-sized.
681

682
        toI16Reg = toI8Reg -- for now
683

684
        conversionNop :: Size -> CmmExpr -> NatM Register
685
        conversionNop new_size expr
686
            = do e_code <- getRegister' dflags is32Bit expr
687 688 689
                 return (swizzleRegisterRep e_code new_size)


690
getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
691 692
  sse2 <- sse2Enabled
  case mop of
Ian Lynagh's avatar
Ian Lynagh committed
693 694 695 696 697 698
      MO_F_Eq _ -> condFltReg is32Bit EQQ x y
      MO_F_Ne _ -> condFltReg is32Bit NE  x y
      MO_F_Gt _ -> condFltReg is32Bit GTT x y
      MO_F_Ge _ -> condFltReg is32Bit GE  x y
      MO_F_Lt _ -> condFltReg is32Bit LTT x y
      MO_F_Le _ -> condFltReg is32Bit LE  x y
dterei's avatar
dterei committed
699 700 701 702 703 704 705 706 707 708 709 710 711

      MO_Eq _   -> condIntReg EQQ x y
      MO_Ne _   -> condIntReg NE  x y

      MO_S_Gt _ -> condIntReg GTT x y
      MO_S_Ge _ -> condIntReg GE  x y
      MO_S_Lt _ -> condIntReg LTT x y
      MO_S_Le _ -> condIntReg LE  x y

      MO_U_Gt _ -> condIntReg GU  x y
      MO_U_Ge _ -> condIntReg GEU x y
      MO_U_Lt _ -> condIntReg LU  x y
      MO_U_Le _ -> condIntReg LEU x y
712

713
      MO_F_Add w  | sse2      -> trivialFCode_sse2 w ADD  x y
dterei's avatar
dterei committed
714
                  | otherwise -> trivialFCode_x87    GADD x y
715
      MO_F_Sub w  | sse2      -> trivialFCode_sse2 w SUB  x y
dterei's avatar
dterei committed
716
                  | otherwise -> trivialFCode_x87    GSUB x y
717
      MO_F_Quot w | sse2      -> trivialFCode_sse2 w FDIV x y
dterei's avatar
dterei committed
718
                  | otherwise -> trivialFCode_x87    GDIV x y
719
      MO_F_Mul w  | sse2      -> trivialFCode_sse2 w MUL x y
dterei's avatar
dterei committed
720
                  | otherwise -> trivialFCode_x87    GMUL x y
721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736

      MO_Add rep -> add_code rep x y
      MO_Sub rep -> sub_code rep x y

      MO_S_Quot rep -> div_code rep True  True  x y
      MO_S_Rem  rep -> div_code rep True  False x y
      MO_U_Quot rep -> div_code rep False True  x y
      MO_U_Rem  rep -> div_code rep False False x y

      MO_S_MulMayOflo rep -> imulMayOflo rep x y

      MO_Mul rep -> triv_op rep IMUL
      MO_And rep -> triv_op rep AND
      MO_Or  rep -> triv_op rep OR
      MO_Xor rep -> triv_op rep XOR

737 738 739 740
        {- Shift ops on x86s have constraints on their source, it
           either has to be Imm, CL or 1
            => trivialCode is not restrictive enough (sigh.)
        -}
741 742 743 744
      MO_Shl rep   -> shift_code rep SHL x y {-False-}
      MO_U_Shr rep -> shift_code rep SHR x y {-False-}
      MO_S_Shr rep -> shift_code rep SAR x y {-False-}

745 746 747 748 749 750 751 752 753 754 755 756 757 758 759
      MO_V_Insert {}   -> needLlvm
      MO_V_Extract {}  -> needLlvm
      MO_V_Add {}      -> needLlvm
      MO_V_Sub {}      -> needLlvm
      MO_V_Mul {}      -> needLlvm
      MO_VS_Quot {}    -> needLlvm
      MO_VS_Rem {}     -> needLlvm
      MO_VS_Neg {}     -> needLlvm
      MO_VF_Insert {}  -> needLlvm
      MO_VF_Extract {} -> needLlvm
      MO_VF_Add {}     -> needLlvm
      MO_VF_Sub {}     -> needLlvm
      MO_VF_Mul {}     -> needLlvm
      MO_VF_Quot {}    -> needLlvm
      MO_VF_Neg {}     -> needLlvm
760

dterei's avatar
dterei committed
761
      _other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
762 763 764
  where
    --------------------
    triv_op width instr = trivialCode width op (Just op) x y
765
                        where op   = instr (intSize width)
766 767 768 769 770

    imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
    imulMayOflo rep a b = do
         (a_reg, a_code) <- getNonClobberedReg a
         b_code <- getAnyReg b
771 772 773 774 775
         let
             shift_amt  = case rep of
                           W32 -> 31
                           W64 -> 63
                           _ -> panic "shift_amt"
776

777
             size = intSize rep
778 779
             code = a_code `appOL` b_code eax `appOL`
                        toOL [
780
                           IMUL2 size (OpReg a_reg),   -- result in %edx:%eax
781
                           SAR size (OpImm (ImmInt shift_amt)) (OpReg eax),
782
                                -- sign extend lower part
783
                           SUB size (OpReg edx) (OpReg eax)
784
                                -- compare against upper
785 786
                           -- eax==0 if high part == sign extended low part
                        ]
787
         return (Fixed size eax code)
788 789 790

    --------------------
    shift_code :: Width
791 792 793 794
               -> (Size -> Operand -> Operand -> Instr)
               -> CmmExpr
               -> CmmExpr
               -> NatM Register
795 796

    {- Case1: shift length as immediate -}
dterei's avatar
dterei committed
797
    shift_code width instr x (CmmLit lit) = do
798 799 800 801 802 803 804 805
          x_code <- getAnyReg x
          let
               size = intSize width
               code dst
                  = x_code dst `snocOL`
                    instr size (OpImm (litToImm lit)) (OpReg dst)
          return (Any size code)

806 807 808 809
    {- Case2: shift length is complex (non-immediate)
      * y must go in %ecx.
      * we cannot do y first *and* put its result in %ecx, because
        %ecx might be clobbered by x.
810
      * if we do y second, then x cannot be
811 812 813 814 815
        in a clobbered reg.  Also, we cannot clobber x's reg
        with the instruction itself.
      * so we can either:
        - do y first, put its result in a fresh tmp, then copy it to %ecx later
        - do y second and put its result into %ecx.  x gets placed in a fresh
Gabor Greif's avatar
typos  
Gabor Greif committed
816
          tmp.  This is likely to be better, because the reg alloc can
817 818 819 820 821
          eliminate this reg->reg move here (it won't eliminate the other one,
          because the move is into the fixed %ecx).
    -}
    shift_code width instr x y{-amount-} = do
        x_code <- getAnyReg x
822 823
        let size = intSize width
        tmp <- getNewRegNat size
824
        y_code <- getAnyReg y
825 826 827 828
        let
           code = x_code tmp `appOL`
                  y_code ecx `snocOL`
                  instr size (OpReg ecx) (OpReg tmp)
829 830 831 832 833
        return (Fixed size tmp code)

    --------------------
    add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
    add_code rep x (CmmLit (CmmInt y _))
834
        | is32BitInteger y = add_int rep x y
835 836
    add_code rep x y = trivialCode rep (ADD size) (Just (ADD size)) x y
      where size = intSize rep
837 838
    -- TODO: There are other interesting patterns we want to replace
    --     with a LEA, e.g. `(x + offset) + (y << shift)`.
839 840 841 842

    --------------------
    sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
    sub_code rep x (CmmLit (CmmInt y _))
843
        | is32BitInteger (-y) = add_int rep x (-y)
844 845 846 847
    sub_code rep x y = trivialCode rep (SUB (intSize rep)) Nothing x y

    -- our three-operand add instruction:
    add_int width x y = do
848 849 850 851 852
        (x_reg, x_code) <- getSomeReg x
        let
            size = intSize width
            imm = ImmInt (fromInteger y)
            code dst
853
               = x_code `snocOL`
854 855
                 LEA size
                        (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
856
                        (OpReg dst)
857 858
        --
        return (Any size code)
859 860 861

    ----------------------
    div_code width signed quotient x y = do
862 863 864 865 866 867
           (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
           x_code <- getAnyReg x
           let
             size = intSize width
             widen | signed    = CLTD size
                   | otherwise = XOR size (OpReg edx) (OpReg edx)
868

869 870
             instr | signed    = IDIV
                   | otherwise = DIV
871

872 873 874
             code = y_code `appOL`
                    x_code eax `appOL`
                    toOL [widen, instr size y_op]
875

876 877
             result | quotient  = eax
                    | otherwise = edx
878 879 880 881

           return (Fixed size result code)


882
getRegister' _ _ (CmmLoad mem pk)
883 884
  | isFloatType pk
  = do
885 886 887
    Amode addr mem_code <- getAmode mem
    use_sse2 <- sse2Enabled
    loadFloatAmode use_sse2 (typeWidth pk) addr mem_code
888

889
getRegister' _ is32Bit (CmmLoad mem pk)
890
  | is32Bit && not (isWord64 pk)
891
  = do
892 893 894 895 896 897
    code <- intLoadCode instr mem
    return (Any size code)
  where
    width = typeWidth pk
    size = intSize width
    instr = case width of
898 899 900 901 902 903 904
                W8     -> MOVZxL II8
                _other -> MOV size
        -- We always zero-extend 8-bit loads, if we
        -- can't think of anything better.  This is because
        -- we can't guarantee access to an 8-bit variant of every register
        -- (esi and edi don't have 8-bit variants), so to make things
        -- simpler we do our 8-bit arithmetic with full 32-bit registers.
905 906

-- Simpler memory load code on x86_64
907
getRegister' _ is32Bit (CmmLoad mem pk)
908
 | not is32Bit
909
  = do
910 911 912 913
    code <- intLoadCode (MOV size) mem
    return (Any size code)
  where size = intSize $ typeWidth pk

914
getRegister' _ is32Bit (CmmLit (CmmInt 0 width))