CodeGen.hs 115 KB
Newer Older
1 2
{-# LANGUAGE CPP, GADTs, NondecreasingIndentation #-}

3 4 5 6 7 8
-- The default iteration limit is a bit too low for the definitions
-- in this module.
#if __GLASGOW_HASKELL__ >= 800
{-# OPTIONS_GHC -fmax-pmcheck-iterations=10000000 #-}
#endif

9 10 11 12 13 14 15 16 17
-----------------------------------------------------------------------------
--
-- Generating machine code (instruction selection)
--
-- (c) The University of Glasgow 1996-2004
--
-----------------------------------------------------------------------------

-- This is a big module, but, if you pay attention to
Ian Lynagh's avatar
Ian Lynagh committed
18
-- (a) the sectioning, and (b) the type signatures, the
19 20
-- structure should not be too overwhelming.

21 22 23 24 25
module X86.CodeGen (
        cmmTopCodeGen,
        generateJumpTableForInstr,
        InstrBlock
)
26 27 28 29 30

where

#include "HsVersions.h"
#include "nativeGen/NCG.h"
Simon Marlow's avatar
Simon Marlow committed
31
#include "../includes/MachDeps.h"
32 33 34 35 36 37

-- NCG stuff:
import X86.Instr
import X86.Cond
import X86.Regs
import X86.RegInfo
38
import CodeGen.Platform
tibbe's avatar
tibbe committed
39
import CPrim
40
import Debug            ( DebugBlock(..) )
41 42 43
import Instruction
import PIC
import NCGMonad
44
import Format
45 46 47 48 49 50
import Reg
import Platform

-- Our intermediate code:
import BasicTypes
import BlockId
51
import Module           ( primUnitId )
52
import PprCmm           ()
53
import CmmUtils
54
import CmmSwitch
55 56
import Cmm
import Hoopl
57
import CLabel
58 59
import CoreSyn          ( Tickish(..) )
import SrcLoc           ( srcSpanFile, srcSpanStartLine, srcSpanStartCol )
60 61

-- The rest:
62
import ForeignCall      ( CCallConv(..) )
63 64
import OrdList
import Outputable
65
import Unique
66 67
import FastString
import DynFlags
68
import Util
69

70
import Control.Monad
tibbe's avatar
tibbe committed
71
import Data.Bits
72
import Data.Int
73
import Data.Maybe
dterei's avatar
dterei committed
74 75
import Data.Word

Ian Lynagh's avatar
Ian Lynagh committed
76 77
is32BitPlatform :: NatM Bool
is32BitPlatform = do
78
    dflags <- getDynFlags
Ian Lynagh's avatar
Ian Lynagh committed
79 80
    return $ target32Bit (targetPlatform dflags)

81 82
sse2Enabled :: NatM Bool
sse2Enabled = do
83
  dflags <- getDynFlags
84
  return (isSse2Enabled dflags)
85

tibbe's avatar
tibbe committed
86 87
sse4_2Enabled :: NatM Bool
sse4_2Enabled = do
88
  dflags <- getDynFlags
89
  return (isSse4_2Enabled dflags)
tibbe's avatar
tibbe committed
90

91 92 93 94
if_sse2 :: NatM a -> NatM a -> NatM a
if_sse2 sse2 x87 = do
  b <- sse2Enabled
  if b then sse2 else x87
95

96
cmmTopCodeGen
Simon Peyton Jones's avatar
Simon Peyton Jones committed
97 98
        :: RawCmmDecl
        -> NatM [NatCmmDecl (Alignment, CmmStatics) Instr]
99

100 101
cmmTopCodeGen (CmmProc info lab live graph) = do
  let blocks = toBlockListEntryFirst graph
102 103
  (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
  picBaseMb <- getPicBaseMaybeNat
104
  dflags <- getDynFlags
105
  let proc = CmmProc info lab live (ListGraph $ concat nat_blocks)
106
      tops = proc : concat statics
107
      os   = platformOS $ targetPlatform dflags
108 109 110 111

  case picBaseMb of
      Just picBase -> initializePicBase_x86 ArchX86 os picBase tops
      Nothing -> return tops
112

Ian Lynagh's avatar
Ian Lynagh committed
113
cmmTopCodeGen (CmmData sec dat) = do
114
  return [CmmData sec (1, dat)]  -- no translation, we just use CmmStatic
115 116


117
basicBlockCodeGen
118
        :: CmmBlock
119
        -> NatM ( [NatBasicBlock Instr]
Simon Peyton Jones's avatar
Simon Peyton Jones committed
120
                , [NatCmmDecl (Alignment, CmmStatics) Instr])
121

122
basicBlockCodeGen block = do
Peter Wortmann's avatar
Peter Wortmann committed
123 124
  let (_, nodes, tail)  = blockSplit block
      id = entryLabel block
125
      stmts = blockToList nodes
126 127 128 129 130 131 132 133
  -- Generate location directive
  dbg <- getDebugBlock (entryLabel block)
  loc_instrs <- case dblSourceTick =<< dbg of
    Just (SourceNote span name)
      -> do fileId <- getFileId (srcSpanFile span)
            let line = srcSpanStartLine span; col = srcSpanStartCol span
            return $ unitOL $ LOCATION fileId line col name
    _ -> return nilOL
134 135
  mid_instrs <- stmtsToInstrs stmts
  tail_instrs <- stmtToInstrs tail
136
  let instrs = loc_instrs `appOL` mid_instrs `appOL` tail_instrs
137 138 139 140 141
  -- code generation may introduce new basic block boundaries, which
  -- are indicated by the NEWBLOCK instruction.  We must split up the
  -- instruction stream into basic blocks again.  Also, we extract
  -- LDATAs here too.
  let
142 143 144 145 146 147 148 149
        (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs

        mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
          = ([], BasicBlock id instrs : blocks, statics)
        mkBlocks (LDATA sec dat) (instrs,blocks,statics)
          = (instrs, blocks, CmmData sec dat:statics)
        mkBlocks instr (instrs,blocks,statics)
          = (instr:instrs, blocks, statics)
150 151 152
  return (BasicBlock id top : other_blocks, statics)


153
stmtsToInstrs :: [CmmNode e x] -> NatM InstrBlock
154 155 156 157 158
stmtsToInstrs stmts
   = do instrss <- mapM stmtToInstrs stmts
        return (concatOL instrss)


159
stmtToInstrs :: CmmNode e x -> NatM InstrBlock
160
stmtToInstrs stmt = do
161
  dflags <- getDynFlags
Ian Lynagh's avatar
Ian Lynagh committed
162
  is32Bit <- is32BitPlatform
163
  case stmt of
164
    CmmComment s   -> return (unitOL (COMMENT s))
Peter Wortmann's avatar
Peter Wortmann committed
165
    CmmTick {}     -> return nilOL
Peter Wortmann's avatar
Peter Wortmann committed
166
    CmmUnwind {}   -> return nilOL
167 168

    CmmAssign reg src
169
      | isFloatType ty         -> assignReg_FltCode format reg src
170
      | is32Bit && isWord64 ty -> assignReg_I64Code      reg src
171
      | otherwise              -> assignReg_IntCode format reg src
172
        where ty = cmmRegType dflags reg
173
              format = cmmTypeFormat ty
174 175

    CmmStore addr src
176
      | isFloatType ty         -> assignMem_FltCode format addr src
177
      | is32Bit && isWord64 ty -> assignMem_I64Code      addr src
178
      | otherwise              -> assignMem_IntCode format addr src
179
        where ty = cmmExprType dflags src
180
              format = cmmTypeFormat ty
181

182
    CmmUnsafeForeignCall target result_regs args
183
       -> genCCall dflags is32Bit target result_regs args
184

185
    CmmBranch id          -> genBranch id
186 187 188 189
    CmmCondBranch arg true false _ -> do
      b1 <- genCondJump true arg
      b2 <- genBranch false
      return (b1 `appOL` b2)
190 191
    CmmSwitch arg ids -> do dflags <- getDynFlags
                            genSwitch dflags arg ids
192 193 194
    CmmCall { cml_target = arg
            , cml_args_regs = gregs } -> do
                                dflags <- getDynFlags
195
                                genJump arg (jumpRegs dflags gregs)
196 197
    _ ->
      panic "stmtToInstrs: statement should have been cps'd away"
198 199


200 201
jumpRegs :: DynFlags -> [GlobalReg] -> [Reg]
jumpRegs dflags gregs = [ RegReal r | Just r <- map (globalRegMaybe platform) gregs ]
202
    where platform = targetPlatform dflags
203

204 205
--------------------------------------------------------------------------------
-- | 'InstrBlock's are the insn sequences generated by the insn selectors.
206 207
--      They are really trees of insns to facilitate fast appending, where a
--      left-to-right traversal yields the insns in the correct order.
208
--
209 210
type InstrBlock
        = OrdList Instr
211 212 213 214


-- | Condition codes passed up the tree.
--
215 216
data CondCode
        = CondCode Bool Cond InstrBlock
217 218 219


-- | a.k.a "Register64"
220 221
--      Reg is the lower 32-bit temporary which contains the result.
--      Use getHiVRegFromLo to find the other VRegUnique.
222
--
223 224
--      Rules of this simplified insn selection game are therefore that
--      the returned Reg may be modified
225
--
226 227
data ChildCode64
   = ChildCode64
228
        InstrBlock
229
        Reg
230 231 232


-- | Register's passed up the tree.  If the stix code forces the register
233 234 235
--      to live in a pre-decided machine register, it comes out as @Fixed@;
--      otherwise, it comes out as @Any@, and the parent can decide which
--      register to put it in.
236 237
--
data Register
238 239
        = Fixed Format Reg InstrBlock
        | Any   Format (Reg -> InstrBlock)
240 241


242 243 244
swizzleRegisterRep :: Register -> Format -> Register
swizzleRegisterRep (Fixed _ reg code) format = Fixed format reg code
swizzleRegisterRep (Any _ codefn)     format = Any   format codefn
245 246 247


-- | Grab the Reg for a CmmReg
248
getRegisterReg :: Platform -> Bool -> CmmReg -> Reg
249

250
getRegisterReg _ use_sse2 (CmmLocal (LocalReg u pk))
251 252
  = let fmt = cmmTypeFormat pk in
    if isFloatFormat fmt && not use_sse2
253
       then RegVirtual (mkVirtualReg u FF80)
254
       else RegVirtual (mkVirtualReg u fmt)
255

256 257
getRegisterReg platform _ (CmmGlobal mid)
  = case globalRegMaybe platform mid of
258 259 260 261 262
        Just reg -> RegReal $ reg
        Nothing  -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
        -- By this stage, the only MagicIds remaining should be the
        -- ones which map to a real machine register on this
        -- platform.  Hence ...
263 264 265


-- | Memory addressing modes passed up the tree.
266 267
data Amode
        = Amode AddrMode InstrBlock
268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288

{-
Now, given a tree (the argument to an CmmLoad) that references memory,
produce a suitable addressing mode.

A Rule of the Game (tm) for Amodes: use of the addr bit must
immediately follow use of the code part, since the code part puts
values in registers which the addr then refers to.  So you can't put
anything in between, lest it overwrite some of those registers.  If
you need to do some other computation between the code part and use of
the addr bit, first store the effective address from the amode in a
temporary, then do the other computation, and then use the temporary:

    code
    LEA amode, tmp
    ... other computation ...
    ... (tmp) ...
-}


-- | Check whether an integer will fit in 32 bits.
289 290 291 292
--      A CmmInt is intended to be truncated to the appropriate
--      number of bits, so here we truncate it to Int64.  This is
--      important because e.g. -1 as a CmmInt might be either
--      -1 or 18446744073709551615.
293 294 295 296 297 298 299
--
is32BitInteger :: Integer -> Bool
is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
  where i64 = fromIntegral i :: Int64


-- | Convert a BlockId to some CmmStatic data
300 301 302
jumpTableEntry :: DynFlags -> Maybe BlockId -> CmmStatic
jumpTableEntry dflags Nothing = CmmStaticLit (CmmInt 0 (wordWidth dflags))
jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
303
    where blockLabel = mkAsmTempLabel (getUnique blockid)
304 305 306 307 308 309 310


-- -----------------------------------------------------------------------------
-- General things for putting together code sequences

-- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
-- CmmExprs into CmmRegOff?
311 312
mangleIndexTree :: DynFlags -> CmmReg -> Int -> CmmExpr
mangleIndexTree dflags reg off
313
  = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
314
  where width = typeWidth (cmmRegType dflags reg)
315 316

-- | The dual to getAnyReg: compute an expression into a register, but
317
--      we don't mind which one it is.
318 319 320 321 322
getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg expr = do
  r <- getRegister expr
  case r of
    Any rep code -> do
323 324 325 326
        tmp <- getNewRegNat rep
        return (tmp, code tmp)
    Fixed _ reg code ->
        return (reg, code)
327 328 329 330 331 332


assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
assignMem_I64Code addrTree valueTree = do
  Amode addr addr_code <- getAmode addrTree
  ChildCode64 vcode rlo <- iselExpr64 valueTree
333
  let
334 335 336 337 338 339 340 341 342
        rhi = getHiVRegFromLo rlo

        -- Little-endian store
        mov_lo = MOV II32 (OpReg rlo) (OpAddr addr)
        mov_hi = MOV II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
  return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)


assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
dterei's avatar
dterei committed
343
assignReg_I64Code (CmmLocal (LocalReg u_dst _)) valueTree = do
344
   ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
345
   let
346
         r_dst_lo = RegVirtual $ mkVirtualReg u_dst II32
347 348 349 350 351 352 353 354
         r_dst_hi = getHiVRegFromLo r_dst_lo
         r_src_hi = getHiVRegFromLo r_src_lo
         mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo)
         mov_hi = MOV II32 (OpReg r_src_hi) (OpReg r_dst_hi)
   return (
        vcode `snocOL` mov_lo `snocOL` mov_hi
     )

dterei's avatar
dterei committed
355
assignReg_I64Code _ _
356 357 358 359 360 361 362
   = panic "assignReg_I64Code(i386): invalid lvalue"


iselExpr64        :: CmmExpr -> NatM ChildCode64
iselExpr64 (CmmLit (CmmInt i _)) = do
  (rlo,rhi) <- getNewRegPairNat II32
  let
363
        r = fromIntegral (fromIntegral i :: Word32)
364
        q = fromIntegral (fromIntegral (i `shiftR` 32) :: Word32)
365 366 367 368
        code = toOL [
                MOV II32 (OpImm (ImmInteger r)) (OpReg rlo),
                MOV II32 (OpImm (ImmInteger q)) (OpReg rhi)
                ]
369 370 371 372 373
  return (ChildCode64 code rlo)

iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
   Amode addr addr_code <- getAmode addrTree
   (rlo,rhi) <- getNewRegPairNat II32
374
   let
375 376 377
        mov_lo = MOV II32 (OpAddr addr) (OpReg rlo)
        mov_hi = MOV II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
   return (
378
            ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
379 380 381 382
                        rlo
     )

iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
383
   = return (ChildCode64 nilOL (RegVirtual $ mkVirtualReg vu II32))
384

385 386 387 388 389
-- we handle addition, but rather badly
iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
   ChildCode64 code1 r1lo <- iselExpr64 e1
   (rlo,rhi) <- getNewRegPairNat II32
   let
390
        r = fromIntegral (fromIntegral i :: Word32)
391
        q = fromIntegral (fromIntegral (i `shiftR` 32) :: Word32)
392 393 394 395 396 397
        r1hi = getHiVRegFromLo r1lo
        code =  code1 `appOL`
                toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
                       ADD II32 (OpImm (ImmInteger r)) (OpReg rlo),
                       MOV II32 (OpReg r1hi) (OpReg rhi),
                       ADC II32 (OpImm (ImmInteger q)) (OpReg rhi) ]
398 399 400 401 402 403 404
   return (ChildCode64 code rlo)

iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
   ChildCode64 code1 r1lo <- iselExpr64 e1
   ChildCode64 code2 r2lo <- iselExpr64 e2
   (rlo,rhi) <- getNewRegPairNat II32
   let
405 406 407 408 409 410 411 412
        r1hi = getHiVRegFromLo r1lo
        r2hi = getHiVRegFromLo r2lo
        code =  code1 `appOL`
                code2 `appOL`
                toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
                       ADD II32 (OpReg r2lo) (OpReg rlo),
                       MOV II32 (OpReg r1hi) (OpReg rhi),
                       ADC II32 (OpReg r2hi) (OpReg rhi) ]
413 414
   return (ChildCode64 code rlo)

415 416 417 418 419 420 421 422 423 424 425 426 427 428 429
iselExpr64 (CmmMachOp (MO_Sub _) [e1,e2]) = do
   ChildCode64 code1 r1lo <- iselExpr64 e1
   ChildCode64 code2 r2lo <- iselExpr64 e2
   (rlo,rhi) <- getNewRegPairNat II32
   let
        r1hi = getHiVRegFromLo r1lo
        r2hi = getHiVRegFromLo r2lo
        code =  code1 `appOL`
                code2 `appOL`
                toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
                       SUB II32 (OpReg r2lo) (OpReg rlo),
                       MOV II32 (OpReg r1hi) (OpReg rhi),
                       SBB II32 (OpReg r2hi) (OpReg rhi) ]
   return (ChildCode64 code rlo)

430 431 432 433 434 435
iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do
     fn <- getAnyReg expr
     r_dst_lo <-  getNewRegNat II32
     let r_dst_hi = getHiVRegFromLo r_dst_lo
         code = fn r_dst_lo
     return (
436
             ChildCode64 (code `snocOL`
437 438 439 440 441
                          MOV II32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
                          r_dst_lo
            )

iselExpr64 expr
Ian Lynagh's avatar
Ian Lynagh committed
442
   = pprPanic "iselExpr64(i386)" (ppr expr)
443 444 445 446


--------------------------------------------------------------------------------
getRegister :: CmmExpr -> NatM Register
447 448 449
getRegister e = do dflags <- getDynFlags
                   is32Bit <- is32BitPlatform
                   getRegister' dflags is32Bit e
450

451
getRegister' :: DynFlags -> Bool -> CmmExpr -> NatM Register
452

453
getRegister' dflags is32Bit (CmmReg reg)
454 455 456 457 458 459
  = case reg of
        CmmGlobal PicBaseReg
         | is32Bit ->
            -- on x86_64, we have %rip for PicBaseReg, but it's not
            -- a full-featured register, it can only be used for
            -- rip-relative addressing.
460 461
            do reg' <- getPicBaseNat (archWordFormat is32Bit)
               return (Fixed (archWordFormat is32Bit) reg' nilOL)
462 463 464
        _ ->
            do use_sse2 <- sse2Enabled
               let
465 466 467
                 fmt = cmmTypeFormat (cmmRegType dflags reg)
                 format | not use_sse2 && isFloatFormat fmt = FF80
                        | otherwise                         = fmt
468
               --
469
               let platform = targetPlatform dflags
470 471 472
               return (Fixed format
                             (getRegisterReg platform use_sse2 reg)
                             nilOL)
473 474


475 476
getRegister' dflags is32Bit (CmmRegOff r n)
  = getRegister' dflags is32Bit $ mangleIndexTree dflags r n
477 478 479 480

-- for 32-bit architectuers, support some 64 -> 32 bit conversions:
-- TO_W_(x), TO_W_(x >> 32)

481
getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W64 W32)
482 483
                     [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
 | is32Bit = do
484 485 486
  ChildCode64 code rlo <- iselExpr64 x
  return $ Fixed II32 (getHiVRegFromLo rlo) code

487
getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W64 W32)
488 489
                     [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
 | is32Bit = do
490 491 492
  ChildCode64 code rlo <- iselExpr64 x
  return $ Fixed II32 (getHiVRegFromLo rlo) code

493
getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W64 W32) [x])
494
 | is32Bit = do
495 496 497
  ChildCode64 code rlo <- iselExpr64 x
  return $ Fixed II32 rlo code

498
getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W64 W32) [x])
499
 | is32Bit = do
500
  ChildCode64 code rlo <- iselExpr64 x
501
  return $ Fixed II32 rlo code
502

503
getRegister' _ _ (CmmLit lit@(CmmFloat f w)) =
504 505 506 507 508
  if_sse2 float_const_sse2 float_const_x87
 where
  float_const_sse2
    | f == 0.0 = do
      let
509 510
          format = floatFormat w
          code dst = unitOL  (XOR format (OpReg dst) (OpReg dst))
511 512
        -- I don't know why there are xorpd, xorps, and pxor instructions.
        -- They all appear to do the same thing --SDM
513
      return (Any format code)
514 515 516 517 518 519 520 521 522 523

   | otherwise = do
      Amode addr code <- memConstant (widthInBytes w) lit
      loadFloatAmode True w addr code

  float_const_x87 = case w of
    W64
      | f == 0.0 ->
        let code dst = unitOL (GLDZ dst)
        in  return (Any FF80 code)
524

525 526 527
      | f == 1.0 ->
        let code dst = unitOL (GLD1 dst)
        in  return (Any FF80 code)
528

529 530 531
    _otherwise -> do
      Amode addr code <- memConstant (widthInBytes w) lit
      loadFloatAmode False w addr code
532 533

-- catch simple cases of zero- or sign-extended load
534
getRegister' _ _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do
535 536 537
  code <- intLoadCode (MOVZxL II8) addr
  return (Any II32 code)

538
getRegister' _ _ (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _]) = do
539 540 541
  code <- intLoadCode (MOVSxL II8) addr
  return (Any II32 code)

542
getRegister' _ _ (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _]) = do
543 544 545
  code <- intLoadCode (MOVZxL II16) addr
  return (Any II32 code)

546
getRegister' _ _ (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _]) = do
547 548 549 550
  code <- intLoadCode (MOVSxL II16) addr
  return (Any II32 code)

-- catch simple cases of zero- or sign-extended load
551
getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _])
552
 | not is32Bit = do
553 554 555
  code <- intLoadCode (MOVZxL II8) addr
  return (Any II64 code)

556
getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _])
557
 | not is32Bit = do
558 559 560
  code <- intLoadCode (MOVSxL II8) addr
  return (Any II64 code)

561
getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _])
562
 | not is32Bit = do
563 564 565
  code <- intLoadCode (MOVZxL II16) addr
  return (Any II64 code)

566
getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _])
567
 | not is32Bit = do
568 569 570
  code <- intLoadCode (MOVSxL II16) addr
  return (Any II64 code)

571
getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _])
572
 | not is32Bit = do
573 574 575
  code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend
  return (Any II64 code)

576
getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _])
577
 | not is32Bit = do
578 579 580
  code <- intLoadCode (MOVSxL II32) addr
  return (Any II64 code)

581
getRegister' _ is32Bit (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
582
                                     CmmLit displacement])
583 584
 | not is32Bit = do
      return $ Any II64 (\dst -> unitOL $
585 586
        LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))

587
getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
588 589 590 591 592
    sse2 <- sse2Enabled
    case mop of
      MO_F_Neg w
         | sse2      -> sse2NegCode w x
         | otherwise -> trivialUFCode FF80 (GNEG FF80) x
593

594 595
      MO_S_Neg w -> triv_ucode NEGI (intFormat w)
      MO_Not w   -> triv_ucode NOT  (intFormat w)
596 597 598 599 600 601 602 603 604

      -- Nop conversions
      MO_UU_Conv W32 W8  -> toI8Reg  W32 x
      MO_SS_Conv W32 W8  -> toI8Reg  W32 x
      MO_UU_Conv W16 W8  -> toI8Reg  W16 x
      MO_SS_Conv W16 W8  -> toI8Reg  W16 x
      MO_UU_Conv W32 W16 -> toI16Reg W32 x
      MO_SS_Conv W32 W16 -> toI16Reg W32 x

605 606 607 608 609 610
      MO_UU_Conv W64 W32 | not is32Bit -> conversionNop II64 x
      MO_SS_Conv W64 W32 | not is32Bit -> conversionNop II64 x
      MO_UU_Conv W64 W16 | not is32Bit -> toI16Reg W64 x
      MO_SS_Conv W64 W16 | not is32Bit -> toI16Reg W64 x
      MO_UU_Conv W64 W8  | not is32Bit -> toI8Reg  W64 x
      MO_SS_Conv W64 W8  | not is32Bit -> toI8Reg  W64 x
611

612 613
      MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intFormat rep1) x
      MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intFormat rep1) x
614 615 616 617 618 619 620 621 622 623

      -- widenings
      MO_UU_Conv W8  W32 -> integerExtend W8  W32 MOVZxL x
      MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x
      MO_UU_Conv W8  W16 -> integerExtend W8  W16 MOVZxL x

      MO_SS_Conv W8  W32 -> integerExtend W8  W32 MOVSxL x
      MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x
      MO_SS_Conv W8  W16 -> integerExtend W8  W16 MOVSxL x

624 625 626 627 628 629
      MO_UU_Conv W8  W64 | not is32Bit -> integerExtend W8  W64 MOVZxL x
      MO_UU_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVZxL x
      MO_UU_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOVZxL x
      MO_SS_Conv W8  W64 | not is32Bit -> integerExtend W8  W64 MOVSxL x
      MO_SS_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVSxL x
      MO_SS_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOVSxL x
630 631 632 633
        -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
        -- However, we don't want the register allocator to throw it
        -- away as an unnecessary reg-to-reg move, so we keep it in
        -- the form of a movzl and print it as a movl later.
634

635 636
      MO_FF_Conv W32 W64
        | sse2      -> coerceFP2FP W64 x
637
        | otherwise -> conversionNop FF80 x
638

639
      MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
640 641 642 643

      MO_FS_Conv from to -> coerceFP2Int from to x
      MO_SF_Conv from to -> coerceInt2FP from to x

644 645 646 647 648 649 650 651
      MO_V_Insert {}   -> needLlvm
      MO_V_Extract {}  -> needLlvm
      MO_V_Add {}      -> needLlvm
      MO_V_Sub {}      -> needLlvm
      MO_V_Mul {}      -> needLlvm
      MO_VS_Quot {}    -> needLlvm
      MO_VS_Rem {}     -> needLlvm
      MO_VS_Neg {}     -> needLlvm
652 653
      MO_VU_Quot {}    -> needLlvm
      MO_VU_Rem {}     -> needLlvm
654 655 656 657 658 659 660
      MO_VF_Insert {}  -> needLlvm
      MO_VF_Extract {} -> needLlvm
      MO_VF_Add {}     -> needLlvm
      MO_VF_Sub {}     -> needLlvm
      MO_VF_Mul {}     -> needLlvm
      MO_VF_Quot {}    -> needLlvm
      MO_VF_Neg {}     -> needLlvm
661

dterei's avatar
dterei committed
662
      _other -> pprPanic "getRegister" (pprMachOp mop)
663
   where
664 665
        triv_ucode :: (Format -> Operand -> Instr) -> Format -> NatM Register
        triv_ucode instr format = trivialUCode format (instr format) x
666 667 668

        -- signed or unsigned extension.
        integerExtend :: Width -> Width
669
                      -> (Format -> Operand -> Operand -> Instr)
670 671 672 673 674 675 676
                      -> CmmExpr -> NatM Register
        integerExtend from to instr expr = do
            (reg,e_code) <- if from == W8 then getByteReg expr
                                          else getSomeReg expr
            let
                code dst =
                  e_code `snocOL`
677 678
                  instr (intFormat from) (OpReg reg) (OpReg dst)
            return (Any (intFormat to) code)
679 680 681

        toI8Reg :: Width -> CmmExpr -> NatM Register
        toI8Reg new_rep expr
682
            = do codefn <- getAnyReg expr
683
                 return (Any (intFormat new_rep) codefn)
684 685 686 687 688 689
                -- HACK: use getAnyReg to get a byte-addressable register.
                -- If the source was a Fixed register, this will add the
                -- mov instruction to put it into the desired destination.
                -- We're assuming that the destination won't be a fixed
                -- non-byte-addressable register; it won't be, because all
                -- fixed registers are word-sized.
690

691
        toI16Reg = toI8Reg -- for now
692

693 694
        conversionNop :: Format -> CmmExpr -> NatM Register
        conversionNop new_format expr
695
            = do e_code <- getRegister' dflags is32Bit expr
696
                 return (swizzleRegisterRep e_code new_format)
697 698


699
getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
700 701
  sse2 <- sse2Enabled
  case mop of
Ian Lynagh's avatar
Ian Lynagh committed
702 703 704 705 706 707
      MO_F_Eq _ -> condFltReg is32Bit EQQ x y
      MO_F_Ne _ -> condFltReg is32Bit NE  x y
      MO_F_Gt _ -> condFltReg is32Bit GTT x y
      MO_F_Ge _ -> condFltReg is32Bit GE  x y
      MO_F_Lt _ -> condFltReg is32Bit LTT x y
      MO_F_Le _ -> condFltReg is32Bit LE  x y
dterei's avatar
dterei committed
708 709 710 711 712 713 714 715 716 717 718 719 720

      MO_Eq _   -> condIntReg EQQ x y
      MO_Ne _   -> condIntReg NE  x y

      MO_S_Gt _ -> condIntReg GTT x y
      MO_S_Ge _ -> condIntReg GE  x y
      MO_S_Lt _ -> condIntReg LTT x y
      MO_S_Le _ -> condIntReg LE  x y

      MO_U_Gt _ -> condIntReg GU  x y
      MO_U_Ge _ -> condIntReg GEU x y
      MO_U_Lt _ -> condIntReg LU  x y
      MO_U_Le _ -> condIntReg LEU x y
721

722
      MO_F_Add w  | sse2      -> trivialFCode_sse2 w ADD  x y
dterei's avatar
dterei committed
723
                  | otherwise -> trivialFCode_x87    GADD x y
724
      MO_F_Sub w  | sse2      -> trivialFCode_sse2 w SUB  x y
dterei's avatar
dterei committed
725
                  | otherwise -> trivialFCode_x87    GSUB x y
726
      MO_F_Quot w | sse2      -> trivialFCode_sse2 w FDIV x y
dterei's avatar
dterei committed
727
                  | otherwise -> trivialFCode_x87    GDIV x y
728
      MO_F_Mul w  | sse2      -> trivialFCode_sse2 w MUL x y
dterei's avatar
dterei committed
729
                  | otherwise -> trivialFCode_x87    GMUL x y
730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745

      MO_Add rep -> add_code rep x y
      MO_Sub rep -> sub_code rep x y

      MO_S_Quot rep -> div_code rep True  True  x y
      MO_S_Rem  rep -> div_code rep True  False x y
      MO_U_Quot rep -> div_code rep False True  x y
      MO_U_Rem  rep -> div_code rep False False x y

      MO_S_MulMayOflo rep -> imulMayOflo rep x y

      MO_Mul rep -> triv_op rep IMUL
      MO_And rep -> triv_op rep AND
      MO_Or  rep -> triv_op rep OR
      MO_Xor rep -> triv_op rep XOR

746 747 748 749
        {- Shift ops on x86s have constraints on their source, it
           either has to be Imm, CL or 1
            => trivialCode is not restrictive enough (sigh.)
        -}
750 751 752 753
      MO_Shl rep   -> shift_code rep SHL x y {-False-}
      MO_U_Shr rep -> shift_code rep SHR x y {-False-}
      MO_S_Shr rep -> shift_code rep SAR x y {-False-}

754 755 756 757 758 759 760 761 762 763 764 765 766 767 768
      MO_V_Insert {}   -> needLlvm
      MO_V_Extract {}  -> needLlvm
      MO_V_Add {}      -> needLlvm
      MO_V_Sub {}      -> needLlvm
      MO_V_Mul {}      -> needLlvm
      MO_VS_Quot {}    -> needLlvm
      MO_VS_Rem {}     -> needLlvm
      MO_VS_Neg {}     -> needLlvm
      MO_VF_Insert {}  -> needLlvm
      MO_VF_Extract {} -> needLlvm
      MO_VF_Add {}     -> needLlvm
      MO_VF_Sub {}     -> needLlvm
      MO_VF_Mul {}     -> needLlvm
      MO_VF_Quot {}    -> needLlvm
      MO_VF_Neg {}     -> needLlvm
769

dterei's avatar
dterei committed
770
      _other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
771 772 773
  where
    --------------------
    triv_op width instr = trivialCode width op (Just op) x y
774
                        where op   = instr (intFormat width)
775 776 777 778 779

    imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
    imulMayOflo rep a b = do
         (a_reg, a_code) <- getNonClobberedReg a
         b_code <- getAnyReg b
780 781 782 783 784
         let
             shift_amt  = case rep of
                           W32 -> 31
                           W64 -> 63
                           _ -> panic "shift_amt"
785

786
             format = intFormat rep
787 788
             code = a_code `appOL` b_code eax `appOL`
                        toOL [
789 790
                           IMUL2 format (OpReg a_reg),   -- result in %edx:%eax
                           SAR format (OpImm (ImmInt shift_amt)) (OpReg eax),
791
                                -- sign extend lower part
792
                           SUB format (OpReg edx) (OpReg eax)
793
                                -- compare against upper
794 795
                           -- eax==0 if high part == sign extended low part
                        ]
796
         return (Fixed format eax code)
797 798 799

    --------------------
    shift_code :: Width
800
               -> (Format -> Operand -> Operand -> Instr)
801 802 803
               -> CmmExpr
               -> CmmExpr
               -> NatM Register
804 805

    {- Case1: shift length as immediate -}
dterei's avatar
dterei committed
806
    shift_code width instr x (CmmLit lit) = do
807 808
          x_code <- getAnyReg x
          let
809
               format = intFormat width
810 811
               code dst
                  = x_code dst `snocOL`
812 813
                    instr format (OpImm (litToImm lit)) (OpReg dst)
          return (Any format code)
814

815 816 817 818
    {- Case2: shift length is complex (non-immediate)
      * y must go in %ecx.
      * we cannot do y first *and* put its result in %ecx, because
        %ecx might be clobbered by x.
819
      * if we do y second, then x cannot be
820 821 822 823 824
        in a clobbered reg.  Also, we cannot clobber x's reg
        with the instruction itself.
      * so we can either:
        - do y first, put its result in a fresh tmp, then copy it to %ecx later
        - do y second and put its result into %ecx.  x gets placed in a fresh
Gabor Greif's avatar
typos  
Gabor Greif committed
825
          tmp.  This is likely to be better, because the reg alloc can
826 827 828 829 830
          eliminate this reg->reg move here (it won't eliminate the other one,
          because the move is into the fixed %ecx).
    -}
    shift_code width instr x y{-amount-} = do
        x_code <- getAnyReg x
831 832
        let format = intFormat width
        tmp <- getNewRegNat format
833
        y_code <- getAnyReg y
834 835 836
        let
           code = x_code tmp `appOL`
                  y_code ecx `snocOL`
837 838
                  instr format (OpReg ecx) (OpReg tmp)
        return (Fixed format tmp code)
839 840 841 842

    --------------------
    add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
    add_code rep x (CmmLit (CmmInt y _))
843
        | is32BitInteger y = add_int rep x y
844 845
    add_code rep x y = trivialCode rep (ADD format) (Just (ADD format)) x y
      where format = intFormat rep
846 847
    -- TODO: There are other interesting patterns we want to replace
    --     with a LEA, e.g. `(x + offset) + (y << shift)`.
848 849 850 851

    --------------------
    sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
    sub_code rep x (CmmLit (CmmInt y _))
852
        | is32BitInteger (-y) = add_int rep x (-y)
853
    sub_code rep x y = trivialCode rep (SUB (intFormat rep)) Nothing x y
854 855 856

    -- our three-operand add instruction:
    add_int width x y = do
857 858
        (x_reg, x_code) <- getSomeReg x
        let
859
            format = intFormat width
860 861
            imm = ImmInt (fromInteger y)
            code dst
862
               = x_code `snocOL`
863
                 LEA format
864
                        (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
865
                        (OpReg dst)
866
        --
867
        return (Any format code)
868 869 870

    ----------------------
    div_code width signed quotient x y = do
871 872 873
           (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
           x_code <- getAnyReg x
           let
874 875 876
             format = intFormat width
             widen | signed    = CLTD format
                   | otherwise = XOR format (OpReg edx) (OpReg edx)<