CodeGen.hs 101 KB
Newer Older
1 2 3 4 5 6 7 8 9
-----------------------------------------------------------------------------
--
-- Generating machine code (instruction selection)
--
-- (c) The University of Glasgow 1996-2004
--
-----------------------------------------------------------------------------

-- This is a big module, but, if you pay attention to
Ian Lynagh's avatar
Ian Lynagh committed
10
-- (a) the sectioning, and (b) the type signatures, the
11 12
-- structure should not be too overwhelming.

13
{-# LANGUAGE GADTs #-}
14 15 16 17 18
module X86.CodeGen (
        cmmTopCodeGen,
        generateJumpTableForInstr,
        InstrBlock
)
19 20 21 22 23

where

#include "HsVersions.h"
#include "nativeGen/NCG.h"
Simon Marlow's avatar
Simon Marlow committed
24
#include "../includes/MachDeps.h"
25 26 27 28 29 30

-- NCG stuff:
import X86.Instr
import X86.Cond
import X86.Regs
import X86.RegInfo
31
import CodeGen.Platform
tibbe's avatar
tibbe committed
32
import CPrim
33 34 35 36 37 38 39 40 41 42
import Instruction
import PIC
import NCGMonad
import Size
import Reg
import Platform

-- Our intermediate code:
import BasicTypes
import BlockId
43
import Module           ( primPackageId )
44
import PprCmm           ()
45 46 47
import CmmUtils
import Cmm
import Hoopl
48 49 50
import CLabel

-- The rest:
51
import ForeignCall      ( CCallConv(..) )
52 53
import OrdList
import Outputable
54
import Unique
55
import FastString
56
import FastBool         ( isFastTrue )
57
import DynFlags
58
import Util
59

60
import Control.Monad
tibbe's avatar
tibbe committed
61
import Data.Bits
62
import Data.Int
63
import Data.Maybe
dterei's avatar
dterei committed
64 65
import Data.Word

Ian Lynagh's avatar
Ian Lynagh committed
66 67
is32BitPlatform :: NatM Bool
is32BitPlatform = do
68
    dflags <- getDynFlags
Ian Lynagh's avatar
Ian Lynagh committed
69 70
    return $ target32Bit (targetPlatform dflags)

71 72
sse2Enabled :: NatM Bool
sse2Enabled = do
73
  dflags <- getDynFlags
74
  return (isSse2Enabled dflags)
75

tibbe's avatar
tibbe committed
76 77
sse4_2Enabled :: NatM Bool
sse4_2Enabled = do
78
  dflags <- getDynFlags
79
  return (isSse4_2Enabled dflags)
tibbe's avatar
tibbe committed
80

81 82 83 84
if_sse2 :: NatM a -> NatM a -> NatM a
if_sse2 sse2 x87 = do
  b <- sse2Enabled
  if b then sse2 else x87
85

86
cmmTopCodeGen
Simon Peyton Jones's avatar
Simon Peyton Jones committed
87 88
        :: RawCmmDecl
        -> NatM [NatCmmDecl (Alignment, CmmStatics) Instr]
89

90 91
cmmTopCodeGen (CmmProc info lab live graph) = do
  let blocks = toBlockListEntryFirst graph
92 93
  (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
  picBaseMb <- getPicBaseMaybeNat
94
  dflags <- getDynFlags
95
  let proc = CmmProc info lab live (ListGraph $ concat nat_blocks)
96
      tops = proc : concat statics
97
      os   = platformOS $ targetPlatform dflags
98 99 100 101

  case picBaseMb of
      Just picBase -> initializePicBase_x86 ArchX86 os picBase tops
      Nothing -> return tops
102

Ian Lynagh's avatar
Ian Lynagh committed
103
cmmTopCodeGen (CmmData sec dat) = do
104
  return [CmmData sec (1, dat)]  -- no translation, we just use CmmStatic
105 106


107
basicBlockCodeGen
108
        :: CmmBlock
109
        -> NatM ( [NatBasicBlock Instr]
Simon Peyton Jones's avatar
Simon Peyton Jones committed
110
                , [NatCmmDecl (Alignment, CmmStatics) Instr])
111

112 113 114 115 116 117
basicBlockCodeGen block = do
  let (CmmEntry id, nodes, tail)  = blockSplit block
      stmts = blockToList nodes
  mid_instrs <- stmtsToInstrs stmts
  tail_instrs <- stmtToInstrs tail
  let instrs = mid_instrs `appOL` tail_instrs
118 119 120 121 122
  -- code generation may introduce new basic block boundaries, which
  -- are indicated by the NEWBLOCK instruction.  We must split up the
  -- instruction stream into basic blocks again.  Also, we extract
  -- LDATAs here too.
  let
123 124 125 126 127 128 129 130
        (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs

        mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
          = ([], BasicBlock id instrs : blocks, statics)
        mkBlocks (LDATA sec dat) (instrs,blocks,statics)
          = (instrs, blocks, CmmData sec dat:statics)
        mkBlocks instr (instrs,blocks,statics)
          = (instr:instrs, blocks, statics)
131 132 133
  return (BasicBlock id top : other_blocks, statics)


134
stmtsToInstrs :: [CmmNode e x] -> NatM InstrBlock
135 136 137 138 139
stmtsToInstrs stmts
   = do instrss <- mapM stmtToInstrs stmts
        return (concatOL instrss)


140
stmtToInstrs :: CmmNode e x -> NatM InstrBlock
141
stmtToInstrs stmt = do
142
  dflags <- getDynFlags
Ian Lynagh's avatar
Ian Lynagh committed
143
  is32Bit <- is32BitPlatform
144
  case stmt of
145 146 147
    CmmComment s   -> return (unitOL (COMMENT s))

    CmmAssign reg src
148 149 150
      | isFloatType ty         -> assignReg_FltCode size reg src
      | is32Bit && isWord64 ty -> assignReg_I64Code      reg src
      | otherwise              -> assignReg_IntCode size reg src
151
        where ty = cmmRegType dflags reg
152
              size = cmmTypeSize ty
153 154

    CmmStore addr src
155 156 157
      | isFloatType ty         -> assignMem_FltCode size addr src
      | is32Bit && isWord64 ty -> assignMem_I64Code      addr src
      | otherwise              -> assignMem_IntCode size addr src
158
        where ty = cmmExprType dflags src
159
              size = cmmTypeSize ty
160

161
    CmmUnsafeForeignCall target result_regs args
Ian Lynagh's avatar
Ian Lynagh committed
162
       -> genCCall is32Bit target result_regs args
163

164
    CmmBranch id          -> genBranch id
165 166 167
    CmmCondBranch arg true false -> do b1 <- genCondJump true arg
                                       b2 <- genBranch false
                                       return (b1 `appOL` b2)
Ian Lynagh's avatar
Ian Lynagh committed
168 169
    CmmSwitch arg ids     -> do dflags <- getDynFlags
                                genSwitch dflags arg ids
170 171 172
    CmmCall { cml_target = arg
            , cml_args_regs = gregs } -> do
                                dflags <- getDynFlags
173
                                genJump arg (jumpRegs dflags gregs)
174 175
    _ ->
      panic "stmtToInstrs: statement should have been cps'd away"
176 177


178 179
jumpRegs :: DynFlags -> [GlobalReg] -> [Reg]
jumpRegs dflags gregs = [ RegReal r | Just r <- map (globalRegMaybe platform) gregs ]
180
    where platform = targetPlatform dflags
181

182 183
--------------------------------------------------------------------------------
-- | 'InstrBlock's are the insn sequences generated by the insn selectors.
184 185
--      They are really trees of insns to facilitate fast appending, where a
--      left-to-right traversal yields the insns in the correct order.
186
--
187 188
type InstrBlock
        = OrdList Instr
189 190 191 192


-- | Condition codes passed up the tree.
--
193 194
data CondCode
        = CondCode Bool Cond InstrBlock
195 196 197


-- | a.k.a "Register64"
198 199
--      Reg is the lower 32-bit temporary which contains the result.
--      Use getHiVRegFromLo to find the other VRegUnique.
200
--
201 202
--      Rules of this simplified insn selection game are therefore that
--      the returned Reg may be modified
203
--
204 205
data ChildCode64
   = ChildCode64
206
        InstrBlock
207
        Reg
208 209 210


-- | Register's passed up the tree.  If the stix code forces the register
211 212 213
--      to live in a pre-decided machine register, it comes out as @Fixed@;
--      otherwise, it comes out as @Any@, and the parent can decide which
--      register to put it in.
214 215
--
data Register
216 217
        = Fixed Size Reg InstrBlock
        | Any   Size (Reg -> InstrBlock)
218 219 220 221 222 223 224 225


swizzleRegisterRep :: Register -> Size -> Register
swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code
swizzleRegisterRep (Any _ codefn)     size = Any   size codefn


-- | Grab the Reg for a CmmReg
226
getRegisterReg :: Platform -> Bool -> CmmReg -> Reg
227

228
getRegisterReg _ use_sse2 (CmmLocal (LocalReg u pk))
229 230 231 232
  = let sz = cmmTypeSize pk in
    if isFloatSize sz && not use_sse2
       then RegVirtual (mkVirtualReg u FF80)
       else RegVirtual (mkVirtualReg u sz)
233

234 235
getRegisterReg platform _ (CmmGlobal mid)
  = case globalRegMaybe platform mid of
236 237 238 239 240
        Just reg -> RegReal $ reg
        Nothing  -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
        -- By this stage, the only MagicIds remaining should be the
        -- ones which map to a real machine register on this
        -- platform.  Hence ...
241 242 243


-- | Memory addressing modes passed up the tree.
244 245
data Amode
        = Amode AddrMode InstrBlock
246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266

{-
Now, given a tree (the argument to an CmmLoad) that references memory,
produce a suitable addressing mode.

A Rule of the Game (tm) for Amodes: use of the addr bit must
immediately follow use of the code part, since the code part puts
values in registers which the addr then refers to.  So you can't put
anything in between, lest it overwrite some of those registers.  If
you need to do some other computation between the code part and use of
the addr bit, first store the effective address from the amode in a
temporary, then do the other computation, and then use the temporary:

    code
    LEA amode, tmp
    ... other computation ...
    ... (tmp) ...
-}


-- | Check whether an integer will fit in 32 bits.
267 268 269 270
--      A CmmInt is intended to be truncated to the appropriate
--      number of bits, so here we truncate it to Int64.  This is
--      important because e.g. -1 as a CmmInt might be either
--      -1 or 18446744073709551615.
271 272 273 274 275 276 277
--
is32BitInteger :: Integer -> Bool
is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
  where i64 = fromIntegral i :: Int64


-- | Convert a BlockId to some CmmStatic data
278 279 280
jumpTableEntry :: DynFlags -> Maybe BlockId -> CmmStatic
jumpTableEntry dflags Nothing = CmmStaticLit (CmmInt 0 (wordWidth dflags))
jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
281
    where blockLabel = mkAsmTempLabel (getUnique blockid)
282 283 284 285 286 287 288


-- -----------------------------------------------------------------------------
-- General things for putting together code sequences

-- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
-- CmmExprs into CmmRegOff?
289 290
mangleIndexTree :: DynFlags -> CmmReg -> Int -> CmmExpr
mangleIndexTree dflags reg off
291
  = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
292
  where width = typeWidth (cmmRegType dflags reg)
293 294

-- | The dual to getAnyReg: compute an expression into a register, but
295
--      we don't mind which one it is.
296 297 298 299 300
getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg expr = do
  r <- getRegister expr
  case r of
    Any rep code -> do
301 302 303 304
        tmp <- getNewRegNat rep
        return (tmp, code tmp)
    Fixed _ reg code ->
        return (reg, code)
305 306 307 308 309 310


assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
assignMem_I64Code addrTree valueTree = do
  Amode addr addr_code <- getAmode addrTree
  ChildCode64 vcode rlo <- iselExpr64 valueTree
311
  let
312 313 314 315 316 317 318 319 320
        rhi = getHiVRegFromLo rlo

        -- Little-endian store
        mov_lo = MOV II32 (OpReg rlo) (OpAddr addr)
        mov_hi = MOV II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
  return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)


assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
dterei's avatar
dterei committed
321
assignReg_I64Code (CmmLocal (LocalReg u_dst _)) valueTree = do
322
   ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
323
   let
324
         r_dst_lo = RegVirtual $ mkVirtualReg u_dst II32
325 326 327 328 329 330 331 332
         r_dst_hi = getHiVRegFromLo r_dst_lo
         r_src_hi = getHiVRegFromLo r_src_lo
         mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo)
         mov_hi = MOV II32 (OpReg r_src_hi) (OpReg r_dst_hi)
   return (
        vcode `snocOL` mov_lo `snocOL` mov_hi
     )

dterei's avatar
dterei committed
333
assignReg_I64Code _ _
334 335 336 337 338 339 340
   = panic "assignReg_I64Code(i386): invalid lvalue"


iselExpr64        :: CmmExpr -> NatM ChildCode64
iselExpr64 (CmmLit (CmmInt i _)) = do
  (rlo,rhi) <- getNewRegPairNat II32
  let
341
        r = fromIntegral (fromIntegral i :: Word32)
342
        q = fromIntegral (fromIntegral (i `shiftR` 32) :: Word32)
343 344 345 346
        code = toOL [
                MOV II32 (OpImm (ImmInteger r)) (OpReg rlo),
                MOV II32 (OpImm (ImmInteger q)) (OpReg rhi)
                ]
347 348 349 350 351
  return (ChildCode64 code rlo)

iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
   Amode addr addr_code <- getAmode addrTree
   (rlo,rhi) <- getNewRegPairNat II32
352
   let
353 354 355
        mov_lo = MOV II32 (OpAddr addr) (OpReg rlo)
        mov_hi = MOV II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
   return (
356
            ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
357 358 359 360
                        rlo
     )

iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
361
   = return (ChildCode64 nilOL (RegVirtual $ mkVirtualReg vu II32))
362

363 364 365 366 367
-- we handle addition, but rather badly
iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
   ChildCode64 code1 r1lo <- iselExpr64 e1
   (rlo,rhi) <- getNewRegPairNat II32
   let
368
        r = fromIntegral (fromIntegral i :: Word32)
369
        q = fromIntegral (fromIntegral (i `shiftR` 32) :: Word32)
370 371 372 373 374 375
        r1hi = getHiVRegFromLo r1lo
        code =  code1 `appOL`
                toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
                       ADD II32 (OpImm (ImmInteger r)) (OpReg rlo),
                       MOV II32 (OpReg r1hi) (OpReg rhi),
                       ADC II32 (OpImm (ImmInteger q)) (OpReg rhi) ]
376 377 378 379 380 381 382
   return (ChildCode64 code rlo)

iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
   ChildCode64 code1 r1lo <- iselExpr64 e1
   ChildCode64 code2 r2lo <- iselExpr64 e2
   (rlo,rhi) <- getNewRegPairNat II32
   let
383 384 385 386 387 388 389 390
        r1hi = getHiVRegFromLo r1lo
        r2hi = getHiVRegFromLo r2lo
        code =  code1 `appOL`
                code2 `appOL`
                toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
                       ADD II32 (OpReg r2lo) (OpReg rlo),
                       MOV II32 (OpReg r1hi) (OpReg rhi),
                       ADC II32 (OpReg r2hi) (OpReg rhi) ]
391 392 393 394 395 396 397 398
   return (ChildCode64 code rlo)

iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do
     fn <- getAnyReg expr
     r_dst_lo <-  getNewRegNat II32
     let r_dst_hi = getHiVRegFromLo r_dst_lo
         code = fn r_dst_lo
     return (
399
             ChildCode64 (code `snocOL`
400 401 402 403 404
                          MOV II32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
                          r_dst_lo
            )

iselExpr64 expr
Ian Lynagh's avatar
Ian Lynagh committed
405
   = pprPanic "iselExpr64(i386)" (ppr expr)
406 407 408 409


--------------------------------------------------------------------------------
getRegister :: CmmExpr -> NatM Register
410 411 412
getRegister e = do dflags <- getDynFlags
                   is32Bit <- is32BitPlatform
                   getRegister' dflags is32Bit e
413

414
getRegister' :: DynFlags -> Bool -> CmmExpr -> NatM Register
415

416
getRegister' dflags is32Bit (CmmReg reg)
417 418 419 420 421 422
  = case reg of
        CmmGlobal PicBaseReg
         | is32Bit ->
            -- on x86_64, we have %rip for PicBaseReg, but it's not
            -- a full-featured register, it can only be used for
            -- rip-relative addressing.
Ian Lynagh's avatar
Ian Lynagh committed
423 424
            do reg' <- getPicBaseNat (archWordSize is32Bit)
               return (Fixed (archWordSize is32Bit) reg' nilOL)
425 426 427
        _ ->
            do use_sse2 <- sse2Enabled
               let
428
                 sz = cmmTypeSize (cmmRegType dflags reg)
429 430 431
                 size | not use_sse2 && isFloatSize sz = FF80
                      | otherwise                      = sz
               --
432 433
               let platform = targetPlatform dflags
               return (Fixed size (getRegisterReg platform use_sse2 reg) nilOL)
434 435


436 437
getRegister' dflags is32Bit (CmmRegOff r n)
  = getRegister' dflags is32Bit $ mangleIndexTree dflags r n
438 439 440 441

-- for 32-bit architectuers, support some 64 -> 32 bit conversions:
-- TO_W_(x), TO_W_(x >> 32)

442
getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W64 W32)
443 444
                     [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
 | is32Bit = do
445 446 447
  ChildCode64 code rlo <- iselExpr64 x
  return $ Fixed II32 (getHiVRegFromLo rlo) code

448
getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W64 W32)
449 450
                     [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
 | is32Bit = do
451 452 453
  ChildCode64 code rlo <- iselExpr64 x
  return $ Fixed II32 (getHiVRegFromLo rlo) code

454
getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W64 W32) [x])
455
 | is32Bit = do
456 457 458
  ChildCode64 code rlo <- iselExpr64 x
  return $ Fixed II32 rlo code

459
getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W64 W32) [x])
460
 | is32Bit = do
461
  ChildCode64 code rlo <- iselExpr64 x
462
  return $ Fixed II32 rlo code
463

464
getRegister' _ _ (CmmLit lit@(CmmFloat f w)) =
465 466 467 468 469 470 471
  if_sse2 float_const_sse2 float_const_x87
 where
  float_const_sse2
    | f == 0.0 = do
      let
          size = floatSize w
          code dst = unitOL  (XOR size (OpReg dst) (OpReg dst))
472 473
        -- I don't know why there are xorpd, xorps, and pxor instructions.
        -- They all appear to do the same thing --SDM
474 475 476 477 478 479 480 481 482 483 484
      return (Any size code)

   | otherwise = do
      Amode addr code <- memConstant (widthInBytes w) lit
      loadFloatAmode True w addr code

  float_const_x87 = case w of
    W64
      | f == 0.0 ->
        let code dst = unitOL (GLDZ dst)
        in  return (Any FF80 code)
485

486 487 488
      | f == 1.0 ->
        let code dst = unitOL (GLD1 dst)
        in  return (Any FF80 code)
489

490 491 492
    _otherwise -> do
      Amode addr code <- memConstant (widthInBytes w) lit
      loadFloatAmode False w addr code
493 494

-- catch simple cases of zero- or sign-extended load
495
getRegister' _ _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do
496 497 498
  code <- intLoadCode (MOVZxL II8) addr
  return (Any II32 code)

499
getRegister' _ _ (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _]) = do
500 501 502
  code <- intLoadCode (MOVSxL II8) addr
  return (Any II32 code)

503
getRegister' _ _ (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _]) = do
504 505 506
  code <- intLoadCode (MOVZxL II16) addr
  return (Any II32 code)

507
getRegister' _ _ (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _]) = do
508 509 510 511
  code <- intLoadCode (MOVSxL II16) addr
  return (Any II32 code)

-- catch simple cases of zero- or sign-extended load
512
getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _])
513
 | not is32Bit = do
514 515 516
  code <- intLoadCode (MOVZxL II8) addr
  return (Any II64 code)

517
getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _])
518
 | not is32Bit = do
519 520 521
  code <- intLoadCode (MOVSxL II8) addr
  return (Any II64 code)

522
getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _])
523
 | not is32Bit = do
524 525 526
  code <- intLoadCode (MOVZxL II16) addr
  return (Any II64 code)

527
getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _])
528
 | not is32Bit = do
529 530 531
  code <- intLoadCode (MOVSxL II16) addr
  return (Any II64 code)

532
getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _])
533
 | not is32Bit = do
534 535 536
  code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend
  return (Any II64 code)

537
getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _])
538
 | not is32Bit = do
539 540 541
  code <- intLoadCode (MOVSxL II32) addr
  return (Any II64 code)

542
getRegister' _ is32Bit (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
543
                                     CmmLit displacement])
544 545
 | not is32Bit = do
      return $ Any II64 (\dst -> unitOL $
546 547
        LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))

548
getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
549 550 551 552 553
    sse2 <- sse2Enabled
    case mop of
      MO_F_Neg w
         | sse2      -> sse2NegCode w x
         | otherwise -> trivialUFCode FF80 (GNEG FF80) x
554 555 556 557 558 559 560 561 562 563 564 565

      MO_S_Neg w -> triv_ucode NEGI (intSize w)
      MO_Not w   -> triv_ucode NOT  (intSize w)

      -- Nop conversions
      MO_UU_Conv W32 W8  -> toI8Reg  W32 x
      MO_SS_Conv W32 W8  -> toI8Reg  W32 x
      MO_UU_Conv W16 W8  -> toI8Reg  W16 x
      MO_SS_Conv W16 W8  -> toI8Reg  W16 x
      MO_UU_Conv W32 W16 -> toI16Reg W32 x
      MO_SS_Conv W32 W16 -> toI16Reg W32 x

566 567 568 569 570 571
      MO_UU_Conv W64 W32 | not is32Bit -> conversionNop II64 x
      MO_SS_Conv W64 W32 | not is32Bit -> conversionNop II64 x
      MO_UU_Conv W64 W16 | not is32Bit -> toI16Reg W64 x
      MO_SS_Conv W64 W16 | not is32Bit -> toI16Reg W64 x
      MO_UU_Conv W64 W8  | not is32Bit -> toI8Reg  W64 x
      MO_SS_Conv W64 W8  | not is32Bit -> toI8Reg  W64 x
572 573 574 575 576 577 578 579 580 581 582 583 584

      MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
      MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x

      -- widenings
      MO_UU_Conv W8  W32 -> integerExtend W8  W32 MOVZxL x
      MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x
      MO_UU_Conv W8  W16 -> integerExtend W8  W16 MOVZxL x

      MO_SS_Conv W8  W32 -> integerExtend W8  W32 MOVSxL x
      MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x
      MO_SS_Conv W8  W16 -> integerExtend W8  W16 MOVSxL x

585 586 587 588 589 590
      MO_UU_Conv W8  W64 | not is32Bit -> integerExtend W8  W64 MOVZxL x
      MO_UU_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVZxL x
      MO_UU_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOVZxL x
      MO_SS_Conv W8  W64 | not is32Bit -> integerExtend W8  W64 MOVSxL x
      MO_SS_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVSxL x
      MO_SS_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOVSxL x
591 592 593 594
        -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
        -- However, we don't want the register allocator to throw it
        -- away as an unnecessary reg-to-reg move, so we keep it in
        -- the form of a movzl and print it as a movl later.
595

596 597
      MO_FF_Conv W32 W64
        | sse2      -> coerceFP2FP W64 x
598
        | otherwise -> conversionNop FF80 x
599

600
      MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
601 602 603 604

      MO_FS_Conv from to -> coerceFP2Int from to x
      MO_SF_Conv from to -> coerceInt2FP from to x

dterei's avatar
dterei committed
605
      _other -> pprPanic "getRegister" (pprMachOp mop)
606
   where
607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624
        triv_ucode :: (Size -> Operand -> Instr) -> Size -> NatM Register
        triv_ucode instr size = trivialUCode size (instr size) x

        -- signed or unsigned extension.
        integerExtend :: Width -> Width
                      -> (Size -> Operand -> Operand -> Instr)
                      -> CmmExpr -> NatM Register
        integerExtend from to instr expr = do
            (reg,e_code) <- if from == W8 then getByteReg expr
                                          else getSomeReg expr
            let
                code dst =
                  e_code `snocOL`
                  instr (intSize from) (OpReg reg) (OpReg dst)
            return (Any (intSize to) code)

        toI8Reg :: Width -> CmmExpr -> NatM Register
        toI8Reg new_rep expr
625
            = do codefn <- getAnyReg expr
626 627 628 629 630 631 632
                 return (Any (intSize new_rep) codefn)
                -- HACK: use getAnyReg to get a byte-addressable register.
                -- If the source was a Fixed register, this will add the
                -- mov instruction to put it into the desired destination.
                -- We're assuming that the destination won't be a fixed
                -- non-byte-addressable register; it won't be, because all
                -- fixed registers are word-sized.
633

634
        toI16Reg = toI8Reg -- for now
635

636
        conversionNop :: Size -> CmmExpr -> NatM Register
637
        conversionNop new_size expr
638
            = do e_code <- getRegister' dflags is32Bit expr
639 640 641
                 return (swizzleRegisterRep e_code new_size)


642
getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
643 644
  sse2 <- sse2Enabled
  case mop of
Ian Lynagh's avatar
Ian Lynagh committed
645 646 647 648 649 650
      MO_F_Eq _ -> condFltReg is32Bit EQQ x y
      MO_F_Ne _ -> condFltReg is32Bit NE  x y
      MO_F_Gt _ -> condFltReg is32Bit GTT x y
      MO_F_Ge _ -> condFltReg is32Bit GE  x y
      MO_F_Lt _ -> condFltReg is32Bit LTT x y
      MO_F_Le _ -> condFltReg is32Bit LE  x y
dterei's avatar
dterei committed
651 652 653 654 655 656 657 658 659 660 661 662 663

      MO_Eq _   -> condIntReg EQQ x y
      MO_Ne _   -> condIntReg NE  x y

      MO_S_Gt _ -> condIntReg GTT x y
      MO_S_Ge _ -> condIntReg GE  x y
      MO_S_Lt _ -> condIntReg LTT x y
      MO_S_Le _ -> condIntReg LE  x y

      MO_U_Gt _ -> condIntReg GU  x y
      MO_U_Ge _ -> condIntReg GEU x y
      MO_U_Lt _ -> condIntReg LU  x y
      MO_U_Le _ -> condIntReg LEU x y
664

665
      MO_F_Add w  | sse2      -> trivialFCode_sse2 w ADD  x y
dterei's avatar
dterei committed
666
                  | otherwise -> trivialFCode_x87    GADD x y
667
      MO_F_Sub w  | sse2      -> trivialFCode_sse2 w SUB  x y
dterei's avatar
dterei committed
668
                  | otherwise -> trivialFCode_x87    GSUB x y
669
      MO_F_Quot w | sse2      -> trivialFCode_sse2 w FDIV x y
dterei's avatar
dterei committed
670
                  | otherwise -> trivialFCode_x87    GDIV x y
671
      MO_F_Mul w  | sse2      -> trivialFCode_sse2 w MUL x y
dterei's avatar
dterei committed
672
                  | otherwise -> trivialFCode_x87    GMUL x y
673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688

      MO_Add rep -> add_code rep x y
      MO_Sub rep -> sub_code rep x y

      MO_S_Quot rep -> div_code rep True  True  x y
      MO_S_Rem  rep -> div_code rep True  False x y
      MO_U_Quot rep -> div_code rep False True  x y
      MO_U_Rem  rep -> div_code rep False False x y

      MO_S_MulMayOflo rep -> imulMayOflo rep x y

      MO_Mul rep -> triv_op rep IMUL
      MO_And rep -> triv_op rep AND
      MO_Or  rep -> triv_op rep OR
      MO_Xor rep -> triv_op rep XOR

689 690 691 692
        {- Shift ops on x86s have constraints on their source, it
           either has to be Imm, CL or 1
            => trivialCode is not restrictive enough (sigh.)
        -}
693 694 695 696
      MO_Shl rep   -> shift_code rep SHL x y {-False-}
      MO_U_Shr rep -> shift_code rep SHR x y {-False-}
      MO_S_Shr rep -> shift_code rep SAR x y {-False-}

dterei's avatar
dterei committed
697
      _other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
698 699 700
  where
    --------------------
    triv_op width instr = trivialCode width op (Just op) x y
701
                        where op   = instr (intSize width)
702 703 704 705 706

    imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
    imulMayOflo rep a b = do
         (a_reg, a_code) <- getNonClobberedReg a
         b_code <- getAnyReg b
707 708 709 710 711
         let
             shift_amt  = case rep of
                           W32 -> 31
                           W64 -> 63
                           _ -> panic "shift_amt"
712

713
             size = intSize rep
714 715
             code = a_code `appOL` b_code eax `appOL`
                        toOL [
716
                           IMUL2 size (OpReg a_reg),   -- result in %edx:%eax
717
                           SAR size (OpImm (ImmInt shift_amt)) (OpReg eax),
718
                                -- sign extend lower part
719
                           SUB size (OpReg edx) (OpReg eax)
720
                                -- compare against upper
721 722
                           -- eax==0 if high part == sign extended low part
                        ]
723
         return (Fixed size eax code)
724 725 726

    --------------------
    shift_code :: Width
727 728 729 730
               -> (Size -> Operand -> Operand -> Instr)
               -> CmmExpr
               -> CmmExpr
               -> NatM Register
731 732

    {- Case1: shift length as immediate -}
dterei's avatar
dterei committed
733
    shift_code width instr x (CmmLit lit) = do
734 735 736 737 738 739 740 741
          x_code <- getAnyReg x
          let
               size = intSize width
               code dst
                  = x_code dst `snocOL`
                    instr size (OpImm (litToImm lit)) (OpReg dst)
          return (Any size code)

742 743 744 745
    {- Case2: shift length is complex (non-immediate)
      * y must go in %ecx.
      * we cannot do y first *and* put its result in %ecx, because
        %ecx might be clobbered by x.
746
      * if we do y second, then x cannot be
747 748 749 750 751
        in a clobbered reg.  Also, we cannot clobber x's reg
        with the instruction itself.
      * so we can either:
        - do y first, put its result in a fresh tmp, then copy it to %ecx later
        - do y second and put its result into %ecx.  x gets placed in a fresh
Gabor Greif's avatar
typos  
Gabor Greif committed
752
          tmp.  This is likely to be better, because the reg alloc can
753 754 755 756 757
          eliminate this reg->reg move here (it won't eliminate the other one,
          because the move is into the fixed %ecx).
    -}
    shift_code width instr x y{-amount-} = do
        x_code <- getAnyReg x
758 759
        let size = intSize width
        tmp <- getNewRegNat size
760
        y_code <- getAnyReg y
761 762 763 764
        let
           code = x_code tmp `appOL`
                  y_code ecx `snocOL`
                  instr size (OpReg ecx) (OpReg tmp)
765 766 767 768 769
        return (Fixed size tmp code)

    --------------------
    add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
    add_code rep x (CmmLit (CmmInt y _))
770
        | is32BitInteger y = add_int rep x y
771 772 773 774 775 776
    add_code rep x y = trivialCode rep (ADD size) (Just (ADD size)) x y
      where size = intSize rep

    --------------------
    sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
    sub_code rep x (CmmLit (CmmInt y _))
777
        | is32BitInteger (-y) = add_int rep x (-y)
778 779 780 781
    sub_code rep x y = trivialCode rep (SUB (intSize rep)) Nothing x y

    -- our three-operand add instruction:
    add_int width x y = do
782 783 784 785 786
        (x_reg, x_code) <- getSomeReg x
        let
            size = intSize width
            imm = ImmInt (fromInteger y)
            code dst
787
               = x_code `snocOL`
788 789
                 LEA size
                        (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
790
                        (OpReg dst)
791 792
        --
        return (Any size code)
793 794 795

    ----------------------
    div_code width signed quotient x y = do
796 797 798 799 800 801
           (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
           x_code <- getAnyReg x
           let
             size = intSize width
             widen | signed    = CLTD size
                   | otherwise = XOR size (OpReg edx) (OpReg edx)
802

803 804
             instr | signed    = IDIV
                   | otherwise = DIV
805

806 807 808
             code = y_code `appOL`
                    x_code eax `appOL`
                    toOL [widen, instr size y_op]
809

810 811
             result | quotient  = eax
                    | otherwise = edx
812 813 814 815

           return (Fixed size result code)


816
getRegister' _ _ (CmmLoad mem pk)
817 818
  | isFloatType pk
  = do
819 820 821
    Amode addr mem_code <- getAmode mem
    use_sse2 <- sse2Enabled
    loadFloatAmode use_sse2 (typeWidth pk) addr mem_code
822

823
getRegister' _ is32Bit (CmmLoad mem pk)
824
  | is32Bit && not (isWord64 pk)
825
  = do
826 827 828 829 830 831
    code <- intLoadCode instr mem
    return (Any size code)
  where
    width = typeWidth pk
    size = intSize width
    instr = case width of
832 833 834 835 836 837 838
                W8     -> MOVZxL II8
                _other -> MOV size
        -- We always zero-extend 8-bit loads, if we
        -- can't think of anything better.  This is because
        -- we can't guarantee access to an 8-bit variant of every register
        -- (esi and edi don't have 8-bit variants), so to make things
        -- simpler we do our 8-bit arithmetic with full 32-bit registers.
839 840

-- Simpler memory load code on x86_64
841
getRegister' _ is32Bit (CmmLoad mem pk)
842
 | not is32Bit
843
  = do
844 845 846 847
    code <- intLoadCode (MOV size) mem
    return (Any size code)
  where size = intSize $ typeWidth pk

848
getRegister' _ is32Bit (CmmLit (CmmInt 0 width))
849
  = let
850
        size = intSize width
851

852
        -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
853 854 855 856
        size1 = if is32Bit then size
                           else case size of
                                II64 -> II32
                                _ -> size
857
        code dst
858 859
           = unitOL (XOR size1 (OpReg dst) (OpReg dst))
    in
860
        return (Any size code)
861 862 863 864

  -- optimisation for loading small literals on x86_64: take advantage
  -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
  -- instruction forms are shorter.
865 866
getRegister' dflags is32Bit (CmmLit lit)
  | not is32Bit, isWord64 (cmmLitType dflags lit), not (isBigLit lit)
867 868 869
  = let
        imm = litToImm lit
        code dst = unitOL (MOV II32 (OpImm imm) (OpReg dst))
870
    in
871
        return (Any II64 code)
872 873 874
  where
   isBigLit (CmmInt i _) = i < 0 || i > 0xffffffff
   isBigLit _ = False
875 876 877 878 879
        -- note1: not the same as (not.is32BitLit), because that checks for
        -- signed literals that fit in 32 bits, but we want unsigned
        -- literals here.
        -- note2: all labels are small, because we're assuming the
        -- small memory model (see gcc docs, -mcmodel=small).
880

881 882 883 884 885
getRegister' dflags _ (CmmLit lit)
  = do let size = cmmTypeSize (cmmLitType dflags lit)
           imm = litToImm lit
           code dst = unitOL (MOV size (OpImm imm) (OpReg dst))
       return (Any size code)
886

887
getRegister' _ _ other = pprPanic "getRegister(x86)" (ppr other)
888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911


intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
   -> NatM (Reg -> InstrBlock)
intLoadCode instr mem = do
  Amode src mem_code <- getAmode mem
  return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))

-- Compute an expression into *any* register, adding the appropriate
-- move instruction if necessary.
getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
getAnyReg expr = do
  r <- getRegister expr
  anyReg r

anyReg :: Register -> NatM (Reg -> InstrBlock)
anyReg (Any _ code)          = return code
anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)

-- A bit like getSomeReg, but we want a reg that can be byte-addressed.
-- Fixed registers might not be byte-addressable, so we make sure we've
-- got a temporary, inserting an extra reg copy if necessary.
getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
getByteReg expr = do
Ian Lynagh's avatar
Ian Lynagh committed
912 913
  is32Bit <- is32BitPlatform
  if is32Bit
914 915 916 917