CodeGen.hs 138 KB
Newer Older
1 2
{-# LANGUAGE CPP, GADTs, NondecreasingIndentation #-}

3 4 5 6
-- The default iteration limit is a bit too low for the definitions
-- in this module.
{-# OPTIONS_GHC -fmax-pmcheck-iterations=10000000 #-}

7 8 9 10 11 12 13 14 15
-----------------------------------------------------------------------------
--
-- Generating machine code (instruction selection)
--
-- (c) The University of Glasgow 1996-2004
--
-----------------------------------------------------------------------------

-- This is a big module, but, if you pay attention to
Ian Lynagh's avatar
Ian Lynagh committed
16
-- (a) the sectioning, and (b) the type signatures, the
17 18
-- structure should not be too overwhelming.

19 20 21
module X86.CodeGen (
        cmmTopCodeGen,
        generateJumpTableForInstr,
22
        extractUnwindPoints,
23
        invertCondBranches,
24 25
        InstrBlock
)
26 27 28 29 30

where

#include "HsVersions.h"
#include "nativeGen/NCG.h"
Simon Marlow's avatar
Simon Marlow committed
31
#include "../includes/MachDeps.h"
32 33

-- NCG stuff:
34 35
import GhcPrelude

36 37 38 39
import X86.Instr
import X86.Cond
import X86.Regs
import X86.RegInfo
40 41 42 43

--TODO: Remove - Just for development/debugging
import X86.Ppr()

44
import CodeGen.Platform
tibbe's avatar
tibbe committed
45
import CPrim
46 47
import Debug            ( DebugBlock(..), UnwindPoint(..), UnwindTable
                        , UnwindExpr(UwReg), toUnwindExpr )
48 49
import Instruction
import PIC
50 51 52 53 54
import NCGMonad   ( NatM, getNewRegNat, getNewLabelNat, setDeltaNat
                  , getDeltaNat, getBlockIdNat, getPicBaseNat, getNewRegPairNat
                  , getPicBaseMaybeNat, getDebugBlock, getFileId
                  , addImmediateSuccessorNat, updateCfgNat)
import CFG
55
import Format
56 57 58 59 60 61
import Reg
import Platform

-- Our intermediate code:
import BasicTypes
import BlockId
62
import Module           ( primUnitId )
63
import PprCmm           ()
64
import CmmUtils
65
import CmmSwitch
66
import Cmm
67
import Hoopl.Block
68
import Hoopl.Collections
69
import Hoopl.Graph
70
import Hoopl.Label
71
import CLabel
72 73
import CoreSyn          ( Tickish(..) )
import SrcLoc           ( srcSpanFile, srcSpanStartLine, srcSpanStartCol )
74 75

-- The rest:
76
import ForeignCall      ( CCallConv(..) )
77 78 79 80
import OrdList
import Outputable
import FastString
import DynFlags
81
import Util
82
import UniqSupply       ( getUniqueM )
83

84
import Control.Monad
tibbe's avatar
tibbe committed
85
import Data.Bits
86
import Data.Foldable (fold)
87
import Data.Int
88
import Data.Maybe
dterei's avatar
dterei committed
89 90
import Data.Word

91 92
import qualified Data.Map as M

Ian Lynagh's avatar
Ian Lynagh committed
93 94
is32BitPlatform :: NatM Bool
is32BitPlatform = do
95
    dflags <- getDynFlags
Ian Lynagh's avatar
Ian Lynagh committed
96 97
    return $ target32Bit (targetPlatform dflags)

98 99
sse2Enabled :: NatM Bool
sse2Enabled = do
100
  dflags <- getDynFlags
101
  return (isSse2Enabled dflags)
102

tibbe's avatar
tibbe committed
103 104
sse4_2Enabled :: NatM Bool
sse4_2Enabled = do
105
  dflags <- getDynFlags
106
  return (isSse4_2Enabled dflags)
tibbe's avatar
tibbe committed
107

108 109 110 111
if_sse2 :: NatM a -> NatM a -> NatM a
if_sse2 sse2 x87 = do
  b <- sse2Enabled
  if b then sse2 else x87
112

113
cmmTopCodeGen
Simon Peyton Jones's avatar
Simon Peyton Jones committed
114 115
        :: RawCmmDecl
        -> NatM [NatCmmDecl (Alignment, CmmStatics) Instr]
116

117 118
cmmTopCodeGen (CmmProc info lab live graph) = do
  let blocks = toBlockListEntryFirst graph
119 120
  (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
  picBaseMb <- getPicBaseMaybeNat
121
  dflags <- getDynFlags
122
  let proc = CmmProc info lab live (ListGraph $ concat nat_blocks)
123
      tops = proc : concat statics
124
      os   = platformOS $ targetPlatform dflags
125 126 127 128

  case picBaseMb of
      Just picBase -> initializePicBase_x86 ArchX86 os picBase tops
      Nothing -> return tops
129

Ian Lynagh's avatar
Ian Lynagh committed
130
cmmTopCodeGen (CmmData sec dat) = do
131
  return [CmmData sec (mkAlignment 1, dat)]  -- no translation, we just use CmmStatic
132 133


134
basicBlockCodeGen
135
        :: CmmBlock
136
        -> NatM ( [NatBasicBlock Instr]
Simon Peyton Jones's avatar
Simon Peyton Jones committed
137
                , [NatCmmDecl (Alignment, CmmStatics) Instr])
138

139
basicBlockCodeGen block = do
Peter Wortmann's avatar
Peter Wortmann committed
140 141
  let (_, nodes, tail)  = blockSplit block
      id = entryLabel block
142
      stmts = blockToList nodes
143 144 145 146 147 148 149 150
  -- Generate location directive
  dbg <- getDebugBlock (entryLabel block)
  loc_instrs <- case dblSourceTick =<< dbg of
    Just (SourceNote span name)
      -> do fileId <- getFileId (srcSpanFile span)
            let line = srcSpanStartLine span; col = srcSpanStartCol span
            return $ unitOL $ LOCATION fileId line col name
    _ -> return nilOL
151 152
  mid_instrs <- stmtsToInstrs id stmts
  tail_instrs <- stmtToInstrs id tail
153
  let instrs = loc_instrs `appOL` mid_instrs `appOL` tail_instrs
154
  instrs' <- fold <$> traverse addSpUnwindings instrs
155 156 157 158 159
  -- code generation may introduce new basic block boundaries, which
  -- are indicated by the NEWBLOCK instruction.  We must split up the
  -- instruction stream into basic blocks again.  Also, we extract
  -- LDATAs here too.
  let
160
        (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs'
161 162 163 164 165 166 167

        mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
          = ([], BasicBlock id instrs : blocks, statics)
        mkBlocks (LDATA sec dat) (instrs,blocks,statics)
          = (instrs, blocks, CmmData sec dat:statics)
        mkBlocks instr (instrs,blocks,statics)
          = (instr:instrs, blocks, statics)
168 169
  return (BasicBlock id top : other_blocks, statics)

170 171 172 173 174 175 176
-- | Convert 'DELTA' instructions into 'UNWIND' instructions to capture changes
-- in the @sp@ register. See Note [What is this unwinding business?] in Debug
-- for details.
addSpUnwindings :: Instr -> NatM (OrdList Instr)
addSpUnwindings instr@(DELTA d) = do
    dflags <- getDynFlags
    if debugLevel dflags >= 1
177
        then do lbl <- mkAsmTempLabel <$> getUniqueM
178
                let unwind = M.singleton MachSp (Just $ UwReg MachSp $ negate d)
179 180 181
                return $ toOL [ instr, UNWIND lbl unwind ]
        else return (unitOL instr)
addSpUnwindings instr = return $ unitOL instr
182

183 184 185
stmtsToInstrs :: BlockId -> [CmmNode e x] -> NatM InstrBlock
stmtsToInstrs bid stmts
   = do instrss <- mapM (stmtToInstrs bid) stmts
186 187
        return (concatOL instrss)

188 189 190 191
-- | `bid` refers to the current block and is used to update the CFG
--   if new blocks are inserted in the control flow.
stmtToInstrs :: BlockId -> CmmNode e x -> NatM InstrBlock
stmtToInstrs bid stmt = do
192
  dflags <- getDynFlags
Ian Lynagh's avatar
Ian Lynagh committed
193
  is32Bit <- is32BitPlatform
194
  case stmt of
195
    CmmComment s   -> return (unitOL (COMMENT s))
Peter Wortmann's avatar
Peter Wortmann committed
196
    CmmTick {}     -> return nilOL
197 198

    CmmUnwind regs -> do
199 200
      let to_unwind_entry :: (GlobalReg, Maybe CmmExpr) -> UnwindTable
          to_unwind_entry (reg, expr) = M.singleton reg (fmap toUnwindExpr expr)
201 202 203
      case foldMap to_unwind_entry regs of
        tbl | M.null tbl -> return nilOL
            | otherwise  -> do
204
                lbl <- mkAsmTempLabel <$> getUniqueM
205
                return $ unitOL $ UNWIND lbl tbl
206 207

    CmmAssign reg src
208
      | isFloatType ty         -> assignReg_FltCode format reg src
209
      | is32Bit && isWord64 ty -> assignReg_I64Code      reg src
210
      | otherwise              -> assignReg_IntCode format reg src
211
        where ty = cmmRegType dflags reg
212
              format = cmmTypeFormat ty
213 214

    CmmStore addr src
215
      | isFloatType ty         -> assignMem_FltCode format addr src
216
      | is32Bit && isWord64 ty -> assignMem_I64Code      addr src
217
      | otherwise              -> assignMem_IntCode format addr src
218
        where ty = cmmExprType dflags src
219
              format = cmmTypeFormat ty
220

221
    CmmUnsafeForeignCall target result_regs args
222
       -> genCCall dflags is32Bit target result_regs args bid
223

224
    CmmBranch id          -> return $ genBranch id
225 226 227

    --We try to arrange blocks such that the likely branch is the fallthrough
    --in CmmContFlowOpt. So we can assume the condition is likely false here.
228
    CmmCondBranch arg true false _ -> genCondBranch bid true false arg
229 230
    CmmSwitch arg ids -> do dflags <- getDynFlags
                            genSwitch dflags arg ids
231 232 233
    CmmCall { cml_target = arg
            , cml_args_regs = gregs } -> do
                                dflags <- getDynFlags
234
                                genJump arg (jumpRegs dflags gregs)
235 236
    _ ->
      panic "stmtToInstrs: statement should have been cps'd away"
237 238


239 240
jumpRegs :: DynFlags -> [GlobalReg] -> [Reg]
jumpRegs dflags gregs = [ RegReal r | Just r <- map (globalRegMaybe platform) gregs ]
241
    where platform = targetPlatform dflags
242

243 244
--------------------------------------------------------------------------------
-- | 'InstrBlock's are the insn sequences generated by the insn selectors.
245 246
--      They are really trees of insns to facilitate fast appending, where a
--      left-to-right traversal yields the insns in the correct order.
247
--
248 249
type InstrBlock
        = OrdList Instr
250 251 252 253


-- | Condition codes passed up the tree.
--
254 255
data CondCode
        = CondCode Bool Cond InstrBlock
256 257 258


-- | a.k.a "Register64"
259 260
--      Reg is the lower 32-bit temporary which contains the result.
--      Use getHiVRegFromLo to find the other VRegUnique.
261
--
262 263
--      Rules of this simplified insn selection game are therefore that
--      the returned Reg may be modified
264
--
265 266
data ChildCode64
   = ChildCode64
267
        InstrBlock
268
        Reg
269 270 271


-- | Register's passed up the tree.  If the stix code forces the register
272 273 274
--      to live in a pre-decided machine register, it comes out as @Fixed@;
--      otherwise, it comes out as @Any@, and the parent can decide which
--      register to put it in.
275 276
--
data Register
277 278
        = Fixed Format Reg InstrBlock
        | Any   Format (Reg -> InstrBlock)
279 280


281 282 283
swizzleRegisterRep :: Register -> Format -> Register
swizzleRegisterRep (Fixed _ reg code) format = Fixed format reg code
swizzleRegisterRep (Any _ codefn)     format = Any   format codefn
284 285 286


-- | Grab the Reg for a CmmReg
287
getRegisterReg :: Platform -> Bool -> CmmReg -> Reg
288

289
getRegisterReg _ use_sse2 (CmmLocal (LocalReg u pk))
290 291
  = let fmt = cmmTypeFormat pk in
    if isFloatFormat fmt && not use_sse2
292
       then RegVirtual (mkVirtualReg u FF80)
293
       else RegVirtual (mkVirtualReg u fmt)
294

295 296
getRegisterReg platform _ (CmmGlobal mid)
  = case globalRegMaybe platform mid of
297 298 299 300 301
        Just reg -> RegReal $ reg
        Nothing  -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
        -- By this stage, the only MagicIds remaining should be the
        -- ones which map to a real machine register on this
        -- platform.  Hence ...
302 303 304


-- | Memory addressing modes passed up the tree.
305 306
data Amode
        = Amode AddrMode InstrBlock
307 308

{-
Gabor Greif's avatar
Gabor Greif committed
309
Now, given a tree (the argument to a CmmLoad) that references memory,
310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327
produce a suitable addressing mode.

A Rule of the Game (tm) for Amodes: use of the addr bit must
immediately follow use of the code part, since the code part puts
values in registers which the addr then refers to.  So you can't put
anything in between, lest it overwrite some of those registers.  If
you need to do some other computation between the code part and use of
the addr bit, first store the effective address from the amode in a
temporary, then do the other computation, and then use the temporary:

    code
    LEA amode, tmp
    ... other computation ...
    ... (tmp) ...
-}


-- | Check whether an integer will fit in 32 bits.
328 329 330 331
--      A CmmInt is intended to be truncated to the appropriate
--      number of bits, so here we truncate it to Int64.  This is
--      important because e.g. -1 as a CmmInt might be either
--      -1 or 18446744073709551615.
332 333 334 335 336 337 338
--
is32BitInteger :: Integer -> Bool
is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
  where i64 = fromIntegral i :: Int64


-- | Convert a BlockId to some CmmStatic data
339 340 341
jumpTableEntry :: DynFlags -> Maybe BlockId -> CmmStatic
jumpTableEntry dflags Nothing = CmmStaticLit (CmmInt 0 (wordWidth dflags))
jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
342
    where blockLabel = blockLbl blockid
343 344 345 346 347 348 349


-- -----------------------------------------------------------------------------
-- General things for putting together code sequences

-- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
-- CmmExprs into CmmRegOff?
350 351
mangleIndexTree :: DynFlags -> CmmReg -> Int -> CmmExpr
mangleIndexTree dflags reg off
352
  = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
353
  where width = typeWidth (cmmRegType dflags reg)
354 355

-- | The dual to getAnyReg: compute an expression into a register, but
356
--      we don't mind which one it is.
357 358 359 360 361
getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg expr = do
  r <- getRegister expr
  case r of
    Any rep code -> do
362 363 364 365
        tmp <- getNewRegNat rep
        return (tmp, code tmp)
    Fixed _ reg code ->
        return (reg, code)
366 367 368 369 370 371


assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
assignMem_I64Code addrTree valueTree = do
  Amode addr addr_code <- getAmode addrTree
  ChildCode64 vcode rlo <- iselExpr64 valueTree
372
  let
373 374 375 376 377 378 379 380 381
        rhi = getHiVRegFromLo rlo

        -- Little-endian store
        mov_lo = MOV II32 (OpReg rlo) (OpAddr addr)
        mov_hi = MOV II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
  return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)


assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
dterei's avatar
dterei committed
382
assignReg_I64Code (CmmLocal (LocalReg u_dst _)) valueTree = do
383
   ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
384
   let
385
         r_dst_lo = RegVirtual $ mkVirtualReg u_dst II32
386 387 388 389 390 391 392 393
         r_dst_hi = getHiVRegFromLo r_dst_lo
         r_src_hi = getHiVRegFromLo r_src_lo
         mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo)
         mov_hi = MOV II32 (OpReg r_src_hi) (OpReg r_dst_hi)
   return (
        vcode `snocOL` mov_lo `snocOL` mov_hi
     )

dterei's avatar
dterei committed
394
assignReg_I64Code _ _
395 396 397 398 399 400 401
   = panic "assignReg_I64Code(i386): invalid lvalue"


iselExpr64        :: CmmExpr -> NatM ChildCode64
iselExpr64 (CmmLit (CmmInt i _)) = do
  (rlo,rhi) <- getNewRegPairNat II32
  let
402
        r = fromIntegral (fromIntegral i :: Word32)
403
        q = fromIntegral (fromIntegral (i `shiftR` 32) :: Word32)
404 405 406 407
        code = toOL [
                MOV II32 (OpImm (ImmInteger r)) (OpReg rlo),
                MOV II32 (OpImm (ImmInteger q)) (OpReg rhi)
                ]
408 409 410 411 412
  return (ChildCode64 code rlo)

iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
   Amode addr addr_code <- getAmode addrTree
   (rlo,rhi) <- getNewRegPairNat II32
413
   let
414 415 416
        mov_lo = MOV II32 (OpAddr addr) (OpReg rlo)
        mov_hi = MOV II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
   return (
417
            ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
418 419 420 421
                        rlo
     )

iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
422
   = return (ChildCode64 nilOL (RegVirtual $ mkVirtualReg vu II32))
423

424 425 426 427 428
-- we handle addition, but rather badly
iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
   ChildCode64 code1 r1lo <- iselExpr64 e1
   (rlo,rhi) <- getNewRegPairNat II32
   let
429
        r = fromIntegral (fromIntegral i :: Word32)
430
        q = fromIntegral (fromIntegral (i `shiftR` 32) :: Word32)
431 432 433 434 435 436
        r1hi = getHiVRegFromLo r1lo
        code =  code1 `appOL`
                toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
                       ADD II32 (OpImm (ImmInteger r)) (OpReg rlo),
                       MOV II32 (OpReg r1hi) (OpReg rhi),
                       ADC II32 (OpImm (ImmInteger q)) (OpReg rhi) ]
437 438 439 440 441 442 443
   return (ChildCode64 code rlo)

iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
   ChildCode64 code1 r1lo <- iselExpr64 e1
   ChildCode64 code2 r2lo <- iselExpr64 e2
   (rlo,rhi) <- getNewRegPairNat II32
   let
444 445 446 447 448 449 450 451
        r1hi = getHiVRegFromLo r1lo
        r2hi = getHiVRegFromLo r2lo
        code =  code1 `appOL`
                code2 `appOL`
                toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
                       ADD II32 (OpReg r2lo) (OpReg rlo),
                       MOV II32 (OpReg r1hi) (OpReg rhi),
                       ADC II32 (OpReg r2hi) (OpReg rhi) ]
452 453
   return (ChildCode64 code rlo)

454 455 456 457 458 459 460 461 462 463 464 465 466 467 468
iselExpr64 (CmmMachOp (MO_Sub _) [e1,e2]) = do
   ChildCode64 code1 r1lo <- iselExpr64 e1
   ChildCode64 code2 r2lo <- iselExpr64 e2
   (rlo,rhi) <- getNewRegPairNat II32
   let
        r1hi = getHiVRegFromLo r1lo
        r2hi = getHiVRegFromLo r2lo
        code =  code1 `appOL`
                code2 `appOL`
                toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
                       SUB II32 (OpReg r2lo) (OpReg rlo),
                       MOV II32 (OpReg r1hi) (OpReg rhi),
                       SBB II32 (OpReg r2hi) (OpReg rhi) ]
   return (ChildCode64 code rlo)

469 470 471 472 473 474
iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do
     fn <- getAnyReg expr
     r_dst_lo <-  getNewRegNat II32
     let r_dst_hi = getHiVRegFromLo r_dst_lo
         code = fn r_dst_lo
     return (
475
             ChildCode64 (code `snocOL`
476 477 478 479
                          MOV II32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
                          r_dst_lo
            )

480 481 482 483 484 485 486 487 488 489 490 491 492 493
iselExpr64 (CmmMachOp (MO_SS_Conv W32 W64) [expr]) = do
     fn <- getAnyReg expr
     r_dst_lo <-  getNewRegNat II32
     let r_dst_hi = getHiVRegFromLo r_dst_lo
         code = fn r_dst_lo
     return (
             ChildCode64 (code `snocOL`
                          MOV II32 (OpReg r_dst_lo) (OpReg eax) `snocOL`
                          CLTD II32 `snocOL`
                          MOV II32 (OpReg eax) (OpReg r_dst_lo) `snocOL`
                          MOV II32 (OpReg edx) (OpReg r_dst_hi))
                          r_dst_lo
            )

494
iselExpr64 expr
Ian Lynagh's avatar
Ian Lynagh committed
495
   = pprPanic "iselExpr64(i386)" (ppr expr)
496 497 498 499


--------------------------------------------------------------------------------
getRegister :: CmmExpr -> NatM Register
500 501 502
getRegister e = do dflags <- getDynFlags
                   is32Bit <- is32BitPlatform
                   getRegister' dflags is32Bit e
503

504
getRegister' :: DynFlags -> Bool -> CmmExpr -> NatM Register
505

506
getRegister' dflags is32Bit (CmmReg reg)
507 508 509 510 511 512
  = case reg of
        CmmGlobal PicBaseReg
         | is32Bit ->
            -- on x86_64, we have %rip for PicBaseReg, but it's not
            -- a full-featured register, it can only be used for
            -- rip-relative addressing.
513 514
            do reg' <- getPicBaseNat (archWordFormat is32Bit)
               return (Fixed (archWordFormat is32Bit) reg' nilOL)
515 516 517
        _ ->
            do use_sse2 <- sse2Enabled
               let
518 519 520
                 fmt = cmmTypeFormat (cmmRegType dflags reg)
                 format | not use_sse2 && isFloatFormat fmt = FF80
                        | otherwise                         = fmt
521
               --
522
               let platform = targetPlatform dflags
523 524 525
               return (Fixed format
                             (getRegisterReg platform use_sse2 reg)
                             nilOL)
526 527


528 529
getRegister' dflags is32Bit (CmmRegOff r n)
  = getRegister' dflags is32Bit $ mangleIndexTree dflags r n
530

Ben Gamari's avatar
Ben Gamari committed
531 532 533
getRegister' dflags is32Bit (CmmMachOp (MO_AlignmentCheck align _) [e])
  = addAlignmentCheck align <$> getRegister' dflags is32Bit e

534
-- for 32-bit architectures, support some 64 -> 32 bit conversions:
535 536
-- TO_W_(x), TO_W_(x >> 32)

537
getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W64 W32)
538 539
                     [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
 | is32Bit = do
540 541 542
  ChildCode64 code rlo <- iselExpr64 x
  return $ Fixed II32 (getHiVRegFromLo rlo) code

543
getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W64 W32)
544 545
                     [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
 | is32Bit = do
546 547 548
  ChildCode64 code rlo <- iselExpr64 x
  return $ Fixed II32 (getHiVRegFromLo rlo) code

549
getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W64 W32) [x])
550
 | is32Bit = do
551 552 553
  ChildCode64 code rlo <- iselExpr64 x
  return $ Fixed II32 rlo code

554
getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W64 W32) [x])
555
 | is32Bit = do
556
  ChildCode64 code rlo <- iselExpr64 x
557
  return $ Fixed II32 rlo code
558

559
getRegister' _ _ (CmmLit lit@(CmmFloat f w)) =
560 561 562 563 564
  if_sse2 float_const_sse2 float_const_x87
 where
  float_const_sse2
    | f == 0.0 = do
      let
565 566
          format = floatFormat w
          code dst = unitOL  (XOR format (OpReg dst) (OpReg dst))
567 568
        -- I don't know why there are xorpd, xorps, and pxor instructions.
        -- They all appear to do the same thing --SDM
569
      return (Any format code)
570 571

   | otherwise = do
572
      Amode addr code <- memConstant (mkAlignment $ widthInBytes w) lit
573 574 575 576 577 578 579
      loadFloatAmode True w addr code

  float_const_x87 = case w of
    W64
      | f == 0.0 ->
        let code dst = unitOL (GLDZ dst)
        in  return (Any FF80 code)
580

581 582 583
      | f == 1.0 ->
        let code dst = unitOL (GLD1 dst)
        in  return (Any FF80 code)
584

585
    _otherwise -> do
586
      Amode addr code <- memConstant (mkAlignment $ widthInBytes w) lit
587
      loadFloatAmode False w addr code
588 589

-- catch simple cases of zero- or sign-extended load
590
getRegister' _ _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do
591 592 593
  code <- intLoadCode (MOVZxL II8) addr
  return (Any II32 code)

594
getRegister' _ _ (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _]) = do
595 596 597
  code <- intLoadCode (MOVSxL II8) addr
  return (Any II32 code)

598
getRegister' _ _ (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _]) = do
599 600 601
  code <- intLoadCode (MOVZxL II16) addr
  return (Any II32 code)

602
getRegister' _ _ (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _]) = do
603 604 605 606
  code <- intLoadCode (MOVSxL II16) addr
  return (Any II32 code)

-- catch simple cases of zero- or sign-extended load
607
getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _])
608
 | not is32Bit = do
609 610 611
  code <- intLoadCode (MOVZxL II8) addr
  return (Any II64 code)

612
getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _])
613
 | not is32Bit = do
614 615 616
  code <- intLoadCode (MOVSxL II8) addr
  return (Any II64 code)

617
getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _])
618
 | not is32Bit = do
619 620 621
  code <- intLoadCode (MOVZxL II16) addr
  return (Any II64 code)

622
getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _])
623
 | not is32Bit = do
624 625 626
  code <- intLoadCode (MOVSxL II16) addr
  return (Any II64 code)

627
getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _])
628
 | not is32Bit = do
629 630 631
  code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend
  return (Any II64 code)

632
getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _])
633
 | not is32Bit = do
634 635 636
  code <- intLoadCode (MOVSxL II32) addr
  return (Any II64 code)

637
getRegister' _ is32Bit (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
638
                                     CmmLit displacement])
639 640
 | not is32Bit = do
      return $ Any II64 (\dst -> unitOL $
641 642
        LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))

643
getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
644 645 646 647 648
    sse2 <- sse2Enabled
    case mop of
      MO_F_Neg w
         | sse2      -> sse2NegCode w x
         | otherwise -> trivialUFCode FF80 (GNEG FF80) x
649

650 651
      MO_S_Neg w -> triv_ucode NEGI (intFormat w)
      MO_Not w   -> triv_ucode NOT  (intFormat w)
652 653 654 655

      -- Nop conversions
      MO_UU_Conv W32 W8  -> toI8Reg  W32 x
      MO_SS_Conv W32 W8  -> toI8Reg  W32 x
Michal Terepeta's avatar
Michal Terepeta committed
656
      MO_XX_Conv W32 W8  -> toI8Reg  W32 x
657 658
      MO_UU_Conv W16 W8  -> toI8Reg  W16 x
      MO_SS_Conv W16 W8  -> toI8Reg  W16 x
Michal Terepeta's avatar
Michal Terepeta committed
659
      MO_XX_Conv W16 W8  -> toI8Reg  W16 x
660 661
      MO_UU_Conv W32 W16 -> toI16Reg W32 x
      MO_SS_Conv W32 W16 -> toI16Reg W32 x
Michal Terepeta's avatar
Michal Terepeta committed
662
      MO_XX_Conv W32 W16 -> toI16Reg W32 x
663

664 665
      MO_UU_Conv W64 W32 | not is32Bit -> conversionNop II64 x
      MO_SS_Conv W64 W32 | not is32Bit -> conversionNop II64 x
Michal Terepeta's avatar
Michal Terepeta committed
666
      MO_XX_Conv W64 W32 | not is32Bit -> conversionNop II64 x
667 668
      MO_UU_Conv W64 W16 | not is32Bit -> toI16Reg W64 x
      MO_SS_Conv W64 W16 | not is32Bit -> toI16Reg W64 x
Michal Terepeta's avatar
Michal Terepeta committed
669
      MO_XX_Conv W64 W16 | not is32Bit -> toI16Reg W64 x
670 671
      MO_UU_Conv W64 W8  | not is32Bit -> toI8Reg  W64 x
      MO_SS_Conv W64 W8  | not is32Bit -> toI8Reg  W64 x
Michal Terepeta's avatar
Michal Terepeta committed
672
      MO_XX_Conv W64 W8  | not is32Bit -> toI8Reg  W64 x
673

674 675
      MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intFormat rep1) x
      MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intFormat rep1) x
Michal Terepeta's avatar
Michal Terepeta committed
676
      MO_XX_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intFormat rep1) x
677 678 679 680 681 682 683 684 685 686

      -- widenings
      MO_UU_Conv W8  W32 -> integerExtend W8  W32 MOVZxL x
      MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x
      MO_UU_Conv W8  W16 -> integerExtend W8  W16 MOVZxL x

      MO_SS_Conv W8  W32 -> integerExtend W8  W32 MOVSxL x
      MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x
      MO_SS_Conv W8  W16 -> integerExtend W8  W16 MOVSxL x

Michal Terepeta's avatar
Michal Terepeta committed
687 688 689 690 691 692 693 694 695 696 697
      -- We don't care about the upper bits for MO_XX_Conv, so MOV is enough. However, on 32-bit we
      -- have 8-bit registers only for a few registers (as opposed to x86-64 where every register
      -- has 8-bit version). So for 32-bit code, we'll just zero-extend.
      MO_XX_Conv W8  W32
          | is32Bit   -> integerExtend W8 W32 MOVZxL x
          | otherwise -> integerExtend W8 W32 MOV x
      MO_XX_Conv W8  W16
          | is32Bit   -> integerExtend W8 W16 MOVZxL x
          | otherwise -> integerExtend W8 W16 MOV x
      MO_XX_Conv W16 W32 -> integerExtend W16 W32 MOV x

698 699 700 701 702 703
      MO_UU_Conv W8  W64 | not is32Bit -> integerExtend W8  W64 MOVZxL x
      MO_UU_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVZxL x
      MO_UU_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOVZxL x
      MO_SS_Conv W8  W64 | not is32Bit -> integerExtend W8  W64 MOVSxL x
      MO_SS_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVSxL x
      MO_SS_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOVSxL x
Michal Terepeta's avatar
Michal Terepeta committed
704 705 706 707 708 709 710 711 712
      -- For 32-to-64 bit zero extension, amd64 uses an ordinary movl.
      -- However, we don't want the register allocator to throw it
      -- away as an unnecessary reg-to-reg move, so we keep it in
      -- the form of a movzl and print it as a movl later.
      -- This doesn't apply to MO_XX_Conv since in this case we don't care about
      -- the upper bits. So we can just use MOV.
      MO_XX_Conv W8  W64 | not is32Bit -> integerExtend W8  W64 MOV x
      MO_XX_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOV x
      MO_XX_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOV x
713

714 715
      MO_FF_Conv W32 W64
        | sse2      -> coerceFP2FP W64 x
716
        | otherwise -> conversionNop FF80 x
717

718
      MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
719 720 721 722

      MO_FS_Conv from to -> coerceFP2Int from to x
      MO_SF_Conv from to -> coerceInt2FP from to x

723 724 725 726 727 728 729 730
      MO_V_Insert {}   -> needLlvm
      MO_V_Extract {}  -> needLlvm
      MO_V_Add {}      -> needLlvm
      MO_V_Sub {}      -> needLlvm
      MO_V_Mul {}      -> needLlvm
      MO_VS_Quot {}    -> needLlvm
      MO_VS_Rem {}     -> needLlvm
      MO_VS_Neg {}     -> needLlvm
731 732
      MO_VU_Quot {}    -> needLlvm
      MO_VU_Rem {}     -> needLlvm
733 734 735 736 737 738 739
      MO_VF_Insert {}  -> needLlvm
      MO_VF_Extract {} -> needLlvm
      MO_VF_Add {}     -> needLlvm
      MO_VF_Sub {}     -> needLlvm
      MO_VF_Mul {}     -> needLlvm
      MO_VF_Quot {}    -> needLlvm
      MO_VF_Neg {}     -> needLlvm
740

dterei's avatar
dterei committed
741
      _other -> pprPanic "getRegister" (pprMachOp mop)
742
   where
743 744
        triv_ucode :: (Format -> Operand -> Instr) -> Format -> NatM Register
        triv_ucode instr format = trivialUCode format (instr format) x
745 746 747

        -- signed or unsigned extension.
        integerExtend :: Width -> Width
748
                      -> (Format -> Operand -> Operand -> Instr)
749 750 751 752 753 754 755
                      -> CmmExpr -> NatM Register
        integerExtend from to instr expr = do
            (reg,e_code) <- if from == W8 then getByteReg expr
                                          else getSomeReg expr
            let
                code dst =
                  e_code `snocOL`
756 757
                  instr (intFormat from) (OpReg reg) (OpReg dst)
            return (Any (intFormat to) code)
758 759 760

        toI8Reg :: Width -> CmmExpr -> NatM Register
        toI8Reg new_rep expr
761
            = do codefn <- getAnyReg expr
762
                 return (Any (intFormat new_rep) codefn)
763 764 765 766 767 768
                -- HACK: use getAnyReg to get a byte-addressable register.
                -- If the source was a Fixed register, this will add the
                -- mov instruction to put it into the desired destination.
                -- We're assuming that the destination won't be a fixed
                -- non-byte-addressable register; it won't be, because all
                -- fixed registers are word-sized.
769

770
        toI16Reg = toI8Reg -- for now
771

772 773
        conversionNop :: Format -> CmmExpr -> NatM Register
        conversionNop new_format expr
774
            = do e_code <- getRegister' dflags is32Bit expr
775
                 return (swizzleRegisterRep e_code new_format)
776 777


778
getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
779 780
  sse2 <- sse2Enabled
  case mop of
Ian Lynagh's avatar
Ian Lynagh committed
781 782 783 784
      MO_F_Eq _ -> condFltReg is32Bit EQQ x y
      MO_F_Ne _ -> condFltReg is32Bit NE  x y
      MO_F_Gt _ -> condFltReg is32Bit GTT x y
      MO_F_Ge _ -> condFltReg is32Bit GE  x y
785 786 787 788
      -- Invert comparison condition and swap operands
      -- See Note [SSE Parity Checks]
      MO_F_Lt _ -> condFltReg is32Bit GTT  y x
      MO_F_Le _ -> condFltReg is32Bit GE   y x
dterei's avatar
dterei committed
789 790 791 792 793 794 795 796 797 798 799 800 801

      MO_Eq _   -> condIntReg EQQ x y
      MO_Ne _   -> condIntReg NE  x y

      MO_S_Gt _ -> condIntReg GTT x y
      MO_S_Ge _ -> condIntReg GE  x y
      MO_S_Lt _ -> condIntReg LTT x y
      MO_S_Le _ -> condIntReg LE  x y

      MO_U_Gt _ -> condIntReg GU  x y
      MO_U_Ge _ -> condIntReg GEU x y
      MO_U_Lt _ -> condIntReg LU  x y
      MO_U_Le _ -> condIntReg LEU x y
802

803
      MO_F_Add w  | sse2      -> trivialFCode_sse2 w ADD  x y
dterei's avatar
dterei committed
804
                  | otherwise -> trivialFCode_x87    GADD x y
805
      MO_F_Sub w  | sse2      -> trivialFCode_sse2 w SUB  x y
dterei's avatar
dterei committed
806
                  | otherwise -> trivialFCode_x87    GSUB x y
807
      MO_F_Quot w | sse2      -> trivialFCode_sse2 w FDIV x y
dterei's avatar
dterei committed
808
                  | otherwise -> trivialFCode_x87    GDIV x y
809
      MO_F_Mul w  | sse2      -> trivialFCode_sse2 w MUL x y
dterei's avatar
dterei committed
810
                  | otherwise -> trivialFCode_x87    GMUL x y
811 812 813 814 815 816 817 818 819 820 821

      MO_Add rep -> add_code rep x y
      MO_Sub rep -> sub_code rep x y

      MO_S_Quot rep -> div_code rep True  True  x y
      MO_S_Rem  rep -> div_code rep True  False x y
      MO_U_Quot rep -> div_code rep False True  x y
      MO_U_Rem  rep -> div_code rep False False x y

      MO_S_MulMayOflo rep -> imulMayOflo rep x y

Michal Terepeta's avatar
Michal Terepeta committed
822
      MO_Mul W8  -> imulW8 x y
823 824 825 826 827
      MO_Mul rep -> triv_op rep IMUL
      MO_And rep -> triv_op rep AND
      MO_Or  rep -> triv_op rep OR
      MO_Xor rep -> triv_op rep XOR

828 829 830 831
        {- Shift ops on x86s have constraints on their source, it
           either has to be Imm, CL or 1
            => trivialCode is not restrictive enough (sigh.)
        -}
832 833 834 835
      MO_Shl rep   -> shift_code rep SHL x y {-False-}
      MO_U_Shr rep -> shift_code rep SHR x y {-False-}
      MO_S_Shr rep -> shift_code rep SAR x y {-False-}

836 837 838 839 840 841 842 843 844 845 846 847 848 849 850
      MO_V_Insert {}   -> needLlvm
      MO_V_Extract {}  -> needLlvm
      MO_V_Add {}      -> needLlvm
      MO_V_Sub {}      -> needLlvm
      MO_V_Mul {}      -> needLlvm
      MO_VS_Quot {}    -> needLlvm
      MO_VS_Rem {}     -> needLlvm
      MO_VS_Neg {}     -> needLlvm
      MO_VF_Insert {}  -> needLlvm
      MO_VF_Extract {} -> needLlvm
      MO_VF_Add {}     -> needLlvm
      MO_VF_Sub {}     -> needLlvm
      MO_VF_Mul {}     -> needLlvm
      MO_VF_Quot {}    -> needLlvm
      MO_VF_Neg {}     -> needLlvm
851