CodeGen.hs 145 KB
Newer Older
1
{-# LANGUAGE CPP, GADTs, NondecreasingIndentation #-}
2 3
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE BangPatterns #-}
4

5 6
#if __GLASGOW_HASKELL__ <= 808
-- GHC 8.10 deprecates this flag, but GHC 8.8 needs it
7 8 9
-- The default iteration limit is a bit too low for the definitions
-- in this module.
{-# OPTIONS_GHC -fmax-pmcheck-iterations=10000000 #-}
10
#endif
11

12 13
{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-}

14 15 16 17 18 19 20 21 22
-----------------------------------------------------------------------------
--
-- Generating machine code (instruction selection)
--
-- (c) The University of Glasgow 1996-2004
--
-----------------------------------------------------------------------------

-- This is a big module, but, if you pay attention to
Ian Lynagh's avatar
Ian Lynagh committed
23
-- (a) the sectioning, and (b) the type signatures, the
24 25
-- structure should not be too overwhelming.

26 27 28
module X86.CodeGen (
        cmmTopCodeGen,
        generateJumpTableForInstr,
29
        extractUnwindPoints,
30
        invertCondBranches,
31 32
        InstrBlock
)
33 34 35 36 37 38

where

#include "HsVersions.h"

-- NCG stuff:
39 40
import GhcPrelude

41 42 43
import X86.Instr
import X86.Cond
import X86.Regs
44
import X86.Ppr (  )
45
import X86.RegInfo
46

47
import GHC.Platform.Regs
tibbe's avatar
tibbe committed
48
import CPrim
49
import GHC.Cmm.DebugBlock            ( DebugBlock(..), UnwindPoint(..), UnwindTable
50
                        , UnwindExpr(UwReg), toUnwindExpr )
51 52
import Instruction
import PIC
53 54 55 56 57
import NCGMonad   ( NatM, getNewRegNat, getNewLabelNat, setDeltaNat
                  , getDeltaNat, getBlockIdNat, getPicBaseNat, getNewRegPairNat
                  , getPicBaseMaybeNat, getDebugBlock, getFileId
                  , addImmediateSuccessorNat, updateCfgNat)
import CFG
58
import Format
59
import Reg
John Ericson's avatar
John Ericson committed
60
import GHC.Platform
61 62 63

-- Our intermediate code:
import BasicTypes
64
import GHC.Cmm.BlockId
65
import Module           ( primUnitId )
66 67 68 69 70 71 72 73
import GHC.Cmm.Utils
import GHC.Cmm.Switch
import GHC.Cmm
import GHC.Cmm.Dataflow.Block
import GHC.Cmm.Dataflow.Collections
import GHC.Cmm.Dataflow.Graph
import GHC.Cmm.Dataflow.Label
import GHC.Cmm.CLabel
74 75
import CoreSyn          ( Tickish(..) )
import SrcLoc           ( srcSpanFile, srcSpanStartLine, srcSpanStartCol )
76 77

-- The rest:
78
import ForeignCall      ( CCallConv(..) )
79 80 81 82
import OrdList
import Outputable
import FastString
import DynFlags
83
import Util
84
import UniqSupply       ( getUniqueM )
85

86
import Control.Monad
tibbe's avatar
tibbe committed
87
import Data.Bits
88
import Data.Foldable (fold)
89
import Data.Int
90
import Data.Maybe
dterei's avatar
dterei committed
91 92
import Data.Word

93 94
import qualified Data.Map as M

Ian Lynagh's avatar
Ian Lynagh committed
95 96
is32BitPlatform :: NatM Bool
is32BitPlatform = do
97
    dflags <- getDynFlags
Ian Lynagh's avatar
Ian Lynagh committed
98 99
    return $ target32Bit (targetPlatform dflags)

100 101
sse2Enabled :: NatM Bool
sse2Enabled = do
102
  dflags <- getDynFlags
103 104 105 106 107 108 109 110 111 112 113 114 115
  case platformArch (targetPlatform dflags) of
  -- We Assume  SSE1 and SSE2 operations are available on both
  -- x86 and x86_64. Historically we didn't default to SSE2 and
  -- SSE1 on x86, which results in defacto nondeterminism for how
  -- rounding behaves in the associated x87 floating point instructions
  -- because variations in the spill/fpu stack placement of arguments for
  -- operations would change the precision and final result of what
  -- would otherwise be the same expressions with respect to single or
  -- double precision IEEE floating point computations.
    ArchX86_64 -> return True
    ArchX86    -> return True
    _          -> panic "trying to generate x86/x86_64 on the wrong platform"

116

tibbe's avatar
tibbe committed
117 118
sse4_2Enabled :: NatM Bool
sse4_2Enabled = do
119
  dflags <- getDynFlags
120
  return (isSse4_2Enabled dflags)
tibbe's avatar
tibbe committed
121

122

123
cmmTopCodeGen
Simon Peyton Jones's avatar
Simon Peyton Jones committed
124 125
        :: RawCmmDecl
        -> NatM [NatCmmDecl (Alignment, CmmStatics) Instr]
126

127 128
cmmTopCodeGen (CmmProc info lab live graph) = do
  let blocks = toBlockListEntryFirst graph
129 130
  (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
  picBaseMb <- getPicBaseMaybeNat
131
  dflags <- getDynFlags
132
  let proc = CmmProc info lab live (ListGraph $ concat nat_blocks)
133
      tops = proc : concat statics
134
      os   = platformOS $ targetPlatform dflags
135 136 137 138

  case picBaseMb of
      Just picBase -> initializePicBase_x86 ArchX86 os picBase tops
      Nothing -> return tops
139

Ian Lynagh's avatar
Ian Lynagh committed
140
cmmTopCodeGen (CmmData sec dat) = do
141
  return [CmmData sec (mkAlignment 1, dat)]  -- no translation, we just use CmmStatic
142

143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192
{- Note [Verifying basic blocks]
   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

   We want to guarantee a few things about the results
   of instruction selection.

   Namely that each basic blocks consists of:
    * A (potentially empty) sequence of straight line instructions
  followed by
    * A (potentially empty) sequence of jump like instructions.

    We can verify this by going through the instructions and
    making sure that any non-jumpish instruction can't appear
    after a jumpish instruction.

    There are gotchas however:
    * CALLs are strictly speaking control flow but here we care
      not about them. Hence we treat them as regular instructions.

      It's safe for them to appear inside a basic block
      as (ignoring side effects inside the call) they will result in
      straight line code.

    * NEWBLOCK marks the start of a new basic block so can
      be followed by any instructions.
-}

-- Verifying basic blocks is cheap, but not cheap enough to enable it unconditionally.
verifyBasicBlock :: [Instr] -> ()
verifyBasicBlock instrs
  | debugIsOn     = go False instrs
  | otherwise     = ()
  where
    go _     [] = ()
    go atEnd (i:instr)
        = case i of
            -- Start a new basic block
            NEWBLOCK {} -> go False instr
            -- Calls are not viable block terminators
            CALL {}     | atEnd -> faultyBlockWith i
                        | not atEnd -> go atEnd instr
            -- All instructions ok, check if we reached the end and continue.
            _ | not atEnd -> go (isJumpishInstr i) instr
              -- Only jumps allowed at the end of basic blocks.
              | otherwise -> if isJumpishInstr i
                                then go True instr
                                else faultyBlockWith i
    faultyBlockWith i
        = pprPanic "Non control flow instructions after end of basic block."
                   (ppr i <+> text "in:" $$ vcat (map ppr instrs))
193

194
basicBlockCodeGen
195
        :: CmmBlock
196
        -> NatM ( [NatBasicBlock Instr]
Simon Peyton Jones's avatar
Simon Peyton Jones committed
197
                , [NatCmmDecl (Alignment, CmmStatics) Instr])
198

199
basicBlockCodeGen block = do
Peter Wortmann's avatar
Peter Wortmann committed
200 201
  let (_, nodes, tail)  = blockSplit block
      id = entryLabel block
202
      stmts = blockToList nodes
203 204 205 206 207 208 209 210
  -- Generate location directive
  dbg <- getDebugBlock (entryLabel block)
  loc_instrs <- case dblSourceTick =<< dbg of
    Just (SourceNote span name)
      -> do fileId <- getFileId (srcSpanFile span)
            let line = srcSpanStartLine span; col = srcSpanStartCol span
            return $ unitOL $ LOCATION fileId line col name
    _ -> return nilOL
211 212
  (mid_instrs,mid_bid) <- stmtsToInstrs id stmts
  (!tail_instrs,_) <- stmtToInstrs mid_bid tail
213
  let instrs = loc_instrs `appOL` mid_instrs `appOL` tail_instrs
214
  return $! verifyBasicBlock (fromOL instrs)
215
  instrs' <- fold <$> traverse addSpUnwindings instrs
216 217 218 219 220
  -- code generation may introduce new basic block boundaries, which
  -- are indicated by the NEWBLOCK instruction.  We must split up the
  -- instruction stream into basic blocks again.  Also, we extract
  -- LDATAs here too.
  let
221
        (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs'
222 223 224 225 226 227 228

        mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
          = ([], BasicBlock id instrs : blocks, statics)
        mkBlocks (LDATA sec dat) (instrs,blocks,statics)
          = (instrs, blocks, CmmData sec dat:statics)
        mkBlocks instr (instrs,blocks,statics)
          = (instr:instrs, blocks, statics)
229 230
  return (BasicBlock id top : other_blocks, statics)

231 232 233 234 235 236 237
-- | Convert 'DELTA' instructions into 'UNWIND' instructions to capture changes
-- in the @sp@ register. See Note [What is this unwinding business?] in Debug
-- for details.
addSpUnwindings :: Instr -> NatM (OrdList Instr)
addSpUnwindings instr@(DELTA d) = do
    dflags <- getDynFlags
    if debugLevel dflags >= 1
238
        then do lbl <- mkAsmTempLabel <$> getUniqueM
239
                let unwind = M.singleton MachSp (Just $ UwReg MachSp $ negate d)
240 241 242
                return $ toOL [ instr, UNWIND lbl unwind ]
        else return (unitOL instr)
addSpUnwindings instr = return $ unitOL instr
243

244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307
{- Note [Keeping track of the current block]
   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

When generating instructions for Cmm we sometimes require
the current block for things like retry loops.

We also sometimes change the current block, if a MachOP
results in branching control flow.

Issues arise if we have two statements in the same block,
which both depend on the current block id *and* change the
basic block after them. This happens for atomic primops
in the X86 backend where we want to update the CFG data structure
when introducing new basic blocks.

For example in #17334 we got this Cmm code:

        c3Bf: // global
            (_s3t1::I64) = call MO_AtomicRMW W64 AMO_And(_s3sQ::P64 + 88, 18);
            (_s3t4::I64) = call MO_AtomicRMW W64 AMO_Or(_s3sQ::P64 + 88, 0);
            _s3sT::I64 = _s3sV::I64;
            goto c3B1;

This resulted in two new basic blocks being inserted:

        c3Bf:
                movl $18,%vI_n3Bo
                movq 88(%vI_s3sQ),%rax
                jmp _n3Bp
        n3Bp:
                ...
                cmpxchgq %vI_n3Bq,88(%vI_s3sQ)
                jne _n3Bp
                ...
                jmp _n3Bs
        n3Bs:
                ...
                cmpxchgq %vI_n3Bt,88(%vI_s3sQ)
                jne _n3Bs
                ...
                jmp _c3B1
        ...

Based on the Cmm we called stmtToInstrs we translated both atomic operations under
the assumption they would be placed into their Cmm basic block `c3Bf`.
However for the retry loop we introduce new labels, so this is not the case
for the second statement.
This resulted in a desync between the explicit control flow graph
we construct as a separate data type and the actual control flow graph in the code.

Instead we now return the new basic block if a statement causes a change
in the current block and use the block for all following statements.

For this reason genCCall is also split into two parts.
One for calls which *won't* change the basic blocks in
which successive instructions will be placed.
A different one for calls which *are* known to change the
basic block.

-}

-- See Note [Keeping track of the current block] for why
-- we pass the BlockId.
stmtsToInstrs :: BlockId -- ^ Basic block these statement will start to be placed in.
308 309
              -> [CmmNode O O] -- ^ Cmm Statement
              -> NatM (InstrBlock, BlockId) -- ^ Resulting instruction
310 311 312
stmtsToInstrs bid stmts =
    go bid stmts nilOL
  where
313
    go bid  []        instrs = return (instrs,bid)
314 315 316 317 318
    go bid (s:stmts)  instrs = do
      (instrs',bid') <- stmtToInstrs bid s
      -- If the statement introduced a new block, we use that one
      let !newBid = fromMaybe bid bid'
      go newBid stmts (instrs `appOL` instrs')
319

320 321
-- | `bid` refers to the current block and is used to update the CFG
--   if new blocks are inserted in the control flow.
322 323 324 325 326 327
-- See Note [Keeping track of the current block] for more details.
stmtToInstrs :: BlockId -- ^ Basic block this statement will start to be placed in.
             -> CmmNode e x
             -> NatM (InstrBlock, Maybe BlockId)
             -- ^ Instructions, and bid of new block if successive
             -- statements are placed in a different basic block.
328
stmtToInstrs bid stmt = do
329
  dflags <- getDynFlags
Ian Lynagh's avatar
Ian Lynagh committed
330
  is32Bit <- is32BitPlatform
331
  case stmt of
332
    CmmUnsafeForeignCall target result_regs args
333
       -> genCCall dflags is32Bit target result_regs args bid
334

335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364
    _ -> (,Nothing) <$> case stmt of
      CmmComment s   -> return (unitOL (COMMENT s))
      CmmTick {}     -> return nilOL

      CmmUnwind regs -> do
        let to_unwind_entry :: (GlobalReg, Maybe CmmExpr) -> UnwindTable
            to_unwind_entry (reg, expr) = M.singleton reg (fmap toUnwindExpr expr)
        case foldMap to_unwind_entry regs of
          tbl | M.null tbl -> return nilOL
              | otherwise  -> do
                  lbl <- mkAsmTempLabel <$> getUniqueM
                  return $ unitOL $ UNWIND lbl tbl

      CmmAssign reg src
        | isFloatType ty         -> assignReg_FltCode format reg src
        | is32Bit && isWord64 ty -> assignReg_I64Code      reg src
        | otherwise              -> assignReg_IntCode format reg src
          where ty = cmmRegType dflags reg
                format = cmmTypeFormat ty

      CmmStore addr src
        | isFloatType ty         -> assignMem_FltCode format addr src
        | is32Bit && isWord64 ty -> assignMem_I64Code      addr src
        | otherwise              -> assignMem_IntCode format addr src
          where ty = cmmExprType dflags src
                format = cmmTypeFormat ty

      CmmBranch id          -> return $ genBranch id

      --We try to arrange blocks such that the likely branch is the fallthrough
365
      --in GHC.Cmm.ContFlowOpt. So we can assume the condition is likely false here.
366 367 368 369 370 371 372 373 374
      CmmCondBranch arg true false _ -> genCondBranch bid true false arg
      CmmSwitch arg ids -> do dflags <- getDynFlags
                              genSwitch dflags arg ids
      CmmCall { cml_target = arg
              , cml_args_regs = gregs } -> do
                                  dflags <- getDynFlags
                                  genJump arg (jumpRegs dflags gregs)
      _ ->
        panic "stmtToInstrs: statement should have been cps'd away"
375 376


377 378
jumpRegs :: DynFlags -> [GlobalReg] -> [Reg]
jumpRegs dflags gregs = [ RegReal r | Just r <- map (globalRegMaybe platform) gregs ]
379
    where platform = targetPlatform dflags
380

381 382
--------------------------------------------------------------------------------
-- | 'InstrBlock's are the insn sequences generated by the insn selectors.
383 384
--      They are really trees of insns to facilitate fast appending, where a
--      left-to-right traversal yields the insns in the correct order.
385
--
386 387
type InstrBlock
        = OrdList Instr
388 389 390 391


-- | Condition codes passed up the tree.
--
392 393
data CondCode
        = CondCode Bool Cond InstrBlock
394 395 396


-- | a.k.a "Register64"
397 398
--      Reg is the lower 32-bit temporary which contains the result.
--      Use getHiVRegFromLo to find the other VRegUnique.
399
--
400 401
--      Rules of this simplified insn selection game are therefore that
--      the returned Reg may be modified
402
--
403 404
data ChildCode64
   = ChildCode64
405
        InstrBlock
406
        Reg
407 408 409


-- | Register's passed up the tree.  If the stix code forces the register
410 411 412
--      to live in a pre-decided machine register, it comes out as @Fixed@;
--      otherwise, it comes out as @Any@, and the parent can decide which
--      register to put it in.
413 414
--
data Register
415 416
        = Fixed Format Reg InstrBlock
        | Any   Format (Reg -> InstrBlock)
417 418


419 420 421
swizzleRegisterRep :: Register -> Format -> Register
swizzleRegisterRep (Fixed _ reg code) format = Fixed format reg code
swizzleRegisterRep (Any _ codefn)     format = Any   format codefn
422 423 424


-- | Grab the Reg for a CmmReg
425
getRegisterReg :: Platform  -> CmmReg -> Reg
426

427 428 429 430
getRegisterReg _   (CmmLocal (LocalReg u pk))
  = -- by Assuming SSE2, Int,Word,Float,Double all can be register allocated
   let fmt = cmmTypeFormat pk in
        RegVirtual (mkVirtualReg u fmt)
431

432
getRegisterReg platform  (CmmGlobal mid)
433
  = case globalRegMaybe platform mid of
434 435 436 437 438
        Just reg -> RegReal $ reg
        Nothing  -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
        -- By this stage, the only MagicIds remaining should be the
        -- ones which map to a real machine register on this
        -- platform.  Hence ...
439 440 441


-- | Memory addressing modes passed up the tree.
442 443
data Amode
        = Amode AddrMode InstrBlock
444 445

{-
Gabor Greif's avatar
Gabor Greif committed
446
Now, given a tree (the argument to a CmmLoad) that references memory,
447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464
produce a suitable addressing mode.

A Rule of the Game (tm) for Amodes: use of the addr bit must
immediately follow use of the code part, since the code part puts
values in registers which the addr then refers to.  So you can't put
anything in between, lest it overwrite some of those registers.  If
you need to do some other computation between the code part and use of
the addr bit, first store the effective address from the amode in a
temporary, then do the other computation, and then use the temporary:

    code
    LEA amode, tmp
    ... other computation ...
    ... (tmp) ...
-}


-- | Check whether an integer will fit in 32 bits.
465 466 467 468
--      A CmmInt is intended to be truncated to the appropriate
--      number of bits, so here we truncate it to Int64.  This is
--      important because e.g. -1 as a CmmInt might be either
--      -1 or 18446744073709551615.
469 470 471 472 473 474 475
--
is32BitInteger :: Integer -> Bool
is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
  where i64 = fromIntegral i :: Int64


-- | Convert a BlockId to some CmmStatic data
476 477 478
jumpTableEntry :: DynFlags -> Maybe BlockId -> CmmStatic
jumpTableEntry dflags Nothing = CmmStaticLit (CmmInt 0 (wordWidth dflags))
jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
479
    where blockLabel = blockLbl blockid
480 481 482 483 484 485 486


-- -----------------------------------------------------------------------------
-- General things for putting together code sequences

-- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
-- CmmExprs into CmmRegOff?
487 488
mangleIndexTree :: DynFlags -> CmmReg -> Int -> CmmExpr
mangleIndexTree dflags reg off
489
  = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
490
  where width = typeWidth (cmmRegType dflags reg)
491 492

-- | The dual to getAnyReg: compute an expression into a register, but
493
--      we don't mind which one it is.
494 495 496 497 498
getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg expr = do
  r <- getRegister expr
  case r of
    Any rep code -> do
499 500 501 502
        tmp <- getNewRegNat rep
        return (tmp, code tmp)
    Fixed _ reg code ->
        return (reg, code)
503 504 505 506 507 508


assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
assignMem_I64Code addrTree valueTree = do
  Amode addr addr_code <- getAmode addrTree
  ChildCode64 vcode rlo <- iselExpr64 valueTree
509
  let
510 511 512 513 514 515 516 517 518
        rhi = getHiVRegFromLo rlo

        -- Little-endian store
        mov_lo = MOV II32 (OpReg rlo) (OpAddr addr)
        mov_hi = MOV II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
  return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)


assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
dterei's avatar
dterei committed
519
assignReg_I64Code (CmmLocal (LocalReg u_dst _)) valueTree = do
520
   ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
521
   let
522
         r_dst_lo = RegVirtual $ mkVirtualReg u_dst II32
523 524 525 526 527 528 529 530
         r_dst_hi = getHiVRegFromLo r_dst_lo
         r_src_hi = getHiVRegFromLo r_src_lo
         mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo)
         mov_hi = MOV II32 (OpReg r_src_hi) (OpReg r_dst_hi)
   return (
        vcode `snocOL` mov_lo `snocOL` mov_hi
     )

dterei's avatar
dterei committed
531
assignReg_I64Code _ _
532 533 534 535 536 537 538
   = panic "assignReg_I64Code(i386): invalid lvalue"


iselExpr64        :: CmmExpr -> NatM ChildCode64
iselExpr64 (CmmLit (CmmInt i _)) = do
  (rlo,rhi) <- getNewRegPairNat II32
  let
539
        r = fromIntegral (fromIntegral i :: Word32)
540
        q = fromIntegral (fromIntegral (i `shiftR` 32) :: Word32)
541 542 543 544
        code = toOL [
                MOV II32 (OpImm (ImmInteger r)) (OpReg rlo),
                MOV II32 (OpImm (ImmInteger q)) (OpReg rhi)
                ]
545 546 547 548 549
  return (ChildCode64 code rlo)

iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
   Amode addr addr_code <- getAmode addrTree
   (rlo,rhi) <- getNewRegPairNat II32
550
   let
551 552 553
        mov_lo = MOV II32 (OpAddr addr) (OpReg rlo)
        mov_hi = MOV II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
   return (
554
            ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
555 556 557 558
                        rlo
     )

iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
559
   = return (ChildCode64 nilOL (RegVirtual $ mkVirtualReg vu II32))
560

561 562 563 564 565
-- we handle addition, but rather badly
iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
   ChildCode64 code1 r1lo <- iselExpr64 e1
   (rlo,rhi) <- getNewRegPairNat II32
   let
566
        r = fromIntegral (fromIntegral i :: Word32)
567
        q = fromIntegral (fromIntegral (i `shiftR` 32) :: Word32)
568 569 570 571 572 573
        r1hi = getHiVRegFromLo r1lo
        code =  code1 `appOL`
                toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
                       ADD II32 (OpImm (ImmInteger r)) (OpReg rlo),
                       MOV II32 (OpReg r1hi) (OpReg rhi),
                       ADC II32 (OpImm (ImmInteger q)) (OpReg rhi) ]
574 575 576 577 578 579 580
   return (ChildCode64 code rlo)

iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
   ChildCode64 code1 r1lo <- iselExpr64 e1
   ChildCode64 code2 r2lo <- iselExpr64 e2
   (rlo,rhi) <- getNewRegPairNat II32
   let
581 582 583 584 585 586 587 588
        r1hi = getHiVRegFromLo r1lo
        r2hi = getHiVRegFromLo r2lo
        code =  code1 `appOL`
                code2 `appOL`
                toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
                       ADD II32 (OpReg r2lo) (OpReg rlo),
                       MOV II32 (OpReg r1hi) (OpReg rhi),
                       ADC II32 (OpReg r2hi) (OpReg rhi) ]
589 590
   return (ChildCode64 code rlo)

591 592 593 594 595 596 597 598 599 600 601 602 603 604 605
iselExpr64 (CmmMachOp (MO_Sub _) [e1,e2]) = do
   ChildCode64 code1 r1lo <- iselExpr64 e1
   ChildCode64 code2 r2lo <- iselExpr64 e2
   (rlo,rhi) <- getNewRegPairNat II32
   let
        r1hi = getHiVRegFromLo r1lo
        r2hi = getHiVRegFromLo r2lo
        code =  code1 `appOL`
                code2 `appOL`
                toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
                       SUB II32 (OpReg r2lo) (OpReg rlo),
                       MOV II32 (OpReg r1hi) (OpReg rhi),
                       SBB II32 (OpReg r2hi) (OpReg rhi) ]
   return (ChildCode64 code rlo)

606 607 608 609 610 611
iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do
     fn <- getAnyReg expr
     r_dst_lo <-  getNewRegNat II32
     let r_dst_hi = getHiVRegFromLo r_dst_lo
         code = fn r_dst_lo
     return (
612
             ChildCode64 (code `snocOL`
613 614 615 616
                          MOV II32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
                          r_dst_lo
            )

617 618 619 620 621 622 623 624 625 626 627 628 629 630
iselExpr64 (CmmMachOp (MO_SS_Conv W32 W64) [expr]) = do
     fn <- getAnyReg expr
     r_dst_lo <-  getNewRegNat II32
     let r_dst_hi = getHiVRegFromLo r_dst_lo
         code = fn r_dst_lo
     return (
             ChildCode64 (code `snocOL`
                          MOV II32 (OpReg r_dst_lo) (OpReg eax) `snocOL`
                          CLTD II32 `snocOL`
                          MOV II32 (OpReg eax) (OpReg r_dst_lo) `snocOL`
                          MOV II32 (OpReg edx) (OpReg r_dst_hi))
                          r_dst_lo
            )

631
iselExpr64 expr
Ian Lynagh's avatar
Ian Lynagh committed
632
   = pprPanic "iselExpr64(i386)" (ppr expr)
633 634 635 636


--------------------------------------------------------------------------------
getRegister :: CmmExpr -> NatM Register
637 638 639
getRegister e = do dflags <- getDynFlags
                   is32Bit <- is32BitPlatform
                   getRegister' dflags is32Bit e
640

641
getRegister' :: DynFlags -> Bool -> CmmExpr -> NatM Register
642

643
getRegister' dflags is32Bit (CmmReg reg)
644 645 646 647 648 649
  = case reg of
        CmmGlobal PicBaseReg
         | is32Bit ->
            -- on x86_64, we have %rip for PicBaseReg, but it's not
            -- a full-featured register, it can only be used for
            -- rip-relative addressing.
650 651
            do reg' <- getPicBaseNat (archWordFormat is32Bit)
               return (Fixed (archWordFormat is32Bit) reg' nilOL)
652
        _ ->
653 654 655 656 657 658 659 660 661 662
            do
               let
                 fmt = cmmTypeFormat (cmmRegType dflags reg)
                 format  = fmt
               --
               let platform = targetPlatform dflags
               return (Fixed format
                             (getRegisterReg platform  reg)
                             nilOL)

663

664 665
getRegister' dflags is32Bit (CmmRegOff r n)
  = getRegister' dflags is32Bit $ mangleIndexTree dflags r n
666

667 668 669
getRegister' dflags is32Bit (CmmMachOp (MO_AlignmentCheck align _) [e])
  = addAlignmentCheck align <$> getRegister' dflags is32Bit e

670
-- for 32-bit architectures, support some 64 -> 32 bit conversions:
671 672
-- TO_W_(x), TO_W_(x >> 32)

673
getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W64 W32)
674 675
                     [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
 | is32Bit = do
676 677 678
  ChildCode64 code rlo <- iselExpr64 x
  return $ Fixed II32 (getHiVRegFromLo rlo) code

679
getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W64 W32)
680 681
                     [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
 | is32Bit = do
682 683 684
  ChildCode64 code rlo <- iselExpr64 x
  return $ Fixed II32 (getHiVRegFromLo rlo) code

685
getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W64 W32) [x])
686
 | is32Bit = do
687 688 689
  ChildCode64 code rlo <- iselExpr64 x
  return $ Fixed II32 rlo code

690
getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W64 W32) [x])
691
 | is32Bit = do
692
  ChildCode64 code rlo <- iselExpr64 x
693
  return $ Fixed II32 rlo code
694

695
getRegister' _ _ (CmmLit lit@(CmmFloat f w)) =
696
  float_const_sse2  where
697 698 699
  float_const_sse2
    | f == 0.0 = do
      let
700 701
          format = floatFormat w
          code dst = unitOL  (XOR format (OpReg dst) (OpReg dst))
702 703
        -- I don't know why there are xorpd, xorps, and pxor instructions.
        -- They all appear to do the same thing --SDM
704
      return (Any format code)
705 706

   | otherwise = do
707
      Amode addr code <- memConstant (mkAlignment $ widthInBytes w) lit
708
      loadFloatAmode w addr code
709 710

-- catch simple cases of zero- or sign-extended load
711
getRegister' _ _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do
712 713 714
  code <- intLoadCode (MOVZxL II8) addr
  return (Any II32 code)

715
getRegister' _ _ (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _]) = do
716 717 718
  code <- intLoadCode (MOVSxL II8) addr
  return (Any II32 code)

719
getRegister' _ _ (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _]) = do
720 721 722
  code <- intLoadCode (MOVZxL II16) addr
  return (Any II32 code)

723
getRegister' _ _ (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _]) = do
724 725 726 727
  code <- intLoadCode (MOVSxL II16) addr
  return (Any II32 code)

-- catch simple cases of zero- or sign-extended load
728
getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _])
729
 | not is32Bit = do
730 731 732
  code <- intLoadCode (MOVZxL II8) addr
  return (Any II64 code)

733
getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _])
734
 | not is32Bit = do
735 736 737
  code <- intLoadCode (MOVSxL II8) addr
  return (Any II64 code)

738
getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _])
739
 | not is32Bit = do
740 741 742
  code <- intLoadCode (MOVZxL II16) addr
  return (Any II64 code)

743
getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _])
744
 | not is32Bit = do
745 746 747
  code <- intLoadCode (MOVSxL II16) addr
  return (Any II64 code)

748
getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _])
749
 | not is32Bit = do
750 751 752
  code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend
  return (Any II64 code)

753
getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _])
754
 | not is32Bit = do
755 756 757
  code <- intLoadCode (MOVSxL II32) addr
  return (Any II64 code)

758
getRegister' _ is32Bit (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
759
                                     CmmLit displacement])
760 761
 | not is32Bit = do
      return $ Any II64 (\dst -> unitOL $
762 763
        LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))

764
getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
765
    case mop of
766 767
      MO_F_Neg w  -> sse2NegCode w x

768

769 770
      MO_S_Neg w -> triv_ucode NEGI (intFormat w)
      MO_Not w   -> triv_ucode NOT  (intFormat w)
771 772 773 774

      -- Nop conversions
      MO_UU_Conv W32 W8  -> toI8Reg  W32 x
      MO_SS_Conv W32 W8  -> toI8Reg  W32 x
Michal Terepeta's avatar
Michal Terepeta committed
775
      MO_XX_Conv W32 W8  -> toI8Reg  W32 x
776 777
      MO_UU_Conv W16 W8  -> toI8Reg  W16 x
      MO_SS_Conv W16 W8  -> toI8Reg  W16 x
Michal Terepeta's avatar
Michal Terepeta committed
778
      MO_XX_Conv W16 W8  -> toI8Reg  W16 x
779 780
      MO_UU_Conv W32 W16 -> toI16Reg W32 x
      MO_SS_Conv W32 W16 -> toI16Reg W32 x
Michal Terepeta's avatar
Michal Terepeta committed
781
      MO_XX_Conv W32 W16 -> toI16Reg W32 x
782

783 784
      MO_UU_Conv W64 W32 | not is32Bit -> conversionNop II64 x
      MO_SS_Conv W64 W32 | not is32Bit -> conversionNop II64 x
Michal Terepeta's avatar
Michal Terepeta committed
785
      MO_XX_Conv W64 W32 | not is32Bit -> conversionNop II64 x
786 787
      MO_UU_Conv W64 W16 | not is32Bit -> toI16Reg W64 x
      MO_SS_Conv W64 W16 | not is32Bit -> toI16Reg W64 x
Michal Terepeta's avatar
Michal Terepeta committed
788
      MO_XX_Conv W64 W16 | not is32Bit -> toI16Reg W64 x
789 790
      MO_UU_Conv W64 W8  | not is32Bit -> toI8Reg  W64 x
      MO_SS_Conv W64 W8  | not is32Bit -> toI8Reg  W64 x
Michal Terepeta's avatar
Michal Terepeta committed
791
      MO_XX_Conv W64 W8  | not is32Bit -> toI8Reg  W64 x
792

793 794
      MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intFormat rep1) x
      MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intFormat rep1) x
Michal Terepeta's avatar
Michal Terepeta committed
795
      MO_XX_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intFormat rep1) x
796 797 798 799 800 801 802 803 804 805

      -- widenings
      MO_UU_Conv W8  W32 -> integerExtend W8  W32 MOVZxL x
      MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x
      MO_UU_Conv W8  W16 -> integerExtend W8  W16 MOVZxL x

      MO_SS_Conv W8  W32 -> integerExtend W8  W32 MOVSxL x
      MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x
      MO_SS_Conv W8  W16 -> integerExtend W8  W16 MOVSxL x

Michal Terepeta's avatar
Michal Terepeta committed
806 807 808 809 810 811 812 813 814 815 816
      -- We don't care about the upper bits for MO_XX_Conv, so MOV is enough. However, on 32-bit we
      -- have 8-bit registers only for a few registers (as opposed to x86-64 where every register
      -- has 8-bit version). So for 32-bit code, we'll just zero-extend.
      MO_XX_Conv W8  W32
          | is32Bit   -> integerExtend W8 W32 MOVZxL x
          | otherwise -> integerExtend W8 W32 MOV x
      MO_XX_Conv W8  W16
          | is32Bit   -> integerExtend W8 W16 MOVZxL x
          | otherwise -> integerExtend W8 W16 MOV x
      MO_XX_Conv W16 W32 -> integerExtend W16 W32 MOV x

817 818 819 820 821 822
      MO_UU_Conv W8  W64 | not is32Bit -> integerExtend W8  W64 MOVZxL x
      MO_UU_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVZxL x
      MO_UU_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOVZxL x
      MO_SS_Conv W8  W64 | not is32Bit -> integerExtend W8  W64 MOVSxL x
      MO_SS_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVSxL x
      MO_SS_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOVSxL x
Michal Terepeta's avatar
Michal Terepeta committed
823 824 825 826 827 828 829 830 831
      -- For 32-to-64 bit zero extension, amd64 uses an ordinary movl.
      -- However, we don't want the register allocator to throw it
      -- away as an unnecessary reg-to-reg move, so we keep it in
      -- the form of a movzl and print it as a movl later.
      -- This doesn't apply to MO_XX_Conv since in this case we don't care about
      -- the upper bits. So we can just use MOV.
      MO_XX_Conv W8  W64 | not is32Bit -> integerExtend W8  W64 MOV x
      MO_XX_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOV x
      MO_XX_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOV x
832

833 834
      MO_FF_Conv W32 W64 -> coerceFP2FP W64 x

835

836
      MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
837 838 839 840

      MO_FS_Conv from to -> coerceFP2Int from to x
      MO_SF_Conv from to -> coerceInt2FP from to x

841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857
      MO_V_Insert {}   -> needLlvm
      MO_V_Extract {}  -> needLlvm
      MO_V_Add {}      -> needLlvm
      MO_V_Sub {}      -> needLlvm
      MO_V_Mul {}      -> needLlvm
      MO_VS_Quot {}    -> needLlvm
      MO_VS_Rem {}     -> needLlvm
      MO_VS_Neg {}     -> needLlvm
      MO_VU_Quot {}    -> needLlvm
      MO_VU_Rem {}     -> needLlvm
      MO_VF_Insert {}  -> needLlvm
      MO_VF_Extract {} -> needLlvm
      MO_VF_Add {}     -> needLlvm
      MO_VF_Sub {}     -> needLlvm
      MO_VF_Mul {}     -> needLlvm
      MO_VF_Quot {}    -> needLlvm
      MO_VF_Neg {}     -> needLlvm
858

dterei's avatar
dterei committed
859
      _other -> pprPanic "getRegister" (pprMachOp mop)
860
   where
861 862
        triv_ucode :: (Format -> Operand -> Instr) -> Format -> NatM Register
        triv_ucode instr format = trivialUCode format (instr format) x
863 864 865

        -- signed or unsigned extension.
        integerExtend :: Width -> Width
866
                      -> (Format -> Operand -> Operand -> Instr)
867 868 869 870 871 872 873
                      -> CmmExpr -> NatM Register
        integerExtend from to instr expr = do
            (reg,e_code) <- if from == W8 then getByteReg expr
                                          else getSomeReg expr
            let
                code dst =
                  e_code `snocOL`
874 875
                  instr (intFormat from) (OpReg reg) (OpReg dst)
            return (Any (intFormat to) code)
876 877 878

        toI8Reg :: Width -> CmmExpr -> NatM Register
        toI8Reg new_rep expr
879
            = do codefn <- getAnyReg expr
880
                 return (Any (intFormat new_rep) codefn)
881 882 883 884 885 886
                -- HACK: use getAnyReg to get a byte-addressable register.
                -- If the source was a Fixed register, this will add the
                -- mov instruction to put it into the desired destination.
                -- We're assuming that the destination won't be a fixed
                -- non-byte-addressable register; it won't be, because all
                -- fixed registers are word-sized.
887

888
        toI16Reg = toI8Reg -- for now
889

890 891
        conversionNop :: Format -> CmmExpr -> NatM Register
        conversionNop new_format expr
892
            = do e_code <- getRegister' dflags is32Bit expr
893
                 return (swizzleRegisterRep e_code new_format)
894 895


896
getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
897
  case mop of
Ian Lynagh's avatar
Ian Lynagh committed
898 899 900 901
      MO_F_Eq _ -> condFltReg is32Bit EQQ x y
      MO_F_Ne _ -> condFltReg is32Bit NE  x y
      MO_F_Gt _ -> condFltReg is32Bit GTT x y
      MO_F_Ge _ -> condFltReg is32Bit GE  x y
902 903 904 905
      -- Invert comparison condition and swap operands
      -- See Note [SSE Parity Checks]
      MO_F_Lt _ -> condFltReg is32Bit GTT  y x
      MO_F_Le _ -> condFltReg is32Bit GE   y x
dterei's avatar
dterei committed
906 907 908 909 910 911 912 913 914 915 916 917 918

      MO_Eq _   -> condIntReg EQQ x y
      MO_Ne _   -> condIntReg NE  x y

      MO_S_Gt _ -> condIntReg GTT x y
      MO_S_Ge _ -> condIntReg GE  x y
      MO_S_Lt _ -> condIntReg LTT x y
      MO_S_Le _ -> condIntReg LE  x y

      MO_U_Gt _ -> condIntReg GU  x y
      MO_U_Ge _ -> condIntReg GEU x y
      MO_U_Lt _ -> condIntReg LU  x y
      MO_U_Le _ -> condIntReg LEU x y
919

920 921 922 923 924 925 926 927
      MO_F_Add w   -> trivialFCode_sse2 w ADD  x y

      MO_F_Sub w   -> trivialFCode_sse2 w SUB  x y

      MO_F_Quot w  -> trivialFCode_sse2 w FDIV x y

      MO_F_Mul w   -> trivialFCode_sse2 w MUL x y

928 929 930 931 932 933 934 935 936 937 938

      MO_Add rep -> add_code rep x y
      MO_Sub rep -> sub_code rep x y

      MO_S_Quot rep -> div_code rep True  True  x y
      MO_S_Rem  rep -> div_code rep True  False x y
      MO_U_Quot rep -> div_code rep False True  x y
      MO_U_Rem  rep -> div_code rep False False x y

      MO_S_MulMayOflo rep -> imulMayOflo rep x y

Michal Terepeta's avatar
Michal Terepeta committed
939
      MO_Mul W8  -> imulW8 x y
940 941 942 943 944
      MO_Mul rep -> triv_op rep IMUL
      MO_And rep -> triv_op rep AND
      MO_Or  rep -> triv_op rep OR
      MO_Xor rep -> triv_op rep XOR

945 946 947 948
        {- Shift ops on x86s have constraints on their source, it
           either has to be Imm, CL or 1
            => trivialCode is not restrictive enough (sigh.)
        -}
949 950 951 952
      MO_Shl rep   -> shift_code rep SHL x y {-False-}
      MO_U_Shr rep -> shift_code rep SHR x y {-False-}
      MO_S_Shr rep -> shift_code rep SAR x y {-False-}

953 954 955 956 957 958 959 960
      MO_V_Insert {}   -> needLlvm
      MO_V_Extract {}  -> needLlvm
      MO_V_Add {}      -> needLlvm
      MO_V_Sub {}      -> needLlvm
      MO_V_Mul {}      -> needLlvm
      MO_VS_Quot {}    -> needLlvm
      MO_VS_Rem {}     -> needLlvm
      MO_VS_Neg {}     -> needLlvm
961 962 963 964 965 966 967
      MO_VF_Insert {}  -> needLlvm
      MO_VF_Extract {} -> needLlvm
      MO_VF_Add {}     -> needLlvm
      MO_VF_Sub {}     -> needLlvm
      MO_VF_Mul {}     -> needLlvm
      MO_VF_Quot {}    -> needLlvm
      MO_VF_Neg {}     -> needLlvm
968

dterei's avatar
dterei committed
969
      _other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
970 971 972
  where
    --------------------
    triv_op width instr = trivialCode width op (Just op) x y
973
                        where op   = instr (intFormat width)
974

Michal Terepeta's avatar
Michal Terepeta committed
975 976 977 978 979 980 981 982 983 984 985 986 987 988 989
    -- Special case for IMUL for bytes, since the result of IMULB will be in
    -- %ax, the split to %dx/%edx/%rdx and %ax/%eax/%rax happens only for wider
    -- values.
    imulW8 :: CmmExpr -> CmmExpr -> NatM Register
    imulW8 arg_a arg_b = do
        (a_reg, a_code) <- getNonClobberedReg arg_a
        b_code <- getAnyReg arg_b

        let code = a_code `appOL` b_code eax `appOL`
                   toOL [ IMUL2 format (OpReg a_reg) ]
            format = intFormat W8

        return (Fixed format eax code)


990 991 992 993
    imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
    imulMayOflo rep a b = do
         (a_reg, a_code) <- getNonClobberedReg a
         b_code <- getAnyReg b
994 995 996 997 998
         let
             shift_amt  = case rep of
                           W32 -> 31
                           W64 -> 63
                           _ -> panic "shift_amt"
999

1000
             format = intFormat rep
1001 1002
             code = a_code `appOL` b_code eax `appOL`
                        toOL [
1003 1004
                           IMUL2 format (OpReg a_reg),   -- result in %edx:%eax
                           SAR format (OpImm (ImmInt shift_amt)) (OpReg eax),
1005
                                -- sign extend lower part
1006
                           SUB format (OpReg edx) (OpReg eax)
1007
                                -- compare against upper
1008 1009
                           -- eax==0 if high part == sign extended low part
                        ]
1010
         return (Fixed format eax code)
1011 1012 1013

    --------------------
    shift_code :: Width
1014
               -> (Format -> Operand -> Operand -> Instr)
1015 1016 1017
               -> CmmExpr
               -> CmmExpr
               -> NatM Register
1018 1019

    {- Case1: shift length as immediate -}
dterei's avatar
dterei committed
1020
    shift_code width instr x (CmmLit lit) = do
1021 1022
          x_code <- getAnyReg x
          let
1023
               format = intFormat width
1024 1025
               code dst
                  = x_code dst `snocOL`
1026 1027
                    instr format (OpImm (litToImm lit)) (OpReg dst)
          return (Any format code)
1028

1029 1030 1031 1032
    {- Case2: shift length is complex (non-immediate)
      * y must go in %ecx.
      * we cannot do y first *and* put its result in %ecx, because
        %ecx might be clobbered by x.
1033
      * if we do y second, then x cannot be
1034 1035 1036 1037 1038
        in a clobbered reg.  Also, we cannot clobber x's reg
        with the instruction itself.
      * so we can either:
        - do y first, put its result in a fresh tmp, then copy it to %ecx later
        - do y second and put its result into %ecx.  x gets placed in a fresh
Gabor Greif's avatar
typos  
Gabor Greif committed
1039
          tmp.  This is likely to be better, because the reg alloc can
1040 1041 1042 1043 1044
          eliminate this reg->reg move here (it won't eliminate the other one,
          because the move is into the fixed %ecx).
    -}
    shift_code width instr x y{-amount-} = do
        x_code <- getAnyReg x
1045 1046
        let format = intFormat width
        tmp <- getNewRegNat format
1047
        y_code <- getAnyReg y
1048 1049 1050
        let
           code = x_code tmp `appOL`
                  y_code ecx `snocOL`
1051 1052
                  instr format (OpReg ecx) (OpReg tmp)
        return (Fixed format tmp code)
1053 1054 1055 1056

    --------------------
    add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
    add_code rep x (CmmLit (CmmInt y _))
1057
        | is32BitInteger y = add_int rep x y
1058 1059
    add_code rep x y = trivialCode rep (ADD format) (Just (ADD format)) x y
      where format = intFormat rep
1060 1061
    -- TODO: There are other interesting patterns we want to replace
    --     with a LEA, e.g. `(x + offset) + (y << shift)`.
1062 1063 1064 1065

    --------------------
    sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
    sub_code rep x (CmmLit (CmmInt y _))
1066
        | is32BitInteger (-y) = add_int rep x (-y)
1067
    sub_code rep x y = trivialCode rep (SUB (intFormat rep)) Nothing x y
1068 1069 1070

    -- our three-operand add instruction:
    add_int width x y = do
1071 1072
        (x_reg, x_code) <- getSomeReg x
        let
1073
            format = intFormat width
1074 1075
            imm = ImmInt (fromInteger y)
            code dst
1076
               = x_code `snocOL`
1077
                 LEA format
1078
                        (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
1079
                        (OpReg dst)
1080
        --
1081
        return (Any format code)
1082 1083

    ----------------------
Michal Terepeta's avatar
Michal Terepeta committed
1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095

    -- See Note [DIV/IDIV for bytes]
    div_code W8 signed quotient x y = do
        let widen | signed    = MO_SS_Conv W8 W16
                  | otherwise = MO_UU_Conv W8 W16
        div_code
            W16
            signed
            quotient
            (CmmMachOp widen [x])
            (CmmMachOp widen [y])

1096
    div_code width signed quotient x y = do
1097 1098 1099
           (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
           x_code <- getAnyReg x
           let
1100 1101 1102
             format = intFormat width
             widen | signed    = CLTD format
                   | otherwise = XOR format (OpReg edx) (OpReg edx)
1103

1104 1105
             instr | signed    = IDIV
                   | otherwise = DIV
1106

1107 1108
             code = y_code `appOL`
                    x_code eax `appOL`
1109
                    toOL [widen, instr format y_op]
1110

1111 1112
             result | quotient  = eax
                    | otherwise = edx
1113

1114
           return (Fixed format result code)
1115 1116


1117
getRegister' _ _ (CmmLoad mem pk)
1118 1119
  | isFloatType pk
  = do
1120
    Amode addr mem_code <- getAmode mem
1121
    loadFloatAmode  (typeWidth pk) addr mem_code
1122

1123
getRegister' _ is32Bit (CmmLoad mem pk)
1124
  | is32Bit && not (isWord64 pk)
1125
  = do
1126
    code <- intLoadCode instr mem
1127
    return (Any format code)
1128 1129
  where
    width = typeWidth pk
1130
    format = intFormat width
1131
    instr = case width of
1132
                W8     -> MOVZxL II8
1133
                _other -> MOV format
1134 1135 1136 1137 1138
        -- We always zero-extend 8-bit loads, if we
        -- can't think of anything better.  This is because
        -- we can't guarantee access to an 8-bit variant of every register
        -- (esi and edi don't have 8-bit variants), so to make things
        -- simpler we do our 8-bit arithmetic with full 32-bit registers.
1139 1140

-- Simpler memory load code on x86_64
1141
getRegister' _ is32Bit (CmmLoad mem pk)
1142
 | not is32Bit