CodeGen.hs 151 KB
Newer Older
1
{-# LANGUAGE CPP, GADTs, NondecreasingIndentation #-}
2 3
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE BangPatterns #-}
4

5 6
#if __GLASGOW_HASKELL__ <= 808
-- GHC 8.10 deprecates this flag, but GHC 8.8 needs it
7 8 9
-- The default iteration limit is a bit too low for the definitions
-- in this module.
{-# OPTIONS_GHC -fmax-pmcheck-iterations=10000000 #-}
10
#endif
11

12 13
{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-}

14 15 16 17 18 19 20 21 22
-----------------------------------------------------------------------------
--
-- Generating machine code (instruction selection)
--
-- (c) The University of Glasgow 1996-2004
--
-----------------------------------------------------------------------------

-- This is a big module, but, if you pay attention to
Ian Lynagh's avatar
Ian Lynagh committed
23
-- (a) the sectioning, and (b) the type signatures, the
24 25
-- structure should not be too overwhelming.

Sylvain Henry's avatar
Sylvain Henry committed
26
module GHC.CmmToAsm.X86.CodeGen (
27 28
        cmmTopCodeGen,
        generateJumpTableForInstr,
29
        extractUnwindPoints,
30
        invertCondBranches,
31 32
        InstrBlock
)
33 34 35 36 37 38

where

#include "HsVersions.h"

-- NCG stuff:
39
import GHC.Prelude
40

Sylvain Henry's avatar
Sylvain Henry committed
41 42 43
import GHC.CmmToAsm.X86.Instr
import GHC.CmmToAsm.X86.Cond
import GHC.CmmToAsm.X86.Regs
44
import GHC.CmmToAsm.X86.Ppr
Sylvain Henry's avatar
Sylvain Henry committed
45
import GHC.CmmToAsm.X86.RegInfo
46

47
import GHC.Platform.Regs
Sylvain Henry's avatar
Sylvain Henry committed
48
import GHC.CmmToAsm.CPrim
49
import GHC.CmmToAsm.Types
Sylvain Henry's avatar
Sylvain Henry committed
50 51 52 53 54 55 56 57 58
import GHC.Cmm.DebugBlock
   ( DebugBlock(..), UnwindPoint(..), UnwindTable
   , UnwindExpr(UwReg), toUnwindExpr
   )
import GHC.CmmToAsm.PIC
import GHC.CmmToAsm.Monad
   ( NatM, getNewRegNat, getNewLabelNat, setDeltaNat
   , getDeltaNat, getBlockIdNat, getPicBaseNat, getNewRegPairNat
   , getPicBaseMaybeNat, getDebugBlock, getFileId
59
   , addImmediateSuccessorNat, updateCfgNat, getConfig, getPlatform
60
   , getCfgWeights
Sylvain Henry's avatar
Sylvain Henry committed
61 62 63
   )
import GHC.CmmToAsm.CFG
import GHC.CmmToAsm.Format
64
import GHC.CmmToAsm.Config
Sylvain Henry's avatar
Sylvain Henry committed
65
import GHC.Platform.Reg
John Ericson's avatar
John Ericson committed
66
import GHC.Platform
67 68

-- Our intermediate code:
Sylvain Henry's avatar
Sylvain Henry committed
69
import GHC.Types.Basic
70
import GHC.Cmm.BlockId
71
import GHC.Unit.Types ( primUnitId )
72 73 74 75 76 77 78 79
import GHC.Cmm.Utils
import GHC.Cmm.Switch
import GHC.Cmm
import GHC.Cmm.Dataflow.Block
import GHC.Cmm.Dataflow.Collections
import GHC.Cmm.Dataflow.Graph
import GHC.Cmm.Dataflow.Label
import GHC.Cmm.CLabel
Sylvain Henry's avatar
Sylvain Henry committed
80
import GHC.Core          ( Tickish(..) )
Sylvain Henry's avatar
Sylvain Henry committed
81
import GHC.Types.SrcLoc  ( srcSpanFile, srcSpanStartLine, srcSpanStartCol )
82 83

-- The rest:
Sylvain Henry's avatar
Sylvain Henry committed
84
import GHC.Types.ForeignCall ( CCallConv(..) )
85 86
import GHC.Data.OrdList
import GHC.Utils.Outputable
87
import GHC.Utils.Panic
88
import GHC.Data.FastString
Sylvain Henry's avatar
Sylvain Henry committed
89
import GHC.Driver.Session
90
import GHC.Utils.Misc
Sylvain Henry's avatar
Sylvain Henry committed
91
import GHC.Types.Unique.Supply ( getUniqueM )
92

93
import Control.Monad
tibbe's avatar
tibbe committed
94
import Data.Bits
95
import Data.Foldable (fold)
96
import Data.Int
97
import Data.Maybe
dterei's avatar
dterei committed
98 99
import Data.Word

100 101
import qualified Data.Map as M

Ian Lynagh's avatar
Ian Lynagh committed
102 103
is32BitPlatform :: NatM Bool
is32BitPlatform = do
104 105
    platform <- getPlatform
    return $ target32Bit platform
Ian Lynagh's avatar
Ian Lynagh committed
106

107 108
sse2Enabled :: NatM Bool
sse2Enabled = do
109 110
  config <- getConfig
  return (ncgSseVersion config >= Just SSE2)
111

tibbe's avatar
tibbe committed
112 113
sse4_2Enabled :: NatM Bool
sse4_2Enabled = do
114 115
  config <- getConfig
  return (ncgSseVersion config >= Just SSE42)
116

117
cmmTopCodeGen
Simon Peyton Jones's avatar
Simon Peyton Jones committed
118
        :: RawCmmDecl
119
        -> NatM [NatCmmDecl (Alignment, RawCmmStatics) Instr]
120

121 122
cmmTopCodeGen (CmmProc info lab live graph) = do
  let blocks = toBlockListEntryFirst graph
123 124
  (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
  picBaseMb <- getPicBaseMaybeNat
125
  platform <- getPlatform
126
  let proc = CmmProc info lab live (ListGraph $ concat nat_blocks)
127
      tops = proc : concat statics
128
      os   = platformOS platform
129 130 131 132

  case picBaseMb of
      Just picBase -> initializePicBase_x86 ArchX86 os picBase tops
      Nothing -> return tops
133

Ian Lynagh's avatar
Ian Lynagh committed
134
cmmTopCodeGen (CmmData sec dat) = do
135
  return [CmmData sec (mkAlignment 1, dat)]  -- no translation, we just use CmmStatic
136

137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164
{- Note [Verifying basic blocks]
   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

   We want to guarantee a few things about the results
   of instruction selection.

   Namely that each basic blocks consists of:
    * A (potentially empty) sequence of straight line instructions
  followed by
    * A (potentially empty) sequence of jump like instructions.

    We can verify this by going through the instructions and
    making sure that any non-jumpish instruction can't appear
    after a jumpish instruction.

    There are gotchas however:
    * CALLs are strictly speaking control flow but here we care
      not about them. Hence we treat them as regular instructions.

      It's safe for them to appear inside a basic block
      as (ignoring side effects inside the call) they will result in
      straight line code.

    * NEWBLOCK marks the start of a new basic block so can
      be followed by any instructions.
-}

-- Verifying basic blocks is cheap, but not cheap enough to enable it unconditionally.
165 166
verifyBasicBlock :: Platform -> [Instr] -> ()
verifyBasicBlock platform instrs
167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185
  | debugIsOn     = go False instrs
  | otherwise     = ()
  where
    go _     [] = ()
    go atEnd (i:instr)
        = case i of
            -- Start a new basic block
            NEWBLOCK {} -> go False instr
            -- Calls are not viable block terminators
            CALL {}     | atEnd -> faultyBlockWith i
                        | not atEnd -> go atEnd instr
            -- All instructions ok, check if we reached the end and continue.
            _ | not atEnd -> go (isJumpishInstr i) instr
              -- Only jumps allowed at the end of basic blocks.
              | otherwise -> if isJumpishInstr i
                                then go True instr
                                else faultyBlockWith i
    faultyBlockWith i
        = pprPanic "Non control flow instructions after end of basic block."
186
                   (pprInstr platform i <+> text "in:" $$ vcat (map (pprInstr platform) instrs))
187

188
basicBlockCodeGen
189
        :: CmmBlock
190
        -> NatM ( [NatBasicBlock Instr]
191
                , [NatCmmDecl (Alignment, RawCmmStatics) Instr])
192

193
basicBlockCodeGen block = do
Peter Wortmann's avatar
Peter Wortmann committed
194 195
  let (_, nodes, tail)  = blockSplit block
      id = entryLabel block
196
      stmts = blockToList nodes
197 198 199 200 201 202 203 204
  -- Generate location directive
  dbg <- getDebugBlock (entryLabel block)
  loc_instrs <- case dblSourceTick =<< dbg of
    Just (SourceNote span name)
      -> do fileId <- getFileId (srcSpanFile span)
            let line = srcSpanStartLine span; col = srcSpanStartCol span
            return $ unitOL $ LOCATION fileId line col name
    _ -> return nilOL
205 206
  (mid_instrs,mid_bid) <- stmtsToInstrs id stmts
  (!tail_instrs,_) <- stmtToInstrs mid_bid tail
207
  let instrs = loc_instrs `appOL` mid_instrs `appOL` tail_instrs
208 209
  platform <- getPlatform
  return $! verifyBasicBlock platform (fromOL instrs)
210
  instrs' <- fold <$> traverse addSpUnwindings instrs
211 212 213 214 215
  -- code generation may introduce new basic block boundaries, which
  -- are indicated by the NEWBLOCK instruction.  We must split up the
  -- instruction stream into basic blocks again.  Also, we extract
  -- LDATAs here too.
  let
216
        (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs'
217 218 219 220 221 222 223

        mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
          = ([], BasicBlock id instrs : blocks, statics)
        mkBlocks (LDATA sec dat) (instrs,blocks,statics)
          = (instrs, blocks, CmmData sec dat:statics)
        mkBlocks instr (instrs,blocks,statics)
          = (instr:instrs, blocks, statics)
224 225
  return (BasicBlock id top : other_blocks, statics)

226
-- | Convert 'DELTA' instructions into 'UNWIND' instructions to capture changes
227
-- in the @sp@ register. See Note [What is this unwinding business?] in "GHC.Cmm.DebugBlock"
228 229 230
-- for details.
addSpUnwindings :: Instr -> NatM (OrdList Instr)
addSpUnwindings instr@(DELTA d) = do
231
    config <- getConfig
Sylvain Henry's avatar
Sylvain Henry committed
232
    if ncgDwarfUnwindings config
233
        then do lbl <- mkAsmTempLabel <$> getUniqueM
234
                let unwind = M.singleton MachSp (Just $ UwReg MachSp $ negate d)
235 236 237
                return $ toOL [ instr, UNWIND lbl unwind ]
        else return (unitOL instr)
addSpUnwindings instr = return $ unitOL instr
238

239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291
{- Note [Keeping track of the current block]
   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

When generating instructions for Cmm we sometimes require
the current block for things like retry loops.

We also sometimes change the current block, if a MachOP
results in branching control flow.

Issues arise if we have two statements in the same block,
which both depend on the current block id *and* change the
basic block after them. This happens for atomic primops
in the X86 backend where we want to update the CFG data structure
when introducing new basic blocks.

For example in #17334 we got this Cmm code:

        c3Bf: // global
            (_s3t1::I64) = call MO_AtomicRMW W64 AMO_And(_s3sQ::P64 + 88, 18);
            (_s3t4::I64) = call MO_AtomicRMW W64 AMO_Or(_s3sQ::P64 + 88, 0);
            _s3sT::I64 = _s3sV::I64;
            goto c3B1;

This resulted in two new basic blocks being inserted:

        c3Bf:
                movl $18,%vI_n3Bo
                movq 88(%vI_s3sQ),%rax
                jmp _n3Bp
        n3Bp:
                ...
                cmpxchgq %vI_n3Bq,88(%vI_s3sQ)
                jne _n3Bp
                ...
                jmp _n3Bs
        n3Bs:
                ...
                cmpxchgq %vI_n3Bt,88(%vI_s3sQ)
                jne _n3Bs
                ...
                jmp _c3B1
        ...

Based on the Cmm we called stmtToInstrs we translated both atomic operations under
the assumption they would be placed into their Cmm basic block `c3Bf`.
However for the retry loop we introduce new labels, so this is not the case
for the second statement.
This resulted in a desync between the explicit control flow graph
we construct as a separate data type and the actual control flow graph in the code.

Instead we now return the new basic block if a statement causes a change
in the current block and use the block for all following statements.

292 293 294 295 296
For this reason genCCall is also split into two parts.  One for calls which
*won't* change the basic blocks in which successive instructions will be
placed (since they only evaluate CmmExpr, which can only contain MachOps, which
cannot introduce basic blocks in their lowerings).  A different one for calls
which *are* known to change the basic block.
297 298 299 300 301 302

-}

-- See Note [Keeping track of the current block] for why
-- we pass the BlockId.
stmtsToInstrs :: BlockId -- ^ Basic block these statement will start to be placed in.
303 304
              -> [CmmNode O O] -- ^ Cmm Statement
              -> NatM (InstrBlock, BlockId) -- ^ Resulting instruction
305 306 307
stmtsToInstrs bid stmts =
    go bid stmts nilOL
  where
308
    go bid  []        instrs = return (instrs,bid)
309 310 311 312 313
    go bid (s:stmts)  instrs = do
      (instrs',bid') <- stmtToInstrs bid s
      -- If the statement introduced a new block, we use that one
      let !newBid = fromMaybe bid bid'
      go newBid stmts (instrs `appOL` instrs')
314

315 316
-- | `bid` refers to the current block and is used to update the CFG
--   if new blocks are inserted in the control flow.
317 318 319 320 321 322
-- See Note [Keeping track of the current block] for more details.
stmtToInstrs :: BlockId -- ^ Basic block this statement will start to be placed in.
             -> CmmNode e x
             -> NatM (InstrBlock, Maybe BlockId)
             -- ^ Instructions, and bid of new block if successive
             -- statements are placed in a different basic block.
323
stmtToInstrs bid stmt = do
Ian Lynagh's avatar
Ian Lynagh committed
324
  is32Bit <- is32BitPlatform
325
  platform <- getPlatform
326
  case stmt of
327
    CmmUnsafeForeignCall target result_regs args
328
       -> genCCall is32Bit target result_regs args bid
329

330 331 332 333 334 335
    _ -> (,Nothing) <$> case stmt of
      CmmComment s   -> return (unitOL (COMMENT s))
      CmmTick {}     -> return nilOL

      CmmUnwind regs -> do
        let to_unwind_entry :: (GlobalReg, Maybe CmmExpr) -> UnwindTable
336
            to_unwind_entry (reg, expr) = M.singleton reg (fmap (toUnwindExpr platform) expr)
337 338 339 340 341 342 343 344 345 346
        case foldMap to_unwind_entry regs of
          tbl | M.null tbl -> return nilOL
              | otherwise  -> do
                  lbl <- mkAsmTempLabel <$> getUniqueM
                  return $ unitOL $ UNWIND lbl tbl

      CmmAssign reg src
        | isFloatType ty         -> assignReg_FltCode format reg src
        | is32Bit && isWord64 ty -> assignReg_I64Code      reg src
        | otherwise              -> assignReg_IntCode format reg src
347
          where ty = cmmRegType platform reg
348 349 350 351 352 353
                format = cmmTypeFormat ty

      CmmStore addr src
        | isFloatType ty         -> assignMem_FltCode format addr src
        | is32Bit && isWord64 ty -> assignMem_I64Code      addr src
        | otherwise              -> assignMem_IntCode format addr src
354
          where ty = cmmExprType platform src
355 356 357 358 359
                format = cmmTypeFormat ty

      CmmBranch id          -> return $ genBranch id

      --We try to arrange blocks such that the likely branch is the fallthrough
360
      --in GHC.Cmm.ContFlowOpt. So we can assume the condition is likely false here.
361
      CmmCondBranch arg true false _ -> genCondBranch bid true false arg
362
      CmmSwitch arg ids -> genSwitch arg ids
363
      CmmCall { cml_target = arg
364
              , cml_args_regs = gregs } -> genJump arg (jumpRegs platform gregs)
365 366
      _ ->
        panic "stmtToInstrs: statement should have been cps'd away"
367 368


369 370
jumpRegs :: Platform -> [GlobalReg] -> [Reg]
jumpRegs platform gregs = [ RegReal r | Just r <- map (globalRegMaybe platform) gregs ]
371

372 373
--------------------------------------------------------------------------------
-- | 'InstrBlock's are the insn sequences generated by the insn selectors.
374 375
--      They are really trees of insns to facilitate fast appending, where a
--      left-to-right traversal yields the insns in the correct order.
376
--
377 378
type InstrBlock
        = OrdList Instr
379 380 381 382


-- | Condition codes passed up the tree.
--
383 384
data CondCode
        = CondCode Bool Cond InstrBlock
385 386 387


-- | a.k.a "Register64"
388 389
--      Reg is the lower 32-bit temporary which contains the result.
--      Use getHiVRegFromLo to find the other VRegUnique.
390
--
391 392
--      Rules of this simplified insn selection game are therefore that
--      the returned Reg may be modified
393
--
394 395
data ChildCode64
   = ChildCode64
396
        InstrBlock
397
        Reg
398 399 400


-- | Register's passed up the tree.  If the stix code forces the register
401 402 403
--      to live in a pre-decided machine register, it comes out as @Fixed@;
--      otherwise, it comes out as @Any@, and the parent can decide which
--      register to put it in.
404 405
--
data Register
406 407
        = Fixed Format Reg InstrBlock
        | Any   Format (Reg -> InstrBlock)
408 409


410 411 412
swizzleRegisterRep :: Register -> Format -> Register
swizzleRegisterRep (Fixed _ reg code) format = Fixed format reg code
swizzleRegisterRep (Any _ codefn)     format = Any   format codefn
413 414 415


-- | Grab the Reg for a CmmReg
416
getRegisterReg :: Platform  -> CmmReg -> Reg
417

418 419 420 421
getRegisterReg _   (CmmLocal (LocalReg u pk))
  = -- by Assuming SSE2, Int,Word,Float,Double all can be register allocated
   let fmt = cmmTypeFormat pk in
        RegVirtual (mkVirtualReg u fmt)
422

423
getRegisterReg platform  (CmmGlobal mid)
424
  = case globalRegMaybe platform mid of
425 426 427 428 429
        Just reg -> RegReal $ reg
        Nothing  -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
        -- By this stage, the only MagicIds remaining should be the
        -- ones which map to a real machine register on this
        -- platform.  Hence ...
430 431 432


-- | Memory addressing modes passed up the tree.
433 434
data Amode
        = Amode AddrMode InstrBlock
435 436

{-
Gabor Greif's avatar
Gabor Greif committed
437
Now, given a tree (the argument to a CmmLoad) that references memory,
438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455
produce a suitable addressing mode.

A Rule of the Game (tm) for Amodes: use of the addr bit must
immediately follow use of the code part, since the code part puts
values in registers which the addr then refers to.  So you can't put
anything in between, lest it overwrite some of those registers.  If
you need to do some other computation between the code part and use of
the addr bit, first store the effective address from the amode in a
temporary, then do the other computation, and then use the temporary:

    code
    LEA amode, tmp
    ... other computation ...
    ... (tmp) ...
-}


-- | Check whether an integer will fit in 32 bits.
456 457 458 459
--      A CmmInt is intended to be truncated to the appropriate
--      number of bits, so here we truncate it to Int64.  This is
--      important because e.g. -1 as a CmmInt might be either
--      -1 or 18446744073709551615.
460 461 462 463 464 465 466
--
is32BitInteger :: Integer -> Bool
is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
  where i64 = fromIntegral i :: Int64


-- | Convert a BlockId to some CmmStatic data
467 468
jumpTableEntry :: NCGConfig -> Maybe BlockId -> CmmStatic
jumpTableEntry config Nothing = CmmStaticLit (CmmInt 0 (ncgWordWidth config))
469
jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
470
    where blockLabel = blockLbl blockid
471 472 473 474 475 476 477


-- -----------------------------------------------------------------------------
-- General things for putting together code sequences

-- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
-- CmmExprs into CmmRegOff?
478 479
mangleIndexTree :: Platform -> CmmReg -> Int -> CmmExpr
mangleIndexTree platform reg off
480
  = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
481
  where width = typeWidth (cmmRegType platform reg)
482 483

-- | The dual to getAnyReg: compute an expression into a register, but
484
--      we don't mind which one it is.
485 486 487 488 489
getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg expr = do
  r <- getRegister expr
  case r of
    Any rep code -> do
490 491 492 493
        tmp <- getNewRegNat rep
        return (tmp, code tmp)
    Fixed _ reg code ->
        return (reg, code)
494 495 496 497 498 499


assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
assignMem_I64Code addrTree valueTree = do
  Amode addr addr_code <- getAmode addrTree
  ChildCode64 vcode rlo <- iselExpr64 valueTree
500
  let
501 502 503 504 505 506 507 508 509
        rhi = getHiVRegFromLo rlo

        -- Little-endian store
        mov_lo = MOV II32 (OpReg rlo) (OpAddr addr)
        mov_hi = MOV II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
  return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)


assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
dterei's avatar
dterei committed
510
assignReg_I64Code (CmmLocal (LocalReg u_dst _)) valueTree = do
511
   ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
512
   let
513
         r_dst_lo = RegVirtual $ mkVirtualReg u_dst II32
514 515 516 517 518 519 520 521
         r_dst_hi = getHiVRegFromLo r_dst_lo
         r_src_hi = getHiVRegFromLo r_src_lo
         mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo)
         mov_hi = MOV II32 (OpReg r_src_hi) (OpReg r_dst_hi)
   return (
        vcode `snocOL` mov_lo `snocOL` mov_hi
     )

dterei's avatar
dterei committed
522
assignReg_I64Code _ _
523 524 525 526 527 528 529
   = panic "assignReg_I64Code(i386): invalid lvalue"


iselExpr64        :: CmmExpr -> NatM ChildCode64
iselExpr64 (CmmLit (CmmInt i _)) = do
  (rlo,rhi) <- getNewRegPairNat II32
  let
530
        r = fromIntegral (fromIntegral i :: Word32)
531
        q = fromIntegral (fromIntegral (i `shiftR` 32) :: Word32)
532 533 534 535
        code = toOL [
                MOV II32 (OpImm (ImmInteger r)) (OpReg rlo),
                MOV II32 (OpImm (ImmInteger q)) (OpReg rhi)
                ]
536 537 538 539 540
  return (ChildCode64 code rlo)

iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
   Amode addr addr_code <- getAmode addrTree
   (rlo,rhi) <- getNewRegPairNat II32
541
   let
542 543 544
        mov_lo = MOV II32 (OpAddr addr) (OpReg rlo)
        mov_hi = MOV II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
   return (
545
            ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
546 547 548 549
                        rlo
     )

iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
550
   = return (ChildCode64 nilOL (RegVirtual $ mkVirtualReg vu II32))
551

552 553 554 555 556
-- we handle addition, but rather badly
iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
   ChildCode64 code1 r1lo <- iselExpr64 e1
   (rlo,rhi) <- getNewRegPairNat II32
   let
557
        r = fromIntegral (fromIntegral i :: Word32)
558
        q = fromIntegral (fromIntegral (i `shiftR` 32) :: Word32)
559 560 561 562 563 564
        r1hi = getHiVRegFromLo r1lo
        code =  code1 `appOL`
                toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
                       ADD II32 (OpImm (ImmInteger r)) (OpReg rlo),
                       MOV II32 (OpReg r1hi) (OpReg rhi),
                       ADC II32 (OpImm (ImmInteger q)) (OpReg rhi) ]
565 566 567 568 569 570 571
   return (ChildCode64 code rlo)

iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
   ChildCode64 code1 r1lo <- iselExpr64 e1
   ChildCode64 code2 r2lo <- iselExpr64 e2
   (rlo,rhi) <- getNewRegPairNat II32
   let
572 573 574 575 576 577 578 579
        r1hi = getHiVRegFromLo r1lo
        r2hi = getHiVRegFromLo r2lo
        code =  code1 `appOL`
                code2 `appOL`
                toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
                       ADD II32 (OpReg r2lo) (OpReg rlo),
                       MOV II32 (OpReg r1hi) (OpReg rhi),
                       ADC II32 (OpReg r2hi) (OpReg rhi) ]
580 581
   return (ChildCode64 code rlo)

582 583 584 585 586 587 588 589 590 591 592 593 594 595 596
iselExpr64 (CmmMachOp (MO_Sub _) [e1,e2]) = do
   ChildCode64 code1 r1lo <- iselExpr64 e1
   ChildCode64 code2 r2lo <- iselExpr64 e2
   (rlo,rhi) <- getNewRegPairNat II32
   let
        r1hi = getHiVRegFromLo r1lo
        r2hi = getHiVRegFromLo r2lo
        code =  code1 `appOL`
                code2 `appOL`
                toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
                       SUB II32 (OpReg r2lo) (OpReg rlo),
                       MOV II32 (OpReg r1hi) (OpReg rhi),
                       SBB II32 (OpReg r2hi) (OpReg rhi) ]
   return (ChildCode64 code rlo)

597 598 599 600 601 602
iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do
     fn <- getAnyReg expr
     r_dst_lo <-  getNewRegNat II32
     let r_dst_hi = getHiVRegFromLo r_dst_lo
         code = fn r_dst_lo
     return (
603
             ChildCode64 (code `snocOL`
604 605 606 607
                          MOV II32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
                          r_dst_lo
            )

608 609 610 611 612 613 614 615 616 617 618 619 620 621
iselExpr64 (CmmMachOp (MO_SS_Conv W32 W64) [expr]) = do
     fn <- getAnyReg expr
     r_dst_lo <-  getNewRegNat II32
     let r_dst_hi = getHiVRegFromLo r_dst_lo
         code = fn r_dst_lo
     return (
             ChildCode64 (code `snocOL`
                          MOV II32 (OpReg r_dst_lo) (OpReg eax) `snocOL`
                          CLTD II32 `snocOL`
                          MOV II32 (OpReg eax) (OpReg r_dst_lo) `snocOL`
                          MOV II32 (OpReg edx) (OpReg r_dst_hi))
                          r_dst_lo
            )

622
iselExpr64 expr
Sylvain Henry's avatar
Sylvain Henry committed
623 624 625
   = do
      platform <- getPlatform
      pprPanic "iselExpr64(i386)" (pdoc platform expr)
626 627 628 629


--------------------------------------------------------------------------------
getRegister :: CmmExpr -> NatM Register
630
getRegister e = do platform <- getPlatform
631
                   is32Bit <- is32BitPlatform
632
                   getRegister' platform is32Bit e
633

634
getRegister' :: Platform -> Bool -> CmmExpr -> NatM Register
635

636
getRegister' platform is32Bit (CmmReg reg)
637 638 639 640 641 642
  = case reg of
        CmmGlobal PicBaseReg
         | is32Bit ->
            -- on x86_64, we have %rip for PicBaseReg, but it's not
            -- a full-featured register, it can only be used for
            -- rip-relative addressing.
643 644
            do reg' <- getPicBaseNat (archWordFormat is32Bit)
               return (Fixed (archWordFormat is32Bit) reg' nilOL)
645
        _ ->
646 647
            do
               let
648
                 fmt = cmmTypeFormat (cmmRegType platform reg)
649 650
                 format  = fmt
               --
651
               platform <- ncgPlatform <$> getConfig
652
               return (Fixed format
653
                             (getRegisterReg platform reg)
654 655
                             nilOL)

656

657 658
getRegister' platform is32Bit (CmmRegOff r n)
  = getRegister' platform is32Bit $ mangleIndexTree platform r n
659

660 661
getRegister' platform is32Bit (CmmMachOp (MO_AlignmentCheck align _) [e])
  = addAlignmentCheck align <$> getRegister' platform is32Bit e
Ben Gamari's avatar
Ben Gamari committed
662

663
-- for 32-bit architectures, support some 64 -> 32 bit conversions:
664 665
-- TO_W_(x), TO_W_(x >> 32)

666
getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W64 W32)
667 668
                     [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
 | is32Bit = do
669 670 671
  ChildCode64 code rlo <- iselExpr64 x
  return $ Fixed II32 (getHiVRegFromLo rlo) code

672
getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W64 W32)
673 674
                     [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
 | is32Bit = do
675 676 677
  ChildCode64 code rlo <- iselExpr64 x
  return $ Fixed II32 (getHiVRegFromLo rlo) code

678
getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W64 W32) [x])
679
 | is32Bit = do
680 681 682
  ChildCode64 code rlo <- iselExpr64 x
  return $ Fixed II32 rlo code

683
getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W64 W32) [x])
684
 | is32Bit = do
685
  ChildCode64 code rlo <- iselExpr64 x
686
  return $ Fixed II32 rlo code
687

688
getRegister' _ _ (CmmLit lit@(CmmFloat f w)) =
689
  float_const_sse2  where
690 691 692
  float_const_sse2
    | f == 0.0 = do
      let
693 694
          format = floatFormat w
          code dst = unitOL  (XOR format (OpReg dst) (OpReg dst))
695 696
        -- I don't know why there are xorpd, xorps, and pxor instructions.
        -- They all appear to do the same thing --SDM
697
      return (Any format code)
698 699

   | otherwise = do
700
      Amode addr code <- memConstant (mkAlignment $ widthInBytes w) lit
701
      loadFloatAmode w addr code
702 703

-- catch simple cases of zero- or sign-extended load
704
getRegister' _ _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do
705 706 707
  code <- intLoadCode (MOVZxL II8) addr
  return (Any II32 code)

708
getRegister' _ _ (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _]) = do
709 710 711
  code <- intLoadCode (MOVSxL II8) addr
  return (Any II32 code)

712
getRegister' _ _ (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _]) = do
713 714 715
  code <- intLoadCode (MOVZxL II16) addr
  return (Any II32 code)

716
getRegister' _ _ (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _]) = do
717 718 719 720
  code <- intLoadCode (MOVSxL II16) addr
  return (Any II32 code)

-- catch simple cases of zero- or sign-extended load
721
getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _])
722
 | not is32Bit = do
723 724 725
  code <- intLoadCode (MOVZxL II8) addr
  return (Any II64 code)

726
getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _])
727
 | not is32Bit = do
728 729 730
  code <- intLoadCode (MOVSxL II8) addr
  return (Any II64 code)

731
getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _])
732
 | not is32Bit = do
733 734 735
  code <- intLoadCode (MOVZxL II16) addr
  return (Any II64 code)

736
getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _])
737
 | not is32Bit = do
738 739 740
  code <- intLoadCode (MOVSxL II16) addr
  return (Any II64 code)

741
getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _])
742
 | not is32Bit = do
743 744 745
  code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend
  return (Any II64 code)

746
getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _])
747
 | not is32Bit = do
748 749 750
  code <- intLoadCode (MOVSxL II32) addr
  return (Any II64 code)

751
getRegister' _ is32Bit (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
752
                                     CmmLit displacement])
753 754
 | not is32Bit = do
      return $ Any II64 (\dst -> unitOL $
755 756
        LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))

757
getRegister' platform is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
758
    case mop of
759 760
      MO_F_Neg w  -> sse2NegCode w x

761

762 763
      MO_S_Neg w -> triv_ucode NEGI (intFormat w)
      MO_Not w   -> triv_ucode NOT  (intFormat w)
764 765 766 767

      -- Nop conversions
      MO_UU_Conv W32 W8  -> toI8Reg  W32 x
      MO_SS_Conv W32 W8  -> toI8Reg  W32 x
Michal Terepeta's avatar
Michal Terepeta committed
768
      MO_XX_Conv W32 W8  -> toI8Reg  W32 x
769 770
      MO_UU_Conv W16 W8  -> toI8Reg  W16 x
      MO_SS_Conv W16 W8  -> toI8Reg  W16 x
Michal Terepeta's avatar
Michal Terepeta committed
771
      MO_XX_Conv W16 W8  -> toI8Reg  W16 x
772 773
      MO_UU_Conv W32 W16 -> toI16Reg W32 x
      MO_SS_Conv W32 W16 -> toI16Reg W32 x
Michal Terepeta's avatar
Michal Terepeta committed
774
      MO_XX_Conv W32 W16 -> toI16Reg W32 x
775

776 777
      MO_UU_Conv W64 W32 | not is32Bit -> conversionNop II64 x
      MO_SS_Conv W64 W32 | not is32Bit -> conversionNop II64 x
Michal Terepeta's avatar
Michal Terepeta committed
778
      MO_XX_Conv W64 W32 | not is32Bit -> conversionNop II64 x
779 780
      MO_UU_Conv W64 W16 | not is32Bit -> toI16Reg W64 x
      MO_SS_Conv W64 W16 | not is32Bit -> toI16Reg W64 x
Michal Terepeta's avatar
Michal Terepeta committed
781
      MO_XX_Conv W64 W16 | not is32Bit -> toI16Reg W64 x
782 783
      MO_UU_Conv W64 W8  | not is32Bit -> toI8Reg  W64 x
      MO_SS_Conv W64 W8  | not is32Bit -> toI8Reg  W64 x
Michal Terepeta's avatar
Michal Terepeta committed
784
      MO_XX_Conv W64 W8  | not is32Bit -> toI8Reg  W64 x
785

786 787
      MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intFormat rep1) x
      MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intFormat rep1) x
Michal Terepeta's avatar
Michal Terepeta committed
788
      MO_XX_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intFormat rep1) x
789 790 791 792 793 794 795 796 797 798

      -- widenings
      MO_UU_Conv W8  W32 -> integerExtend W8  W32 MOVZxL x
      MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x
      MO_UU_Conv W8  W16 -> integerExtend W8  W16 MOVZxL x

      MO_SS_Conv W8  W32 -> integerExtend W8  W32 MOVSxL x
      MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x
      MO_SS_Conv W8  W16 -> integerExtend W8  W16 MOVSxL x

Michal Terepeta's avatar
Michal Terepeta committed
799 800 801 802 803 804 805 806 807 808 809
      -- We don't care about the upper bits for MO_XX_Conv, so MOV is enough. However, on 32-bit we
      -- have 8-bit registers only for a few registers (as opposed to x86-64 where every register
      -- has 8-bit version). So for 32-bit code, we'll just zero-extend.
      MO_XX_Conv W8  W32
          | is32Bit   -> integerExtend W8 W32 MOVZxL x
          | otherwise -> integerExtend W8 W32 MOV x
      MO_XX_Conv W8  W16
          | is32Bit   -> integerExtend W8 W16 MOVZxL x
          | otherwise -> integerExtend W8 W16 MOV x
      MO_XX_Conv W16 W32 -> integerExtend W16 W32 MOV x

810 811 812 813 814 815
      MO_UU_Conv W8  W64 | not is32Bit -> integerExtend W8  W64 MOVZxL x
      MO_UU_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVZxL x
      MO_UU_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOVZxL x
      MO_SS_Conv W8  W64 | not is32Bit -> integerExtend W8  W64 MOVSxL x
      MO_SS_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVSxL x
      MO_SS_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOVSxL x
Michal Terepeta's avatar
Michal Terepeta committed
816 817 818 819 820 821 822 823 824
      -- For 32-to-64 bit zero extension, amd64 uses an ordinary movl.
      -- However, we don't want the register allocator to throw it
      -- away as an unnecessary reg-to-reg move, so we keep it in
      -- the form of a movzl and print it as a movl later.
      -- This doesn't apply to MO_XX_Conv since in this case we don't care about
      -- the upper bits. So we can just use MOV.
      MO_XX_Conv W8  W64 | not is32Bit -> integerExtend W8  W64 MOV x
      MO_XX_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOV x
      MO_XX_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOV x
825

826 827
      MO_FF_Conv W32 W64 -> coerceFP2FP W64 x

828

829
      MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
830 831 832 833

      MO_FS_Conv from to -> coerceFP2Int from to x
      MO_SF_Conv from to -> coerceInt2FP from to x

834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850
      MO_V_Insert {}   -> needLlvm
      MO_V_Extract {}  -> needLlvm
      MO_V_Add {}      -> needLlvm
      MO_V_Sub {}      -> needLlvm
      MO_V_Mul {}      -> needLlvm
      MO_VS_Quot {}    -> needLlvm
      MO_VS_Rem {}     -> needLlvm
      MO_VS_Neg {}     -> needLlvm
      MO_VU_Quot {}    -> needLlvm
      MO_VU_Rem {}     -> needLlvm
      MO_VF_Insert {}  -> needLlvm
      MO_VF_Extract {} -> needLlvm
      MO_VF_Add {}     -> needLlvm
      MO_VF_Sub {}     -> needLlvm
      MO_VF_Mul {}     -> needLlvm
      MO_VF_Quot {}    -> needLlvm
      MO_VF_Neg {}     -> needLlvm
851

dterei's avatar
dterei committed
852
      _other -> pprPanic "getRegister" (pprMachOp mop)
853
   where
854 855
        triv_ucode :: (Format -> Operand -> Instr) -> Format -> NatM Register
        triv_ucode instr format = trivialUCode format (instr format) x
856 857 858

        -- signed or unsigned extension.
        integerExtend :: Width -> Width
859
                      -> (Format -> Operand -> Operand -> Instr)
860 861 862 863 864 865 866
                      -> CmmExpr -> NatM Register
        integerExtend from to instr expr = do
            (reg,e_code) <- if from == W8 then getByteReg expr
                                          else getSomeReg expr
            let
                code dst =
                  e_code `snocOL`
867 868
                  instr (intFormat from) (OpReg reg) (OpReg dst)
            return (Any (intFormat to) code)
869 870 871

        toI8Reg :: Width -> CmmExpr -> NatM Register
        toI8Reg new_rep expr
872
            = do codefn <- getAnyReg expr
873
                 return (Any (intFormat new_rep) codefn)
874 875 876 877 878 879
                -- HACK: use getAnyReg to get a byte-addressable register.
                -- If the source was a Fixed register, this will add the
                -- mov instruction to put it into the desired destination.
                -- We're assuming that the destination won't be a fixed
                -- non-byte-addressable register; it won't be, because all
                -- fixed registers are word-sized.
880

881
        toI16Reg = toI8Reg -- for now
882

883 884
        conversionNop :: Format -> CmmExpr -> NatM Register
        conversionNop new_format expr
885
            = do e_code <- getRegister' platform is32Bit expr
886
                 return (swizzleRegisterRep e_code new_format)
887 888


889
getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
890
  case mop of
Ian Lynagh's avatar
Ian Lynagh committed
891 892 893 894
      MO_F_Eq _ -> condFltReg is32Bit EQQ x y
      MO_F_Ne _ -> condFltReg is32Bit NE  x y
      MO_F_Gt _ -> condFltReg is32Bit GTT x y
      MO_F_Ge _ -> condFltReg is32Bit GE  x y
895 896 897 898
      -- Invert comparison condition and swap operands
      -- See Note [SSE Parity Checks]
      MO_F_Lt _ -> condFltReg is32Bit GTT  y x
      MO_F_Le _ -> condFltReg is32Bit GE   y x
dterei's avatar
dterei committed
899 900 901 902 903 904 905 906 907 908 909 910 911

      MO_Eq _   -> condIntReg EQQ x y
      MO_Ne _   -> condIntReg NE  x y

      MO_S_Gt _ -> condIntReg GTT x y
      MO_S_Ge _ -> condIntReg GE  x y
      MO_S_Lt _ -> condIntReg LTT x y
      MO_S_Le _ -> condIntReg LE  x y

      MO_U_Gt _ -> condIntReg GU  x y
      MO_U_Ge _ -> condIntReg GEU x y
      MO_U_Lt _ -> condIntReg LU  x y
      MO_U_Le _ -> condIntReg LEU x y
912