CodeGen.hs 120 KB
Newer Older
1 2
{-# LANGUAGE CPP, GADTs, NondecreasingIndentation #-}

3 4 5 6
-- The default iteration limit is a bit too low for the definitions
-- in this module.
{-# OPTIONS_GHC -fmax-pmcheck-iterations=10000000 #-}

7 8 9 10 11 12 13 14 15
-----------------------------------------------------------------------------
--
-- Generating machine code (instruction selection)
--
-- (c) The University of Glasgow 1996-2004
--
-----------------------------------------------------------------------------

-- This is a big module, but, if you pay attention to
Ian Lynagh's avatar
Ian Lynagh committed
16
-- (a) the sectioning, and (b) the type signatures, the
17 18
-- structure should not be too overwhelming.

19 20 21
module X86.CodeGen (
        cmmTopCodeGen,
        generateJumpTableForInstr,
22
        extractUnwindPoints,
23 24
        InstrBlock
)
25 26 27 28 29

where

#include "HsVersions.h"
#include "nativeGen/NCG.h"
Simon Marlow's avatar
Simon Marlow committed
30
#include "../includes/MachDeps.h"
31 32 33 34 35 36

-- NCG stuff:
import X86.Instr
import X86.Cond
import X86.Regs
import X86.RegInfo
37
import CodeGen.Platform
tibbe's avatar
tibbe committed
38
import CPrim
39 40
import Debug            ( DebugBlock(..), UnwindPoint(..), UnwindTable
                        , UnwindExpr(UwReg), toUnwindExpr )
41 42 43
import Instruction
import PIC
import NCGMonad
44
import Format
45 46 47 48 49 50
import Reg
import Platform

-- Our intermediate code:
import BasicTypes
import BlockId
51
import Module           ( primUnitId )
52
import PprCmm           ()
53
import CmmUtils
54
import CmmSwitch
55
import Cmm
56 57
import Hoopl.Block
import Hoopl.Graph
58
import CLabel
59 60
import CoreSyn          ( Tickish(..) )
import SrcLoc           ( srcSpanFile, srcSpanStartLine, srcSpanStartCol )
61 62

-- The rest:
63
import ForeignCall      ( CCallConv(..) )
64 65 66 67
import OrdList
import Outputable
import FastString
import DynFlags
68
import Util
69
import UniqSupply       ( getUniqueM )
70

71
import Control.Monad
tibbe's avatar
tibbe committed
72
import Data.Bits
73
import Data.Foldable (fold)
74
import Data.Int
75
import Data.Maybe
dterei's avatar
dterei committed
76 77
import Data.Word

78 79
import qualified Data.Map as M

Ian Lynagh's avatar
Ian Lynagh committed
80 81
is32BitPlatform :: NatM Bool
is32BitPlatform = do
82
    dflags <- getDynFlags
Ian Lynagh's avatar
Ian Lynagh committed
83 84
    return $ target32Bit (targetPlatform dflags)

85 86
sse2Enabled :: NatM Bool
sse2Enabled = do
87
  dflags <- getDynFlags
88
  return (isSse2Enabled dflags)
89

tibbe's avatar
tibbe committed
90 91
sse4_2Enabled :: NatM Bool
sse4_2Enabled = do
92
  dflags <- getDynFlags
93
  return (isSse4_2Enabled dflags)
tibbe's avatar
tibbe committed
94

95 96 97 98
if_sse2 :: NatM a -> NatM a -> NatM a
if_sse2 sse2 x87 = do
  b <- sse2Enabled
  if b then sse2 else x87
99

100
cmmTopCodeGen
Simon Peyton Jones's avatar
Simon Peyton Jones committed
101 102
        :: RawCmmDecl
        -> NatM [NatCmmDecl (Alignment, CmmStatics) Instr]
103

104 105
cmmTopCodeGen (CmmProc info lab live graph) = do
  let blocks = toBlockListEntryFirst graph
106 107
  (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
  picBaseMb <- getPicBaseMaybeNat
108
  dflags <- getDynFlags
109
  let proc = CmmProc info lab live (ListGraph $ concat nat_blocks)
110
      tops = proc : concat statics
111
      os   = platformOS $ targetPlatform dflags
112 113 114 115

  case picBaseMb of
      Just picBase -> initializePicBase_x86 ArchX86 os picBase tops
      Nothing -> return tops
116

Ian Lynagh's avatar
Ian Lynagh committed
117
cmmTopCodeGen (CmmData sec dat) = do
118
  return [CmmData sec (1, dat)]  -- no translation, we just use CmmStatic
119 120


121
basicBlockCodeGen
122
        :: CmmBlock
123
        -> NatM ( [NatBasicBlock Instr]
Simon Peyton Jones's avatar
Simon Peyton Jones committed
124
                , [NatCmmDecl (Alignment, CmmStatics) Instr])
125

126
basicBlockCodeGen block = do
Peter Wortmann's avatar
Peter Wortmann committed
127 128
  let (_, nodes, tail)  = blockSplit block
      id = entryLabel block
129
      stmts = blockToList nodes
130 131 132 133 134 135 136 137
  -- Generate location directive
  dbg <- getDebugBlock (entryLabel block)
  loc_instrs <- case dblSourceTick =<< dbg of
    Just (SourceNote span name)
      -> do fileId <- getFileId (srcSpanFile span)
            let line = srcSpanStartLine span; col = srcSpanStartCol span
            return $ unitOL $ LOCATION fileId line col name
    _ -> return nilOL
138 139
  mid_instrs <- stmtsToInstrs stmts
  tail_instrs <- stmtToInstrs tail
140
  let instrs = loc_instrs `appOL` mid_instrs `appOL` tail_instrs
141
  instrs' <- fold <$> traverse addSpUnwindings instrs
142 143 144 145 146
  -- code generation may introduce new basic block boundaries, which
  -- are indicated by the NEWBLOCK instruction.  We must split up the
  -- instruction stream into basic blocks again.  Also, we extract
  -- LDATAs here too.
  let
147
        (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs'
148 149 150 151 152 153 154

        mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
          = ([], BasicBlock id instrs : blocks, statics)
        mkBlocks (LDATA sec dat) (instrs,blocks,statics)
          = (instrs, blocks, CmmData sec dat:statics)
        mkBlocks instr (instrs,blocks,statics)
          = (instr:instrs, blocks, statics)
155 156
  return (BasicBlock id top : other_blocks, statics)

157 158 159 160 161 162 163
-- | Convert 'DELTA' instructions into 'UNWIND' instructions to capture changes
-- in the @sp@ register. See Note [What is this unwinding business?] in Debug
-- for details.
addSpUnwindings :: Instr -> NatM (OrdList Instr)
addSpUnwindings instr@(DELTA d) = do
    dflags <- getDynFlags
    if debugLevel dflags >= 1
164
        then do lbl <- mkAsmTempLabel <$> getUniqueM
165
                let unwind = M.singleton MachSp (Just $ UwReg MachSp $ negate d)
166 167 168
                return $ toOL [ instr, UNWIND lbl unwind ]
        else return (unitOL instr)
addSpUnwindings instr = return $ unitOL instr
169

170
stmtsToInstrs :: [CmmNode e x] -> NatM InstrBlock
171 172 173 174 175
stmtsToInstrs stmts
   = do instrss <- mapM stmtToInstrs stmts
        return (concatOL instrss)


176
stmtToInstrs :: CmmNode e x -> NatM InstrBlock
177
stmtToInstrs stmt = do
178
  dflags <- getDynFlags
Ian Lynagh's avatar
Ian Lynagh committed
179
  is32Bit <- is32BitPlatform
180
  case stmt of
181
    CmmComment s   -> return (unitOL (COMMENT s))
Peter Wortmann's avatar
Peter Wortmann committed
182
    CmmTick {}     -> return nilOL
183 184

    CmmUnwind regs -> do
185 186
      let to_unwind_entry :: (GlobalReg, Maybe CmmExpr) -> UnwindTable
          to_unwind_entry (reg, expr) = M.singleton reg (fmap toUnwindExpr expr)
187 188 189
      case foldMap to_unwind_entry regs of
        tbl | M.null tbl -> return nilOL
            | otherwise  -> do
190
                lbl <- mkAsmTempLabel <$> getUniqueM
191
                return $ unitOL $ UNWIND lbl tbl
192 193

    CmmAssign reg src
194
      | isFloatType ty         -> assignReg_FltCode format reg src
195
      | is32Bit && isWord64 ty -> assignReg_I64Code      reg src
196
      | otherwise              -> assignReg_IntCode format reg src
197
        where ty = cmmRegType dflags reg
198
              format = cmmTypeFormat ty
199 200

    CmmStore addr src
201
      | isFloatType ty         -> assignMem_FltCode format addr src
202
      | is32Bit && isWord64 ty -> assignMem_I64Code      addr src
203
      | otherwise              -> assignMem_IntCode format addr src
204
        where ty = cmmExprType dflags src
205
              format = cmmTypeFormat ty
206

207
    CmmUnsafeForeignCall target result_regs args
208
       -> genCCall dflags is32Bit target result_regs args
209

210
    CmmBranch id          -> genBranch id
211 212 213 214
    CmmCondBranch arg true false _ -> do
      b1 <- genCondJump true arg
      b2 <- genBranch false
      return (b1 `appOL` b2)
215 216
    CmmSwitch arg ids -> do dflags <- getDynFlags
                            genSwitch dflags arg ids
217 218 219
    CmmCall { cml_target = arg
            , cml_args_regs = gregs } -> do
                                dflags <- getDynFlags
220
                                genJump arg (jumpRegs dflags gregs)
221 222
    _ ->
      panic "stmtToInstrs: statement should have been cps'd away"
223 224


225 226
jumpRegs :: DynFlags -> [GlobalReg] -> [Reg]
jumpRegs dflags gregs = [ RegReal r | Just r <- map (globalRegMaybe platform) gregs ]
227
    where platform = targetPlatform dflags
228

229 230
--------------------------------------------------------------------------------
-- | 'InstrBlock's are the insn sequences generated by the insn selectors.
231 232
--      They are really trees of insns to facilitate fast appending, where a
--      left-to-right traversal yields the insns in the correct order.
233
--
234 235
type InstrBlock
        = OrdList Instr
236 237 238 239


-- | Condition codes passed up the tree.
--
240 241
data CondCode
        = CondCode Bool Cond InstrBlock
242 243 244


-- | a.k.a "Register64"
245 246
--      Reg is the lower 32-bit temporary which contains the result.
--      Use getHiVRegFromLo to find the other VRegUnique.
247
--
248 249
--      Rules of this simplified insn selection game are therefore that
--      the returned Reg may be modified
250
--
251 252
data ChildCode64
   = ChildCode64
253
        InstrBlock
254
        Reg
255 256 257


-- | Register's passed up the tree.  If the stix code forces the register
258 259 260
--      to live in a pre-decided machine register, it comes out as @Fixed@;
--      otherwise, it comes out as @Any@, and the parent can decide which
--      register to put it in.
261 262
--
data Register
263 264
        = Fixed Format Reg InstrBlock
        | Any   Format (Reg -> InstrBlock)
265 266


267 268 269
swizzleRegisterRep :: Register -> Format -> Register
swizzleRegisterRep (Fixed _ reg code) format = Fixed format reg code
swizzleRegisterRep (Any _ codefn)     format = Any   format codefn
270 271 272


-- | Grab the Reg for a CmmReg
273
getRegisterReg :: Platform -> Bool -> CmmReg -> Reg
274

275
getRegisterReg _ use_sse2 (CmmLocal (LocalReg u pk))
276 277
  = let fmt = cmmTypeFormat pk in
    if isFloatFormat fmt && not use_sse2
278
       then RegVirtual (mkVirtualReg u FF80)
279
       else RegVirtual (mkVirtualReg u fmt)
280

281 282
getRegisterReg platform _ (CmmGlobal mid)
  = case globalRegMaybe platform mid of
283 284 285 286 287
        Just reg -> RegReal $ reg
        Nothing  -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
        -- By this stage, the only MagicIds remaining should be the
        -- ones which map to a real machine register on this
        -- platform.  Hence ...
288 289 290


-- | Memory addressing modes passed up the tree.
291 292
data Amode
        = Amode AddrMode InstrBlock
293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313

{-
Now, given a tree (the argument to an CmmLoad) that references memory,
produce a suitable addressing mode.

A Rule of the Game (tm) for Amodes: use of the addr bit must
immediately follow use of the code part, since the code part puts
values in registers which the addr then refers to.  So you can't put
anything in between, lest it overwrite some of those registers.  If
you need to do some other computation between the code part and use of
the addr bit, first store the effective address from the amode in a
temporary, then do the other computation, and then use the temporary:

    code
    LEA amode, tmp
    ... other computation ...
    ... (tmp) ...
-}


-- | Check whether an integer will fit in 32 bits.
314 315 316 317
--      A CmmInt is intended to be truncated to the appropriate
--      number of bits, so here we truncate it to Int64.  This is
--      important because e.g. -1 as a CmmInt might be either
--      -1 or 18446744073709551615.
318 319 320 321 322 323 324
--
is32BitInteger :: Integer -> Bool
is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
  where i64 = fromIntegral i :: Int64


-- | Convert a BlockId to some CmmStatic data
325 326 327
jumpTableEntry :: DynFlags -> Maybe BlockId -> CmmStatic
jumpTableEntry dflags Nothing = CmmStaticLit (CmmInt 0 (wordWidth dflags))
jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
328
    where blockLabel = blockLbl blockid
329 330 331 332 333 334 335


-- -----------------------------------------------------------------------------
-- General things for putting together code sequences

-- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
-- CmmExprs into CmmRegOff?
336 337
mangleIndexTree :: DynFlags -> CmmReg -> Int -> CmmExpr
mangleIndexTree dflags reg off
338
  = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
339
  where width = typeWidth (cmmRegType dflags reg)
340 341

-- | The dual to getAnyReg: compute an expression into a register, but
342
--      we don't mind which one it is.
343 344 345 346 347
getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg expr = do
  r <- getRegister expr
  case r of
    Any rep code -> do
348 349 350 351
        tmp <- getNewRegNat rep
        return (tmp, code tmp)
    Fixed _ reg code ->
        return (reg, code)
352 353 354 355 356 357


assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
assignMem_I64Code addrTree valueTree = do
  Amode addr addr_code <- getAmode addrTree
  ChildCode64 vcode rlo <- iselExpr64 valueTree
358
  let
359 360 361 362 363 364 365 366 367
        rhi = getHiVRegFromLo rlo

        -- Little-endian store
        mov_lo = MOV II32 (OpReg rlo) (OpAddr addr)
        mov_hi = MOV II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
  return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)


assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
dterei's avatar
dterei committed
368
assignReg_I64Code (CmmLocal (LocalReg u_dst _)) valueTree = do
369
   ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
370
   let
371
         r_dst_lo = RegVirtual $ mkVirtualReg u_dst II32
372 373 374 375 376 377 378 379
         r_dst_hi = getHiVRegFromLo r_dst_lo
         r_src_hi = getHiVRegFromLo r_src_lo
         mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo)
         mov_hi = MOV II32 (OpReg r_src_hi) (OpReg r_dst_hi)
   return (
        vcode `snocOL` mov_lo `snocOL` mov_hi
     )

dterei's avatar
dterei committed
380
assignReg_I64Code _ _
381 382 383 384 385 386 387
   = panic "assignReg_I64Code(i386): invalid lvalue"


iselExpr64        :: CmmExpr -> NatM ChildCode64
iselExpr64 (CmmLit (CmmInt i _)) = do
  (rlo,rhi) <- getNewRegPairNat II32
  let
388
        r = fromIntegral (fromIntegral i :: Word32)
389
        q = fromIntegral (fromIntegral (i `shiftR` 32) :: Word32)
390 391 392 393
        code = toOL [
                MOV II32 (OpImm (ImmInteger r)) (OpReg rlo),
                MOV II32 (OpImm (ImmInteger q)) (OpReg rhi)
                ]
394 395 396 397 398
  return (ChildCode64 code rlo)

iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
   Amode addr addr_code <- getAmode addrTree
   (rlo,rhi) <- getNewRegPairNat II32
399
   let
400 401 402
        mov_lo = MOV II32 (OpAddr addr) (OpReg rlo)
        mov_hi = MOV II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
   return (
403
            ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
404 405 406 407
                        rlo
     )

iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
408
   = return (ChildCode64 nilOL (RegVirtual $ mkVirtualReg vu II32))
409

410 411 412 413 414
-- we handle addition, but rather badly
iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
   ChildCode64 code1 r1lo <- iselExpr64 e1
   (rlo,rhi) <- getNewRegPairNat II32
   let
415
        r = fromIntegral (fromIntegral i :: Word32)
416
        q = fromIntegral (fromIntegral (i `shiftR` 32) :: Word32)
417 418 419 420 421 422
        r1hi = getHiVRegFromLo r1lo
        code =  code1 `appOL`
                toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
                       ADD II32 (OpImm (ImmInteger r)) (OpReg rlo),
                       MOV II32 (OpReg r1hi) (OpReg rhi),
                       ADC II32 (OpImm (ImmInteger q)) (OpReg rhi) ]
423 424 425 426 427 428 429
   return (ChildCode64 code rlo)

iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
   ChildCode64 code1 r1lo <- iselExpr64 e1
   ChildCode64 code2 r2lo <- iselExpr64 e2
   (rlo,rhi) <- getNewRegPairNat II32
   let
430 431 432 433 434 435 436 437
        r1hi = getHiVRegFromLo r1lo
        r2hi = getHiVRegFromLo r2lo
        code =  code1 `appOL`
                code2 `appOL`
                toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
                       ADD II32 (OpReg r2lo) (OpReg rlo),
                       MOV II32 (OpReg r1hi) (OpReg rhi),
                       ADC II32 (OpReg r2hi) (OpReg rhi) ]
438 439
   return (ChildCode64 code rlo)

440 441 442 443 444 445 446 447 448 449 450 451 452 453 454
iselExpr64 (CmmMachOp (MO_Sub _) [e1,e2]) = do
   ChildCode64 code1 r1lo <- iselExpr64 e1
   ChildCode64 code2 r2lo <- iselExpr64 e2
   (rlo,rhi) <- getNewRegPairNat II32
   let
        r1hi = getHiVRegFromLo r1lo
        r2hi = getHiVRegFromLo r2lo
        code =  code1 `appOL`
                code2 `appOL`
                toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
                       SUB II32 (OpReg r2lo) (OpReg rlo),
                       MOV II32 (OpReg r1hi) (OpReg rhi),
                       SBB II32 (OpReg r2hi) (OpReg rhi) ]
   return (ChildCode64 code rlo)

455 456 457 458 459 460
iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do
     fn <- getAnyReg expr
     r_dst_lo <-  getNewRegNat II32
     let r_dst_hi = getHiVRegFromLo r_dst_lo
         code = fn r_dst_lo
     return (
461
             ChildCode64 (code `snocOL`
462 463 464 465 466
                          MOV II32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
                          r_dst_lo
            )

iselExpr64 expr
Ian Lynagh's avatar
Ian Lynagh committed
467
   = pprPanic "iselExpr64(i386)" (ppr expr)
468 469 470 471


--------------------------------------------------------------------------------
getRegister :: CmmExpr -> NatM Register
472 473 474
getRegister e = do dflags <- getDynFlags
                   is32Bit <- is32BitPlatform
                   getRegister' dflags is32Bit e
475

476
getRegister' :: DynFlags -> Bool -> CmmExpr -> NatM Register
477

478
getRegister' dflags is32Bit (CmmReg reg)
479 480 481 482 483 484
  = case reg of
        CmmGlobal PicBaseReg
         | is32Bit ->
            -- on x86_64, we have %rip for PicBaseReg, but it's not
            -- a full-featured register, it can only be used for
            -- rip-relative addressing.
485 486
            do reg' <- getPicBaseNat (archWordFormat is32Bit)
               return (Fixed (archWordFormat is32Bit) reg' nilOL)
487 488 489
        _ ->
            do use_sse2 <- sse2Enabled
               let
490 491 492
                 fmt = cmmTypeFormat (cmmRegType dflags reg)
                 format | not use_sse2 && isFloatFormat fmt = FF80
                        | otherwise                         = fmt
493
               --
494
               let platform = targetPlatform dflags
495 496 497
               return (Fixed format
                             (getRegisterReg platform use_sse2 reg)
                             nilOL)
498 499


500 501
getRegister' dflags is32Bit (CmmRegOff r n)
  = getRegister' dflags is32Bit $ mangleIndexTree dflags r n
502 503 504 505

-- for 32-bit architectuers, support some 64 -> 32 bit conversions:
-- TO_W_(x), TO_W_(x >> 32)

506
getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W64 W32)
507 508
                     [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
 | is32Bit = do
509 510 511
  ChildCode64 code rlo <- iselExpr64 x
  return $ Fixed II32 (getHiVRegFromLo rlo) code

512
getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W64 W32)
513 514
                     [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
 | is32Bit = do
515 516 517
  ChildCode64 code rlo <- iselExpr64 x
  return $ Fixed II32 (getHiVRegFromLo rlo) code

518
getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W64 W32) [x])
519
 | is32Bit = do
520 521 522
  ChildCode64 code rlo <- iselExpr64 x
  return $ Fixed II32 rlo code

523
getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W64 W32) [x])
524
 | is32Bit = do
525
  ChildCode64 code rlo <- iselExpr64 x
526
  return $ Fixed II32 rlo code
527

528
getRegister' _ _ (CmmLit lit@(CmmFloat f w)) =
529 530 531 532 533
  if_sse2 float_const_sse2 float_const_x87
 where
  float_const_sse2
    | f == 0.0 = do
      let
534 535
          format = floatFormat w
          code dst = unitOL  (XOR format (OpReg dst) (OpReg dst))
536 537
        -- I don't know why there are xorpd, xorps, and pxor instructions.
        -- They all appear to do the same thing --SDM
538
      return (Any format code)
539 540 541 542 543 544 545 546 547 548

   | otherwise = do
      Amode addr code <- memConstant (widthInBytes w) lit
      loadFloatAmode True w addr code

  float_const_x87 = case w of
    W64
      | f == 0.0 ->
        let code dst = unitOL (GLDZ dst)
        in  return (Any FF80 code)
549

550 551 552
      | f == 1.0 ->
        let code dst = unitOL (GLD1 dst)
        in  return (Any FF80 code)
553

554 555 556
    _otherwise -> do
      Amode addr code <- memConstant (widthInBytes w) lit
      loadFloatAmode False w addr code
557 558

-- catch simple cases of zero- or sign-extended load
559
getRegister' _ _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do
560 561 562
  code <- intLoadCode (MOVZxL II8) addr
  return (Any II32 code)

563
getRegister' _ _ (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _]) = do
564 565 566
  code <- intLoadCode (MOVSxL II8) addr
  return (Any II32 code)

567
getRegister' _ _ (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _]) = do
568 569 570
  code <- intLoadCode (MOVZxL II16) addr
  return (Any II32 code)

571
getRegister' _ _ (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _]) = do
572 573 574 575
  code <- intLoadCode (MOVSxL II16) addr
  return (Any II32 code)

-- catch simple cases of zero- or sign-extended load
576
getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _])
577
 | not is32Bit = do
578 579 580
  code <- intLoadCode (MOVZxL II8) addr
  return (Any II64 code)

581
getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _])
582
 | not is32Bit = do
583 584 585
  code <- intLoadCode (MOVSxL II8) addr
  return (Any II64 code)

586
getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _])
587
 | not is32Bit = do
588 589 590
  code <- intLoadCode (MOVZxL II16) addr
  return (Any II64 code)

591
getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _])
592
 | not is32Bit = do
593 594 595
  code <- intLoadCode (MOVSxL II16) addr
  return (Any II64 code)

596
getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _])
597
 | not is32Bit = do
598 599 600
  code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend
  return (Any II64 code)

601
getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _])
602
 | not is32Bit = do
603 604 605
  code <- intLoadCode (MOVSxL II32) addr
  return (Any II64 code)

606
getRegister' _ is32Bit (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
607
                                     CmmLit displacement])
608 609
 | not is32Bit = do
      return $ Any II64 (\dst -> unitOL $
610 611
        LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))

612
getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
613 614 615 616 617
    sse2 <- sse2Enabled
    case mop of
      MO_F_Neg w
         | sse2      -> sse2NegCode w x
         | otherwise -> trivialUFCode FF80 (GNEG FF80) x
618

619 620
      MO_S_Neg w -> triv_ucode NEGI (intFormat w)
      MO_Not w   -> triv_ucode NOT  (intFormat w)
621 622 623 624 625 626 627 628 629

      -- Nop conversions
      MO_UU_Conv W32 W8  -> toI8Reg  W32 x
      MO_SS_Conv W32 W8  -> toI8Reg  W32 x
      MO_UU_Conv W16 W8  -> toI8Reg  W16 x
      MO_SS_Conv W16 W8  -> toI8Reg  W16 x
      MO_UU_Conv W32 W16 -> toI16Reg W32 x
      MO_SS_Conv W32 W16 -> toI16Reg W32 x

630 631 632 633 634 635
      MO_UU_Conv W64 W32 | not is32Bit -> conversionNop II64 x
      MO_SS_Conv W64 W32 | not is32Bit -> conversionNop II64 x
      MO_UU_Conv W64 W16 | not is32Bit -> toI16Reg W64 x
      MO_SS_Conv W64 W16 | not is32Bit -> toI16Reg W64 x
      MO_UU_Conv W64 W8  | not is32Bit -> toI8Reg  W64 x
      MO_SS_Conv W64 W8  | not is32Bit -> toI8Reg  W64 x
636

637 638
      MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intFormat rep1) x
      MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intFormat rep1) x
639 640 641 642 643 644 645 646 647 648

      -- widenings
      MO_UU_Conv W8  W32 -> integerExtend W8  W32 MOVZxL x
      MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x
      MO_UU_Conv W8  W16 -> integerExtend W8  W16 MOVZxL x

      MO_SS_Conv W8  W32 -> integerExtend W8  W32 MOVSxL x
      MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x
      MO_SS_Conv W8  W16 -> integerExtend W8  W16 MOVSxL x

649 650 651 652 653 654
      MO_UU_Conv W8  W64 | not is32Bit -> integerExtend W8  W64 MOVZxL x
      MO_UU_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVZxL x
      MO_UU_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOVZxL x
      MO_SS_Conv W8  W64 | not is32Bit -> integerExtend W8  W64 MOVSxL x
      MO_SS_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVSxL x
      MO_SS_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOVSxL x
655 656 657 658
        -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
        -- However, we don't want the register allocator to throw it
        -- away as an unnecessary reg-to-reg move, so we keep it in
        -- the form of a movzl and print it as a movl later.
659

660 661
      MO_FF_Conv W32 W64
        | sse2      -> coerceFP2FP W64 x
662
        | otherwise -> conversionNop FF80 x
663

664
      MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
665 666 667 668

      MO_FS_Conv from to -> coerceFP2Int from to x
      MO_SF_Conv from to -> coerceInt2FP from to x

669 670 671 672 673 674 675 676
      MO_V_Insert {}   -> needLlvm
      MO_V_Extract {}  -> needLlvm
      MO_V_Add {}      -> needLlvm
      MO_V_Sub {}      -> needLlvm
      MO_V_Mul {}      -> needLlvm
      MO_VS_Quot {}    -> needLlvm
      MO_VS_Rem {}     -> needLlvm
      MO_VS_Neg {}     -> needLlvm
677 678
      MO_VU_Quot {}    -> needLlvm
      MO_VU_Rem {}     -> needLlvm
679 680 681 682 683 684 685
      MO_VF_Insert {}  -> needLlvm
      MO_VF_Extract {} -> needLlvm
      MO_VF_Add {}     -> needLlvm
      MO_VF_Sub {}     -> needLlvm
      MO_VF_Mul {}     -> needLlvm
      MO_VF_Quot {}    -> needLlvm
      MO_VF_Neg {}     -> needLlvm
686

dterei's avatar
dterei committed
687
      _other -> pprPanic "getRegister" (pprMachOp mop)
688
   where
689 690
        triv_ucode :: (Format -> Operand -> Instr) -> Format -> NatM Register
        triv_ucode instr format = trivialUCode format (instr format) x
691 692 693

        -- signed or unsigned extension.
        integerExtend :: Width -> Width
694
                      -> (Format -> Operand -> Operand -> Instr)
695 696 697 698 699 700 701
                      -> CmmExpr -> NatM Register
        integerExtend from to instr expr = do
            (reg,e_code) <- if from == W8 then getByteReg expr
                                          else getSomeReg expr
            let
                code dst =
                  e_code `snocOL`
702 703
                  instr (intFormat from) (OpReg reg) (OpReg dst)
            return (Any (intFormat to) code)
704 705 706

        toI8Reg :: Width -> CmmExpr -> NatM Register
        toI8Reg new_rep expr
707
            = do codefn <- getAnyReg expr
708
                 return (Any (intFormat new_rep) codefn)
709 710 711 712 713 714
                -- HACK: use getAnyReg to get a byte-addressable register.
                -- If the source was a Fixed register, this will add the
                -- mov instruction to put it into the desired destination.
                -- We're assuming that the destination won't be a fixed
                -- non-byte-addressable register; it won't be, because all
                -- fixed registers are word-sized.
715

716
        toI16Reg = toI8Reg -- for now
717

718 719
        conversionNop :: Format -> CmmExpr -> NatM Register
        conversionNop new_format expr
720
            = do e_code <- getRegister' dflags is32Bit expr
721
                 return (swizzleRegisterRep e_code new_format)
722 723


724
getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
725 726
  sse2 <- sse2Enabled
  case mop of
Ian Lynagh's avatar
Ian Lynagh committed
727 728 729 730 731 732
      MO_F_Eq _ -> condFltReg is32Bit EQQ x y
      MO_F_Ne _ -> condFltReg is32Bit NE  x y
      MO_F_Gt _ -> condFltReg is32Bit GTT x y
      MO_F_Ge _ -> condFltReg is32Bit GE  x y
      MO_F_Lt _ -> condFltReg is32Bit LTT x y
      MO_F_Le _ -> condFltReg is32Bit LE  x y
dterei's avatar
dterei committed
733 734 735 736 737 738 739 740 741 742 743 744 745

      MO_Eq _   -> condIntReg EQQ x y
      MO_Ne _   -> condIntReg NE  x y

      MO_S_Gt _ -> condIntReg GTT x y
      MO_S_Ge _ -> condIntReg GE  x y
      MO_S_Lt _ -> condIntReg LTT x y
      MO_S_Le _ -> condIntReg LE  x y

      MO_U_Gt _ -> condIntReg GU  x y
      MO_U_Ge _ -> condIntReg GEU x y
      MO_U_Lt _ -> condIntReg LU  x y
      MO_U_Le _ -> condIntReg LEU x y
746

747
      MO_F_Add w  | sse2      -> trivialFCode_sse2 w ADD  x y
dterei's avatar
dterei committed
748
                  | otherwise -> trivialFCode_x87    GADD x y
749
      MO_F_Sub w  | sse2      -> trivialFCode_sse2 w SUB  x y
dterei's avatar
dterei committed
750
                  | otherwise -> trivialFCode_x87    GSUB x y
751
      MO_F_Quot w | sse2      -> trivialFCode_sse2 w FDIV x y
dterei's avatar
dterei committed
752
                  | otherwise -> trivialFCode_x87    GDIV x y
753
      MO_F_Mul w  | sse2      -> trivialFCode_sse2 w MUL x y
dterei's avatar
dterei committed
754
                  | otherwise -> trivialFCode_x87    GMUL x y
755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770

      MO_Add rep -> add_code rep x y
      MO_Sub rep -> sub_code rep x y

      MO_S_Quot rep -> div_code rep True  True  x y
      MO_S_Rem  rep -> div_code rep True  False x y
      MO_U_Quot rep -> div_code rep False True  x y
      MO_U_Rem  rep -> div_code rep False False x y

      MO_S_MulMayOflo rep -> imulMayOflo rep x y

      MO_Mul rep -> triv_op rep IMUL
      MO_And rep -> triv_op rep AND
      MO_Or  rep -> triv_op rep OR
      MO_Xor rep -> triv_op rep XOR

771 772 773 774
        {- Shift ops on x86s have constraints on their source, it
           either has to be Imm, CL or 1
            => trivialCode is not restrictive enough (sigh.)
        -}
775 776 777 778
      MO_Shl rep   -> shift_code rep SHL x y {-False-}
      MO_U_Shr rep -> shift_code rep SHR x y {-False-}
      MO_S_Shr rep -> shift_code rep SAR x y {-False-}

779 780 781 782 783 784 785 786 787 788 789 790 791 792 793
      MO_V_Insert {}   -> needLlvm
      MO_V_Extract {}  -> needLlvm
      MO_V_Add {}      -> needLlvm
      MO_V_Sub {}      -> needLlvm
      MO_V_Mul {}      -> needLlvm
      MO_VS_Quot {}    -> needLlvm
      MO_VS_Rem {}     -> needLlvm
      MO_VS_Neg {}     -> needLlvm
      MO_VF_Insert {}  -> needLlvm
      MO_VF_Extract {} -> needLlvm
      MO_VF_Add {}     -> needLlvm
      MO_VF_Sub {}     -> needLlvm
      MO_VF_Mul {}     -> needLlvm
      MO_VF_Quot {}    -> needLlvm
      MO_VF_Neg {}     -> needLlvm
794

dterei's avatar
dterei committed
795
      _other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
796 797 798
  where
    --------------------
    triv_op width instr = trivialCode width op (Just op) x y
799
                        where op   = instr (intFormat width)
800 801 802 803 804

    imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
    imulMayOflo rep a b = do
         (a_reg, a_code) <- getNonClobberedReg a
         b_code <- getAnyReg b
805 806 807 808 809
         let
             shift_amt  = case rep of
                           W32 -> 31
                           W64 -> 63
                           _ -> panic "shift_amt"
810

811
             format = intFormat rep
812 813
             code = a_code `appOL` b_code eax `appOL`
                        toOL [
814 815
                           IMUL2 format (OpReg a_reg),   -- result in %edx:%eax
                           SAR format (OpImm (ImmInt shift_amt)) (OpReg eax),
816
                                -- sign extend lower part
817
                           SUB format (OpReg edx) (OpReg eax)
818
                                -- compare against upper
819 820
                           -- eax==0 if high part == sign extended low part
                        ]
821
         return (Fixed format eax code)
822 823 824

    --------------------
    shift_code :: Width
825
               -> (Format -> Operand -> Operand -> Instr)
826 827 828
               -> CmmExpr
               -> CmmExpr
               -> NatM Register
829 830

    {- Case1: shift length as immediate -}
dterei's avatar
dterei committed
831
    shift_code width instr x (CmmLit lit) = do
832 833
          x_code <- getAnyReg x
          let
834
               format = intFormat width
835 836
               code dst
                  = x_code dst `snocOL`
837 838
                    instr format (OpImm (litToImm lit)) (OpReg dst)
          return (Any format code)
839

840 841 842 843
    {- Case2: shift length is complex (non-immediate)
      * y must go in %ecx.
      * we cannot do y first *and* put its result in %ecx, because
        %ecx might be clobbered by x.
844
      * if we do y second, then x cannot be
845 846 847 848 849
        in a clobbered reg.  Also, we cannot clobber x's reg
        with the instruction itself.
      * so we can either:
        - do y first, put its result in a fresh tmp, then copy it to %ecx later
        - do y second and put its result into %ecx.  x gets placed in a fresh
Gabor Greif's avatar
typos  
Gabor Greif committed
850
          tmp.  This is likely to be better, because the reg alloc can
851 852 853 854 855
          eliminate this reg->reg move here (it won't eliminate the other one,
          because the move is into the fixed %ecx).
    -}
    shift_code width instr x y{-amount-} = do
        x_code <- getAnyReg x
856 857
        let format = intFormat width
        tmp <- getNewRegNat format
858
        y_code <- getAnyReg y
859 860 861
        let
           code = x_code tmp `appOL`
                  y_code ecx `snocOL`
862 863
                  instr format (OpReg ecx) (OpReg tmp)
        return (Fixed format tmp code)
864 865 866 867

    --------------------
    add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
    add_code rep x (CmmLit (CmmInt y _))
868
        | is32BitInteger y = add_int rep x y
869 870
    add_code rep x y = trivialCode rep (ADD format) (Just (ADD format)) x y
      where format = intFormat rep
871 872
    -- TODO: There are other interesting patterns we want to replace
    --     with a LEA, e.g. `(x + offset) + (y << shift)`.
873 874 875 876

    --------------------
    sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
    sub_code rep x (CmmLit (CmmInt y _))
877
        | is32BitInteger (-y) = add_int rep x (-y)
878
    sub_code rep x y = trivialCode rep (SUB (intFormat rep)) Nothing x y
879 880 881

    -- our three-operand add instruction:
    add_int width x y = do
882 883
        (x_reg, x_code) <- getSomeReg x
        let
884
            format = intFormat width
885 886
            imm = ImmInt (fromInteger y)
            code dst
887
               = x_code `snocOL`
888
                 LEA format
889
                        (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
890
                        (OpReg dst)
891
        --
892
        return (Any format code)
893 894 895

    ----------------------
    div_code width signed quotient x y = do
896 897 898
           (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
           x_code <- getAnyReg x
           let
899 900 901
             format = intFormat width
             widen | signed    = CLTD format
                   | otherwise = XOR format (OpReg edx) (OpReg edx)
902

903 904
             instr | signed    = IDIV
                   | otherwise = DIV
905

906 907
             code = y_code `appOL`
                    x_code eax `appOL`
908
                    toOL [widen, instr format y_op]
909

910 911
             result | quotient  = eax
                    | otherwise = edx
912

913
           return (Fixed format result code)
914 915


916
getRegister' _ _ (CmmLoad mem pk)
917 918
  | isFloatType pk
  = do
919 920 921
    Amode addr mem_code <- getAmode mem
    use_sse2 <- sse2Enabled
    loadFloatAmode use_sse2 (typeWidth pk) addr mem_code
922

923
getRegister' _ is32Bit (CmmLoad mem pk)
924
  | is32Bit && not (isWord64 pk)
925
  = do
926
    code <- intLoadCode instr mem
927
    return (Any format code)
928 929
  where
    width = typeWidth pk
930
    format = intFormat width
931
    instr = case width of
932
                W8     -> MOVZxL II8
933
                _other -> MOV format
934 935 936 937 938
        -- We always zero-extend 8-bit loads, if we
        -- can't think of anything better.  This is because
        -- we can't guarantee access to an 8-bit variant of every register
        -- (esi and edi don't have 8-bit variants), so to make things
        -- simpler we do our 8-bit arithmetic with full 32-bit registers.
939 940

-- Simpler memory load code on x86_64
941
getRegister' _ is32Bit (CmmLoad mem pk)
942
 | not is32Bit
943
  = do
944 945 946
    code <- intLoadCode (MOV format) mem
    return (Any format code)
  where format = intFormat $ typeWidth pk
947

948
getRegister' _ is32Bit (CmmLit (CmmInt 0 width))
949
  = let
950
        format = intFormat width
951

952
        -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
953 954
        format1 = if is32Bit then format
                           else case format of
955
                                II64 -> II32
956
                                _ -> format
957
        code dst
958
           = unitOL (XOR format1 (OpReg dst) (OpReg dst))
959
    in
960
        return (Any format code)
961 962 963 964

  -- optimisation for loading small literals on x86_64: take advantage
  -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
  -- instruction forms are shorter.
965 966
getRegister' dflags is32Bit (CmmLit lit)
  | not is32Bit, isWord64 (cmmLitType dflags lit), not (isBigLit lit)
967 968 969
  = let
        imm = litToImm lit
        code dst = unitOL (MOV II32 (OpImm imm) (OpReg dst))
970
    in
971
        return (Any II64 code)
972 973 974
  where
   isBigLit (CmmInt i _) = i < 0 || i > 0xffffffff
   isBigLit _ = False
975 976 977 978 979
        -- note1: not the same as (not.is32BitLit), because that checks for
        -- signed literals that fit in 32 bits, but we want unsigned
        -- literals here.
        -- note2: all labels are small, because we're assuming the
        -- small memory model (see gcc docs, -mcmodel=small).
980

981
getRegister' dflags _ (CmmLit lit)
982
  = do let format = cmmTypeFormat (cmmLitType dflags lit)
983
           imm = litToImm lit
984 985
           code dst = unitOL (MOV format (OpImm imm) (OpReg dst))
       return (Any format code)
986

987 988 989
getRegister' _ _ other
    | isVecExpr other  = needLlvm
    | otherwise        = pprPanic "getRegister(x86)" (ppr other)
990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013


intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
   -> NatM (Reg -> InstrBlock)
intLoadCode instr mem = do
  Amode src mem_code <- getAmode mem
  return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))

-- Compute an expression into *any* register, adding the appropriate
-- move instruction if necessary.
getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
getAnyReg expr = do
  r <- getRegister expr
  anyReg r

anyReg :: Register -> NatM (Reg -> InstrBlock)
anyReg (Any _ code)          = return code
anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)

-- A bit like getSomeReg, but we want a reg that can be byte-addressed.
-- Fixed registers might not be byte-addressable, so we make sure we've
-- got a temporary, inserting an extra reg copy if necessary.
getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
getByteReg expr = do
Ian Lynagh's avatar
Ian Lynagh committed
1014 1015