CodeGen.hs 95.7 KB
Newer Older
1
{-# LANGUAGE CPP, GADTs #-}
2 3 4 5 6 7 8 9 10 11

-----------------------------------------------------------------------------
--
-- Generating machine code (instruction selection)
--
-- (c) The University of Glasgow 1996-2004
--
-----------------------------------------------------------------------------

-- This is a big module, but, if you pay attention to
12 13
-- (a) the sectioning, and (b) the type signatures,
-- the structure should not be too overwhelming.
14

15 16 17 18 19
module PPC.CodeGen (
        cmmTopCodeGen,
        generateJumpTableForInstr,
        InstrBlock
)
20 21 22 23 24

where

#include "HsVersions.h"
#include "nativeGen/NCG.h"
Simon Marlow's avatar
Simon Marlow committed
25
#include "../includes/MachDeps.h"
26 27

-- NCG stuff:
28
import CodeGen.Platform
29 30 31
import PPC.Instr
import PPC.Cond
import PPC.Regs
tibbe's avatar
tibbe committed
32
import CPrim
33 34 35
import NCGMonad
import Instruction
import PIC
36
import Format
37 38
import RegClass
import Reg
39
import TargetReg
40 41 42 43
import Platform

-- Our intermediate code:
import BlockId
44
import PprCmm           ( pprExpr )
45 46
import Cmm
import CmmUtils
47
import CmmSwitch
48
import CLabel
49 50
import Hoopl.Block
import Hoopl.Graph
51 52 53 54 55 56

-- The rest:
import OrdList
import Outputable
import DynFlags

57
import Control.Monad    ( mapAndUnzipM, when )
58 59 60
import Data.Bits
import Data.Word

61 62
import BasicTypes
import FastString
63
import Util
64

65 66 67 68 69 70 71 72
-- -----------------------------------------------------------------------------
-- Top-level of the instruction selector

-- | 'InstrBlock's are the insn sequences generated by the insn selectors.
-- They are really trees of insns to facilitate fast appending, where a
-- left-to-right traversal (pre-order?) yields the insns in the correct
-- order.

73
cmmTopCodeGen
Simon Peyton Jones's avatar
Simon Peyton Jones committed
74 75
        :: RawCmmDecl
        -> NatM [NatCmmDecl CmmStatics Instr]
76

77 78
cmmTopCodeGen (CmmProc info lab live graph) = do
  let blocks = toBlockListEntryFirst graph
79
  (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
80
  dflags <- getDynFlags
81
  let proc = CmmProc info lab live (ListGraph $ concat nat_blocks)
82 83
      tops = proc : concat statics
      os   = platformOS $ targetPlatform dflags
84 85
      arch = platformArch $ targetPlatform dflags
  case arch of
86 87
    ArchPPC | os == OSAIX -> return tops
            | otherwise -> do
88 89 90 91 92 93 94 95 96 97 98
      picBaseMb <- getPicBaseMaybeNat
      case picBaseMb of
           Just picBase -> initializePicBase_ppc arch os picBase tops
           Nothing -> return tops
    ArchPPC_64 ELF_V1 -> return tops
                      -- generating function descriptor is handled in
                      -- pretty printer
    ArchPPC_64 ELF_V2 -> return tops
                      -- generating function prologue is handled in
                      -- pretty printer
    _          -> panic "PPC.cmmTopCodeGen: unknown arch"
99

Ian Lynagh's avatar
Ian Lynagh committed
100
cmmTopCodeGen (CmmData sec dat) = do
101 102
  return [CmmData sec dat]  -- no translation, we just use CmmStatic

103
basicBlockCodeGen
104
        :: Block CmmNode C C
105
        -> NatM ( [NatBasicBlock Instr]
Simon Peyton Jones's avatar
Simon Peyton Jones committed
106
                , [NatCmmDecl CmmStatics Instr])
107

108
basicBlockCodeGen block = do
Peter Wortmann's avatar
Peter Wortmann committed
109 110
  let (_, nodes, tail)  = blockSplit block
      id = entryLabel block
111 112 113 114
      stmts = blockToList nodes
  mid_instrs <- stmtsToInstrs stmts
  tail_instrs <- stmtToInstrs tail
  let instrs = mid_instrs `appOL` tail_instrs
115 116 117 118 119
  -- code generation may introduce new basic block boundaries, which
  -- are indicated by the NEWBLOCK instruction.  We must split up the
  -- instruction stream into basic blocks again.  Also, we extract
  -- LDATAs here too.
  let
120 121 122 123 124 125 126 127
        (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs

        mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
          = ([], BasicBlock id instrs : blocks, statics)
        mkBlocks (LDATA sec dat) (instrs,blocks,statics)
          = (instrs, blocks, CmmData sec dat:statics)
        mkBlocks instr (instrs,blocks,statics)
          = (instr:instrs, blocks, statics)
128 129
  return (BasicBlock id top : other_blocks, statics)

130
stmtsToInstrs :: [CmmNode e x] -> NatM InstrBlock
131 132 133 134
stmtsToInstrs stmts
   = do instrss <- mapM stmtToInstrs stmts
        return (concatOL instrss)

135
stmtToInstrs :: CmmNode e x -> NatM InstrBlock
136
stmtToInstrs stmt = do
137
  dflags <- getDynFlags
138
  case stmt of
139
    CmmComment s   -> return (unitOL (COMMENT s))
Peter Wortmann's avatar
Peter Wortmann committed
140
    CmmTick {}     -> return nilOL
141
    CmmUnwind {}   -> return nilOL
142 143

    CmmAssign reg src
144
      | isFloatType ty -> assignReg_FltCode format reg src
145 146
      | target32Bit (targetPlatform dflags) &&
        isWord64 ty    -> assignReg_I64Code      reg src
147
      | otherwise        -> assignReg_IntCode format reg src
148
        where ty = cmmRegType dflags reg
149
              format = cmmTypeFormat ty
150 151

    CmmStore addr src
152
      | isFloatType ty -> assignMem_FltCode format addr src
153 154
      | target32Bit (targetPlatform dflags) &&
        isWord64 ty      -> assignMem_I64Code      addr src
155
      | otherwise        -> assignMem_IntCode format addr src
156
        where ty = cmmExprType dflags src
157
              format = cmmTypeFormat ty
158

159
    CmmUnsafeForeignCall target result_regs args
160 161
       -> genCCall target result_regs args

162
    CmmBranch id          -> genBranch id
163 164 165 166
    CmmCondBranch arg true false _ -> do
      b1 <- genCondJump true arg
      b2 <- genBranch false
      return (b1 `appOL` b2)
167 168
    CmmSwitch arg ids -> do dflags <- getDynFlags
                            genSwitch dflags arg ids
169 170 171
    CmmCall { cml_target = arg } -> genJump arg
    _ ->
      panic "stmtToInstrs: statement should have been cps'd away"
172 173 174 175


--------------------------------------------------------------------------------
-- | 'InstrBlock's are the insn sequences generated by the insn selectors.
176 177
--      They are really trees of insns to facilitate fast appending, where a
--      left-to-right traversal yields the insns in the correct order.
178
--
179 180
type InstrBlock
        = OrdList Instr
181 182 183


-- | Register's passed up the tree.  If the stix code forces the register
184 185 186
--      to live in a pre-decided machine register, it comes out as @Fixed@;
--      otherwise, it comes out as @Any@, and the parent can decide which
--      register to put it in.
187 188
--
data Register
189 190
        = Fixed Format Reg InstrBlock
        | Any   Format (Reg -> InstrBlock)
191 192


193 194 195
swizzleRegisterRep :: Register -> Format -> Register
swizzleRegisterRep (Fixed _ reg code) format = Fixed format reg code
swizzleRegisterRep (Any _ codefn)     format = Any   format codefn
196 197 198


-- | Grab the Reg for a CmmReg
199
getRegisterReg :: Platform -> CmmReg -> Reg
200

201
getRegisterReg _ (CmmLocal (LocalReg u pk))
202
  = RegVirtual $ mkVirtualReg u (cmmTypeFormat pk)
203

204 205
getRegisterReg platform (CmmGlobal mid)
  = case globalRegMaybe platform mid of
206
        Just reg -> RegReal reg
207 208 209 210
        Nothing  -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
        -- By this stage, the only MagicIds remaining should be the
        -- ones which map to a real machine register on this
        -- platform.  Hence ...
211 212

-- | Convert a BlockId to some CmmStatic data
213 214 215
jumpTableEntry :: DynFlags -> Maybe BlockId -> CmmStatic
jumpTableEntry dflags Nothing = CmmStaticLit (CmmInt 0 (wordWidth dflags))
jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
216
    where blockLabel = blockLbl blockid
217 218 219 220 221 222 223 224



-- -----------------------------------------------------------------------------
-- General things for putting together code sequences

-- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
-- CmmExprs into CmmRegOff?
225 226
mangleIndexTree :: DynFlags -> CmmExpr -> CmmExpr
mangleIndexTree dflags (CmmRegOff reg off)
227
  = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
228
  where width = typeWidth (cmmRegType dflags reg)
229

230
mangleIndexTree _ _
231
        = panic "PPC.CodeGen.mangleIndexTree: no match"
232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248

-- -----------------------------------------------------------------------------
--  Code gen for 64-bit arithmetic on 32-bit platforms

{-
Simple support for generating 64-bit code (ie, 64 bit values and 64
bit assignments) on 32-bit platforms.  Unlike the main code generator
we merely shoot for generating working code as simply as possible, and
pay little attention to code quality.  Specifically, there is no
attempt to deal cleverly with the fixed-vs-floating register
distinction; all values are generated into (pairs of) floating
registers, even if this would mean some redundant reg-reg moves as a
result.  Only one of the VRegUniques is returned, since it will be
of the VRegUniqueLo form, and the upper-half VReg can be determined
by applying getHiVRegFromLo to it.
-}

249 250 251 252 253 254 255 256
data ChildCode64        -- a.k.a "Register64"
      = ChildCode64
           InstrBlock   -- code
           Reg          -- the lower 32-bit temporary which contains the
                        -- result; use getHiVRegFromLo to find the other
                        -- VRegUnique.  Rules of this simplified insn
                        -- selection game are therefore that the returned
                        -- Reg may be modified
257 258


259
-- | Compute an expression into a register, but
260
--      we don't mind which one it is.
261 262 263 264 265
getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg expr = do
  r <- getRegister expr
  case r of
    Any rep code -> do
266 267 268 269
        tmp <- getNewRegNat rep
        return (tmp, code tmp)
    Fixed _ reg code ->
        return (reg, code)
270 271 272

getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
getI64Amodes addrTree = do
273
    Amode hi_addr addr_code <- getAmode D addrTree
274 275 276 277 278 279 280 281 282 283 284
    case addrOffset hi_addr 4 of
        Just lo_addr -> return (hi_addr, lo_addr, addr_code)
        Nothing      -> do (hi_ptr, code) <- getSomeReg addrTree
                           return (AddrRegImm hi_ptr (ImmInt 0),
                                   AddrRegImm hi_ptr (ImmInt 4),
                                   code)


assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
assignMem_I64Code addrTree valueTree = do
        (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
285 286 287
        ChildCode64 vcode rlo <- iselExpr64 valueTree
        let
                rhi = getHiVRegFromLo rlo
288

289 290 291 292
                -- Big-endian store
                mov_hi = ST II32 rhi hi_addr
                mov_lo = ST II32 rlo lo_addr
        return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
293 294 295


assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
296
assignReg_I64Code (CmmLocal (LocalReg u_dst _)) valueTree = do
297
   ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
298
   let
299
         r_dst_lo = RegVirtual $ mkVirtualReg u_dst II32
300 301 302 303 304 305 306 307
         r_dst_hi = getHiVRegFromLo r_dst_lo
         r_src_hi = getHiVRegFromLo r_src_lo
         mov_lo = MR r_dst_lo r_src_lo
         mov_hi = MR r_dst_hi r_src_hi
   return (
        vcode `snocOL` mov_lo `snocOL` mov_hi
     )

308
assignReg_I64Code _ _
309 310 311 312 313 314 315 316 317
   = panic "assignReg_I64Code(powerpc): invalid lvalue"


iselExpr64        :: CmmExpr -> NatM ChildCode64
iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
    (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
    (rlo, rhi) <- getNewRegPairNat II32
    let mov_hi = LD II32 rhi hi_addr
        mov_lo = LD II32 rlo lo_addr
318
    return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
319 320 321
                         rlo

iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
322
   = return (ChildCode64 nilOL (RegVirtual $ mkVirtualReg vu II32))
323 324 325 326

iselExpr64 (CmmLit (CmmInt i _)) = do
  (rlo,rhi) <- getNewRegPairNat II32
  let
327
        half0 = fromIntegral (fromIntegral i :: Word16)
328 329 330
        half1 = fromIntegral (fromIntegral (i `shiftR` 16) :: Word16)
        half2 = fromIntegral (fromIntegral (i `shiftR` 32) :: Word16)
        half3 = fromIntegral (fromIntegral (i `shiftR` 48) :: Word16)
331 332 333 334 335

        code = toOL [
                LIS rlo (ImmInt half1),
                OR rlo rlo (RIImm $ ImmInt half0),
                LIS rhi (ImmInt half3),
336
                OR rhi rhi (RIImm $ ImmInt half2)
337
                ]
338 339 340 341 342 343 344
  return (ChildCode64 code rlo)

iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
   ChildCode64 code1 r1lo <- iselExpr64 e1
   ChildCode64 code2 r2lo <- iselExpr64 e2
   (rlo,rhi) <- getNewRegPairNat II32
   let
345 346 347 348 349 350
        r1hi = getHiVRegFromLo r1lo
        r2hi = getHiVRegFromLo r2lo
        code =  code1 `appOL`
                code2 `appOL`
                toOL [ ADDC rlo r1lo r2lo,
                       ADDE rhi r1hi r2hi ]
351 352
   return (ChildCode64 code rlo)

353 354 355 356 357 358 359 360 361
iselExpr64 (CmmMachOp (MO_Sub _) [e1,e2]) = do
   ChildCode64 code1 r1lo <- iselExpr64 e1
   ChildCode64 code2 r2lo <- iselExpr64 e2
   (rlo,rhi) <- getNewRegPairNat II32
   let
        r1hi = getHiVRegFromLo r1lo
        r2hi = getHiVRegFromLo r2lo
        code =  code1 `appOL`
                code2 `appOL`
362
                toOL [ SUBFC rlo r2lo (RIReg r1lo),
363 364 365
                       SUBFE rhi r2hi r1hi ]
   return (ChildCode64 code rlo)

366 367 368 369 370 371 372 373
iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do
    (expr_reg,expr_code) <- getSomeReg expr
    (rlo, rhi) <- getNewRegPairNat II32
    let mov_hi = LI rhi (ImmInt 0)
        mov_lo = MR rlo expr_reg
    return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
                         rlo
iselExpr64 expr
374
   = pprPanic "iselExpr64(powerpc)" (pprExpr expr)
375 376 377 378



getRegister :: CmmExpr -> NatM Register
379
getRegister e = do dflags <- getDynFlags
380 381 382
                   getRegister' dflags e

getRegister' :: DynFlags -> CmmExpr -> NatM Register
383

384
getRegister' dflags (CmmReg (CmmGlobal PicBaseReg))
385 386 387 388
  | OSAIX <- platformOS (targetPlatform dflags) = do
        let code dst = toOL [ LD II32 dst tocAddr ]
            tocAddr = AddrRegImm toc (ImmLit (text "ghc_toc_table[TC]"))
        return (Any II32 code)
389
  | target32Bit (targetPlatform dflags) = do
390 391
      reg <- getPicBaseNat $ archWordFormat (target32Bit (targetPlatform dflags))
      return (Fixed (archWordFormat (target32Bit (targetPlatform dflags)))
392 393
                    reg nilOL)
  | otherwise = return (Fixed II64 toc nilOL)
pho@cielonegro.org's avatar
pho@cielonegro.org committed
394

395
getRegister' dflags (CmmReg reg)
396
  = return (Fixed (cmmTypeFormat (cmmRegType dflags reg))
397
                  (getRegisterReg (targetPlatform dflags) reg) nilOL)
398

399
getRegister' dflags tree@(CmmRegOff _ _)
400
  = getRegister' dflags (mangleIndexTree dflags tree)
401 402 403 404

    -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
    -- TO_W_(x), TO_W_(x >> 32)

405 406 407
getRegister' dflags (CmmMachOp (MO_UU_Conv W64 W32)
                     [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
 | target32Bit (targetPlatform dflags) = do
408 409 410
  ChildCode64 code rlo <- iselExpr64 x
  return $ Fixed II32 (getHiVRegFromLo rlo) code

411 412 413
getRegister' dflags (CmmMachOp (MO_SS_Conv W64 W32)
                     [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
 | target32Bit (targetPlatform dflags) = do
414 415 416
  ChildCode64 code rlo <- iselExpr64 x
  return $ Fixed II32 (getHiVRegFromLo rlo) code

417 418
getRegister' dflags (CmmMachOp (MO_UU_Conv W64 W32) [x])
 | target32Bit (targetPlatform dflags) = do
419 420 421
  ChildCode64 code rlo <- iselExpr64 x
  return $ Fixed II32 rlo code

422 423
getRegister' dflags (CmmMachOp (MO_SS_Conv W64 W32) [x])
 | target32Bit (targetPlatform dflags) = do
424
  ChildCode64 code rlo <- iselExpr64 x
425
  return $ Fixed II32 rlo code
426

427
getRegister' dflags (CmmLoad mem pk)
428
 | not (isWord64 pk) = do
429
        let platform = targetPlatform dflags
430
        Amode addr addr_code <- getAmode D mem
431
        let code dst = ASSERT((targetClassOfReg platform dst == RcDouble) == isFloatType pk)
432 433
                       addr_code `snocOL` LD format dst addr
        return (Any format code)
434 435 436 437 438
 | not (target32Bit (targetPlatform dflags)) = do
        Amode addr addr_code <- getAmode DS mem
        let code dst = addr_code `snocOL` LD II64 dst addr
        return (Any II64 code)

439
          where format = cmmTypeFormat pk
440 441

-- catch simple cases of zero- or sign-extended load
442
getRegister' _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad mem _]) = do
443
    Amode addr addr_code <- getAmode D mem
444 445
    return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr))

446 447 448 449
getRegister' _ (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad mem _]) = do
    Amode addr addr_code <- getAmode D mem
    return (Any II64 (\dst -> addr_code `snocOL` LD II8 dst addr))

450 451
-- Note: there is no Load Byte Arithmetic instruction, so no signed case here

452
getRegister' _ (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad mem _]) = do
453
    Amode addr addr_code <- getAmode D mem
454 455
    return (Any II32 (\dst -> addr_code `snocOL` LD II16 dst addr))

456
getRegister' _ (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad mem _]) = do
457
    Amode addr addr_code <- getAmode D mem
458 459
    return (Any II32 (\dst -> addr_code `snocOL` LA II16 dst addr))

460 461 462 463 464 465 466 467 468 469 470 471 472
getRegister' _ (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad mem _]) = do
    Amode addr addr_code <- getAmode D mem
    return (Any II64 (\dst -> addr_code `snocOL` LD II16 dst addr))

getRegister' _ (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad mem _]) = do
    Amode addr addr_code <- getAmode D mem
    return (Any II64 (\dst -> addr_code `snocOL` LA II16 dst addr))

getRegister' _ (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad mem _]) = do
    Amode addr addr_code <- getAmode D mem
    return (Any II64 (\dst -> addr_code `snocOL` LD II32 dst addr))

getRegister' _ (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad mem _]) = do
473 474
    -- lwa is DS-form. See Note [Power instruction format]
    Amode addr addr_code <- getAmode DS mem
475 476
    return (Any II64 (\dst -> addr_code `snocOL` LA II32 dst addr))

477
getRegister' dflags (CmmMachOp mop [x]) -- unary MachOps
478 479 480 481 482 483 484 485 486 487 488 489 490
  = case mop of
      MO_Not rep   -> triv_ucode_int rep NOT

      MO_F_Neg w   -> triv_ucode_float w FNEG
      MO_S_Neg w   -> triv_ucode_int   w NEG

      MO_FF_Conv W64 W32 -> trivialUCode  FF32 FRSP x
      MO_FF_Conv W32 W64 -> conversionNop FF64 x

      MO_FS_Conv from to -> coerceFP2Int from to x
      MO_SF_Conv from to -> coerceInt2FP from to x

      MO_SS_Conv from to
491
        | from == to    -> conversionNop (intFormat to) x
492 493

        -- narrowing is a nop: we treat the high bits as undefined
494 495
      MO_SS_Conv W64 to
        | arch32    -> panic "PPC.CodeGen.getRegister no 64 bit int register"
496
        | otherwise -> conversionNop (intFormat to) x
497
      MO_SS_Conv W32 to
498
        | arch32    -> conversionNop (intFormat to) x
499 500 501 502 503
        | otherwise -> case to of
            W64 -> triv_ucode_int to (EXTS II32)
            W16 -> conversionNop II16 x
            W8  -> conversionNop II8 x
            _   -> panic "PPC.CodeGen.getRegister: no match"
504 505 506 507 508
      MO_SS_Conv W16 W8 -> conversionNop II8 x
      MO_SS_Conv W8  to -> triv_ucode_int to (EXTS II8)
      MO_SS_Conv W16 to -> triv_ucode_int to (EXTS II16)

      MO_UU_Conv from to
509
        | from == to -> conversionNop (intFormat to) x
510
        -- narrowing is a nop: we treat the high bits as undefined
511 512
      MO_UU_Conv W64 to
        | arch32    -> panic "PPC.CodeGen.getRegister no 64 bit target"
513
        | otherwise -> conversionNop (intFormat to) x
514
      MO_UU_Conv W32 to
515
        | arch32    -> conversionNop (intFormat to) x
516 517 518 519 520 521
        | otherwise ->
          case to of
           W64 -> trivialCode to False AND x (CmmLit (CmmInt 4294967295 W64))
           W16 -> conversionNop II16 x
           W8  -> conversionNop II8 x
           _   -> panic "PPC.CodeGen.getRegister: no match"
522 523
      MO_UU_Conv W16 W8 -> conversionNop II8 x
      MO_UU_Conv W8 to  -> trivialCode to False AND x (CmmLit (CmmInt 255 W32))
524 525
      MO_UU_Conv W16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 W32))
      _ -> panic "PPC.CodeGen.getRegister: no match"
526 527

    where
528 529
        triv_ucode_int   width instr = trivialUCode (intFormat    width) instr x
        triv_ucode_float width instr = trivialUCode (floatFormat  width) instr x
530

531
        conversionNop new_format expr
532
            = do e_code <- getRegister' dflags expr
533
                 return (swizzleRegisterRep e_code new_format)
534
        arch32 = target32Bit $ targetPlatform dflags
535

536
getRegister' dflags (CmmMachOp mop [x, y]) -- dyadic PrimOps
537
  = case mop of
538 539 540 541 542 543
      MO_F_Eq _ -> condFltReg EQQ x y
      MO_F_Ne _ -> condFltReg NE  x y
      MO_F_Gt _ -> condFltReg GTT x y
      MO_F_Ge _ -> condFltReg GE  x y
      MO_F_Lt _ -> condFltReg LTT x y
      MO_F_Le _ -> condFltReg LE  x y
544

545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566
      MO_Eq rep -> condIntReg EQQ  (extendUExpr dflags rep x)
                                   (extendUExpr dflags rep y)
      MO_Ne rep -> condIntReg NE   (extendUExpr dflags rep x)
                                   (extendUExpr dflags rep y)

      MO_S_Gt rep -> condIntReg GTT  (extendSExpr dflags rep x)
                                     (extendSExpr dflags rep y)
      MO_S_Ge rep -> condIntReg GE   (extendSExpr dflags rep x)
                                     (extendSExpr dflags rep y)
      MO_S_Lt rep -> condIntReg LTT  (extendSExpr dflags rep x)
                                     (extendSExpr dflags rep y)
      MO_S_Le rep -> condIntReg LE   (extendSExpr dflags rep x)
                                     (extendSExpr dflags rep y)

      MO_U_Gt rep -> condIntReg GU   (extendUExpr dflags rep x)
                                     (extendUExpr dflags rep y)
      MO_U_Ge rep -> condIntReg GEU  (extendUExpr dflags rep x)
                                     (extendUExpr dflags rep y)
      MO_U_Lt rep -> condIntReg LU   (extendUExpr dflags rep x)
                                     (extendUExpr dflags rep y)
      MO_U_Le rep -> condIntReg LEU  (extendUExpr dflags rep x)
                                     (extendUExpr dflags rep y)
567 568 569 570 571

      MO_F_Add w  -> triv_float w FADD
      MO_F_Sub w  -> triv_float w FSUB
      MO_F_Mul w  -> triv_float w FMUL
      MO_F_Quot w -> triv_float w FDIV
572

573 574 575 576
         -- optimize addition with 32-bit immediate
         -- (needed for PIC)
      MO_Add W32 ->
        case y of
577
          CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate W32 True imm
578 579 580 581 582 583 584 585 586 587 588 589 590 591
            -> trivialCode W32 True ADD x (CmmLit $ CmmInt imm immrep)
          CmmLit lit
            -> do
                (src, srcCode) <- getSomeReg x
                let imm = litToImm lit
                    code dst = srcCode `appOL` toOL [
                                    ADDIS dst src (HA imm),
                                    ADD dst dst (RIImm (LO imm))
                                ]
                return (Any II32 code)
          _ -> trivialCode W32 True ADD x y

      MO_Add rep -> trivialCode rep True ADD x y
      MO_Sub rep ->
592
        case y of
593 594
          CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
            -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612
          _ -> case x of
                 CmmLit (CmmInt imm _)
                   | Just _ <- makeImmediate rep True imm
                   -- subfi ('substract from' with immediate) doesn't exist
                   -> trivialCode rep True SUBFC y x
                 _ -> trivialCodeNoImm' (intFormat rep) SUBF y x

      MO_Mul rep -> shiftMulCode rep True MULL x y
      MO_S_MulMayOflo rep -> do
        (src1, code1) <- getSomeReg x
        (src2, code2) <- getSomeReg y
        let
          format = intFormat rep
          code dst = code1 `appOL` code2
                       `appOL` toOL [ MULLO format dst src1 src2
                                    , MFOV  format dst
                                    ]
        return (Any format code)
613

614
      MO_S_Quot rep -> trivialCodeNoImmSign (intFormat rep) True DIV
615
                (extendSExpr dflags rep x) (extendSExpr dflags rep y)
616
      MO_U_Quot rep -> trivialCodeNoImmSign (intFormat rep) False DIV
617 618
                (extendUExpr dflags rep x) (extendUExpr dflags rep y)

619
      MO_S_Rem rep -> remainderCode rep True (extendSExpr dflags rep x)
620
                                             (extendSExpr dflags rep y)
621 622
      MO_U_Rem rep -> remainderCode rep False (extendUExpr dflags rep x)
                                              (extendUExpr dflags rep y)
623

624 625 626 627 628 629 630 631 632 633
      MO_And rep   -> case y of
        (CmmLit (CmmInt imm _)) | imm == -8 || imm == -4
            -> do
                (src, srcCode) <- getSomeReg x
                let clear_mask = if imm == -4 then 2 else 3
                    fmt = intFormat rep
                    code dst = srcCode
                               `appOL` unitOL (CLRRI fmt dst src clear_mask)
                return (Any fmt code)
        _ -> trivialCode rep False AND x y
634 635 636
      MO_Or rep    -> trivialCode rep False OR x y
      MO_Xor rep   -> trivialCode rep False XOR x y

637 638 639
      MO_Shl rep   -> shiftMulCode rep False SL x y
      MO_S_Shr rep -> shiftMulCode rep False SRA (extendSExpr dflags rep x) y
      MO_U_Shr rep -> shiftMulCode rep False SR (extendUExpr dflags rep x) y
640
      _         -> panic "PPC.CodeGen.getRegister: no match"
641 642

  where
643 644
    triv_float :: Width -> (Format -> Reg -> Reg -> Reg -> Instr) -> NatM Register
    triv_float width instr = trivialCodeNoImm (floatFormat width) instr x y
645

646
getRegister' _ (CmmLit (CmmInt i rep))
647 648
  | Just imm <- makeImmediate rep True i
  = let
649
        code dst = unitOL (LI dst imm)
650
    in
651
        return (Any (intFormat rep) code)
652

653
getRegister' _ (CmmLit (CmmFloat f frep)) = do
654
    lbl <- getNewLabelNat
655
    dflags <- getDynFlags
656
    dynRef <- cmmMakeDynamicReference dflags DataReference lbl
657
    Amode addr addr_code <- getAmode D dynRef
658
    let format = floatFormat frep
659
        code dst =
660 661
            LDATA (Section ReadOnlyData lbl)
                  (Statics lbl [CmmStaticLit (CmmFloat f frep)])
662 663
            `consOL` (addr_code `snocOL` LD format dst addr)
    return (Any format code)
664

665
getRegister' dflags (CmmLit lit)
666
  | target32Bit (targetPlatform dflags)
667
  = let rep = cmmLitType dflags lit
668 669 670 671 672
        imm = litToImm lit
        code dst = toOL [
              LIS dst (HA imm),
              ADD dst dst (RIImm (LO imm))
          ]
673
    in return (Any (cmmTypeFormat rep) code)
674 675 676 677 678 679
  | otherwise
  = do lbl <- getNewLabelNat
       dflags <- getDynFlags
       dynRef <- cmmMakeDynamicReference dflags DataReference lbl
       Amode addr addr_code <- getAmode D dynRef
       let rep = cmmLitType dflags lit
680
           format = cmmTypeFormat rep
681
           code dst =
682
            LDATA (Section ReadOnlyData lbl) (Statics lbl [CmmStaticLit lit])
683 684
            `consOL` (addr_code `snocOL` LD format dst addr)
       return (Any format code)
685

686
getRegister' _ other = pprPanic "getRegister(ppc)" (pprExpr other)
687

688
    -- extend?Rep: wrap integer expression of type rep
689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712
    -- in a conversion to II32 or II64 resp.
extendSExpr :: DynFlags -> Width -> CmmExpr -> CmmExpr
extendSExpr dflags W32 x
 | target32Bit (targetPlatform dflags) = x

extendSExpr dflags W64 x
 | not (target32Bit (targetPlatform dflags)) = x

extendSExpr dflags rep x =
    let size = if target32Bit $ targetPlatform dflags
               then W32
               else W64
    in CmmMachOp (MO_SS_Conv rep size) [x]

extendUExpr :: DynFlags -> Width -> CmmExpr -> CmmExpr
extendUExpr dflags W32 x
 | target32Bit (targetPlatform dflags) = x
extendUExpr dflags W64 x
 | not (target32Bit (targetPlatform dflags)) = x
extendUExpr dflags rep x =
    let size = if target32Bit $ targetPlatform dflags
               then W32
               else W64
    in CmmMachOp (MO_UU_Conv rep size) [x]
713 714 715 716

-- -----------------------------------------------------------------------------
--  The 'Amode' type: Memory addressing modes passed up the tree.

717 718
data Amode
        = Amode AddrMode InstrBlock
719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737

{-
Now, given a tree (the argument to an CmmLoad) that references memory,
produce a suitable addressing mode.

A Rule of the Game (tm) for Amodes: use of the addr bit must
immediately follow use of the code part, since the code part puts
values in registers which the addr then refers to.  So you can't put
anything in between, lest it overwrite some of those registers.  If
you need to do some other computation between the code part and use of
the addr bit, first store the effective address from the amode in a
temporary, then do the other computation, and then use the temporary:

    code
    LEA amode, tmp
    ... other computation ...
    ... (tmp) ...
-}

738 739
{- Note [Power instruction format]
In some instructions the 16 bit offset must be a multiple of 4, i.e.
Gabor Greif's avatar
Gabor Greif committed
740
the two least significant bits must be zero. The "Power ISA" specification
741 742 743 744 745
calls these instruction formats "DS-FORM" and the instructions with
arbitrary 16 bit offsets are "D-FORM".

The Power ISA specification document can be obtained from www.power.org.
-}
746
data InstrForm = D | DS
747

748 749 750 751 752 753
getAmode :: InstrForm -> CmmExpr -> NatM Amode
getAmode inf tree@(CmmRegOff _ _)
  = do dflags <- getDynFlags
       getAmode inf (mangleIndexTree dflags tree)

getAmode _ (CmmMachOp (MO_Sub W32) [x, CmmLit (CmmInt i _)])
754 755 756 757 758 759
  | Just off <- makeImmediate W32 True (-i)
  = do
        (reg, code) <- getSomeReg x
        return (Amode (AddrRegImm reg off) code)


760
getAmode _ (CmmMachOp (MO_Add W32) [x, CmmLit (CmmInt i _)])
761 762 763 764 765
  | Just off <- makeImmediate W32 True i
  = do
        (reg, code) <- getSomeReg x
        return (Amode (AddrRegImm reg off) code)

766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804
getAmode D (CmmMachOp (MO_Sub W64) [x, CmmLit (CmmInt i _)])
  | Just off <- makeImmediate W64 True (-i)
  = do
        (reg, code) <- getSomeReg x
        return (Amode (AddrRegImm reg off) code)


getAmode D (CmmMachOp (MO_Add W64) [x, CmmLit (CmmInt i _)])
  | Just off <- makeImmediate W64 True i
  = do
        (reg, code) <- getSomeReg x
        return (Amode (AddrRegImm reg off) code)

getAmode DS (CmmMachOp (MO_Sub W64) [x, CmmLit (CmmInt i _)])
  | Just off <- makeImmediate W64 True (-i)
  = do
        (reg, code) <- getSomeReg x
        (reg', off', code')  <-
                     if i `mod` 4 == 0
                      then do return (reg, off, code)
                      else do
                           tmp <- getNewRegNat II64
                           return (tmp, ImmInt 0,
                                  code `snocOL` ADD tmp reg (RIImm off))
        return (Amode (AddrRegImm reg' off') code')

getAmode DS (CmmMachOp (MO_Add W64) [x, CmmLit (CmmInt i _)])
  | Just off <- makeImmediate W64 True i
  = do
        (reg, code) <- getSomeReg x
        (reg', off', code')  <-
                     if i `mod` 4 == 0
                      then do return (reg, off, code)
                      else do
                           tmp <- getNewRegNat II64
                           return (tmp, ImmInt 0,
                                  code `snocOL` ADD tmp reg (RIImm off))
        return (Amode (AddrRegImm reg' off') code')

805 806
   -- optimize addition with 32-bit immediate
   -- (needed for PIC)
807
getAmode _ (CmmMachOp (MO_Add W32) [x, CmmLit lit])
808
  = do
809
        dflags <- getDynFlags
810 811
        (src, srcCode) <- getSomeReg x
        let imm = litToImm lit
812 813 814 815 816 817 818 819 820 821 822 823 824 825
        case () of
            _ | OSAIX <- platformOS (targetPlatform dflags)
              , isCmmLabelType lit ->
                    -- HA16/LO16 relocations on labels not supported on AIX
                    return (Amode (AddrRegImm src imm) srcCode)
              | otherwise -> do
                    tmp <- getNewRegNat II32
                    let code = srcCode `snocOL` ADDIS tmp src (HA imm)
                    return (Amode (AddrRegImm tmp (LO imm)) code)
  where
      isCmmLabelType (CmmLabel {})        = True
      isCmmLabelType (CmmLabelOff {})     = True
      isCmmLabelType (CmmLabelDiffOff {}) = True
      isCmmLabelType _                    = False
826

827
getAmode _ (CmmLit lit)
828
  = do
829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852
        dflags <- getDynFlags
        case platformArch $ targetPlatform dflags of
             ArchPPC -> do
                 tmp <- getNewRegNat II32
                 let imm = litToImm lit
                     code = unitOL (LIS tmp (HA imm))
                 return (Amode (AddrRegImm tmp (LO imm)) code)
             _        -> do -- TODO: Load from TOC,
                            -- see getRegister' _ (CmmLit lit)
                 tmp <- getNewRegNat II64
                 let imm = litToImm lit
                     code =  toOL [
                          LIS tmp (HIGHESTA imm),
                          OR tmp tmp (RIImm (HIGHERA imm)),
                          SL  II64 tmp tmp (RIImm (ImmInt 32)),
                          ORIS tmp tmp (HA imm)
                          ]
                 return (Amode (AddrRegImm tmp (LO imm)) code)

getAmode _ (CmmMachOp (MO_Add W32) [x, y])
  = do
        (regX, codeX) <- getSomeReg x
        (regY, codeY) <- getSomeReg y
        return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
853

854
getAmode _ (CmmMachOp (MO_Add W64) [x, y])
855 856 857 858
  = do
        (regX, codeX) <- getSomeReg x
        (regY, codeY) <- getSomeReg y
        return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
859

860
getAmode _ other
861 862 863 864 865 866 867 868
  = do
        (reg, code) <- getSomeReg other
        let
            off  = ImmInt 0
        return (Amode (AddrRegImm reg off) code)


--  The 'CondCode' type:  Condition codes passed up the tree.
869 870
data CondCode
        = CondCode Bool Cond InstrBlock
871 872 873 874 875 876

-- Set up a condition code for a conditional branch.

getCondCode :: CmmExpr -> NatM CondCode

-- almost the same as everywhere else - but we need to
877
-- extend small integers to 32 bit or 64 bit first
878 879

getCondCode (CmmMachOp mop [x, y])
880 881 882
  = do
    dflags <- getDynFlags
    case mop of
883 884 885 886 887 888 889 890 891 892 893 894 895 896
      MO_F_Eq W32 -> condFltCode EQQ x y
      MO_F_Ne W32 -> condFltCode NE  x y
      MO_F_Gt W32 -> condFltCode GTT x y
      MO_F_Ge W32 -> condFltCode GE  x y
      MO_F_Lt W32 -> condFltCode LTT x y
      MO_F_Le W32 -> condFltCode LE  x y

      MO_F_Eq W64 -> condFltCode EQQ x y
      MO_F_Ne W64 -> condFltCode NE  x y
      MO_F_Gt W64 -> condFltCode GTT x y
      MO_F_Ge W64 -> condFltCode GE  x y
      MO_F_Lt W64 -> condFltCode LTT x y
      MO_F_Le W64 -> condFltCode LE  x y

897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918
      MO_Eq rep -> condIntCode EQQ  (extendUExpr dflags rep x)
                                    (extendUExpr dflags rep y)
      MO_Ne rep -> condIntCode NE   (extendUExpr dflags rep x)
                                    (extendUExpr dflags rep y)

      MO_S_Gt rep -> condIntCode GTT  (extendSExpr dflags rep x)
                                      (extendSExpr dflags rep y)
      MO_S_Ge rep -> condIntCode GE   (extendSExpr dflags rep x)
                                      (extendSExpr dflags rep y)
      MO_S_Lt rep -> condIntCode LTT  (extendSExpr dflags rep x)
                                      (extendSExpr dflags rep y)
      MO_S_Le rep -> condIntCode LE   (extendSExpr dflags rep x)
                                      (extendSExpr dflags rep y)

      MO_U_Gt rep -> condIntCode GU   (extendSExpr dflags rep x)
                                      (extendSExpr dflags rep y)
      MO_U_Ge rep -> condIntCode GEU  (extendSExpr dflags rep x)
                                      (extendSExpr dflags rep y)
      MO_U_Lt rep -> condIntCode LU   (extendSExpr dflags rep x)
                                      (extendSExpr dflags rep y)
      MO_U_Le rep -> condIntCode LEU  (extendSExpr dflags rep x)
                                      (extendSExpr dflags rep y)
919

920
      _ -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
921

922
getCondCode _ = panic "getCondCode(2)(powerpc)"
923 924 925 926 927 928 929


-- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
-- passed back up the tree.

condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode

930 931 932 933 934 935 936 937 938 939 940
-- optimize pointer tag checks. Operation andi. sets condition register
-- so cmpi ..., 0 is redundant.
condIntCode cond (CmmMachOp (MO_And _) [x, CmmLit (CmmInt imm rep)])
                 (CmmLit (CmmInt 0 _))
  | not $ condUnsigned cond,
    Just src2 <- makeImmediate rep False imm
  = do
      (src1, code) <- getSomeReg x
      let code' = code `snocOL` AND r0 src1 (RIImm src2)
      return (CondCode False cond code')

941 942 943 944
condIntCode cond x (CmmLit (CmmInt y rep))
  | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
  = do
        (src1, code) <- getSomeReg x
945
        dflags <- getDynFlags
946
        let format = archWordFormat $ target32Bit $ targetPlatform dflags
947
            code' = code `snocOL`
948
              (if condUnsigned cond then CMPL else CMP) format src1 (RIImm src2)
949 950 951 952 953
        return (CondCode False cond code')

condIntCode cond x y = do
    (src1, code1) <- getSomeReg x
    (src2, code2) <- getSomeReg y
954
    dflags <- getDynFlags
955
    let format = archWordFormat $ target32Bit $ targetPlatform dflags
956
        code' = code1 `appOL` code2 `snocOL`
957
          (if condUnsigned cond then CMPL else CMP) format src1 (RIReg src2)
958 959 960 961 962 963
    return (CondCode False cond code')

condFltCode cond x y = do
    (src1, code1) <- getSomeReg x
    (src2, code2) <- getSomeReg y
    let
964 965
        code'  = code1 `appOL` code2 `snocOL` FCMP src1 src2
        code'' = case cond of -- twiddle CR to handle unordered case
966
                    GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
967 968
                    LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
                    _ -> code'
969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986
                 where
                    ltbit = 0 ; eqbit = 2 ; gtbit = 1
    return (CondCode True cond code'')



-- -----------------------------------------------------------------------------
-- Generating assignments

-- Assignments are really at the heart of the whole code generation
-- business.  Almost all top-level nodes of any real importance are
-- assignments, which correspond to loads, stores, or register
-- transfers.  If we're really lucky, some of the register transfers
-- will go away, because we can use the destination register to
-- complete the code generation for the right hand side.  This only
-- fails when the right hand side is forced into a fixed register
-- (e.g. the result of a call).

987 988
assignMem_IntCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
assignReg_IntCode :: Format -> CmmReg  -> CmmExpr -> NatM InstrBlock
989

990 991
assignMem_FltCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
assignReg_FltCode :: Format -> CmmReg  -> CmmExpr -> NatM InstrBlock
992 993 994

assignMem_IntCode pk addr src = do
    (srcReg, code) <- getSomeReg src
995 996 997
    Amode dstAddr addr_code <- case pk of
                                II64 -> getAmode DS addr
                                _    -> getAmode D  addr
998 999 1000 1001 1002
    return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr

-- dst is a reg, but src could be anything
assignReg_IntCode _ reg src
    = do
1003 1004
        dflags <- getDynFlags
        let dst = getRegisterReg (targetPlatform dflags) reg
1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023
        r <- getRegister src
        return $ case r of
            Any _ code         -> code dst
            Fixed _ freg fcode -> fcode `snocOL` MR dst freg



-- Easy, isn't it?
assignMem_FltCode = assignMem_IntCode
assignReg_FltCode = assignReg_IntCode



genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock

genJump (CmmLit (CmmLabel lbl))
  = return (unitOL $ JMP lbl)

genJump tree
1024 1025
  = do
        dflags <- getDynFlags
1026
        genJump' tree (platformToGCP (targetPlatform dflags))
1027 1028 1029 1030

genJump' :: CmmExpr -> GenCCallPlatform -> NatM InstrBlock

genJump' tree (GCPLinux64ELF 1)
1031 1032
  = do
        (target,code) <- getSomeReg tree
1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046
        return (code
               `snocOL` LD II64 r11 (AddrRegImm target (ImmInt 0))
               `snocOL` LD II64 toc (AddrRegImm target (ImmInt 8))
               `snocOL` MTCTR r11
               `snocOL` LD II64 r11 (AddrRegImm target (ImmInt 16))
               `snocOL` BCTR [] Nothing)

genJump' tree (GCPLinux64ELF 2)
  = do
        (target,code) <- getSomeReg tree
        return (code
               `snocOL` MR r12 target
               `snocOL` MTCTR r12
               `snocOL` BCTR [] Nothing)
1047

1048 1049 1050 1051
genJump' tree _
  = do
        (target,code) <- getSomeReg tree
        return (code `snocOL` MTCTR target `snocOL` BCTR [] Nothing)
1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069

-- -----------------------------------------------------------------------------
--  Unconditional branches
genBranch :: BlockId -> NatM InstrBlock
genBranch = return . toOL . mkJumpInstr


-- -----------------------------------------------------------------------------
--  Conditional jumps

{-
Conditional jumps are always to local labels, so we can use branch
instructions.  We peek at the arguments to decide what kind of
comparison to do.
-}


genCondJump
1070
    :: BlockId      -- the branch target
1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086
    -> CmmExpr      -- the condition on which to branch
    -> NatM InstrBlock

genCondJump id bool = do
  CondCode _ cond code <- getCondCode bool
  return (code `snocOL` BCC cond id)



-- -----------------------------------------------------------------------------
--  Generating C calls

-- Now the biggest nightmare---calls.  Most of the nastiness is buried in
-- @get_arg@, which moves the arguments to the correct registers/stack
-- locations.  Apart from that, the code is easy.

1087
genCCall :: ForeignTarget      -- function to call
1088 1089
         -> [CmmFormal]        -- where to put the result
         -> [CmmActual]        -- arguments (of mixed type)
1090
         -> NatM InstrBlock
1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217
genCCall (PrimTarget MO_WriteBarrier) _ _
 = return $ unitOL LWSYNC

genCCall (PrimTarget MO_Touch) _ _
 = return $ nilOL

genCCall (PrimTarget (MO_Prefetch_Data _)) _ _
 = return $ nilOL

genCCall (PrimTarget (MO_Clz width)) [dst] [src]
 = do dflags <- getDynFlags
      let platform = targetPlatform dflags
          reg_dst = getRegisterReg platform (CmmLocal dst)
      if target32Bit platform && width == W64
        then do
          ChildCode64 code vr_lo <- iselExpr64 src
          lbl1 <- getBlockIdNat
          lbl2 <- getBlockIdNat
          lbl3 <- getBlockIdNat
          let vr_hi = getHiVRegFromLo vr_lo
              cntlz = toOL [ CMPL II32 vr_hi (RIImm (ImmInt 0))
                           , BCC NE lbl2
                           , BCC ALWAYS lbl1

                           , NEWBLOCK lbl1
                           , CNTLZ II32 reg_dst vr_lo
                           , ADD reg_dst reg_dst (RIImm (ImmInt 32))
                           , BCC ALWAYS lbl3

                           , NEWBLOCK lbl2
                           , CNTLZ II32 reg_dst vr_hi
                           , BCC ALWAYS lbl3

                           , NEWBLOCK lbl3
                           ]
          return $ code `appOL` cntlz
        else do
          let format = if width == W64 then II64 else II32
          (s_reg, s_code) <- getSomeReg src
          (pre, reg , post) <-
            case width of
              W64 -> return (nilOL, s_reg, nilOL)
              W32 -> return (nilOL, s_reg, nilOL)
              W16 -> do
                reg_tmp <- getNewRegNat format
                return
                  ( unitOL $ AND reg_tmp s_reg (RIImm (ImmInt 65535))
                  , reg_tmp
                  , unitOL $ ADD reg_dst reg_dst (RIImm (ImmInt (-16)))
                  )
              W8  -> do
                reg_tmp <- getNewRegNat format
                return
                  ( unitOL $ AND reg_tmp s_reg (RIImm (ImmInt 255))
                  , reg_tmp
                  , unitOL $ ADD reg_dst reg_dst (RIImm (ImmInt (-24)))
                  )
              _   -> panic "genCall: Clz wrong format"
          let cntlz = unitOL (CNTLZ format reg_dst reg)
          return $ s_code `appOL` pre `appOL` cntlz `appOL` post

genCCall (PrimTarget (MO_Ctz width)) [dst] [src]
 = do dflags <- getDynFlags
      let platform = targetPlatform dflags
          reg_dst = getRegisterReg platform (CmmLocal dst)
      if target32Bit platform && width == W64
        then do
          let format = II32
          ChildCode64 code vr_lo <- iselExpr64 src
          lbl1 <- getBlockIdNat
          lbl2 <- getBlockIdNat
          lbl3 <- getBlockIdNat
          x' <- getNewRegNat format
          x'' <- getNewRegNat format
          r' <- getNewRegNat format
          cnttzlo <- cnttz format reg_dst vr_lo
          let vr_hi = getHiVRegFromLo vr_lo
              cnttz64 = toOL [ CMPL format vr_lo (RIImm (ImmInt 0))
                             , BCC NE lbl2
                             , BCC ALWAYS lbl1

                             , NEWBLOCK lbl1
                             , ADD x' vr_hi (RIImm (ImmInt (-1)))
                             , ANDC x'' x' vr_hi
                             , CNTLZ format r' x''
                               -- 32 + (32 - clz(x''))
                             , SUBFC reg_dst r' (RIImm (ImmInt 64))
                             , BCC ALWAYS lbl3

                             , NEWBLOCK lbl2
                             ]
                        `appOL` cnttzlo `appOL`
                        toOL [ BCC ALWAYS lbl3

                             , NEWBLOCK lbl3
                             ]
          return $ code `appOL` cnttz64
        else do
          let format = if width == W64 then II64 else II32
          (s_reg, s_code) <- getSomeReg src
          (reg_ctz, pre_code) <-
            case width of
              W64 -> return (s_reg, nilOL)
              W32 -> return (s_reg, nilOL)
              W16 -> do
                reg_tmp <- getNewRegNat format
                return (reg_tmp, unitOL $ ORIS reg_tmp s_reg (ImmInt 1))
              W8  -> do
                reg_tmp <- getNewRegNat format
                return (reg_tmp, unitOL $ OR reg_tmp s_reg (RIImm (ImmInt 256)))
              _   -> panic "genCall: Ctz wrong format"
          ctz_code <- cnttz format reg_dst reg_ctz
          return $ s_code `appOL` pre_code `appOL` ctz_code
        where
          -- cnttz(x) = sizeof(x) - cntlz(~x & (x - 1))
          -- see Henry S. Warren, Hacker's Delight, p 107
          cnttz format dst src = do
            let format_bits = 8 * formatInBytes format
            x' <- getNewRegNat format
            x'' <- getNewRegNat format
            r' <- getNewRegNat format
            return $ toOL [ ADD x' src (RIImm (ImmInt (-1)))
                          , ANDC x'' x' src
                          , CNTLZ format r' x''
                          , SUBFC dst r' (RIImm (ImmInt (format_bits)))
                          ]

1218
genCCall target dest_regs argsAndHints
1219
 = do dflags <- getDynFlags
1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235
      let platform = targetPlatform dflags
      case target of
        PrimTarget (MO_S_QuotRem  width) -> divOp1 platform True  width
                                                   dest_regs argsAndHints
        PrimTarget (MO_U_QuotRem  width) -> divOp1 platform False width
                                                   dest_regs argsAndHints
        PrimTarget (MO_U_QuotRem2 width) -> divOp2 platform width dest_regs
                                                   argsAndHints
        PrimTarget (MO_U_Mul2 width) -> multOp2 platform width dest_regs
                                                argsAndHints
        PrimTarget (MO_Add2 _) -> add2Op platform dest_regs argsAndHints
        PrimTarget (MO_SubWordC _) -> subcOp platform dest_regs argsAndHints
        PrimTarget (MO_AddIntC width) -> addSubCOp ADDO platform width
                                                   dest_regs argsAndHints
        PrimTarget (MO_SubIntC width) -> addSubCOp SUBFO platform width
                                                   dest_regs argsAndHints
1236 1237
        PrimTarget MO_F64_Fabs -> fabs platform dest_regs argsAndHints
        PrimTarget MO_F32_Fabs -> fabs platform dest_regs argsAndHints
1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387