Instr.hs 27.7 KB
Newer Older
1 2
{-# LANGUAGE CPP #-}

3 4
{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-}

5 6 7 8 9 10 11 12
-----------------------------------------------------------------------------
--
-- Machine-dependent assembly language
--
-- (c) The University of Glasgow 1993-2004
--
-----------------------------------------------------------------------------

13 14
#include "HsVersions.h"

15
module PPC.Instr (
16
    archWordFormat,
17 18
    RI(..),
    Instr(..),
19
    stackFrameHeaderSize,
20
    maxSpillSlots,
21 22
    allocMoreStack,
    makeFarBranches
23 24 25
)

where
26 27

import GhcPrelude
28

29
import PPC.Regs
30 31
import PPC.Cond
import Instruction
32
import Format
33
import TargetReg
34 35 36
import RegClass
import Reg

37
import GHC.Platform.Regs
38 39 40
import GHC.Cmm.BlockId
import GHC.Cmm.Dataflow.Collections
import GHC.Cmm.Dataflow.Label
41
import DynFlags
42 43
import GHC.Cmm
import GHC.Cmm.Info
44
import FastString
45
import GHC.Cmm.CLabel
46
import Outputable
John Ericson's avatar
John Ericson committed
47
import GHC.Platform
48
import UniqFM (listToUFM, lookupUFM)
49
import UniqSupply
50

51 52 53
import Control.Monad (replicateM)
import Data.Maybe (fromMaybe)

54
--------------------------------------------------------------------------------
55
-- Format of a PPC memory address.
56
--
57 58
archWordFormat :: Bool -> Format
archWordFormat is32Bit
59 60
 | is32Bit   = II32
 | otherwise = II64
61

62 63 64

-- | Instruction instance for powerpc
instance Instruction Instr where
65 66 67 68 69 70 71 72 73 74 75 76
        regUsageOfInstr         = ppc_regUsageOfInstr
        patchRegsOfInstr        = ppc_patchRegsOfInstr
        isJumpishInstr          = ppc_isJumpishInstr
        jumpDestsOfInstr        = ppc_jumpDestsOfInstr
        patchJumpInstr          = ppc_patchJumpInstr
        mkSpillInstr            = ppc_mkSpillInstr
        mkLoadInstr             = ppc_mkLoadInstr
        takeDeltaInstr          = ppc_takeDeltaInstr
        isMetaInstr             = ppc_isMetaInstr
        mkRegRegMoveInstr _     = ppc_mkRegRegMoveInstr
        takeRegRegMoveInstr     = ppc_takeRegRegMoveInstr
        mkJumpInstr             = ppc_mkJumpInstr
77 78 79 80
        mkStackAllocInstr       = ppc_mkStackAllocInstr
        mkStackDeallocInstr     = ppc_mkStackDeallocInstr


81
ppc_mkStackAllocInstr :: Platform -> Int -> [Instr]
82
ppc_mkStackAllocInstr platform amount
83
  = ppc_mkStackAllocInstr' platform (-amount)
84

85
ppc_mkStackDeallocInstr :: Platform -> Int -> [Instr]
86
ppc_mkStackDeallocInstr platform amount
87 88
  = ppc_mkStackAllocInstr' platform amount

89
ppc_mkStackAllocInstr' :: Platform -> Int -> [Instr]
90
ppc_mkStackAllocInstr' platform amount
91 92 93 94 95 96 97 98 99 100 101
  | fits16Bits amount
  = [ LD fmt r0 (AddrRegImm sp zero)
    , STU fmt r0 (AddrRegImm sp immAmount)
    ]
  | otherwise
  = [ LD fmt r0 (AddrRegImm sp zero)
    , ADDIS tmp sp (HA immAmount)
    , ADD tmp tmp (RIImm (LO immAmount))
    , STU fmt r0 (AddrRegReg sp tmp)
    ]
  where
102
    fmt = intFormat $ widthFromBytes (platformWordSizeInBytes platform)
103 104 105
    zero = ImmInt 0
    tmp = tmpReg platform
    immAmount = ImmInt amount
106

107 108 109
--
-- See note [extra spill slots] in X86/Instr.hs
--
110 111 112 113
allocMoreStack
  :: Platform
  -> Int
  -> NatCmmDecl statics PPC.Instr.Instr
114
  -> UniqSM (NatCmmDecl statics PPC.Instr.Instr, [(BlockId,BlockId)])
115

116
allocMoreStack _ _ top@(CmmData _ _) = return (top,[])
117 118 119 120 121 122 123 124 125
allocMoreStack platform slots (CmmProc info lbl live (ListGraph code)) = do
    let
        infos   = mapKeys info
        entries = case code of
                    [] -> infos
                    BasicBlock entry _ : _ -- first block is the entry point
                        | entry `elem` infos -> infos
                        | otherwise          -> entry : infos

126
    uniqs <- replicateM (length entries) getUniqueM
127 128 129 130 131 132 133 134

    let
        delta = ((x + stackAlign - 1) `quot` stackAlign) * stackAlign -- round up
            where x = slots * spillSlotSize -- sp delta

        alloc   = mkStackAllocInstr   platform delta
        dealloc = mkStackDeallocInstr platform delta

135 136
        retargetList = (zip entries (map mkBlockId uniqs))

137
        new_blockmap :: LabelMap BlockId
138
        new_blockmap = mapFromList retargetList
139 140 141

        insert_stack_insns (BasicBlock id insns)
            | Just new_blockid <- mapLookup id new_blockmap
142
                = [ BasicBlock id $ alloc ++ [BCC ALWAYS new_blockid Nothing]
143 144 145 146 147 148 149 150 151 152 153 154
                  , BasicBlock new_blockid block'
                  ]
            | otherwise
                = [ BasicBlock id block' ]
            where
              block' = foldr insert_dealloc [] insns

        insert_dealloc insn r
            -- BCTR might or might not be a non-local jump. For
            -- "labeled-goto" we use JMP, and for "computed-goto" we
            -- use MTCTR followed by BCTR. See 'PPC.CodeGen.genJump'.
            = case insn of
155 156 157 158 159 160
                JMP _ _           -> dealloc ++ (insn : r)
                BCTR [] Nothing _ -> dealloc ++ (insn : r)
                BCTR ids label rs -> BCTR (map (fmap retarget) ids) label rs : r
                BCCFAR cond b p   -> BCCFAR cond (retarget b) p : r
                BCC    cond b p   -> BCC    cond (retarget b) p : r
                _                 -> insn : r
161 162 163 164 165 166 167 168 169 170 171
            -- BL and BCTRL are call-like instructions rather than
            -- jumps, and are used only for C calls.

        retarget :: BlockId -> BlockId
        retarget b
            = fromMaybe b (mapLookup b new_blockmap)

        new_code
            = concatMap insert_stack_insns code

    -- in
172
    return (CmmProc info lbl live (ListGraph new_code),retargetList)
173

174 175 176 177 178 179 180 181

-- -----------------------------------------------------------------------------
-- Machine's assembly language

-- We have a few common "instructions" (nearly all the pseudo-ops) but
-- mostly all of 'Instr' is machine-specific.

-- Register or immediate
182 183 184
data RI
    = RIReg Reg
    | RIImm Imm
185 186

data Instr
187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205
    -- comment pseudo-op
    = COMMENT FastString

    -- some static data spat out during code
    -- generation.  Will be extracted before
    -- pretty-printing.
    | LDATA   Section CmmStatics

    -- start a new basic block.  Useful during
    -- codegen, removed later.  Preceding
    -- instruction should be a jump, as per the
    -- invariants for a BasicBlock (see Cmm).
    | NEWBLOCK BlockId

    -- specify current stack offset for
    -- benefit of subsequent passes
    | DELTA   Int

    -- Loads and stores.
206
    | LD      Format Reg AddrMode   -- Load format, dst, src
207
    | LDFAR   Format Reg AddrMode   -- Load format, dst, src 32 bit offset
208
    | LDR     Format Reg AddrMode   -- Load and reserve format, dst, src
209 210
    | LA      Format Reg AddrMode   -- Load arithmetic format, dst, src
    | ST      Format Reg AddrMode   -- Store format, src, dst
211
    | STFAR   Format Reg AddrMode   -- Store format, src, dst 32 bit offset
212
    | STU     Format Reg AddrMode   -- Store with Update format, src, dst
213
    | STC     Format Reg AddrMode   -- Store conditional format, src, dst
214 215 216 217
    | LIS     Reg Imm               -- Load Immediate Shifted dst, src
    | LI      Reg Imm               -- Load Immediate dst, src
    | MR      Reg Reg               -- Move Register dst, src -- also for fmr

218 219
    | CMP     Format Reg RI         -- format, src1, src2
    | CMPL    Format Reg RI         -- format, src1, src2
220

221 222 223 224 225 226
    | BCC     Cond BlockId (Maybe Bool) -- cond, block, hint
    | BCCFAR  Cond BlockId (Maybe Bool) -- cond, block, hint
                                    --   hint:
                                    --    Just True:  branch likely taken
                                    --    Just False: branch likely not taken
                                    --    Nothing:    no hint
227
    | JMP     CLabel [Reg]          -- same as branch,
228
                                    -- but with CLabel instead of block ID
229
                                    -- and live global registers
230
    | MTCTR   Reg
231 232 233
    | BCTR    [Maybe BlockId] (Maybe CLabel) [Reg]
                                    -- with list of local destinations, and
                                    -- jump table location if necessary
234 235 236 237
    | BL      CLabel [Reg]          -- with list of argument regs
    | BCTRL   [Reg]

    | ADD     Reg Reg RI            -- dst, src1, src2
238
    | ADDO    Reg Reg Reg           -- add and set overflow
239
    | ADDC    Reg Reg Reg           -- (carrying) dst, src1, src2
240 241
    | ADDE    Reg Reg Reg           -- (extended) dst, src1, src2
    | ADDZE   Reg Reg               -- (to zero extended) dst, src
242 243
    | ADDIS   Reg Reg Imm           -- Add Immediate Shifted dst, src1, src2
    | SUBF    Reg Reg Reg           -- dst, src1, src2 ; dst = src2 - src1
244 245 246 247 248 249 250 251 252
    | SUBFO   Reg Reg Reg           -- subtract from and set overflow
    | SUBFC   Reg Reg RI            -- (carrying) dst, src1, src2 ;
                                    -- dst = src2 - src1
    | SUBFE   Reg Reg Reg           -- (extended) dst, src1, src2 ;
                                    -- dst = src2 - src1
    | MULL    Format Reg Reg RI
    | MULLO   Format Reg Reg Reg    -- multiply and set overflow
    | MFOV    Format Reg            -- move overflow bit (1|33) to register
                                    -- pseudo-instruction; pretty printed as
253
                                    -- mfxer dst
254 255 256
                                    -- extr[w|d]i dst, dst, 1, [1|33]
    | MULHU   Format Reg Reg Reg
    | DIV     Format Bool Reg Reg Reg
257
    | AND     Reg Reg RI            -- dst, src1, src2
258
    | ANDC    Reg Reg Reg           -- AND with complement, dst = src1 & ~ src2
259
    | NAND    Reg Reg Reg           -- dst, src1, src2
260
    | OR      Reg Reg RI            -- dst, src1, src2
261
    | ORIS    Reg Reg Imm           -- OR Immediate Shifted dst, src1, src2
262 263 264
    | XOR     Reg Reg RI            -- dst, src1, src2
    | XORIS   Reg Reg Imm           -- XOR Immediate Shifted dst, src1, src2

265
    | EXTS    Format Reg Reg
266
    | CNTLZ   Format Reg Reg
267 268 269 270

    | NEG     Reg Reg
    | NOT     Reg Reg

271 272 273
    | SL      Format Reg Reg RI            -- shift left
    | SR      Format Reg Reg RI            -- shift right
    | SRA     Format Reg Reg RI            -- shift right arithmetic
274 275

    | RLWINM  Reg Reg Int Int Int   -- Rotate Left Word Immediate then AND with Mask
276
    | CLRLI   Format Reg Reg Int    -- clear left immediate (extended mnemonic)
277
    | CLRRI   Format Reg Reg Int    -- clear right immediate (extended mnemonic)
278

279 280 281 282
    | FADD    Format Reg Reg Reg
    | FSUB    Format Reg Reg Reg
    | FMUL    Format Reg Reg Reg
    | FDIV    Format Reg Reg Reg
283
    | FABS    Reg Reg               -- abs is the same for single and double
284
    | FNEG    Reg Reg               -- negate is the same for single and double prec.
285 286 287 288

    | FCMP    Reg Reg

    | FCTIWZ  Reg Reg           -- convert to integer word
289 290
    | FCTIDZ  Reg Reg           -- convert to integer double word
    | FCFID   Reg Reg           -- convert from integer double word
291 292 293 294 295 296 297 298 299
    | FRSP    Reg Reg           -- reduce to single precision
                                -- (but destination is a FP register)

    | CRNOR   Int Int Int       -- condition register nor
    | MFCR    Reg               -- move from condition register

    | MFLR    Reg               -- move from link register
    | FETCHPC Reg               -- pseudo-instruction:
                                -- bcl to next insn, mflr reg
300 301
    | HWSYNC                    -- heavy weight sync
    | ISYNC                     -- instruction synchronize
302
    | LWSYNC                    -- memory barrier
303 304 305
    | NOP                       -- no operation, PowerPC 64 bit
                                -- needs this as place holder to
                                -- reload TOC pointer
306 307

-- | Get the registers that are being used by this instruction.
308 309 310 311
-- regUsage doesn't need to do any trickery for jumps and such.
-- Just state precisely the regs read and written by that insn.
-- The consequences of control flow transfers, as far as register
-- allocation goes, are taken care of by the register allocator.
312
--
313
ppc_regUsageOfInstr :: Platform -> Instr -> RegUsage
314
ppc_regUsageOfInstr platform instr
315
 = case instr of
316
    LD      _ reg addr       -> usage (regAddr addr, [reg])
317
    LDFAR   _ reg addr       -> usage (regAddr addr, [reg])
318
    LDR     _ reg addr       -> usage (regAddr addr, [reg])
319 320
    LA      _ reg addr       -> usage (regAddr addr, [reg])
    ST      _ reg addr       -> usage (reg : regAddr addr, [])
321
    STFAR   _ reg addr       -> usage (reg : regAddr addr, [])
322
    STU     _ reg addr       -> usage (reg : regAddr addr, [])
323
    STC     _ reg addr       -> usage (reg : regAddr addr, [])
324 325 326 327 328
    LIS     reg _            -> usage ([], [reg])
    LI      reg _            -> usage ([], [reg])
    MR      reg1 reg2        -> usage ([reg2], [reg1])
    CMP     _ reg ri         -> usage (reg : regRI ri,[])
    CMPL    _ reg ri         -> usage (reg : regRI ri,[])
329 330
    BCC     _ _ _            -> noUsage
    BCCFAR  _ _ _            -> noUsage
331
    JMP     _ regs           -> usage (regs, [])
332
    MTCTR   reg              -> usage ([reg],[])
333
    BCTR    _ _ regs         -> usage (regs, [])
334 335 336 337
    BL      _ params         -> usage (params, callClobberedRegs platform)
    BCTRL   params           -> usage (params, callClobberedRegs platform)

    ADD     reg1 reg2 ri     -> usage (reg2 : regRI ri, [reg1])
338
    ADDO    reg1 reg2 reg3   -> usage ([reg2,reg3], [reg1])
339 340
    ADDC    reg1 reg2 reg3   -> usage ([reg2,reg3], [reg1])
    ADDE    reg1 reg2 reg3   -> usage ([reg2,reg3], [reg1])
341
    ADDZE   reg1 reg2        -> usage ([reg2], [reg1])
342 343
    ADDIS   reg1 reg2 _      -> usage ([reg2], [reg1])
    SUBF    reg1 reg2 reg3   -> usage ([reg2,reg3], [reg1])
344 345
    SUBFO   reg1 reg2 reg3   -> usage ([reg2,reg3], [reg1])
    SUBFC   reg1 reg2 ri     -> usage (reg2 : regRI ri, [reg1])
346
    SUBFE   reg1 reg2 reg3   -> usage ([reg2,reg3], [reg1])
347 348 349 350 351 352 353
    MULL    _ reg1 reg2 ri   -> usage (reg2 : regRI ri, [reg1])
    MULLO   _ reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
    MFOV    _ reg            -> usage ([], [reg])
    MULHU   _ reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
    DIV     _ _ reg1 reg2 reg3
                             -> usage ([reg2,reg3], [reg1])

354
    AND     reg1 reg2 ri    -> usage (reg2 : regRI ri, [reg1])
355
    ANDC    reg1 reg2 reg3  -> usage ([reg2,reg3], [reg1])
356
    NAND    reg1 reg2 reg3  -> usage ([reg2,reg3], [reg1])
357
    OR      reg1 reg2 ri    -> usage (reg2 : regRI ri, [reg1])
358
    ORIS    reg1 reg2 _     -> usage ([reg2], [reg1])
359 360 361
    XOR     reg1 reg2 ri    -> usage (reg2 : regRI ri, [reg1])
    XORIS   reg1 reg2 _     -> usage ([reg2], [reg1])
    EXTS    _  reg1 reg2    -> usage ([reg2], [reg1])
362
    CNTLZ   _  reg1 reg2    -> usage ([reg2], [reg1])
363 364
    NEG     reg1 reg2       -> usage ([reg2], [reg1])
    NOT     reg1 reg2       -> usage ([reg2], [reg1])
365 366 367
    SL      _ reg1 reg2 ri  -> usage (reg2 : regRI ri, [reg1])
    SR      _ reg1 reg2 ri  -> usage (reg2 : regRI ri, [reg1])
    SRA     _ reg1 reg2 ri  -> usage (reg2 : regRI ri, [reg1])
368
    RLWINM  reg1 reg2 _ _ _ -> usage ([reg2], [reg1])
369
    CLRLI   _ reg1 reg2 _   -> usage ([reg2], [reg1])
370
    CLRRI   _ reg1 reg2 _   -> usage ([reg2], [reg1])
371 372 373 374 375

    FADD    _ r1 r2 r3      -> usage ([r2,r3], [r1])
    FSUB    _ r1 r2 r3      -> usage ([r2,r3], [r1])
    FMUL    _ r1 r2 r3      -> usage ([r2,r3], [r1])
    FDIV    _ r1 r2 r3      -> usage ([r2,r3], [r1])
376
    FABS    r1 r2           -> usage ([r2], [r1])
377 378 379
    FNEG    r1 r2           -> usage ([r2], [r1])
    FCMP    r1 r2           -> usage ([r1,r2], [])
    FCTIWZ  r1 r2           -> usage ([r2], [r1])
380 381
    FCTIDZ  r1 r2           -> usage ([r2], [r1])
    FCFID   r1 r2           -> usage ([r2], [r1])
382 383 384 385 386
    FRSP    r1 r2           -> usage ([r2], [r1])
    MFCR    reg             -> usage ([], [reg])
    MFLR    reg             -> usage ([], [reg])
    FETCHPC reg             -> usage ([], [reg])
    _                       -> noUsage
387
  where
388
    usage (src, dst) = RU (filter (interesting platform) src)
389
                          (filter (interesting platform) dst)
390 391 392 393
    regAddr (AddrRegReg r1 r2) = [r1, r2]
    regAddr (AddrRegImm r1 _)  = [r1]

    regRI (RIReg r) = [r]
394
    regRI  _        = []
395

396 397
interesting :: Platform -> Reg -> Bool
interesting _        (RegVirtual _)              = True
thomie's avatar
thomie committed
398
interesting platform (RegReal (RealRegSingle i)) = freeReg platform i
399 400
interesting _        (RegReal (RealRegPair{}))
    = panic "PPC.Instr.interesting: no reg pairs on this arch"
401 402 403 404



-- | Apply a given mapping to all the register references in this
405
-- instruction.
406
ppc_patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr
407
ppc_patchRegsOfInstr instr env
408
 = case instr of
409
    LD      fmt reg addr    -> LD fmt (env reg) (fixAddr addr)
410
    LDFAR   fmt reg addr    -> LDFAR fmt (env reg) (fixAddr addr)
411
    LDR     fmt reg addr    -> LDR fmt (env reg) (fixAddr addr)
412 413
    LA      fmt reg addr    -> LA fmt (env reg) (fixAddr addr)
    ST      fmt reg addr    -> ST fmt (env reg) (fixAddr addr)
414
    STFAR   fmt reg addr    -> STFAR fmt (env reg) (fixAddr addr)
415
    STU     fmt reg addr    -> STU fmt (env reg) (fixAddr addr)
416
    STC     fmt reg addr    -> STC fmt (env reg) (fixAddr addr)
417 418 419
    LIS     reg imm         -> LIS (env reg) imm
    LI      reg imm         -> LI (env reg) imm
    MR      reg1 reg2       -> MR (env reg1) (env reg2)
420 421
    CMP     fmt reg ri      -> CMP fmt (env reg) (fixRI ri)
    CMPL    fmt reg ri      -> CMPL fmt (env reg) (fixRI ri)
422 423
    BCC     cond lbl p      -> BCC cond lbl p
    BCCFAR  cond lbl p      -> BCCFAR cond lbl p
424
    JMP     l regs          -> JMP l regs -- global regs will not be remapped
425
    MTCTR   reg             -> MTCTR (env reg)
426
    BCTR    targets lbl rs  -> BCTR targets lbl rs
427 428 429
    BL      imm argRegs     -> BL imm argRegs    -- argument regs
    BCTRL   argRegs         -> BCTRL argRegs     -- cannot be remapped
    ADD     reg1 reg2 ri    -> ADD (env reg1) (env reg2) (fixRI ri)
430
    ADDO    reg1 reg2 reg3  -> ADDO (env reg1) (env reg2) (env reg3)
431 432
    ADDC    reg1 reg2 reg3  -> ADDC (env reg1) (env reg2) (env reg3)
    ADDE    reg1 reg2 reg3  -> ADDE (env reg1) (env reg2) (env reg3)
433
    ADDZE   reg1 reg2       -> ADDZE (env reg1) (env reg2)
434 435
    ADDIS   reg1 reg2 imm   -> ADDIS (env reg1) (env reg2) imm
    SUBF    reg1 reg2 reg3  -> SUBF (env reg1) (env reg2) (env reg3)
436 437
    SUBFO   reg1 reg2 reg3  -> SUBFO (env reg1) (env reg2) (env reg3)
    SUBFC   reg1 reg2 ri    -> SUBFC (env reg1) (env reg2) (fixRI ri)
438
    SUBFE   reg1 reg2 reg3  -> SUBFE (env reg1) (env reg2) (env reg3)
439 440 441 442 443 444 445 446 447 448
    MULL    fmt reg1 reg2 ri
                            -> MULL fmt (env reg1) (env reg2) (fixRI ri)
    MULLO   fmt reg1 reg2 reg3
                            -> MULLO fmt (env reg1) (env reg2) (env reg3)
    MFOV    fmt reg         -> MFOV fmt (env reg)
    MULHU   fmt reg1 reg2 reg3
                            -> MULHU fmt (env reg1) (env reg2) (env reg3)
    DIV     fmt sgn reg1 reg2 reg3
                            -> DIV fmt sgn (env reg1) (env reg2) (env reg3)

449
    AND     reg1 reg2 ri    -> AND (env reg1) (env reg2) (fixRI ri)
450
    ANDC    reg1 reg2 reg3  -> ANDC (env reg1) (env reg2) (env reg3)
451
    NAND    reg1 reg2 reg3  -> NAND (env reg1) (env reg2) (env reg3)
452
    OR      reg1 reg2 ri    -> OR  (env reg1) (env reg2) (fixRI ri)
453
    ORIS    reg1 reg2 imm   -> ORIS (env reg1) (env reg2) imm
454 455
    XOR     reg1 reg2 ri    -> XOR (env reg1) (env reg2) (fixRI ri)
    XORIS   reg1 reg2 imm   -> XORIS (env reg1) (env reg2) imm
456
    EXTS    fmt reg1 reg2   -> EXTS fmt (env reg1) (env reg2)
457
    CNTLZ   fmt reg1 reg2   -> CNTLZ fmt (env reg1) (env reg2)
458 459
    NEG     reg1 reg2       -> NEG (env reg1) (env reg2)
    NOT     reg1 reg2       -> NOT (env reg1) (env reg2)
460 461 462 463 464 465
    SL      fmt reg1 reg2 ri
                            -> SL fmt (env reg1) (env reg2) (fixRI ri)
    SR      fmt reg1 reg2 ri
                            -> SR fmt (env reg1) (env reg2) (fixRI ri)
    SRA     fmt reg1 reg2 ri
                            -> SRA fmt (env reg1) (env reg2) (fixRI ri)
466 467
    RLWINM  reg1 reg2 sh mb me
                            -> RLWINM (env reg1) (env reg2) sh mb me
468
    CLRLI   fmt reg1 reg2 n -> CLRLI fmt (env reg1) (env reg2) n
469
    CLRRI   fmt reg1 reg2 n -> CLRRI fmt (env reg1) (env reg2) n
470 471 472 473
    FADD    fmt r1 r2 r3    -> FADD fmt (env r1) (env r2) (env r3)
    FSUB    fmt r1 r2 r3    -> FSUB fmt (env r1) (env r2) (env r3)
    FMUL    fmt r1 r2 r3    -> FMUL fmt (env r1) (env r2) (env r3)
    FDIV    fmt r1 r2 r3    -> FDIV fmt (env r1) (env r2) (env r3)
474
    FABS    r1 r2           -> FABS (env r1) (env r2)
475 476 477
    FNEG    r1 r2           -> FNEG (env r1) (env r2)
    FCMP    r1 r2           -> FCMP (env r1) (env r2)
    FCTIWZ  r1 r2           -> FCTIWZ (env r1) (env r2)
478 479
    FCTIDZ  r1 r2           -> FCTIDZ (env r1) (env r2)
    FCFID   r1 r2           -> FCFID (env r1) (env r2)
480 481 482 483 484
    FRSP    r1 r2           -> FRSP (env r1) (env r2)
    MFCR    reg             -> MFCR (env reg)
    MFLR    reg             -> MFLR (env reg)
    FETCHPC reg             -> FETCHPC (env reg)
    _                       -> instr
485 486 487 488 489
  where
    fixAddr (AddrRegReg r1 r2) = AddrRegReg (env r1) (env r2)
    fixAddr (AddrRegImm r1 i)  = AddrRegImm (env r1) i

    fixRI (RIReg r) = RIReg (env r)
490
    fixRI other     = other
491 492 493


--------------------------------------------------------------------------------
494 495 496
-- | Checks whether this instruction is a jump/branch instruction.
-- One that can change the flow of control in a way that the
-- register allocator needs to worry about.
497 498 499
ppc_isJumpishInstr :: Instr -> Bool
ppc_isJumpishInstr instr
 = case instr of
500 501 502 503 504 505 506 507 508 509 510 511 512 513
    BCC{}       -> True
    BCCFAR{}    -> True
    BCTR{}      -> True
    BCTRL{}     -> True
    BL{}        -> True
    JMP{}       -> True
    _           -> False


-- | Checks whether this instruction is a jump/branch instruction.
-- One that can change the flow of control in a way that the
-- register allocator needs to worry about.
ppc_jumpDestsOfInstr :: Instr -> [BlockId]
ppc_jumpDestsOfInstr insn
514
  = case insn of
515 516 517 518
        BCC _ id _       -> [id]
        BCCFAR _ id _    -> [id]
        BCTR targets _ _ -> [id | Just id <- targets]
        _                -> []
519 520


521
-- | Change the destination of this jump instruction.
522 523
-- Used in the linear allocator when adding fixup blocks for join
-- points.
524 525 526
ppc_patchJumpInstr :: Instr -> (BlockId -> BlockId) -> Instr
ppc_patchJumpInstr insn patchF
  = case insn of
527 528
        BCC cc id p     -> BCC cc (patchF id) p
        BCCFAR cc id p  -> BCCFAR cc (patchF id) p
529
        BCTR ids lbl rs -> BCTR (map (fmap patchF) ids) lbl rs
530
        _               -> insn
531 532 533 534 535 536


-- -----------------------------------------------------------------------------

-- | An instruction to spill a register into a spill slot.
ppc_mkSpillInstr
537
   :: DynFlags
538 539 540
   -> Reg       -- register to spill
   -> Int       -- current stack delta
   -> Int       -- spill slot to use
541 542
   -> Instr

543 544
ppc_mkSpillInstr dflags reg delta slot
  = let platform = targetPlatform dflags
545
        off      = spillSlotToOffset dflags slot
546
        arch     = platformArch platform
547
    in
548
    let fmt = case targetClassOfReg platform reg of
549 550 551
                RcInteger -> case arch of
                                ArchPPC -> II32
                                _       -> II64
552
                RcDouble  -> FF64
553
                _         -> panic "PPC.Instr.mkSpillInstr: no match"
554 555 556 557 558
        instr = case makeImmediate W32 True (off-delta) of
                Just _  -> ST
                Nothing -> STFAR -- pseudo instruction: 32 bit offsets

    in instr fmt reg (AddrRegImm sp (ImmInt (off-delta)))
559 560 561


ppc_mkLoadInstr
562
   :: DynFlags
563 564 565
   -> Reg       -- register to load
   -> Int       -- current stack delta
   -> Int       -- spill slot to use
566 567
   -> Instr

568 569
ppc_mkLoadInstr dflags reg delta slot
  = let platform = targetPlatform dflags
570
        off      = spillSlotToOffset dflags slot
571
        arch     = platformArch platform
572
    in
573
    let fmt = case targetClassOfReg platform reg of
574 575 576
                RcInteger ->  case arch of
                                 ArchPPC -> II32
                                 _       -> II64
577
                RcDouble  -> FF64
578
                _         -> panic "PPC.Instr.mkLoadInstr: no match"
579 580 581 582 583
        instr = case makeImmediate W32 True (off-delta) of
                Just _  -> LD
                Nothing -> LDFAR -- pseudo instruction: 32 bit offsets

    in instr fmt reg (AddrRegImm sp (ImmInt (off-delta)))
584 585


586 587 588 589 590 591
-- | The size of a minimal stackframe header including minimal
-- parameter save area.
stackFrameHeaderSize :: DynFlags -> Int
stackFrameHeaderSize dflags
  = case platformOS platform of
      OSAIX    -> 24 + 8 * 4
592 593 594 595 596 597
      _ -> case platformArch platform of
                             -- header + parameter save area
             ArchPPC           -> 64 -- TODO: check ABI spec
             ArchPPC_64 ELF_V1 -> 48 + 8 * 8
             ArchPPC_64 ELF_V2 -> 32 + 8 * 8
             _ -> panic "PPC.stackFrameHeaderSize: not defined for this OS"
598 599
     where platform = targetPlatform dflags

600 601 602 603 604 605 606
-- | The maximum number of bytes required to spill a register. PPC32
-- has 32-bit GPRs and 64-bit FPRs, while PPC64 has 64-bit GPRs and
-- 64-bit FPRs. So the maximum is 8 regardless of platforms unlike
-- x86. Note that AltiVec's vector registers are 128-bit wide so we
-- must not use this to spill them.
spillSlotSize :: Int
spillSlotSize = 8
607

608
-- | The number of spill slots available without allocating more.
609 610
maxSpillSlots :: DynFlags -> Int
maxSpillSlots dflags
611 612
    = ((rESERVED_C_STACK_BYTES dflags - stackFrameHeaderSize dflags)
       `div` spillSlotSize) - 1
613 614 615
--     = 0 -- useful for testing allocMoreStack

-- | The number of bytes that the stack pointer should be aligned
616 617
-- to. This is 16 both on PPC32 and PPC64 ELF (see ELF processor
-- specific supplements).
618 619 620 621
stackAlign :: Int
stackAlign = 16

-- | Convert a spill slot number to a *byte* offset, with no sign.
622 623 624
spillSlotToOffset :: DynFlags -> Int -> Int
spillSlotToOffset dflags slot
   = stackFrameHeaderSize dflags + spillSlotSize * slot
625

626 627 628 629

--------------------------------------------------------------------------------
-- | See if this instruction is telling us the current C stack delta
ppc_takeDeltaInstr
630 631 632
    :: Instr
    -> Maybe Int

633 634
ppc_takeDeltaInstr instr
 = case instr of
635 636
     DELTA i  -> Just i
     _        -> Nothing
637 638 639


ppc_isMetaInstr
640 641 642
    :: Instr
    -> Bool

643 644
ppc_isMetaInstr instr
 = case instr of
645 646 647 648 649
    COMMENT{}   -> True
    LDATA{}     -> True
    NEWBLOCK{}  -> True
    DELTA{}     -> True
    _           -> False
650 651 652


-- | Copy the value in a register to another one.
653
-- Must work for all register classes.
654
ppc_mkRegRegMoveInstr
655 656 657
    :: Reg
    -> Reg
    -> Instr
658 659

ppc_mkRegRegMoveInstr src dst
660
    = MR dst src
661 662 663 664


-- | Make an unconditional jump instruction.
ppc_mkJumpInstr
665 666
    :: BlockId
    -> [Instr]
667

668
ppc_mkJumpInstr id
669
    = [BCC ALWAYS id Nothing]
670 671 672


-- | Take the source and destination from this reg -> reg move instruction
673
-- or Nothing if it's not one
674 675 676
ppc_takeRegRegMoveInstr :: Instr -> Maybe (Reg,Reg)
ppc_takeRegRegMoveInstr (MR dst src) = Just (src,dst)
ppc_takeRegRegMoveInstr _  = Nothing
677 678 679 680 681 682 683 684

-- -----------------------------------------------------------------------------
-- Making far branches

-- Conditional branches on PowerPC are limited to +-32KB; if our Procs get too
-- big, we have to work around this limitation.

makeFarBranches
685
        :: LabelMap CmmStatics
686
        -> [NatBasicBlock Instr]
687 688
        -> [NatBasicBlock Instr]
makeFarBranches info_env blocks
689 690 691 692 693 694 695 696 697
    | last blockAddresses < nearLimit = blocks
    | otherwise = zipWith handleBlock blockAddresses blocks
    where
        blockAddresses = scanl (+) 0 $ map blockLen blocks
        blockLen (BasicBlock _ instrs) = length instrs

        handleBlock addr (BasicBlock id instrs)
                = BasicBlock id (zipWith makeFar [addr..] instrs)

698 699
        makeFar _ (BCC ALWAYS tgt _) = BCC ALWAYS tgt Nothing
        makeFar addr (BCC cond tgt p)
700
            | abs (addr - targetAddr) >= nearLimit
701
            = BCCFAR cond tgt p
702
            | otherwise
703
            = BCC cond tgt p
704 705 706
            where Just targetAddr = lookupUFM blockAddressMap tgt
        makeFar _ other            = other

707 708 709 710 711
        -- 8192 instructions are allowed; let's keep some distance, as
        -- we have a few pseudo-insns that are pretty-printed as
        -- multiple instructions, and it's just not worth the effort
        -- to calculate things exactly
        nearLimit = 7000 - mapSize info_env * maxRetInfoTableSizeW
712 713

        blockAddressMap = listToUFM $ zip (map blockId blocks) blockAddresses