AsmCodeGen.lhs 41.8 KB
Newer Older
1 2 3
-- -----------------------------------------------------------------------------
--
-- (c) The University of Glasgow 1993-2004
4
--
5 6 7
-- This is the top-level module in the native code generator.
--
-- -----------------------------------------------------------------------------
8 9

\begin{code}
10
{-# LANGUAGE GADTs #-}
11
module AsmCodeGen ( nativeCodeGen ) where
12

13
#include "HsVersions.h"
14
#include "nativeGen/NCG.h"
15

16

17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34
import qualified X86.CodeGen
import qualified X86.Regs
import qualified X86.Instr
import qualified X86.Ppr

import qualified SPARC.CodeGen
import qualified SPARC.Regs
import qualified SPARC.Instr
import qualified SPARC.Ppr
import qualified SPARC.ShortcutJump
import qualified SPARC.CodeGen.Expand

import qualified PPC.CodeGen
import qualified PPC.Cond
import qualified PPC.Regs
import qualified PPC.RegInfo
import qualified PPC.Instr
import qualified PPC.Ppr
35 36

import RegAlloc.Liveness
37
import qualified RegAlloc.Linear.Main           as Linear
38

39 40 41 42
import qualified GraphColor                     as Color
import qualified RegAlloc.Graph.Main            as Color
import qualified RegAlloc.Graph.Stats           as Color
import qualified RegAlloc.Graph.TrivColorable   as Color
43

44
import TargetReg
45
import Platform
46
import Config
47 48 49 50
import Instruction
import PIC
import Reg
import NCGMonad
51

52
import BlockId
53
import CgUtils          ( fixStgRegisters )
54 55 56
import Cmm
import CmmUtils
import Hoopl
Simon Marlow's avatar
Simon Marlow committed
57
import CmmOpt           ( cmmMachOpFold )
58
import PprCmm
59
import CLabel
60

61
import UniqFM
62
import UniqSupply
63 64
import DynFlags
import Util
65

66
import BasicTypes       ( Alignment )
67
import Digraph
68
import qualified Pretty
69
import BufWrite
70
import Outputable
71
import FastString
72
import UniqSet
73
import ErrUtils
74
import Module
75 76
import Stream (Stream)
import qualified Stream
77

78 79
-- DEBUGGING ONLY
--import OrdList
80

81
import Data.List
82
import Data.Maybe
83
import Control.Exception
84
import Control.Monad
85
import System.IO
86

87 88 89
{-
The native-code generator has machine-independent and
machine-dependent modules.
90

91 92 93 94
This module ("AsmCodeGen") is the top-level machine-independent
module.  Before entering machine-dependent land, we do some
machine-independent optimisations (defined below) on the
'CmmStmts's.
95

96 97 98 99 100 101 102 103 104
We convert to the machine-specific 'Instr' datatype with
'cmmCodeGen', assuming an infinite supply of registers.  We then use
a machine-independent register allocator ('regAlloc') to rejoin
reality.  Obviously, 'regAlloc' has machine-specific helper
functions (see about "RegAllocInfo" below).

Finally, we order the basic blocks of the function so as to minimise
the number of jumps between blocks, by utilising fallthrough wherever
possible.
105 106

The machine-dependent bits break down as follows:
107 108

  * ["MachRegs"]  Everything about the target platform's machine
109 110 111
    registers (and immediate operands, and addresses, which tend to
    intermingle/interact with registers).

112
  * ["MachInstrs"]  Includes the 'Instr' datatype (possibly should
113
    have a module of its own), plus a miscellany of other things
114
    (e.g., 'targetDoubleSize', 'smStablePtrTable', ...)
115

116
  * ["MachCodeGen"]  is where 'Cmm' stuff turns into
117
    machine instructions.
118

119
  * ["PprMach"] 'pprInstr' turns an 'Instr' into text (well, really
120
    a 'SDoc').
121

122 123
  * ["RegAllocInfo"] In the register allocator, we manipulate
    'MRegsState's, which are 'BitSet's, one bit per machine register.
124 125
    When we want to say something about a specific machine register
    (e.g., ``it gets clobbered by this instruction''), we set/unset
126
    its bit.  Obviously, we do this 'BitSet' thing for efficiency
127 128
    reasons.

129
    The 'RegAllocInfo' module collects together the machine-specific
130 131
    info needed to do register allocation.

132 133
   * ["RegisterAlloc"] The (machine-independent) register allocator.
-}
134

135 136
-- -----------------------------------------------------------------------------
-- Top-level of the native codegen
137

138
data NcgImpl statics instr jumpDest = NcgImpl {
Simon Peyton Jones's avatar
Simon Peyton Jones committed
139
    cmmTopCodeGen             :: RawCmmDecl -> NatM [NatCmmDecl statics instr],
140
    generateJumpTableForInstr :: instr -> Maybe (NatCmmDecl statics instr),
141 142
    getJumpDestBlockId        :: jumpDest -> Maybe BlockId,
    canShortcut               :: instr -> Maybe jumpDest,
143
    shortcutStatics           :: (BlockId -> Maybe jumpDest) -> statics -> statics,
144
    shortcutJump              :: (BlockId -> Maybe jumpDest) -> instr -> instr,
145
    pprNatCmmDecl             :: NatCmmDecl statics instr -> SDoc,
146 147
    maxSpillSlots             :: Int,
    allocatableRegs           :: [RealReg],
Simon Peyton Jones's avatar
Simon Peyton Jones committed
148 149
    ncg_x86fp_kludge          :: [NatCmmDecl statics instr] -> [NatCmmDecl statics instr],
    ncgExpandTop              :: [NatCmmDecl statics instr] -> [NatCmmDecl statics instr],
150
    ncgAllocMoreStack         :: Int -> NatCmmDecl statics instr -> NatCmmDecl statics instr,
151 152 153
    ncgMakeFarBranches        :: [NatBasicBlock instr] -> [NatBasicBlock instr]
    }

154
--------------------
155 156
nativeCodeGen :: DynFlags -> Handle -> UniqSupply -> Stream IO RawCmmGroup ()
              -> IO UniqSupply
157
nativeCodeGen dflags h us cmms
158
 = let platform = targetPlatform dflags
159
       nCG' :: (Outputable statics, Outputable instr, Instruction instr) => NcgImpl statics instr jumpDest -> IO UniqSupply
160
       nCG' ncgImpl = nativeCodeGen' dflags ncgImpl h us cmms
161 162
       x86NcgImpl = NcgImpl {
                         cmmTopCodeGen             = X86.CodeGen.cmmTopCodeGen
163
                        ,generateJumpTableForInstr = X86.CodeGen.generateJumpTableForInstr dflags
164 165
                        ,getJumpDestBlockId        = X86.Instr.getJumpDestBlockId
                        ,canShortcut               = X86.Instr.canShortcut
166
                        ,shortcutStatics           = X86.Instr.shortcutStatics
167
                        ,shortcutJump              = X86.Instr.shortcutJump
Simon Peyton Jones's avatar
Simon Peyton Jones committed
168
                        ,pprNatCmmDecl              = X86.Ppr.pprNatCmmDecl
169 170
                        ,maxSpillSlots             = X86.Instr.maxSpillSlots dflags
                        ,allocatableRegs           = X86.Regs.allocatableRegs platform
171
                        ,ncg_x86fp_kludge          = id
172
                        ,ncgAllocMoreStack         = X86.Instr.allocMoreStack platform
173 174 175
                        ,ncgExpandTop              = id
                        ,ncgMakeFarBranches        = id
                    }
176
   in case platformArch platform of
177 178 179 180 181
                 ArchX86    -> nCG' (x86NcgImpl { ncg_x86fp_kludge = map x86fp_kludge })
                 ArchX86_64 -> nCG' x86NcgImpl
                 ArchPPC ->
                     nCG' $ NcgImpl {
                          cmmTopCodeGen             = PPC.CodeGen.cmmTopCodeGen
182
                         ,generateJumpTableForInstr = PPC.CodeGen.generateJumpTableForInstr dflags
183 184
                         ,getJumpDestBlockId        = PPC.RegInfo.getJumpDestBlockId
                         ,canShortcut               = PPC.RegInfo.canShortcut
185
                         ,shortcutStatics           = PPC.RegInfo.shortcutStatics
186
                         ,shortcutJump              = PPC.RegInfo.shortcutJump
Simon Peyton Jones's avatar
Simon Peyton Jones committed
187
                         ,pprNatCmmDecl              = PPC.Ppr.pprNatCmmDecl
188 189
                         ,maxSpillSlots             = PPC.Instr.maxSpillSlots dflags
                         ,allocatableRegs           = PPC.Regs.allocatableRegs platform
190
                         ,ncg_x86fp_kludge          = id
191
                         ,ncgAllocMoreStack         = noAllocMoreStack
192 193 194 195 196 197
                         ,ncgExpandTop              = id
                         ,ncgMakeFarBranches        = makeFarBranches
                     }
                 ArchSPARC ->
                     nCG' $ NcgImpl {
                          cmmTopCodeGen             = SPARC.CodeGen.cmmTopCodeGen
198
                         ,generateJumpTableForInstr = SPARC.CodeGen.generateJumpTableForInstr dflags
199 200
                         ,getJumpDestBlockId        = SPARC.ShortcutJump.getJumpDestBlockId
                         ,canShortcut               = SPARC.ShortcutJump.canShortcut
201
                         ,shortcutStatics           = SPARC.ShortcutJump.shortcutStatics
202
                         ,shortcutJump              = SPARC.ShortcutJump.shortcutJump
Simon Peyton Jones's avatar
Simon Peyton Jones committed
203
                         ,pprNatCmmDecl              = SPARC.Ppr.pprNatCmmDecl
204 205
                         ,maxSpillSlots             = SPARC.Instr.maxSpillSlots dflags
                         ,allocatableRegs           = SPARC.Regs.allocatableRegs
206
                         ,ncg_x86fp_kludge          = id
207
                         ,ncgAllocMoreStack         = noAllocMoreStack
208 209 210
                         ,ncgExpandTop              = map SPARC.CodeGen.Expand.expandTop
                         ,ncgMakeFarBranches        = id
                     }
211
                 ArchARM _ _ _ ->
Simon Marlow's avatar
Simon Marlow committed
212
                     panic "nativeCodeGen: No NCG for ARM"
213 214
                 ArchPPC_64 ->
                     panic "nativeCodeGen: No NCG for PPC 64"
215 216 217 218 219 220
                 ArchAlpha ->
                     panic "nativeCodeGen: No NCG for Alpha"
                 ArchMipseb ->
                     panic "nativeCodeGen: No NCG for mipseb"
                 ArchMipsel ->
                     panic "nativeCodeGen: No NCG for mipsel"
Ian Lynagh's avatar
Ian Lynagh committed
221 222
                 ArchUnknown ->
                     panic "nativeCodeGen: No NCG for unknown arch"
223

224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240

--
-- Allocating more stack space for spilling is currently only
-- supported for the linear register allocator on x86/x86_64, the rest
-- default to the panic below.  To support allocating extra stack on
-- more platforms provide a definition of ncgAllocMoreStack.
--
noAllocMoreStack :: Int -> NatCmmDecl statics instr -> NatCmmDecl statics instr
noAllocMoreStack amount _
  = panic $   "Register allocator: out of stack slots (need " ++ show amount ++ ")\n"
        ++  "   If you are trying to compile SHA1.hs from the crypto library then this\n"
        ++  "   is a known limitation in the linear allocator.\n"
        ++  "\n"
        ++  "   Try enabling the graph colouring allocator with -fregs-graph instead."
        ++  "   You can still file a bug report if you like.\n"


241 242 243 244 245 246 247
type NativeGenState statics instr = (BufHandle, NativeGenAcc statics instr)
type NativeGenAcc statics instr
        = ([[CLabel]],
           [([NatCmmDecl statics instr],
             Maybe [Color.RegAllocStats statics instr],
             Maybe [Linear.RegAllocStats])])

Ian Lynagh's avatar
Ian Lynagh committed
248
nativeCodeGen' :: (Outputable statics, Outputable instr, Instruction instr)
249
               => DynFlags
250
               -> NcgImpl statics instr jumpDest
251
               -> Handle -> UniqSupply -> Stream IO RawCmmGroup () -> IO UniqSupply
252
nativeCodeGen' dflags ncgImpl h us cmms
253
 = do
254
        let platform = targetPlatform dflags
255
            split_cmms  = Stream.map add_split cmms
256 257 258 259
        -- BufHandle is a performance hack.  We could hide it inside
        -- Pretty if it weren't for the fact that we do lots of little
        -- printDocs here (in order to do codegen in constant space).
        bufh <- newBufHandle h
260
        ((imports, prof), us') <- cmmNativeGenStream dflags ncgImpl us split_cmms (bufh, ([], [])) 0
261
        bFlush bufh
262

263 264 265 266 267 268
        let (native, colorStats, linearStats)
                = unzip3 prof

        -- dump native code
        dumpIfSet_dyn dflags
                Opt_D_dump_asm "Asm code"
269
                (vcat $ map (pprNatCmmDecl ncgImpl) $ concat native)
270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300

        -- dump global NCG stats for graph coloring allocator
        (case concat $ catMaybes colorStats of
          []    -> return ()
          stats -> do
                -- build the global register conflict graph
                let graphGlobal
                        = foldl Color.union Color.initGraph
                        $ [ Color.raGraph stat
                                | stat@Color.RegAllocStatsStart{} <- stats]

                dumpSDoc dflags Opt_D_dump_asm_stats "NCG stats"
                        $ Color.pprStats stats graphGlobal

                dumpIfSet_dyn dflags
                        Opt_D_dump_asm_conflicts "Register conflict graph"
                        $ Color.dotGraph
                                (targetRegDotColor platform)
                                (Color.trivColorable platform
                                        (targetVirtualRegSqueeze platform)
                                        (targetRealRegSqueeze platform))
                        $ graphGlobal)


        -- dump global NCG stats for linear allocator
        (case concat $ catMaybes linearStats of
                []      -> return ()
                stats   -> dumpSDoc dflags Opt_D_dump_asm_stats "NCG stats"
                                $ Linear.pprStats (concat native) stats)

        -- write out the imports
Ian Lynagh's avatar
Ian Lynagh committed
301
        Pretty.printDoc Pretty.LeftMode (pprCols dflags) h
Ian Lynagh's avatar
Ian Lynagh committed
302
                $ withPprStyleDoc dflags (mkCodeStyle AsmStyle)
303 304
                $ makeImportsDoc dflags (concat imports)

305
        return us'
306

307
 where  add_split tops
ian@well-typed.com's avatar
ian@well-typed.com committed
308
                | gopt Opt_SplitObjs dflags = split_marker : tops
309
                | otherwise                 = tops
310

311 312
        split_marker = CmmProc mapEmpty mkSplitMarkerLabel []
                               (ofBlockList (panic "split_marker_entry") [])
313

Simon Marlow's avatar
Simon Marlow committed
314
cmmNativeGenStream :: (Outputable statics, Outputable instr, Instruction instr)
315 316 317 318
              => DynFlags
              -> NcgImpl statics instr jumpDest
              -> UniqSupply
              -> Stream IO RawCmmGroup ()
319
              -> NativeGenState statics instr
320
              -> Int
321
              -> IO (NativeGenAcc statics instr, UniqSupply)
322

323
cmmNativeGenStream dflags ncgImpl us cmm_stream ngs@(h, nga) count
324 325 326
 = do
        r <- Stream.runStream cmm_stream
        case r of
327 328 329 330
          Left () ->
            case nga of
            (impAcc, profAcc) ->
              return ((reverse impAcc, reverse profAcc), us)
331
          Right (cmms, cmm_stream') -> do
332 333
            (nga',us') <- cmmNativeGens dflags ncgImpl us cmms ngs count
            cmmNativeGenStream dflags ncgImpl us' cmm_stream' (h, nga') count
334

335 336
-- | Do native code generation on all these cmms.
--
Ian Lynagh's avatar
Ian Lynagh committed
337
cmmNativeGens :: (Outputable statics, Outputable instr, Instruction instr)
338
              => DynFlags
339
              -> NcgImpl statics instr jumpDest
dterei's avatar
dterei committed
340
              -> UniqSupply
Simon Peyton Jones's avatar
Simon Peyton Jones committed
341
              -> [RawCmmDecl]
342
              -> NativeGenState statics instr
dterei's avatar
dterei committed
343
              -> Int
344
              -> IO (NativeGenAcc statics instr, UniqSupply)
dterei's avatar
dterei committed
345

346 347
cmmNativeGens _ _ us [] (_, nga) _
        = return (nga, us)
348

349
cmmNativeGens dflags ncgImpl us (cmm : cmms) (h, (impAcc, profAcc)) count
350
 = do
351
        (us', native, imports, colorStats, linearStats)
Simon Marlow's avatar
Simon Marlow committed
352
                <- {-# SCC "cmmNativeGen" #-} cmmNativeGen dflags ncgImpl us cmm count
353

Simon Marlow's avatar
Simon Marlow committed
354
        {-# SCC "pprNativeCode" #-} Pretty.bufLeftRender h
Ian Lynagh's avatar
Ian Lynagh committed
355
                $ withPprStyleDoc dflags (mkCodeStyle AsmStyle)
356
                $ vcat $ map (pprNatCmmDecl ncgImpl) native
357

358
        let !lsPprNative =
359 360
                if  dopt Opt_D_dump_asm       dflags
                 || dopt Opt_D_dump_asm_stats dflags
361 362
                        then native
                        else []
363

364
        let !count' = count + 1
365

366
        -- force evaulation all this stuff to avoid space leaks
367
        {-# SCC "seqString" #-} evaluate $ seqString (showSDoc dflags $ vcat $ map ppr imports)
368

369
        cmmNativeGens dflags ncgImpl
370 371 372 373
            us' cmms (h,
                      ((imports : impAcc),
                       ((lsPprNative, colorStats, linearStats) : profAcc)))
                     count'
374

375
 where  seqString []            = ()
376
        seqString (x:xs)        = x `seq` seqString xs
377 378


379
-- | Complete native code generation phase for a single top-level chunk of Cmm.
380 381
--      Dumping the output of each stage along the way.
--      Global conflict graph and NGC stats
382
cmmNativeGen
Ian Lynagh's avatar
Ian Lynagh committed
383
        :: (Outputable statics, Outputable instr, Instruction instr)
384
    => DynFlags
385
    -> NcgImpl statics instr jumpDest
386 387 388 389 390 391 392 393
        -> UniqSupply
        -> RawCmmDecl                                   -- ^ the cmm to generate code for
        -> Int                                          -- ^ sequence number of this top thing
        -> IO   ( UniqSupply
                , [NatCmmDecl statics instr]                -- native code
                , [CLabel]                                  -- things imported by this cmm
                , Maybe [Color.RegAllocStats statics instr] -- stats for the coloring register allocator
                , Maybe [Linear.RegAllocStats])             -- stats for the linear register allocators
394

395
cmmNativeGen dflags ncgImpl us cmm count
396
 = do
397
        let platform = targetPlatform dflags
398

399 400 401
        -- rewrite assignments to global regs
        let fixed_cmm =
                {-# SCC "fixStgRegisters" #-}
402
                fixStgRegisters dflags cmm
403

404 405 406 407
        -- cmm to cmm optimisations
        let (opt_cmm, imports) =
                {-# SCC "cmmToCmm" #-}
                cmmToCmm dflags fixed_cmm
408

409 410
        dumpIfSet_dyn dflags
                Opt_D_dump_opt_cmm "Optimised Cmm"
411
                (pprCmmGroup [opt_cmm])
412

413 414 415 416 417 418 419
        -- generate native code from cmm
        let ((native, lastMinuteImports), usGen) =
                {-# SCC "genMachCode" #-}
                initUs us $ genMachCode dflags (cmmTopCodeGen ncgImpl) opt_cmm

        dumpIfSet_dyn dflags
                Opt_D_dump_asm_native "Native code"
420
                (vcat $ map (pprNatCmmDecl ncgImpl) native)
421 422 423 424 425

        -- tag instructions with register liveness information
        let (withLiveness, usLive) =
                {-# SCC "regLiveness" #-}
                initUs usGen
426
                        $ mapM (regLiveness platform)
427 428 429 430
                        $ map natCmmTopToLive native

        dumpIfSet_dyn dflags
                Opt_D_dump_asm_liveness "Liveness annotations added"
Ian Lynagh's avatar
Ian Lynagh committed
431
                (vcat $ map ppr withLiveness)
432 433 434

        -- allocate registers
        (alloced, usAlloc, ppr_raStatsColor, ppr_raStatsLinear) <-
ian@well-typed.com's avatar
ian@well-typed.com committed
435 436
         if ( gopt Opt_RegsGraph dflags
           || gopt Opt_RegsIterative dflags)
437 438 439 440 441 442
          then do
                -- the regs usable for allocation
                let (alloc_regs :: UniqFM (UniqSet RealReg))
                        = foldr (\r -> plusUFM_C unionUniqSets
                                        $ unitUFM (targetClassOfRealReg platform r) (unitUniqSet r))
                                emptyUFM
443
                        $ allocatableRegs ncgImpl
444 445 446 447 448 449 450 451

                -- do the graph coloring register allocation
                let ((alloced, regAllocStats), usAlloc)
                        = {-# SCC "RegAlloc" #-}
                          initUs usLive
                          $ Color.regAlloc
                                dflags
                                alloc_regs
452
                                (mkUniqSet [0 .. maxSpillSlots ncgImpl])
453 454 455 456 457
                                withLiveness

                -- dump out what happened during register allocation
                dumpIfSet_dyn dflags
                        Opt_D_dump_asm_regalloc "Registers allocated"
458
                        (vcat $ map (pprNatCmmDecl ncgImpl) alloced)
459 460 461 462 463 464

                dumpIfSet_dyn dflags
                        Opt_D_dump_asm_regalloc_stages "Build/spill stages"
                        (vcat   $ map (\(stage, stats)
                                        -> text "# --------------------------"
                                        $$ text "#  cmm " <> int count <> text " Stage " <> int stage
Ian Lynagh's avatar
Ian Lynagh committed
465
                                        $$ ppr stats)
466 467 468
                                $ zip [0..] regAllocStats)

                let mPprStats =
469
                        if dopt Opt_D_dump_asm_stats dflags
470 471 472 473 474 475 476 477 478 479 480
                         then Just regAllocStats else Nothing

                -- force evaluation of the Maybe to avoid space leak
                mPprStats `seq` return ()

                return  ( alloced, usAlloc
                        , mPprStats
                        , Nothing)

          else do
                -- do linear register allocation
481 482 483 484 485 486 487 488 489
                let reg_alloc proc = do
                       (alloced, maybe_more_stack, ra_stats) <-
                               Linear.regAlloc dflags proc
                       case maybe_more_stack of
                         Nothing -> return ( alloced, ra_stats )
                         Just amount ->
                           return ( ncgAllocMoreStack ncgImpl amount alloced
                                  , ra_stats )

490 491 492 493
                let ((alloced, regAllocStats), usAlloc)
                        = {-# SCC "RegAlloc" #-}
                          initUs usLive
                          $ liftM unzip
494
                          $ mapM reg_alloc withLiveness
495 496 497

                dumpIfSet_dyn dflags
                        Opt_D_dump_asm_regalloc "Registers allocated"
498
                        (vcat $ map (pprNatCmmDecl ncgImpl) alloced)
499 500

                let mPprStats =
501
                        if dopt Opt_D_dump_asm_stats dflags
502 503 504 505 506 507 508 509
                         then Just (catMaybes regAllocStats) else Nothing

                -- force evaluation of the Maybe to avoid space leak
                mPprStats `seq` return ()

                return  ( alloced, usAlloc
                        , Nothing
                        , mPprStats)
510

511 512 513 514 515 516 517
        ---- x86fp_kludge.  This pass inserts ffree instructions to clear
        ---- the FPU stack on x86.  The x86 ABI requires that the FPU stack
        ---- is clear, and library functions can return odd results if it
        ---- isn't.
        ----
        ---- NB. must happen before shortcutBranches, because that
        ---- generates JXX_GBLs which we can't fix up in x86fp_kludge.
518
        let kludged = {-# SCC "x86fp_kludge" #-} ncg_x86fp_kludge ncgImpl alloced
519 520

        ---- generate jump tables
521 522
        let tabled      =
                {-# SCC "generateJumpTables" #-}
523
                generateJumpTables ncgImpl kludged
524

525 526 527 528
        ---- shortcut branches
        let shorted     =
                {-# SCC "shortcutBranches" #-}
                shortcutBranches dflags ncgImpl tabled
529

530 531 532 533
        ---- sequence blocks
        let sequenced   =
                {-# SCC "sequenceBlocks" #-}
                map (sequenceTop ncgImpl) shorted
534

535
        ---- expansion of SPARC synthetic instrs
536 537
        let expanded =
                {-# SCC "sparc_expand" #-}
538
                ncgExpandTop ncgImpl sequenced
539

540 541
        dumpIfSet_dyn dflags
                Opt_D_dump_asm_expanded "Synthetic instructions expanded"
542
                (vcat $ map (pprNatCmmDecl ncgImpl) expanded)
543

544 545 546 547 548
        return  ( usAlloc
                , expanded
                , lastMinuteImports ++ imports
                , ppr_raStatsColor
                , ppr_raStatsLinear)
549

550

Simon Peyton Jones's avatar
Simon Peyton Jones committed
551
x86fp_kludge :: NatCmmDecl (Alignment, CmmStatics) X86.Instr.Instr -> NatCmmDecl (Alignment, CmmStatics) X86.Instr.Instr
552
x86fp_kludge top@(CmmData _ _) = top
553 554
x86fp_kludge (CmmProc info lbl live (ListGraph code)) =
        CmmProc info lbl live (ListGraph $ X86.Instr.i386_insert_ffrees code)
555

556

557
-- | Build a doc for all the imports.
558
--
559
makeImportsDoc :: DynFlags -> [CLabel] -> SDoc
560
makeImportsDoc dflags imports
561
 = dyld_stubs imports
562
            $$
563 564 565 566
            -- On recent versions of Darwin, the linker supports
            -- dead-stripping of code and data on a per-symbol basis.
            -- There's a hack to make this work in PprMach.pprNatCmmDecl.
            (if platformHasSubsectionsViaSymbols (targetPlatform dflags)
567 568 569
             then text ".subsections_via_symbols"
             else empty)
            $$
570 571 572 573 574 575
                -- On recent GNU ELF systems one can mark an object file
                -- as not requiring an executable stack. If all objects
                -- linked into a program have this note then the program
                -- will not use an executable stack, which is good for
                -- security. GHC generated code does not need an executable
                -- stack so add the note in:
576
            (if platformHasGnuNonexecStack (targetPlatform dflags)
577 578 579
             then text ".section .note.GNU-stack,\"\",@progbits"
             else empty)
            $$
580
                -- And just because every other compiler does, lets stick in
581
                -- an identifier directive: .ident "GHC x.y.z"
582
            (if platformHasIdentDirective (targetPlatform dflags)
583 584 585
             then let compilerIdent = text "GHC" <+> text cProjectVersion
                   in text ".ident" <+> doubleQuotes compilerIdent
             else empty)
586

587
 where
588 589
        -- Generate "symbol stubs" for all external symbols that might
        -- come from a dynamic library.
590 591
        dyld_stubs :: [CLabel] -> SDoc
{-      dyld_stubs imps = vcat $ map pprDyldSymbolStub $
592 593 594 595 596 597 598 599 600
                                    map head $ group $ sort imps-}

        platform = targetPlatform dflags
        arch = platformArch platform
        os   = platformOS   platform

        -- (Hack) sometimes two Labels pretty-print the same, but have
        -- different uniques; so we compare their text versions...
        dyld_stubs imps
Ian Lynagh's avatar
Ian Lynagh committed
601
                | needImportedSymbols dflags arch os
602
                = vcat $
Ian Lynagh's avatar
Ian Lynagh committed
603 604
                        (pprGotDeclaration dflags arch os :) $
                        map ( pprImportedSymbol dflags platform . fst . head) $
605 606 607 608 609
                        groupBy (\(_,a) (_,b) -> a == b) $
                        sortBy (\(_,a) (_,b) -> compare a b) $
                        map doPpr $
                        imps
                | otherwise
610
                = empty
611

Ian Lynagh's avatar
Ian Lynagh committed
612
        doPpr lbl = (lbl, renderWithStyle dflags (pprCLabel platform lbl) astyle)
613
        astyle = mkCodeStyle AsmStyle
614 615


616 617 618 619 620 621 622 623 624
-- -----------------------------------------------------------------------------
-- Sequencing the basic blocks

-- Cmm BasicBlocks are self-contained entities: they always end in a
-- jump, either non-local or to another basic block in the same proc.
-- In this phase, we attempt to place the basic blocks in a sequence
-- such that as many of the local jumps as possible turn into
-- fallthroughs.

625 626
sequenceTop
        :: Instruction instr
Simon Peyton Jones's avatar
Simon Peyton Jones committed
627
    => NcgImpl statics instr jumpDest -> NatCmmDecl statics instr -> NatCmmDecl statics instr
628

629
sequenceTop _       top@(CmmData _ _) = top
630 631
sequenceTop ncgImpl (CmmProc info lbl live (ListGraph blocks)) =
  CmmProc info lbl live (ListGraph $ ncgMakeFarBranches ncgImpl $ sequenceBlocks info blocks)
632 633 634 635 636 637 638 639

-- The algorithm is very simple (and stupid): we make a graph out of
-- the blocks where there is an edge from one block to another iff the
-- first block ends by jumping to the second.  Then we topologically
-- sort this graph.  Then traverse the list: for each block, we first
-- output the block, then if it has an out edge, we move the
-- destination of the out edge to the front of the list, and continue.

640
-- FYI, the classic layout for basic blocks uses postorder DFS; this
641
-- algorithm is implemented in Hoopl.
642

643 644
sequenceBlocks
        :: Instruction instr
645 646
        => BlockEnv i
        -> [NatBasicBlock instr]
647
        -> [NatBasicBlock instr]
648

649 650 651
sequenceBlocks _ [] = []
sequenceBlocks infos (entry:blocks) =
  seqBlocks infos (mkNode entry : reverse (flattenSCCs (sccBlocks blocks)))
652 653
  -- the first block is the entry point ==> it must remain at the start.

654

655 656 657 658
sccBlocks
        :: Instruction instr
        => [NatBasicBlock instr]
        -> [SCC ( NatBasicBlock instr
659 660
                , BlockId
                , [BlockId])]
661

662
sccBlocks blocks = stronglyConnCompFromEdgedVerticesR (map mkNode blocks)
663

664 665
-- we're only interested in the last instruction of
-- the block, and only if it has a single destination.
666 667
getOutEdges
        :: Instruction instr
668
        => [instr] -> [BlockId]
669

670 671
getOutEdges instrs
        = case jumpDestsOfInstr (last instrs) of
672
                [one] -> [one]
673
                _many -> []
674

dterei's avatar
dterei committed
675 676
mkNode :: (Instruction t)
       => GenBasicBlock t
677 678 679 680 681 682 683 684 685 686 687
       -> (GenBasicBlock t, BlockId, [BlockId])
mkNode block@(BasicBlock id instrs) = (block, id, getOutEdges instrs)

seqBlocks :: BlockEnv i -> [(GenBasicBlock t1, BlockId, [BlockId])]
                        -> [GenBasicBlock t1]
seqBlocks _ [] = []
seqBlocks infos ((block,_,[]) : rest)
  = block : seqBlocks infos rest
seqBlocks infos ((block@(BasicBlock id instrs),_,[next]) : rest)
  | can_fallthrough = BasicBlock id (init instrs) : seqBlocks infos rest'
  | otherwise       = block : seqBlocks infos rest'
688
  where
689 690
        can_fallthrough = not (mapMember next infos) && can_reorder
        (can_reorder, rest') = reorder next [] rest
691 692
          -- TODO: we should do a better job for cycles; try to maximise the
          -- fallthroughs within a loop.
693
seqBlocks _ _ = panic "AsmCodegen:seqBlocks"
694

dterei's avatar
dterei committed
695 696
reorder :: (Eq a) => a -> [(t, a, t1)] -> [(t, a, t1)] -> (Bool, [(t, a, t1)])
reorder  _ accum [] = (False, reverse accum)
697 698 699 700
reorder id accum (b@(block,id',out) : rest)
  | id == id'  = (True, (block,id,out) : reverse accum ++ rest)
  | otherwise  = reorder id (b:accum) rest

701 702 703 704 705 706 707

-- -----------------------------------------------------------------------------
-- Making far branches

-- Conditional branches on PowerPC are limited to +-32KB; if our Procs get too
-- big, we have to work around this limitation.

708
makeFarBranches
709 710
        :: [NatBasicBlock PPC.Instr.Instr]
        -> [NatBasicBlock PPC.Instr.Instr]
711 712 713 714 715 716
makeFarBranches blocks
    | last blockAddresses < nearLimit = blocks
    | otherwise = zipWith handleBlock blockAddresses blocks
    where
        blockAddresses = scanl (+) 0 $ map blockLen blocks
        blockLen (BasicBlock _ instrs) = length instrs
717

718 719
        handleBlock addr (BasicBlock id instrs)
                = BasicBlock id (zipWith makeFar [addr..] instrs)
720

721 722
        makeFar _ (PPC.Instr.BCC PPC.Cond.ALWAYS tgt) = PPC.Instr.BCC PPC.Cond.ALWAYS tgt
        makeFar addr (PPC.Instr.BCC cond tgt)
723
            | abs (addr - targetAddr) >= nearLimit
724
            = PPC.Instr.BCCFAR cond tgt
725
            | otherwise
726
            = PPC.Instr.BCC cond tgt
727
            where Just targetAddr = lookupUFM blockAddressMap tgt
728
        makeFar _ other            = other
729

730 731 732 733 734
        nearLimit = 7000 -- 8192 instructions are allowed; let's keep some
                         -- distance, as we have a few pseudo-insns that are
                         -- pretty-printed as multiple instructions,
                         -- and it's just not worth the effort to calculate
                         -- things exactly
735

736 737
        blockAddressMap = listToUFM $ zip (map blockId blocks) blockAddresses

738 739 740 741 742 743
-- -----------------------------------------------------------------------------
-- Generate jump tables

-- Analyzes all native code and generates data sections for all jump
-- table instructions.
generateJumpTables
744 745 746
        :: NcgImpl statics instr jumpDest
        -> [NatCmmDecl statics instr] -> [NatCmmDecl statics instr]
generateJumpTables ncgImpl xs = concatMap f xs
747
    where f p@(CmmProc _ _ _ (ListGraph xs)) = p : concatMap g xs
748
          f p = [p]
749
          g (BasicBlock _ xs) = catMaybes (map (generateJumpTableForInstr ncgImpl) xs)
750

751 752 753
-- -----------------------------------------------------------------------------
-- Shortcut branches

754
shortcutBranches
755
        :: DynFlags
756
    -> NcgImpl statics instr jumpDest
757 758
        -> [NatCmmDecl statics instr]
        -> [NatCmmDecl statics instr]
759

760
shortcutBranches dflags ncgImpl tops
761
  | optLevel dflags < 1 = tops    -- only with -O or higher
762
  | otherwise           = map (apply_mapping ncgImpl mapping) tops'
763
  where
764
    (tops', mappings) = mapAndUnzip (build_mapping ncgImpl) tops
765 766
    mapping = foldr plusUFM emptyUFM mappings

767
build_mapping :: NcgImpl statics instr jumpDest
768 769
              -> GenCmmDecl d (BlockEnv t) (ListGraph instr)
              -> (GenCmmDecl d (BlockEnv t) (ListGraph instr), UniqFM jumpDest)
770
build_mapping _ top@(CmmData _ _) = (top, emptyUFM)
771 772 773 774
build_mapping _ (CmmProc info lbl live (ListGraph []))
  = (CmmProc info lbl live (ListGraph []), emptyUFM)
build_mapping ncgImpl (CmmProc info lbl live (ListGraph (head:blocks)))
  = (CmmProc info lbl live (ListGraph (head:others)), mapping)
775 776 777 778 779
        -- drop the shorted blocks, but don't ever drop the first one,
        -- because it is pointed to by a global label.
  where
    -- find all the blocks that just consist of a jump that can be
    -- shorted.
780 781 782
    -- Don't completely eliminate loops here -- that can leave a dangling jump!
    (_, shortcut_blocks, others) = foldl split (emptyBlockSet, [], []) blocks
    split (s, shortcut_blocks, others) b@(BasicBlock id [insn])
783 784
        | Just jd <- canShortcut ncgImpl insn,
          Just dest <- getJumpDestBlockId ncgImpl jd,
785
          not (has_info id),
786
          (setMember dest s) || dest == id -- loop checks
787 788
        = (s, shortcut_blocks, b : others)
    split (s, shortcut_blocks, others) (BasicBlock id [insn])
789 790
        | Just dest <- canShortcut ncgImpl insn,
          not (has_info id)
791
        = (setInsert id s, (id,dest) : shortcut_blocks, others)
792 793
    split (s, shortcut_blocks, others) other = (s, shortcut_blocks, other : others)

794 795
    -- do not eliminate blocks that have an info table
    has_info l = mapMember l info
796 797 798 799

    -- build a mapping from BlockId to JumpDest for shorting branches
    mapping = foldl add emptyUFM shortcut_blocks
    add ufm (id,dest) = addToUFM ufm id dest
800

801
apply_mapping :: NcgImpl statics instr jumpDest
802
              -> UniqFM jumpDest
Simon Peyton Jones's avatar
Simon Peyton Jones committed
803 804
              -> GenCmmDecl statics h (ListGraph instr)
              -> GenCmmDecl statics h (ListGraph instr)
805
apply_mapping ncgImpl ufm (CmmData sec statics)
806
  = CmmData sec (shortcutStatics ncgImpl (lookupUFM ufm) statics)
807 808
apply_mapping ncgImpl ufm (CmmProc info lbl live (ListGraph blocks))
  = CmmProc info lbl live (ListGraph $ map short_bb blocks)
809 810
  where
    short_bb (BasicBlock id insns) = BasicBlock id $! map short_insn insns
811
    short_insn i = shortcutJump ncgImpl (lookupUFM ufm) i
812 813 814
                 -- shortcutJump should apply the mapping repeatedly,
                 -- just in case we can short multiple branches.

815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834
-- -----------------------------------------------------------------------------
-- Instruction selection

-- Native code instruction selection for a chunk of stix code.  For
-- this part of the computation, we switch from the UniqSM monad to
-- the NatM monad.  The latter carries not only a Unique, but also an
-- Int denoting the current C stack pointer offset in the generated
-- code; this is needed for creating correct spill offsets on
-- architectures which don't offer, or for which it would be
-- prohibitively expensive to employ, a frame pointer register.  Viz,
-- x86.

-- The offset is measured in bytes, and indicates the difference
-- between the current (simulated) C stack-ptr and the value it was at
-- the beginning of the block.  For stacks which grow down, this value
-- should be either zero or negative.

-- Switching between the two monads whilst carrying along the same
-- Unique supply breaks abstraction.  Is that bad?

835 836
genMachCode
        :: DynFlags
Simon Peyton Jones's avatar
Simon Peyton Jones committed
837
        -> (RawCmmDecl -> NatM [NatCmmDecl statics instr])
838 839 840 841
        -> RawCmmDecl
        -> UniqSM
                ( [NatCmmDecl statics instr]
                , [CLabel])
842

843
genMachCode dflags cmmTopCodeGen cmm_top
844 845 846 847 848 849
  = do  { initial_us <- getUs
        ; let initial_st           = mkNatM_State initial_us 0 dflags
              (new_tops, final_st) = initNat initial_st (cmmTopCodeGen cmm_top)
              final_delta          = natm_delta final_st
              final_imports        = natm_imports final_st
        ; if   final_delta == 0
850 851 852
          then return (new_tops, final_imports)
          else pprPanic "genMachCode: nonzero final delta" (int final_delta)
    }
853

854 855 856 857 858 859 860
-- -----------------------------------------------------------------------------
-- Generic Cmm optimiser

{-
Here we do:

  (a) Constant folding
861
  (c) Position independent code and dynamic linking
862 863 864
        (i)  introduce the appropriate indirections
             and position independent refs
        (ii) compile a list of imported symbols
865
  (d) Some arch-specific optimizations
866

Simon Marlow's avatar
Simon Marlow committed
867
(a) will be moving to the new Hoopl pipeline, however, (c) and
868 869 870 871
(d) are only needed by the native backend and will continue to live
here.

Ideas for other things we could do (put these in Hoopl please!):
872 873

  - shortcut jumps-to-jumps
874 875 876
  - simple CSE: if an expr is assigned to a temp, then replace later occs of
    that expr with the temp, until the expr is no longer valid (can push through
    temp assignments, and certain assigns to mem...)