AsmCodeGen.hs 42.9 KB
Newer Older
1 2 3
-- -----------------------------------------------------------------------------
--
-- (c) The University of Glasgow 1993-2004
4
--
5 6 7
-- This is the top-level module in the native code generator.
--
-- -----------------------------------------------------------------------------
8

9 10
{-# LANGUAGE BangPatterns, CPP, GADTs, ScopedTypeVariables, UnboxedTuples #-}

11
module AsmCodeGen ( nativeCodeGen ) where
12

13
#include "HsVersions.h"
14
#include "nativeGen/NCG.h"
15

16

17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
import qualified X86.CodeGen
import qualified X86.Regs
import qualified X86.Instr
import qualified X86.Ppr

import qualified SPARC.CodeGen
import qualified SPARC.Regs
import qualified SPARC.Instr
import qualified SPARC.Ppr
import qualified SPARC.ShortcutJump
import qualified SPARC.CodeGen.Expand

import qualified PPC.CodeGen
import qualified PPC.Regs
import qualified PPC.RegInfo
import qualified PPC.Instr
import qualified PPC.Ppr
34 35

import RegAlloc.Liveness
36
import qualified RegAlloc.Linear.Main           as Linear
37

38 39 40 41
import qualified GraphColor                     as Color
import qualified RegAlloc.Graph.Main            as Color
import qualified RegAlloc.Graph.Stats           as Color
import qualified RegAlloc.Graph.TrivColorable   as Color
42

43
import TargetReg
44
import Platform
45
import Config
46 47 48 49
import Instruction
import PIC
import Reg
import NCGMonad
50
import Debug
51

52
import BlockId
53
import CgUtils          ( fixStgRegisters )
54 55 56
import Cmm
import CmmUtils
import Hoopl
Simon Marlow's avatar
Simon Marlow committed
57
import CmmOpt           ( cmmMachOpFold )
58
import PprCmm
59
import CLabel
60

61
import UniqFM
62
import UniqSupply
63 64
import DynFlags
import Util
65

66
import BasicTypes       ( Alignment )
67
import Digraph
68
import qualified Pretty
69
import BufWrite
70
import Outputable
71
import FastString
72
import UniqSet
73
import ErrUtils
74
import Module
75 76
import Stream (Stream)
import qualified Stream
77

78 79
-- DEBUGGING ONLY
--import OrdList
80

81
import Data.List
82
import Data.Maybe
83
import Control.Exception
84
#if __GLASGOW_HASKELL__ < 709
Austin Seipp's avatar
Austin Seipp committed
85
import Control.Applicative (Applicative(..))
86
#endif
87
import Control.Monad
88
import System.IO
89

90 91 92
{-
The native-code generator has machine-independent and
machine-dependent modules.
93

94 95 96 97
This module ("AsmCodeGen") is the top-level machine-independent
module.  Before entering machine-dependent land, we do some
machine-independent optimisations (defined below) on the
'CmmStmts's.
98

99 100 101 102 103 104 105 106 107
We convert to the machine-specific 'Instr' datatype with
'cmmCodeGen', assuming an infinite supply of registers.  We then use
a machine-independent register allocator ('regAlloc') to rejoin
reality.  Obviously, 'regAlloc' has machine-specific helper
functions (see about "RegAllocInfo" below).

Finally, we order the basic blocks of the function so as to minimise
the number of jumps between blocks, by utilising fallthrough wherever
possible.
108 109

The machine-dependent bits break down as follows:
110 111

  * ["MachRegs"]  Everything about the target platform's machine
112 113 114
    registers (and immediate operands, and addresses, which tend to
    intermingle/interact with registers).

115
  * ["MachInstrs"]  Includes the 'Instr' datatype (possibly should
116
    have a module of its own), plus a miscellany of other things
117
    (e.g., 'targetDoubleSize', 'smStablePtrTable', ...)
118

119
  * ["MachCodeGen"]  is where 'Cmm' stuff turns into
120
    machine instructions.
121

122
  * ["PprMach"] 'pprInstr' turns an 'Instr' into text (well, really
123
    a 'SDoc').
124

125 126
  * ["RegAllocInfo"] In the register allocator, we manipulate
    'MRegsState's, which are 'BitSet's, one bit per machine register.
127 128
    When we want to say something about a specific machine register
    (e.g., ``it gets clobbered by this instruction''), we set/unset
129
    its bit.  Obviously, we do this 'BitSet' thing for efficiency
130 131
    reasons.

132
    The 'RegAllocInfo' module collects together the machine-specific
133 134
    info needed to do register allocation.

135 136
   * ["RegisterAlloc"] The (machine-independent) register allocator.
-}
137

138 139
-- -----------------------------------------------------------------------------
-- Top-level of the native codegen
140

141
data NcgImpl statics instr jumpDest = NcgImpl {
Simon Peyton Jones's avatar
Simon Peyton Jones committed
142
    cmmTopCodeGen             :: RawCmmDecl -> NatM [NatCmmDecl statics instr],
143
    generateJumpTableForInstr :: instr -> Maybe (NatCmmDecl statics instr),
144 145
    getJumpDestBlockId        :: jumpDest -> Maybe BlockId,
    canShortcut               :: instr -> Maybe jumpDest,
146
    shortcutStatics           :: (BlockId -> Maybe jumpDest) -> statics -> statics,
147
    shortcutJump              :: (BlockId -> Maybe jumpDest) -> instr -> instr,
148
    pprNatCmmDecl             :: NatCmmDecl statics instr -> SDoc,
149 150
    maxSpillSlots             :: Int,
    allocatableRegs           :: [RealReg],
Simon Peyton Jones's avatar
Simon Peyton Jones committed
151 152
    ncg_x86fp_kludge          :: [NatCmmDecl statics instr] -> [NatCmmDecl statics instr],
    ncgExpandTop              :: [NatCmmDecl statics instr] -> [NatCmmDecl statics instr],
153
    ncgAllocMoreStack         :: Int -> NatCmmDecl statics instr -> UniqSM (NatCmmDecl statics instr),
154
    ncgMakeFarBranches        :: BlockEnv CmmStatics -> [NatBasicBlock instr] -> [NatBasicBlock instr]
155 156
    }

157
--------------------
158
nativeCodeGen :: DynFlags -> Module -> ModLocation -> Handle -> UniqSupply
159
              -> Stream IO RawCmmGroup ()
160
              -> IO UniqSupply
161
nativeCodeGen dflags this_mod modLoc h us cmms
162
 = let platform = targetPlatform dflags
163 164
       nCG' :: (Outputable statics, Outputable instr, Instruction instr)
            => NcgImpl statics instr jumpDest -> IO UniqSupply
165
       nCG' ncgImpl = nativeCodeGen' dflags this_mod modLoc ncgImpl h us cmms
166
   in case platformArch platform of
167 168 169 170 171
      ArchX86     -> nCG' (x86NcgImpl    dflags)
      ArchX86_64  -> nCG' (x86_64NcgImpl dflags)
      ArchPPC     -> nCG' (ppcNcgImpl    dflags)
      ArchSPARC   -> nCG' (sparcNcgImpl  dflags)
      ArchARM {}  -> panic "nativeCodeGen: No NCG for ARM"
Colin Watson's avatar
Colin Watson committed
172
      ArchARM64   -> panic "nativeCodeGen: No NCG for ARM64"
173 174 175 176 177
      ArchPPC_64  -> panic "nativeCodeGen: No NCG for PPC 64"
      ArchAlpha   -> panic "nativeCodeGen: No NCG for Alpha"
      ArchMipseb  -> panic "nativeCodeGen: No NCG for mipseb"
      ArchMipsel  -> panic "nativeCodeGen: No NCG for mipsel"
      ArchUnknown -> panic "nativeCodeGen: No NCG for unknown arch"
thoughtpolice's avatar
thoughtpolice committed
178
      ArchJavaScript -> panic "nativeCodeGen: No NCG for JavaScript"
179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198

x86NcgImpl :: DynFlags -> NcgImpl (Alignment, CmmStatics) X86.Instr.Instr X86.Instr.JumpDest
x86NcgImpl dflags
 = (x86_64NcgImpl dflags) { ncg_x86fp_kludge = map x86fp_kludge }

x86_64NcgImpl :: DynFlags -> NcgImpl (Alignment, CmmStatics) X86.Instr.Instr X86.Instr.JumpDest
x86_64NcgImpl dflags
 = NcgImpl {
        cmmTopCodeGen             = X86.CodeGen.cmmTopCodeGen
       ,generateJumpTableForInstr = X86.CodeGen.generateJumpTableForInstr dflags
       ,getJumpDestBlockId        = X86.Instr.getJumpDestBlockId
       ,canShortcut               = X86.Instr.canShortcut
       ,shortcutStatics           = X86.Instr.shortcutStatics
       ,shortcutJump              = X86.Instr.shortcutJump
       ,pprNatCmmDecl             = X86.Ppr.pprNatCmmDecl
       ,maxSpillSlots             = X86.Instr.maxSpillSlots dflags
       ,allocatableRegs           = X86.Regs.allocatableRegs platform
       ,ncg_x86fp_kludge          = id
       ,ncgAllocMoreStack         = X86.Instr.allocMoreStack platform
       ,ncgExpandTop              = id
199
       ,ncgMakeFarBranches        = const id
200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217
   }
    where platform = targetPlatform dflags

ppcNcgImpl :: DynFlags -> NcgImpl CmmStatics PPC.Instr.Instr PPC.RegInfo.JumpDest
ppcNcgImpl dflags
 = NcgImpl {
        cmmTopCodeGen             = PPC.CodeGen.cmmTopCodeGen
       ,generateJumpTableForInstr = PPC.CodeGen.generateJumpTableForInstr dflags
       ,getJumpDestBlockId        = PPC.RegInfo.getJumpDestBlockId
       ,canShortcut               = PPC.RegInfo.canShortcut
       ,shortcutStatics           = PPC.RegInfo.shortcutStatics
       ,shortcutJump              = PPC.RegInfo.shortcutJump
       ,pprNatCmmDecl             = PPC.Ppr.pprNatCmmDecl
       ,maxSpillSlots             = PPC.Instr.maxSpillSlots dflags
       ,allocatableRegs           = PPC.Regs.allocatableRegs platform
       ,ncg_x86fp_kludge          = id
       ,ncgAllocMoreStack         = PPC.Instr.allocMoreStack platform
       ,ncgExpandTop              = id
218
       ,ncgMakeFarBranches        = PPC.Instr.makeFarBranches
219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236
   }
    where platform = targetPlatform dflags

sparcNcgImpl :: DynFlags -> NcgImpl CmmStatics SPARC.Instr.Instr SPARC.ShortcutJump.JumpDest
sparcNcgImpl dflags
 = NcgImpl {
        cmmTopCodeGen             = SPARC.CodeGen.cmmTopCodeGen
       ,generateJumpTableForInstr = SPARC.CodeGen.generateJumpTableForInstr dflags
       ,getJumpDestBlockId        = SPARC.ShortcutJump.getJumpDestBlockId
       ,canShortcut               = SPARC.ShortcutJump.canShortcut
       ,shortcutStatics           = SPARC.ShortcutJump.shortcutStatics
       ,shortcutJump              = SPARC.ShortcutJump.shortcutJump
       ,pprNatCmmDecl             = SPARC.Ppr.pprNatCmmDecl
       ,maxSpillSlots             = SPARC.Instr.maxSpillSlots dflags
       ,allocatableRegs           = SPARC.Regs.allocatableRegs
       ,ncg_x86fp_kludge          = id
       ,ncgAllocMoreStack         = noAllocMoreStack
       ,ncgExpandTop              = map SPARC.CodeGen.Expand.expandTop
237
       ,ncgMakeFarBranches        = const id
238
   }
239 240 241 242 243 244 245

--
-- Allocating more stack space for spilling is currently only
-- supported for the linear register allocator on x86/x86_64, the rest
-- default to the panic below.  To support allocating extra stack on
-- more platforms provide a definition of ncgAllocMoreStack.
--
246
noAllocMoreStack :: Int -> NatCmmDecl statics instr -> UniqSM (NatCmmDecl statics instr)
247 248 249 250 251 252 253 254 255
noAllocMoreStack amount _
  = panic $   "Register allocator: out of stack slots (need " ++ show amount ++ ")\n"
        ++  "   If you are trying to compile SHA1.hs from the crypto library then this\n"
        ++  "   is a known limitation in the linear allocator.\n"
        ++  "\n"
        ++  "   Try enabling the graph colouring allocator with -fregs-graph instead."
        ++  "   You can still file a bug report if you like.\n"


256 257 258 259 260 261 262 263 264 265 266 267 268 269
-- | Data accumulated during code generation. Mostly about statistics,
-- but also collects debug data for DWARF generation.
data NativeGenAcc statics instr
  = NGS { ngs_imports     :: ![[CLabel]]
        , ngs_natives     :: ![[NatCmmDecl statics instr]]
             -- ^ Native code generated, for statistics. This might
             -- hold a lot of data, so it is important to clear this
             -- field as early as possible if it isn't actually
             -- required.
        , ngs_colorStats  :: ![[Color.RegAllocStats statics instr]]
        , ngs_linearStats :: ![[Linear.RegAllocStats]]
        , ngs_labels      :: ![Label]
        , ngs_debug       :: ![DebugBlock]
        }
270

Ian Lynagh's avatar
Ian Lynagh committed
271
nativeCodeGen' :: (Outputable statics, Outputable instr, Instruction instr)
272
               => DynFlags
273
               -> Module -> ModLocation
274
               -> NcgImpl statics instr jumpDest
275
               -> Handle
276 277 278
               -> UniqSupply
               -> Stream IO RawCmmGroup ()
               -> IO UniqSupply
279
nativeCodeGen' dflags this_mod modLoc ncgImpl h us cmms
280
 = do
281
        let split_cmms  = Stream.map add_split cmms
282 283 284
        -- BufHandle is a performance hack.  We could hide it inside
        -- Pretty if it weren't for the fact that we do lots of little
        -- printDocs here (in order to do codegen in constant space).
285
        bufh <- newBufHandle h
286 287 288
        (ngs, us') <- cmmNativeGenStream dflags this_mod modLoc ncgImpl bufh us
                                         split_cmms (NGS [] [] [] [] [] [])
        finishNativeGen dflags bufh ngs
289 290 291 292 293 294 295 296 297 298 299 300

        return us'

 where  add_split tops
                | gopt Opt_SplitObjs dflags = split_marker : tops
                | otherwise                 = tops

        split_marker = CmmProc mapEmpty mkSplitMarkerLabel []
                               (ofBlockList (panic "split_marker_entry") [])


finishNativeGen :: Instruction instr
301 302 303
                => DynFlags
                -> BufHandle
                -> NativeGenAcc statics instr
304
                -> IO ()
305
finishNativeGen dflags bufh@(BufHandle _ _ h) ngs
306
 = do
307
        bFlush bufh
308

309
        let platform = targetPlatform dflags
310 311

        -- dump global NCG stats for graph coloring allocator
312 313 314
        let stats = concat (ngs_colorStats ngs)
        when (not (null stats)) $ do

315 316 317 318 319 320
                -- build the global register conflict graph
                let graphGlobal
                        = foldl Color.union Color.initGraph
                        $ [ Color.raGraph stat
                                | stat@Color.RegAllocStatsStart{} <- stats]

321
                dump_stats (Color.pprStats stats graphGlobal)
322 323 324 325 326 327 328 329

                dumpIfSet_dyn dflags
                        Opt_D_dump_asm_conflicts "Register conflict graph"
                        $ Color.dotGraph
                                (targetRegDotColor platform)
                                (Color.trivColorable platform
                                        (targetVirtualRegSqueeze platform)
                                        (targetRealRegSqueeze platform))
330
                        $ graphGlobal
331 332 333


        -- dump global NCG stats for linear allocator
334 335 336
        let linearStats = concat (ngs_linearStats ngs)
        when (not (null linearStats)) $
          dump_stats (Linear.pprStats (concat (ngs_natives ngs)) linearStats)
337 338

        -- write out the imports
Ian Lynagh's avatar
Ian Lynagh committed
339
        Pretty.printDoc Pretty.LeftMode (pprCols dflags) h
Ian Lynagh's avatar
Ian Lynagh committed
340
                $ withPprStyleDoc dflags (mkCodeStyle AsmStyle)
341
                $ makeImportsDoc dflags (concat (ngs_imports ngs))
342 343
  where
    dump_stats = dumpSDoc dflags alwaysQualify Opt_D_dump_asm_stats "NCG stats"
344

Simon Marlow's avatar
Simon Marlow committed
345
cmmNativeGenStream :: (Outputable statics, Outputable instr, Instruction instr)
346
              => DynFlags
347
              -> Module -> ModLocation
348 349
              -> NcgImpl statics instr jumpDest
              -> BufHandle
350 351
              -> UniqSupply
              -> Stream IO RawCmmGroup ()
352 353
              -> NativeGenAcc statics instr
              -> IO (NativeGenAcc statics instr, UniqSupply)
354

355
cmmNativeGenStream dflags this_mod modLoc ncgImpl h us cmm_stream ngs
356 357
 = do r <- Stream.runStream cmm_stream
      case r of
358
          Left () ->
359 360 361 362 363 364
              return (ngs { ngs_imports = reverse $ ngs_imports ngs
                          , ngs_natives = reverse $ ngs_natives ngs
                          , ngs_colorStats = reverse $ ngs_colorStats ngs
                          , ngs_linearStats = reverse $ ngs_linearStats ngs
                          },
                      us)
365
          Right (cmms, cmm_stream') -> do
366 367 368 369 370 371 372

              -- Generate debug information
              let debugFlag = gopt Opt_Debug dflags
                  !ndbgs | debugFlag = cmmDebugGen modLoc cmms
                         | otherwise = []

              -- Generate native code
373
              (ngs',us') <- cmmNativeGens dflags this_mod ncgImpl h us cmms ngs 0
374 375 376 377 378 379 380 381 382 383 384

              -- Link native code information into debug blocks
              let !ldbgs = cmmDebugLink (ngs_labels ngs') ndbgs
              dumpIfSet_dyn dflags Opt_D_dump_debug "Debug Infos"
                (vcat $ map ppr ldbgs)

              -- Strip references to native code unless we want to dump it later
              let ngs'' = ngs' { ngs_debug  = ngs_debug ngs' ++ ldbgs
                               , ngs_labels = [] }
              cmmNativeGenStream dflags this_mod modLoc ncgImpl h us'
                  cmm_stream' ngs''
385

386 387
-- | Do native code generation on all these cmms.
--
Ian Lynagh's avatar
Ian Lynagh committed
388
cmmNativeGens :: (Outputable statics, Outputable instr, Instruction instr)
389
              => DynFlags
390
              -> Module
391 392
              -> NcgImpl statics instr jumpDest
              -> BufHandle
dterei's avatar
dterei committed
393
              -> UniqSupply
Simon Peyton Jones's avatar
Simon Peyton Jones committed
394
              -> [RawCmmDecl]
395 396 397
              -> NativeGenAcc statics instr
              -> Int
              -> IO (NativeGenAcc statics instr, UniqSupply)
398

399
cmmNativeGens _ _ _ _ us [] ngs !_
400
        = return (ngs, us)
401

402
cmmNativeGens dflags this_mod ncgImpl h us (cmm : cmms) ngs count
403
 = do
404
        (us', native, imports, colorStats, linearStats)
405
                <- {-# SCC "cmmNativeGen" #-} cmmNativeGen dflags this_mod ncgImpl us cmm count
406

407 408
        emitNativeCode dflags h $ vcat $
          map (pprNatCmmDecl ncgImpl) native
409

Gabor Greif's avatar
typo  
Gabor Greif committed
410
        -- force evaluation all this stuff to avoid space leaks
411
        {-# SCC "seqString" #-} evaluate $ seqString (showSDoc dflags $ vcat $ map ppr imports)
412

413 414 415 416 417 418 419 420 421 422 423 424
        let !labels' = if gopt Opt_Debug dflags
                       then cmmDebugLabels isMetaInstr native else []
            !natives' = if dopt Opt_D_dump_asm_stats dflags
                        then native : ngs_natives ngs else []
            mCon = maybe id (:)
            ngs' = ngs{ ngs_imports     = imports : ngs_imports ngs
                      , ngs_natives     = natives'
                      , ngs_colorStats  = colorStats `mCon` ngs_colorStats ngs
                      , ngs_linearStats = linearStats `mCon` ngs_linearStats ngs
                      , ngs_labels      = ngs_labels ngs ++ labels'
                      }
        cmmNativeGens dflags this_mod ncgImpl h us' cmms ngs' (count + 1)
425

426
 where  seqString []            = ()
427
        seqString (x:xs)        = x `seq` seqString xs
428 429


430 431 432 433 434 435 436 437 438 439 440
emitNativeCode :: DynFlags -> BufHandle -> SDoc -> IO ()
emitNativeCode dflags h sdoc = do

        {-# SCC "pprNativeCode" #-} Pretty.bufLeftRender h
                $ withPprStyleDoc dflags (mkCodeStyle AsmStyle) sdoc

        -- dump native code
        dumpIfSet_dyn dflags
                Opt_D_dump_asm "Asm code"
                sdoc

441
-- | Complete native code generation phase for a single top-level chunk of Cmm.
442 443
--      Dumping the output of each stage along the way.
--      Global conflict graph and NGC stats
444
cmmNativeGen
Ian Lynagh's avatar
Ian Lynagh committed
445
        :: (Outputable statics, Outputable instr, Instruction instr)
446
    => DynFlags
447
    -> Module
448
    -> NcgImpl statics instr jumpDest
449 450 451 452 453 454 455 456
        -> UniqSupply
        -> RawCmmDecl                                   -- ^ the cmm to generate code for
        -> Int                                          -- ^ sequence number of this top thing
        -> IO   ( UniqSupply
                , [NatCmmDecl statics instr]                -- native code
                , [CLabel]                                  -- things imported by this cmm
                , Maybe [Color.RegAllocStats statics instr] -- stats for the coloring register allocator
                , Maybe [Linear.RegAllocStats])             -- stats for the linear register allocators
457

458
cmmNativeGen dflags this_mod ncgImpl us cmm count
459
 = do
460
        let platform = targetPlatform dflags
461

462 463 464
        -- rewrite assignments to global regs
        let fixed_cmm =
                {-# SCC "fixStgRegisters" #-}
465
                fixStgRegisters dflags cmm
466

467 468 469
        -- cmm to cmm optimisations
        let (opt_cmm, imports) =
                {-# SCC "cmmToCmm" #-}
470
                cmmToCmm dflags this_mod fixed_cmm
471

472 473
        dumpIfSet_dyn dflags
                Opt_D_dump_opt_cmm "Optimised Cmm"
474
                (pprCmmGroup [opt_cmm])
475

476 477 478
        -- generate native code from cmm
        let ((native, lastMinuteImports), usGen) =
                {-# SCC "genMachCode" #-}
479
                initUs us $ genMachCode dflags this_mod (cmmTopCodeGen ncgImpl) opt_cmm
480 481 482

        dumpIfSet_dyn dflags
                Opt_D_dump_asm_native "Native code"
483
                (vcat $ map (pprNatCmmDecl ncgImpl) native)
484 485 486 487 488

        -- tag instructions with register liveness information
        let (withLiveness, usLive) =
                {-# SCC "regLiveness" #-}
                initUs usGen
489
                        $ mapM (regLiveness platform)
490 491 492 493
                        $ map natCmmTopToLive native

        dumpIfSet_dyn dflags
                Opt_D_dump_asm_liveness "Liveness annotations added"
Ian Lynagh's avatar
Ian Lynagh committed
494
                (vcat $ map ppr withLiveness)
495 496 497

        -- allocate registers
        (alloced, usAlloc, ppr_raStatsColor, ppr_raStatsLinear) <-
498 499 500 501
         if False
           -- Disabled, see #7679, #8657
           --  ( gopt Opt_RegsGraph dflags
           --  || gopt Opt_RegsIterative dflags)
502 503 504 505 506 507
          then do
                -- the regs usable for allocation
                let (alloc_regs :: UniqFM (UniqSet RealReg))
                        = foldr (\r -> plusUFM_C unionUniqSets
                                        $ unitUFM (targetClassOfRealReg platform r) (unitUniqSet r))
                                emptyUFM
508
                        $ allocatableRegs ncgImpl
509 510 511 512 513 514 515 516

                -- do the graph coloring register allocation
                let ((alloced, regAllocStats), usAlloc)
                        = {-# SCC "RegAlloc" #-}
                          initUs usLive
                          $ Color.regAlloc
                                dflags
                                alloc_regs
517
                                (mkUniqSet [0 .. maxSpillSlots ncgImpl])
518 519 520 521 522
                                withLiveness

                -- dump out what happened during register allocation
                dumpIfSet_dyn dflags
                        Opt_D_dump_asm_regalloc "Registers allocated"
523
                        (vcat $ map (pprNatCmmDecl ncgImpl) alloced)
524 525 526 527 528 529

                dumpIfSet_dyn dflags
                        Opt_D_dump_asm_regalloc_stages "Build/spill stages"
                        (vcat   $ map (\(stage, stats)
                                        -> text "# --------------------------"
                                        $$ text "#  cmm " <> int count <> text " Stage " <> int stage
Ian Lynagh's avatar
Ian Lynagh committed
530
                                        $$ ppr stats)
531 532 533
                                $ zip [0..] regAllocStats)

                let mPprStats =
534
                        if dopt Opt_D_dump_asm_stats dflags
535 536 537 538 539 540 541 542 543 544 545
                         then Just regAllocStats else Nothing

                -- force evaluation of the Maybe to avoid space leak
                mPprStats `seq` return ()

                return  ( alloced, usAlloc
                        , mPprStats
                        , Nothing)

          else do
                -- do linear register allocation
546 547 548 549 550
                let reg_alloc proc = do
                       (alloced, maybe_more_stack, ra_stats) <-
                               Linear.regAlloc dflags proc
                       case maybe_more_stack of
                         Nothing -> return ( alloced, ra_stats )
551 552 553
                         Just amount -> do
                           alloced' <- ncgAllocMoreStack ncgImpl amount alloced
                           return (alloced', ra_stats )
554

555 556 557 558
                let ((alloced, regAllocStats), usAlloc)
                        = {-# SCC "RegAlloc" #-}
                          initUs usLive
                          $ liftM unzip
559
                          $ mapM reg_alloc withLiveness
560 561 562

                dumpIfSet_dyn dflags
                        Opt_D_dump_asm_regalloc "Registers allocated"
563
                        (vcat $ map (pprNatCmmDecl ncgImpl) alloced)
564 565

                let mPprStats =
566
                        if dopt Opt_D_dump_asm_stats dflags
567 568 569 570 571 572 573 574
                         then Just (catMaybes regAllocStats) else Nothing

                -- force evaluation of the Maybe to avoid space leak
                mPprStats `seq` return ()

                return  ( alloced, usAlloc
                        , Nothing
                        , mPprStats)
575

576 577 578 579 580 581 582
        ---- x86fp_kludge.  This pass inserts ffree instructions to clear
        ---- the FPU stack on x86.  The x86 ABI requires that the FPU stack
        ---- is clear, and library functions can return odd results if it
        ---- isn't.
        ----
        ---- NB. must happen before shortcutBranches, because that
        ---- generates JXX_GBLs which we can't fix up in x86fp_kludge.
583
        let kludged = {-# SCC "x86fp_kludge" #-} ncg_x86fp_kludge ncgImpl alloced
584 585

        ---- generate jump tables
586 587
        let tabled      =
                {-# SCC "generateJumpTables" #-}
588
                generateJumpTables ncgImpl kludged
589

590 591 592 593
        ---- shortcut branches
        let shorted     =
                {-# SCC "shortcutBranches" #-}
                shortcutBranches dflags ncgImpl tabled
594

595 596 597 598
        ---- sequence blocks
        let sequenced   =
                {-# SCC "sequenceBlocks" #-}
                map (sequenceTop ncgImpl) shorted
599

600
        ---- expansion of SPARC synthetic instrs
601 602
        let expanded =
                {-# SCC "sparc_expand" #-}
603
                ncgExpandTop ncgImpl sequenced
604

605 606
        dumpIfSet_dyn dflags
                Opt_D_dump_asm_expanded "Synthetic instructions expanded"
607
                (vcat $ map (pprNatCmmDecl ncgImpl) expanded)
608

609 610 611 612 613
        return  ( usAlloc
                , expanded
                , lastMinuteImports ++ imports
                , ppr_raStatsColor
                , ppr_raStatsLinear)
614

615

Simon Peyton Jones's avatar
Simon Peyton Jones committed
616
x86fp_kludge :: NatCmmDecl (Alignment, CmmStatics) X86.Instr.Instr -> NatCmmDecl (Alignment, CmmStatics) X86.Instr.Instr
617
x86fp_kludge top@(CmmData _ _) = top
618 619
x86fp_kludge (CmmProc info lbl live (ListGraph code)) =
        CmmProc info lbl live (ListGraph $ X86.Instr.i386_insert_ffrees code)
620

621

622
-- | Build a doc for all the imports.
623
--
624
makeImportsDoc :: DynFlags -> [CLabel] -> SDoc
625
makeImportsDoc dflags imports
626
 = dyld_stubs imports
627
            $$
628 629 630
            -- On recent versions of Darwin, the linker supports
            -- dead-stripping of code and data on a per-symbol basis.
            -- There's a hack to make this work in PprMach.pprNatCmmDecl.
ian@well-typed.com's avatar
ian@well-typed.com committed
631
            (if platformHasSubsectionsViaSymbols platform
632
             then text ".subsections_via_symbols"
633
             else Outputable.empty)
634
            $$
635 636 637 638 639 640
                -- On recent GNU ELF systems one can mark an object file
                -- as not requiring an executable stack. If all objects
                -- linked into a program have this note then the program
                -- will not use an executable stack, which is good for
                -- security. GHC generated code does not need an executable
                -- stack so add the note in:
ian@well-typed.com's avatar
ian@well-typed.com committed
641
            (if platformHasGnuNonexecStack platform
642
             then text ".section .note.GNU-stack,\"\",@progbits"
643
             else Outputable.empty)
644
            $$
Gabor Greif's avatar
Gabor Greif committed
645
                -- And just because every other compiler does, let's stick in
646
                -- an identifier directive: .ident "GHC x.y.z"
ian@well-typed.com's avatar
ian@well-typed.com committed
647
            (if platformHasIdentDirective platform
648 649
             then let compilerIdent = text "GHC" <+> text cProjectVersion
                   in text ".ident" <+> doubleQuotes compilerIdent
650
             else Outputable.empty)
651

652
 where
ian@well-typed.com's avatar
ian@well-typed.com committed
653 654 655 656
        platform = targetPlatform dflags
        arch = platformArch platform
        os   = platformOS   platform

657 658
        -- Generate "symbol stubs" for all external symbols that might
        -- come from a dynamic library.
659 660
        dyld_stubs :: [CLabel] -> SDoc
{-      dyld_stubs imps = vcat $ map pprDyldSymbolStub $
661 662 663 664
                                    map head $ group $ sort imps-}
        -- (Hack) sometimes two Labels pretty-print the same, but have
        -- different uniques; so we compare their text versions...
        dyld_stubs imps
Ian Lynagh's avatar
Ian Lynagh committed
665
                | needImportedSymbols dflags arch os
666
                = vcat $
Ian Lynagh's avatar
Ian Lynagh committed
667 668
                        (pprGotDeclaration dflags arch os :) $
                        map ( pprImportedSymbol dflags platform . fst . head) $
669 670 671 672 673
                        groupBy (\(_,a) (_,b) -> a == b) $
                        sortBy (\(_,a) (_,b) -> compare a b) $
                        map doPpr $
                        imps
                | otherwise
674
                = Outputable.empty
675

Ian Lynagh's avatar
Ian Lynagh committed
676
        doPpr lbl = (lbl, renderWithStyle dflags (pprCLabel platform lbl) astyle)
677
        astyle = mkCodeStyle AsmStyle
678 679


680 681 682 683 684 685 686 687 688
-- -----------------------------------------------------------------------------
-- Sequencing the basic blocks

-- Cmm BasicBlocks are self-contained entities: they always end in a
-- jump, either non-local or to another basic block in the same proc.
-- In this phase, we attempt to place the basic blocks in a sequence
-- such that as many of the local jumps as possible turn into
-- fallthroughs.

689 690
sequenceTop
        :: Instruction instr
Simon Peyton Jones's avatar
Simon Peyton Jones committed
691
    => NcgImpl statics instr jumpDest -> NatCmmDecl statics instr -> NatCmmDecl statics instr
692

693
sequenceTop _       top@(CmmData _ _) = top
694
sequenceTop ncgImpl (CmmProc info lbl live (ListGraph blocks)) =
695
  CmmProc info lbl live (ListGraph $ ncgMakeFarBranches ncgImpl info $ sequenceBlocks info blocks)
696 697 698 699 700 701 702 703

-- The algorithm is very simple (and stupid): we make a graph out of
-- the blocks where there is an edge from one block to another iff the
-- first block ends by jumping to the second.  Then we topologically
-- sort this graph.  Then traverse the list: for each block, we first
-- output the block, then if it has an out edge, we move the
-- destination of the out edge to the front of the list, and continue.

704
-- FYI, the classic layout for basic blocks uses postorder DFS; this
705
-- algorithm is implemented in Hoopl.
706

707 708
sequenceBlocks
        :: Instruction instr
709 710
        => BlockEnv i
        -> [NatBasicBlock instr]
711
        -> [NatBasicBlock instr]
712

713 714 715
sequenceBlocks _ [] = []
sequenceBlocks infos (entry:blocks) =
  seqBlocks infos (mkNode entry : reverse (flattenSCCs (sccBlocks blocks)))
716 717
  -- the first block is the entry point ==> it must remain at the start.

718

719 720 721 722
sccBlocks
        :: Instruction instr
        => [NatBasicBlock instr]
        -> [SCC ( NatBasicBlock instr
723 724
                , BlockId
                , [BlockId])]
725

726
sccBlocks blocks = stronglyConnCompFromEdgedVerticesR (map mkNode blocks)
727

728 729
-- we're only interested in the last instruction of
-- the block, and only if it has a single destination.
730 731
getOutEdges
        :: Instruction instr
732
        => [instr] -> [BlockId]
733

734 735
getOutEdges instrs
        = case jumpDestsOfInstr (last instrs) of
736
                [one] -> [one]
737
                _many -> []
738

dterei's avatar
dterei committed
739 740
mkNode :: (Instruction t)
       => GenBasicBlock t
741 742 743 744 745 746 747 748 749 750 751
       -> (GenBasicBlock t, BlockId, [BlockId])
mkNode block@(BasicBlock id instrs) = (block, id, getOutEdges instrs)

seqBlocks :: BlockEnv i -> [(GenBasicBlock t1, BlockId, [BlockId])]
                        -> [GenBasicBlock t1]
seqBlocks _ [] = []
seqBlocks infos ((block,_,[]) : rest)
  = block : seqBlocks infos rest
seqBlocks infos ((block@(BasicBlock id instrs),_,[next]) : rest)
  | can_fallthrough = BasicBlock id (init instrs) : seqBlocks infos rest'
  | otherwise       = block : seqBlocks infos rest'
752
  where
753 754
        can_fallthrough = not (mapMember next infos) && can_reorder
        (can_reorder, rest') = reorder next [] rest
755 756
          -- TODO: we should do a better job for cycles; try to maximise the
          -- fallthroughs within a loop.
757
seqBlocks _ _ = panic "AsmCodegen:seqBlocks"
758

dterei's avatar
dterei committed
759 760
reorder :: (Eq a) => a -> [(t, a, t1)] -> [(t, a, t1)] -> (Bool, [(t, a, t1)])
reorder  _ accum [] = (False, reverse accum)
761 762 763 764
reorder id accum (b@(block,id',out) : rest)
  | id == id'  = (True, (block,id,out) : reverse accum ++ rest)
  | otherwise  = reorder id (b:accum) rest

765

766 767 768 769 770 771
-- -----------------------------------------------------------------------------
-- Generate jump tables

-- Analyzes all native code and generates data sections for all jump
-- table instructions.
generateJumpTables
772 773 774
        :: NcgImpl statics instr jumpDest
        -> [NatCmmDecl statics instr] -> [NatCmmDecl statics instr]
generateJumpTables ncgImpl xs = concatMap f xs
775
    where f p@(CmmProc _ _ _ (ListGraph xs)) = p : concatMap g xs
776
          f p = [p]
777
          g (BasicBlock _ xs) = catMaybes (map (generateJumpTableForInstr ncgImpl) xs)
778

779 780 781
-- -----------------------------------------------------------------------------
-- Shortcut branches

782
shortcutBranches
783
        :: DynFlags
784
    -> NcgImpl statics instr jumpDest
785 786
        -> [NatCmmDecl statics instr]
        -> [NatCmmDecl statics instr]
787

788
shortcutBranches dflags ncgImpl tops
789
  | optLevel dflags < 1 = tops    -- only with -O or higher
790
  | otherwise           = map (apply_mapping ncgImpl mapping) tops'
791
  where
792
    (tops', mappings) = mapAndUnzip (build_mapping ncgImpl) tops
793 794
    mapping = foldr plusUFM emptyUFM mappings

795
build_mapping :: NcgImpl statics instr jumpDest
796 797
              -> GenCmmDecl d (BlockEnv t) (ListGraph instr)
              -> (GenCmmDecl d (BlockEnv t) (ListGraph instr), UniqFM jumpDest)
798
build_mapping _ top@(CmmData _ _) = (top, emptyUFM)
799 800 801 802
build_mapping _ (CmmProc info lbl live (ListGraph []))
  = (CmmProc info lbl live (ListGraph []), emptyUFM)
build_mapping ncgImpl (CmmProc info lbl live (ListGraph (head:blocks)))
  = (CmmProc info lbl live (ListGraph (head:others)), mapping)
803 804 805 806 807
        -- drop the shorted blocks, but don't ever drop the first one,
        -- because it is pointed to by a global label.
  where
    -- find all the blocks that just consist of a jump that can be
    -- shorted.
808 809 810
    -- Don't completely eliminate loops here -- that can leave a dangling jump!
    (_, shortcut_blocks, others) = foldl split (emptyBlockSet, [], []) blocks
    split (s, shortcut_blocks, others) b@(BasicBlock id [insn])
811 812
        | Just jd <- canShortcut ncgImpl insn,
          Just dest <- getJumpDestBlockId ncgImpl jd,
813
          not (has_info id),
814
          (setMember dest s) || dest == id -- loop checks
815 816
        = (s, shortcut_blocks, b : others)
    split (s, shortcut_blocks, others) (BasicBlock id [insn])
817 818
        | Just dest <- canShortcut ncgImpl insn,
          not (has_info id)
819
        = (setInsert id s, (id,dest) : shortcut_blocks, others)
820 821
    split (s, shortcut_blocks, others) other = (s, shortcut_blocks, other : others)

822 823
    -- do not eliminate blocks that have an info table
    has_info l = mapMember l info
824 825 826 827

    -- build a mapping from BlockId to JumpDest for shorting branches
    mapping = foldl add emptyUFM shortcut_blocks
    add ufm (id,dest) = addToUFM ufm id dest
828

829
apply_mapping :: NcgImpl statics instr jumpDest
830
              -> UniqFM jumpDest
Simon Peyton Jones's avatar
Simon Peyton Jones committed
831 832
              -> GenCmmDecl statics h (ListGraph instr)
              -> GenCmmDecl statics h (ListGraph instr)