AsmCodeGen.hs 48.9 KB
Newer Older
1 2 3
-- -----------------------------------------------------------------------------
--
-- (c) The University of Glasgow 1993-2004
4
--
5 6 7
-- This is the top-level module in the native code generator.
--
-- -----------------------------------------------------------------------------
8

9 10
{-# LANGUAGE BangPatterns, CPP, GADTs, ScopedTypeVariables, UnboxedTuples #-}

tjakway's avatar
tjakway committed
11 12 13 14 15 16 17 18 19 20 21 22
module AsmCodeGen (
                    -- * Module entry point
                    nativeCodeGen

                    -- * Test-only exports: see trac #12744
                    -- used by testGraphNoSpills, which needs to access
                    -- the register allocator intermediate data structures
                    -- cmmNativeGen emits
                  , cmmNativeGen
                  , NcgImpl(..)
                  , x86NcgImpl
                  ) where
23

24
#include "HsVersions.h"
25
#include "nativeGen/NCG.h"
26

27

28 29
import GhcPrelude

30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46
import qualified X86.CodeGen
import qualified X86.Regs
import qualified X86.Instr
import qualified X86.Ppr

import qualified SPARC.CodeGen
import qualified SPARC.Regs
import qualified SPARC.Instr
import qualified SPARC.Ppr
import qualified SPARC.ShortcutJump
import qualified SPARC.CodeGen.Expand

import qualified PPC.CodeGen
import qualified PPC.Regs
import qualified PPC.RegInfo
import qualified PPC.Instr
import qualified PPC.Ppr
47 48

import RegAlloc.Liveness
49
import qualified RegAlloc.Linear.Main           as Linear
50

51 52 53 54
import qualified GraphColor                     as Color
import qualified RegAlloc.Graph.Main            as Color
import qualified RegAlloc.Graph.Stats           as Color
import qualified RegAlloc.Graph.TrivColorable   as Color
55

56
import AsmUtils
57
import TargetReg
58
import Platform
59
import BlockLayout
60
import Config
61 62 63 64
import Instruction
import PIC
import Reg
import NCGMonad
65
import CFG
Peter Wortmann's avatar
Peter Wortmann committed
66
import Dwarf
67
import Debug
68

69
import BlockId
70
import CgUtils          ( fixStgRegisters )
71 72
import Cmm
import CmmUtils
73 74 75
import Hoopl.Collections
import Hoopl.Label
import Hoopl.Block
Simon Marlow's avatar
Simon Marlow committed
76
import CmmOpt           ( cmmMachOpFold )
77
import PprCmm
78
import CLabel
79

80
import UniqFM
81
import UniqSupply
82 83
import DynFlags
import Util
84

85
import BasicTypes       ( Alignment )
86
import qualified Pretty
87
import BufWrite
88
import Outputable
89
import FastString
90
import UniqSet
91
import ErrUtils
92
import Module
93 94
import Stream (Stream)
import qualified Stream
95

96 97
-- DEBUGGING ONLY
--import OrdList
98

99
import Data.List
100
import Data.Maybe
101
import Data.Ord         ( comparing )
102
import Control.Exception
103
import Control.Monad
104
import System.IO
105

106 107 108
{-
The native-code generator has machine-independent and
machine-dependent modules.
109

110 111 112 113
This module ("AsmCodeGen") is the top-level machine-independent
module.  Before entering machine-dependent land, we do some
machine-independent optimisations (defined below) on the
'CmmStmts's.
114

115 116 117 118 119 120 121 122 123
We convert to the machine-specific 'Instr' datatype with
'cmmCodeGen', assuming an infinite supply of registers.  We then use
a machine-independent register allocator ('regAlloc') to rejoin
reality.  Obviously, 'regAlloc' has machine-specific helper
functions (see about "RegAllocInfo" below).

Finally, we order the basic blocks of the function so as to minimise
the number of jumps between blocks, by utilising fallthrough wherever
possible.
124 125

The machine-dependent bits break down as follows:
126 127

  * ["MachRegs"]  Everything about the target platform's machine
128 129 130
    registers (and immediate operands, and addresses, which tend to
    intermingle/interact with registers).

131
  * ["MachInstrs"]  Includes the 'Instr' datatype (possibly should
132
    have a module of its own), plus a miscellany of other things
133
    (e.g., 'targetDoubleSize', 'smStablePtrTable', ...)
134

135
  * ["MachCodeGen"]  is where 'Cmm' stuff turns into
136
    machine instructions.
137

138
  * ["PprMach"] 'pprInstr' turns an 'Instr' into text (well, really
139
    a 'SDoc').
140

141 142
  * ["RegAllocInfo"] In the register allocator, we manipulate
    'MRegsState's, which are 'BitSet's, one bit per machine register.
143 144
    When we want to say something about a specific machine register
    (e.g., ``it gets clobbered by this instruction''), we set/unset
145
    its bit.  Obviously, we do this 'BitSet' thing for efficiency
146 147
    reasons.

148
    The 'RegAllocInfo' module collects together the machine-specific
149 150
    info needed to do register allocation.

151 152
   * ["RegisterAlloc"] The (machine-independent) register allocator.
-}
153

154
--------------------
155
nativeCodeGen :: DynFlags -> Module -> ModLocation -> Handle -> UniqSupply
156
              -> Stream IO RawCmmGroup ()
157
              -> IO UniqSupply
158
nativeCodeGen dflags this_mod modLoc h us cmms
159
 = let platform = targetPlatform dflags
160 161
       nCG' :: ( Outputable statics, Outputable instr
               , Outputable jumpDest, Instruction instr)
162
            => NcgImpl statics instr jumpDest -> IO UniqSupply
163
       nCG' ncgImpl = nativeCodeGen' dflags this_mod modLoc ncgImpl h us cmms
164
   in case platformArch platform of
165 166 167 168
      ArchX86       -> nCG' (x86NcgImpl    dflags)
      ArchX86_64    -> nCG' (x86_64NcgImpl dflags)
      ArchPPC       -> nCG' (ppcNcgImpl    dflags)
      ArchSPARC     -> nCG' (sparcNcgImpl  dflags)
169
      ArchSPARC64   -> panic "nativeCodeGen: No NCG for SPARC64"
170 171 172 173 174 175 176 177
      ArchARM {}    -> panic "nativeCodeGen: No NCG for ARM"
      ArchARM64     -> panic "nativeCodeGen: No NCG for ARM64"
      ArchPPC_64 _  -> nCG' (ppcNcgImpl    dflags)
      ArchAlpha     -> panic "nativeCodeGen: No NCG for Alpha"
      ArchMipseb    -> panic "nativeCodeGen: No NCG for mipseb"
      ArchMipsel    -> panic "nativeCodeGen: No NCG for mipsel"
      ArchUnknown   -> panic "nativeCodeGen: No NCG for unknown arch"
      ArchJavaScript-> panic "nativeCodeGen: No NCG for JavaScript"
178

179 180
x86NcgImpl :: DynFlags -> NcgImpl (Alignment, CmmStatics)
                                  X86.Instr.Instr X86.Instr.JumpDest
181 182 183
x86NcgImpl dflags
 = (x86_64NcgImpl dflags) { ncg_x86fp_kludge = map x86fp_kludge }

184 185
x86_64NcgImpl :: DynFlags -> NcgImpl (Alignment, CmmStatics)
                                  X86.Instr.Instr X86.Instr.JumpDest
186 187 188 189 190 191 192 193 194 195 196 197 198 199
x86_64NcgImpl dflags
 = NcgImpl {
        cmmTopCodeGen             = X86.CodeGen.cmmTopCodeGen
       ,generateJumpTableForInstr = X86.CodeGen.generateJumpTableForInstr dflags
       ,getJumpDestBlockId        = X86.Instr.getJumpDestBlockId
       ,canShortcut               = X86.Instr.canShortcut
       ,shortcutStatics           = X86.Instr.shortcutStatics
       ,shortcutJump              = X86.Instr.shortcutJump
       ,pprNatCmmDecl             = X86.Ppr.pprNatCmmDecl
       ,maxSpillSlots             = X86.Instr.maxSpillSlots dflags
       ,allocatableRegs           = X86.Regs.allocatableRegs platform
       ,ncg_x86fp_kludge          = id
       ,ncgAllocMoreStack         = X86.Instr.allocMoreStack platform
       ,ncgExpandTop              = id
200
       ,ncgMakeFarBranches        = const id
201
       ,extractUnwindPoints       = X86.CodeGen.extractUnwindPoints
202
       ,invertCondBranches        = X86.CodeGen.invertCondBranches
203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220
   }
    where platform = targetPlatform dflags

ppcNcgImpl :: DynFlags -> NcgImpl CmmStatics PPC.Instr.Instr PPC.RegInfo.JumpDest
ppcNcgImpl dflags
 = NcgImpl {
        cmmTopCodeGen             = PPC.CodeGen.cmmTopCodeGen
       ,generateJumpTableForInstr = PPC.CodeGen.generateJumpTableForInstr dflags
       ,getJumpDestBlockId        = PPC.RegInfo.getJumpDestBlockId
       ,canShortcut               = PPC.RegInfo.canShortcut
       ,shortcutStatics           = PPC.RegInfo.shortcutStatics
       ,shortcutJump              = PPC.RegInfo.shortcutJump
       ,pprNatCmmDecl             = PPC.Ppr.pprNatCmmDecl
       ,maxSpillSlots             = PPC.Instr.maxSpillSlots dflags
       ,allocatableRegs           = PPC.Regs.allocatableRegs platform
       ,ncg_x86fp_kludge          = id
       ,ncgAllocMoreStack         = PPC.Instr.allocMoreStack platform
       ,ncgExpandTop              = id
221
       ,ncgMakeFarBranches        = PPC.Instr.makeFarBranches
222
       ,extractUnwindPoints       = const []
223
       ,invertCondBranches        = \_ _ -> id
224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241
   }
    where platform = targetPlatform dflags

sparcNcgImpl :: DynFlags -> NcgImpl CmmStatics SPARC.Instr.Instr SPARC.ShortcutJump.JumpDest
sparcNcgImpl dflags
 = NcgImpl {
        cmmTopCodeGen             = SPARC.CodeGen.cmmTopCodeGen
       ,generateJumpTableForInstr = SPARC.CodeGen.generateJumpTableForInstr dflags
       ,getJumpDestBlockId        = SPARC.ShortcutJump.getJumpDestBlockId
       ,canShortcut               = SPARC.ShortcutJump.canShortcut
       ,shortcutStatics           = SPARC.ShortcutJump.shortcutStatics
       ,shortcutJump              = SPARC.ShortcutJump.shortcutJump
       ,pprNatCmmDecl             = SPARC.Ppr.pprNatCmmDecl
       ,maxSpillSlots             = SPARC.Instr.maxSpillSlots dflags
       ,allocatableRegs           = SPARC.Regs.allocatableRegs
       ,ncg_x86fp_kludge          = id
       ,ncgAllocMoreStack         = noAllocMoreStack
       ,ncgExpandTop              = map SPARC.CodeGen.Expand.expandTop
242
       ,ncgMakeFarBranches        = const id
243
       ,extractUnwindPoints       = const []
244
       ,invertCondBranches        = \_ _ -> id
245
   }
246 247 248 249 250 251 252

--
-- Allocating more stack space for spilling is currently only
-- supported for the linear register allocator on x86/x86_64, the rest
-- default to the panic below.  To support allocating extra stack on
-- more platforms provide a definition of ncgAllocMoreStack.
--
253 254
noAllocMoreStack :: Int -> NatCmmDecl statics instr
                 -> UniqSM (NatCmmDecl statics instr, [(BlockId,BlockId)])
255 256 257 258 259 260 261 262 263
noAllocMoreStack amount _
  = panic $   "Register allocator: out of stack slots (need " ++ show amount ++ ")\n"
        ++  "   If you are trying to compile SHA1.hs from the crypto library then this\n"
        ++  "   is a known limitation in the linear allocator.\n"
        ++  "\n"
        ++  "   Try enabling the graph colouring allocator with -fregs-graph instead."
        ++  "   You can still file a bug report if you like.\n"


264 265 266 267 268 269 270 271 272 273 274 275 276
-- | Data accumulated during code generation. Mostly about statistics,
-- but also collects debug data for DWARF generation.
data NativeGenAcc statics instr
  = NGS { ngs_imports     :: ![[CLabel]]
        , ngs_natives     :: ![[NatCmmDecl statics instr]]
             -- ^ Native code generated, for statistics. This might
             -- hold a lot of data, so it is important to clear this
             -- field as early as possible if it isn't actually
             -- required.
        , ngs_colorStats  :: ![[Color.RegAllocStats statics instr]]
        , ngs_linearStats :: ![[Linear.RegAllocStats]]
        , ngs_labels      :: ![Label]
        , ngs_debug       :: ![DebugBlock]
277
        , ngs_dwarfFiles  :: !DwarfFiles
278 279 280
        , ngs_unwinds     :: !(LabelMap [UnwindPoint])
             -- ^ see Note [Unwinding information in the NCG]
             -- and Note [What is this unwinding business?] in Debug.
281
        }
282

283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307
{-
Note [Unwinding information in the NCG]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Unwind information is a type of metadata which allows a debugging tool
to reconstruct the values of machine registers at the time a procedure was
entered. For the most part, the production of unwind information is handled by
the Cmm stage, where it is represented by CmmUnwind nodes.

Unfortunately, the Cmm stage doesn't know everything necessary to produce
accurate unwinding information. For instance, the x86-64 calling convention
requires that the stack pointer be aligned to 16 bytes, which in turn means that
GHC must sometimes add padding to $sp prior to performing a foreign call. When
this happens unwind information must be updated accordingly.
For this reason, we make the NCG backends responsible for producing
unwinding tables (with the extractUnwindPoints function in NcgImpl).

We accumulate the produced unwind tables over CmmGroups in the ngs_unwinds
field of NativeGenAcc. This is a label map which contains an entry for each
procedure, containing a list of unwinding points (e.g. a label and an associated
unwinding table).

See also Note [What is this unwinding business?] in Debug.
-}

308 309
nativeCodeGen' :: (Outputable statics, Outputable instr,Outputable jumpDest,
                   Instruction instr)
310
               => DynFlags
311
               -> Module -> ModLocation
312
               -> NcgImpl statics instr jumpDest
313
               -> Handle
314 315 316
               -> UniqSupply
               -> Stream IO RawCmmGroup ()
               -> IO UniqSupply
317
nativeCodeGen' dflags this_mod modLoc ncgImpl h us cmms
318
 = do
319 320 321
        -- BufHandle is a performance hack.  We could hide it inside
        -- Pretty if it weren't for the fact that we do lots of little
        -- printDocs here (in order to do codegen in constant space).
322
        bufh <- newBufHandle h
323
        let ngs0 = NGS [] [] [] [] [] [] emptyUFM mapEmpty
324
        (ngs, us') <- cmmNativeGenStream dflags this_mod modLoc ncgImpl bufh us
325
                                         cmms ngs0
Peter Wortmann's avatar
Peter Wortmann committed
326
        finishNativeGen dflags modLoc bufh us' ngs
327 328

finishNativeGen :: Instruction instr
329
                => DynFlags
Peter Wortmann's avatar
Peter Wortmann committed
330
                -> ModLocation
331
                -> BufHandle
Peter Wortmann's avatar
Peter Wortmann committed
332
                -> UniqSupply
333
                -> NativeGenAcc statics instr
Peter Wortmann's avatar
Peter Wortmann committed
334 335
                -> IO UniqSupply
finishNativeGen dflags modLoc bufh@(BufHandle _ _ h) us ngs
336
 = do
Peter Wortmann's avatar
Peter Wortmann committed
337
        -- Write debug data and finish
Ben Gamari's avatar
Ben Gamari committed
338
        let emitDw = debugLevel dflags > 0
Peter Wortmann's avatar
Peter Wortmann committed
339 340 341 342
        us' <- if not emitDw then return us else do
          (dwarf, us') <- dwarfGen dflags modLoc us (ngs_debug ngs)
          emitNativeCode dflags bufh dwarf
          return us'
343
        bFlush bufh
344

345
        -- dump global NCG stats for graph coloring allocator
346 347 348
        let stats = concat (ngs_colorStats ngs)
        when (not (null stats)) $ do

Peter Wortmann's avatar
Peter Wortmann committed
349 350
          -- build the global register conflict graph
          let graphGlobal
351
                  = foldl' Color.union Color.initGraph
Peter Wortmann's avatar
Peter Wortmann committed
352 353
                  $ [ Color.raGraph stat
                          | stat@Color.RegAllocStatsStart{} <- stats]
354

Peter Wortmann's avatar
Peter Wortmann committed
355
          dump_stats (Color.pprStats stats graphGlobal)
356

Peter Wortmann's avatar
Peter Wortmann committed
357 358 359 360 361 362 363 364 365
          let platform = targetPlatform dflags
          dumpIfSet_dyn dflags
                  Opt_D_dump_asm_conflicts "Register conflict graph"
                  $ Color.dotGraph
                          (targetRegDotColor platform)
                          (Color.trivColorable platform
                                  (targetVirtualRegSqueeze platform)
                                  (targetRealRegSqueeze platform))
                  $ graphGlobal
366 367 368


        -- dump global NCG stats for linear allocator
369 370 371
        let linearStats = concat (ngs_linearStats ngs)
        when (not (null linearStats)) $
          dump_stats (Linear.pprStats (concat (ngs_natives ngs)) linearStats)
372 373

        -- write out the imports
374
        printSDocLn Pretty.LeftMode dflags h (mkCodeStyle AsmStyle)
375
                $ makeImportsDoc dflags (concat (ngs_imports ngs))
Peter Wortmann's avatar
Peter Wortmann committed
376
        return us'
377 378
  where
    dump_stats = dumpSDoc dflags alwaysQualify Opt_D_dump_asm_stats "NCG stats"
379

380 381
cmmNativeGenStream :: (Outputable statics, Outputable instr
                      ,Outputable jumpDest, Instruction instr)
382
              => DynFlags
383
              -> Module -> ModLocation
384 385
              -> NcgImpl statics instr jumpDest
              -> BufHandle
386 387
              -> UniqSupply
              -> Stream IO RawCmmGroup ()
388 389
              -> NativeGenAcc statics instr
              -> IO (NativeGenAcc statics instr, UniqSupply)
390

391
cmmNativeGenStream dflags this_mod modLoc ncgImpl h us cmm_stream ngs
392 393
 = do r <- Stream.runStream cmm_stream
      case r of
394 395 396 397 398 399 400 401 402 403
        Left () ->
          return (ngs { ngs_imports = reverse $ ngs_imports ngs
                      , ngs_natives = reverse $ ngs_natives ngs
                      , ngs_colorStats = reverse $ ngs_colorStats ngs
                      , ngs_linearStats = reverse $ ngs_linearStats ngs
                      },
                  us)
        Right (cmms, cmm_stream') -> do

          -- Generate debug information
404
          let debugFlag = debugLevel dflags > 0
405 406 407 408
              !ndbgs | debugFlag = cmmDebugGen modLoc cmms
                     | otherwise = []
              dbgMap = debugToMap ndbgs

Ben Gamari's avatar
Ben Gamari committed
409
          -- Generate native code
410
          (ngs',us') <- cmmNativeGens dflags this_mod modLoc ncgImpl h
Ben Gamari's avatar
Ben Gamari committed
411
                                             dbgMap us cmms ngs 0
412 413

          -- Link native code information into debug blocks
414 415
          -- See Note [What is this unwinding business?] in Debug.
          let !ldbgs = cmmDebugLink (ngs_labels ngs') (ngs_unwinds ngs') ndbgs
416 417 418
          dumpIfSet_dyn dflags Opt_D_dump_debug "Debug Infos"
            (vcat $ map ppr ldbgs)

Ben Gamari's avatar
Ben Gamari committed
419 420 421 422
          -- Accumulate debug information for emission in finishNativeGen.
          let ngs'' = ngs' { ngs_debug = ngs_debug ngs' ++ ldbgs, ngs_labels = [] }

          cmmNativeGenStream dflags this_mod modLoc ncgImpl h us'
423
              cmm_stream' ngs''
424

425 426
-- | Do native code generation on all these cmms.
--
427
cmmNativeGens :: forall statics instr jumpDest.
428 429
                 (Outputable statics, Outputable instr
                 ,Outputable jumpDest, Instruction instr)
430
              => DynFlags
431
              -> Module -> ModLocation
432 433
              -> NcgImpl statics instr jumpDest
              -> BufHandle
434
              -> LabelMap DebugBlock
dterei's avatar
dterei committed
435
              -> UniqSupply
Simon Peyton Jones's avatar
Simon Peyton Jones committed
436
              -> [RawCmmDecl]
437 438 439
              -> NativeGenAcc statics instr
              -> Int
              -> IO (NativeGenAcc statics instr, UniqSupply)
440

441 442
cmmNativeGens dflags this_mod modLoc ncgImpl h dbgMap = go
  where
443 444
    go :: UniqSupply -> [RawCmmDecl]
       -> NativeGenAcc statics instr -> Int
445
       -> IO (NativeGenAcc statics instr, UniqSupply)
446

447 448 449 450
    go us [] ngs !_ =
        return (ngs, us)

    go us (cmm : cmms) ngs count = do
451
        let fileIds = ngs_dwarfFiles ngs
452
        (us', fileIds', native, imports, colorStats, linearStats, unwinds)
453 454 455 456
          <- {-# SCC "cmmNativeGen" #-}
             cmmNativeGen dflags this_mod modLoc ncgImpl us fileIds dbgMap
                          cmm count

457 458 459
        -- Generate .file directives for every new file that has been
        -- used. Note that it is important that we generate these in
        -- ascending order, as Clang's 3.6 assembler complains.
niteria's avatar
niteria committed
460 461 462
        let newFileIds = sortBy (comparing snd) $
                         nonDetEltsUFM $ fileIds' `minusUFM` fileIds
            -- See Note [Unique Determinism and code generation]
463
            pprDecl (f,n) = text "\t.file " <> ppr n <+>
464
                            doubleQuotes (ftext f)
465

466
        emitNativeCode dflags h $ vcat $
467
          map pprDecl newFileIds ++
468
          map (pprNatCmmDecl ncgImpl) native
469

Gabor Greif's avatar
typo  
Gabor Greif committed
470
        -- force evaluation all this stuff to avoid space leaks
471
        {-# SCC "seqString" #-} evaluate $ seqString (showSDoc dflags $ vcat $ map ppr imports)
472

473
        let !labels' = if debugLevel dflags > 0
474 475 476
                       then cmmDebugLabels isMetaInstr native else []
            !natives' = if dopt Opt_D_dump_asm_stats dflags
                        then native : ngs_natives ngs else []
477

478 479 480 481 482 483
            mCon = maybe id (:)
            ngs' = ngs{ ngs_imports     = imports : ngs_imports ngs
                      , ngs_natives     = natives'
                      , ngs_colorStats  = colorStats `mCon` ngs_colorStats ngs
                      , ngs_linearStats = linearStats `mCon` ngs_linearStats ngs
                      , ngs_labels      = ngs_labels ngs ++ labels'
484
                      , ngs_dwarfFiles  = fileIds'
485
                      , ngs_unwinds     = ngs_unwinds ngs `mapUnion` unwinds
486
                      }
487
        go us' cmms ngs' (count + 1)
488

489 490
    seqString []            = ()
    seqString (x:xs)        = x `seq` seqString xs
491 492


493 494 495
emitNativeCode :: DynFlags -> BufHandle -> SDoc -> IO ()
emitNativeCode dflags h sdoc = do

496 497
        {-# SCC "pprNativeCode" #-} bufLeftRenderSDoc dflags h
                                      (mkCodeStyle AsmStyle) sdoc
498 499 500 501 502 503

        -- dump native code
        dumpIfSet_dyn dflags
                Opt_D_dump_asm "Asm code"
                sdoc

504
-- | Complete native code generation phase for a single top-level chunk of Cmm.
505 506
--      Dumping the output of each stage along the way.
--      Global conflict graph and NGC stats
507
cmmNativeGen
508 509
    :: forall statics instr jumpDest. (Instruction instr,
        Outputable statics, Outputable instr, Outputable jumpDest)
510
    => DynFlags
511
    -> Module -> ModLocation
512
    -> NcgImpl statics instr jumpDest
513
        -> UniqSupply
514 515
        -> DwarfFiles
        -> LabelMap DebugBlock
516 517 518
        -> RawCmmDecl                                   -- ^ the cmm to generate code for
        -> Int                                          -- ^ sequence number of this top thing
        -> IO   ( UniqSupply
519
                , DwarfFiles
520 521 522
                , [NatCmmDecl statics instr]                -- native code
                , [CLabel]                                  -- things imported by this cmm
                , Maybe [Color.RegAllocStats statics instr] -- stats for the coloring register allocator
523 524 525
                , Maybe [Linear.RegAllocStats]              -- stats for the linear register allocators
                , LabelMap [UnwindPoint]                    -- unwinding information for blocks
                )
526

527
cmmNativeGen dflags this_mod modLoc ncgImpl us fileIds dbgMap cmm count
528
 = do
529
        let platform = targetPlatform dflags
530

531 532 533
        -- rewrite assignments to global regs
        let fixed_cmm =
                {-# SCC "fixStgRegisters" #-}
534
                fixStgRegisters dflags cmm
535

536 537 538
        -- cmm to cmm optimisations
        let (opt_cmm, imports) =
                {-# SCC "cmmToCmm" #-}
539
                cmmToCmm dflags this_mod fixed_cmm
540

541 542
        dumpIfSet_dyn dflags
                Opt_D_dump_opt_cmm "Optimised Cmm"
543
                (pprCmmGroup [opt_cmm])
544

545 546 547
        let cmmCfg = {-# SCC "getCFG" #-}
                     getCfgProc (cfgWeightInfo dflags) opt_cmm

548
        -- generate native code from cmm
549
        let ((native, lastMinuteImports, fileIds', nativeCfgWeights), usGen) =
550
                {-# SCC "genMachCode" #-}
551 552
                initUs us $ genMachCode dflags this_mod modLoc
                                        (cmmTopCodeGen ncgImpl)
553 554
                                        fileIds dbgMap opt_cmm cmmCfg

555 556 557

        dumpIfSet_dyn dflags
                Opt_D_dump_asm_native "Native code"
558
                (vcat $ map (pprNatCmmDecl ncgImpl) native)
559

560 561 562 563
        dumpIfSet_dyn dflags
                Opt_D_dump_cfg_weights "CFG Weights"
                (pprEdgeWeights nativeCfgWeights)

564
        -- tag instructions with register liveness information
565 566 567 568
        -- also drops dead code
        let livenessCfg = if (backendMaintainsCfg dflags)
                                then Just nativeCfgWeights
                                else Nothing
569 570 571
        let (withLiveness, usLive) =
                {-# SCC "regLiveness" #-}
                initUs usGen
572
                        $ mapM (cmmTopLiveness livenessCfg platform) native
573 574 575

        dumpIfSet_dyn dflags
                Opt_D_dump_asm_liveness "Liveness annotations added"
Ian Lynagh's avatar
Ian Lynagh committed
576
                (vcat $ map ppr withLiveness)
577 578

        -- allocate registers
579
        (alloced, usAlloc, ppr_raStatsColor, ppr_raStatsLinear, raStats, stack_updt_blks) <-
580 581
         if ( gopt Opt_RegsGraph dflags
           || gopt Opt_RegsIterative dflags )
582 583 584 585 586 587
          then do
                -- the regs usable for allocation
                let (alloc_regs :: UniqFM (UniqSet RealReg))
                        = foldr (\r -> plusUFM_C unionUniqSets
                                        $ unitUFM (targetClassOfRealReg platform r) (unitUniqSet r))
                                emptyUFM
588
                        $ allocatableRegs ncgImpl
589 590

                -- do the graph coloring register allocation
591
                let ((alloced, maybe_more_stack, regAllocStats), usAlloc)
592
                        = {-# SCC "RegAlloc-color" #-}
593 594 595 596
                          initUs usLive
                          $ Color.regAlloc
                                dflags
                                alloc_regs
597
                                (mkUniqSet [0 .. maxSpillSlots ncgImpl])
598
                                (maxSpillSlots ncgImpl)
599
                                withLiveness
600 601 602 603 604 605 606 607 608 609 610
                                livenessCfg

                let ((alloced', stack_updt_blks), usAlloc')
                        = initUs usAlloc $
                                case maybe_more_stack of
                                Nothing     -> return (alloced, [])
                                Just amount -> do
                                    (alloced',stack_updt_blks) <- unzip <$>
                                                (mapM ((ncgAllocMoreStack ncgImpl) amount) alloced)
                                    return (alloced', concat stack_updt_blks )

611 612 613 614

                -- dump out what happened during register allocation
                dumpIfSet_dyn dflags
                        Opt_D_dump_asm_regalloc "Registers allocated"
615
                        (vcat $ map (pprNatCmmDecl ncgImpl) alloced)
616 617 618 619 620 621

                dumpIfSet_dyn dflags
                        Opt_D_dump_asm_regalloc_stages "Build/spill stages"
                        (vcat   $ map (\(stage, stats)
                                        -> text "# --------------------------"
                                        $$ text "#  cmm " <> int count <> text " Stage " <> int stage
Ian Lynagh's avatar
Ian Lynagh committed
622
                                        $$ ppr stats)
623 624 625
                                $ zip [0..] regAllocStats)

                let mPprStats =
626
                        if dopt Opt_D_dump_asm_stats dflags
627 628 629 630 631
                         then Just regAllocStats else Nothing

                -- force evaluation of the Maybe to avoid space leak
                mPprStats `seq` return ()

632
                return  ( alloced', usAlloc'
633
                        , mPprStats
634
                        , Nothing
635
                        , [], stack_updt_blks)
636 637 638

          else do
                -- do linear register allocation
639 640 641 642
                let reg_alloc proc = do
                       (alloced, maybe_more_stack, ra_stats) <-
                               Linear.regAlloc dflags proc
                       case maybe_more_stack of
643
                         Nothing -> return ( alloced, ra_stats, [] )
644
                         Just amount -> do
645 646 647
                           (alloced',stack_updt_blks) <-
                               ncgAllocMoreStack ncgImpl amount alloced
                           return (alloced', ra_stats, stack_updt_blks )
648

649
                let ((alloced, regAllocStats, stack_updt_blks), usAlloc)
650
                        = {-# SCC "RegAlloc-linear" #-}
651
                          initUs usLive
652
                          $ liftM unzip3
653
                          $ mapM reg_alloc withLiveness
654 655 656

                dumpIfSet_dyn dflags
                        Opt_D_dump_asm_regalloc "Registers allocated"
657
                        (vcat $ map (pprNatCmmDecl ncgImpl) alloced)
658 659

                let mPprStats =
660
                        if dopt Opt_D_dump_asm_stats dflags
661 662 663 664 665 666 667
                         then Just (catMaybes regAllocStats) else Nothing

                -- force evaluation of the Maybe to avoid space leak
                mPprStats `seq` return ()

                return  ( alloced, usAlloc
                        , Nothing
668 669 670 671 672 673 674 675 676 677 678 679 680 681
                        , mPprStats, (catMaybes regAllocStats)
                        , concat stack_updt_blks )

        -- Fixupblocks the register allocator inserted (from, regMoves, to)
        let cfgRegAllocUpdates :: [(BlockId,BlockId,BlockId)]
            cfgRegAllocUpdates = (concatMap Linear.ra_fixupList raStats)

        let cfgWithFixupBlks =
                addNodesBetween nativeCfgWeights cfgRegAllocUpdates

        -- Insert stack update blocks
        let postRegCFG =
                foldl' (\m (from,to) -> addImmediateSuccessor from to m )
                       cfgWithFixupBlks stack_updt_blks
682

683 684 685 686 687 688 689
        ---- x86fp_kludge.  This pass inserts ffree instructions to clear
        ---- the FPU stack on x86.  The x86 ABI requires that the FPU stack
        ---- is clear, and library functions can return odd results if it
        ---- isn't.
        ----
        ---- NB. must happen before shortcutBranches, because that
        ---- generates JXX_GBLs which we can't fix up in x86fp_kludge.
690
        let kludged = {-# SCC "x86fp_kludge" #-} ncg_x86fp_kludge ncgImpl alloced
691 692

        ---- generate jump tables
693 694
        let tabled      =
                {-# SCC "generateJumpTables" #-}
695
                generateJumpTables ncgImpl kludged
696

697 698 699 700 701
        dumpIfSet_dyn dflags
                Opt_D_dump_cfg_weights "CFG Update information"
                ( text "stack:" <+> ppr stack_updt_blks $$
                  text "linearAlloc:" <+> ppr cfgRegAllocUpdates )

702
        ---- shortcut branches
703
        let (shorted, postShortCFG)     =
704
                {-# SCC "shortcutBranches" #-}
705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723
                shortcutBranches dflags ncgImpl tabled postRegCFG

        let optimizedCFG =
                optimizeCFG (cfgWeightInfo dflags) cmm postShortCFG

        dumpIfSet_dyn dflags
                Opt_D_dump_cfg_weights "CFG Final Weights"
                ( pprEdgeWeights optimizedCFG )

        --TODO: Partially check validity of the cfg.
        let getBlks (CmmProc _info _lbl _live (ListGraph blocks)) = blocks
            getBlks _ = []

        when ( backendMaintainsCfg dflags &&
                (gopt Opt_DoAsmLinting dflags || debugIsOn )) $ do
                let blocks = concatMap getBlks shorted
                let labels = setFromList $ fmap blockId blocks :: LabelSet
                return $! seq (sanityCheckCfg optimizedCFG labels $
                                text "cfg not in lockstep") ()
724

725
        ---- sequence blocks
726 727 728
        let sequenced :: [NatCmmDecl statics instr]
            sequenced =
                checkLayout shorted $
729
                {-# SCC "sequenceBlocks" #-}
730 731 732 733 734 735 736 737 738 739 740 741 742 743
                map (BlockLayout.sequenceTop
                        dflags
                        ncgImpl optimizedCFG)
                    shorted

        let branchOpt :: [NatCmmDecl statics instr]
            branchOpt =
                {-# SCC "invertCondBranches" #-}
                map invert sequenced
              where
                invertConds = (invertCondBranches ncgImpl) optimizedCFG
                invert top@CmmData {} = top
                invert (CmmProc info lbl live (ListGraph blocks)) =
                    CmmProc info lbl live (ListGraph $ invertConds info blocks)
744

745
        ---- expansion of SPARC synthetic instrs
746 747
        let expanded =
                {-# SCC "sparc_expand" #-}
748 749
                ncgExpandTop ncgImpl branchOpt
                --ncgExpandTop ncgImpl sequenced
750

751 752
        dumpIfSet_dyn dflags
                Opt_D_dump_asm_expanded "Synthetic instructions expanded"
753
                (vcat $ map (pprNatCmmDecl ncgImpl) expanded)
754

755 756 757 758 759 760 761 762 763
        -- generate unwinding information from cmm
        let unwinds :: BlockMap [UnwindPoint]
            unwinds =
                {-# SCC "unwindingInfo" #-}
                foldl' addUnwind mapEmpty expanded
              where
                addUnwind acc proc =
                    acc `mapUnion` computeUnwinding dflags ncgImpl proc

764
        return  ( usAlloc
765
                , fileIds'
766 767 768
                , expanded
                , lastMinuteImports ++ imports
                , ppr_raStatsColor
769 770
                , ppr_raStatsLinear
                , unwinds )
771

772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789
-- | Make sure all blocks we want the layout algorithm to place have been placed.
checkLayout :: [NatCmmDecl statics instr] -> [NatCmmDecl statics instr]
            -> [NatCmmDecl statics instr]
checkLayout procsUnsequenced procsSequenced =
        ASSERT2(setNull diff,
                ppr "Block sequencing dropped blocks:" <> ppr diff)
        procsSequenced
  where
        blocks1 = foldl' (setUnion) setEmpty $
                        map getBlockIds procsUnsequenced :: LabelSet
        blocks2 = foldl' (setUnion) setEmpty $
                        map getBlockIds procsSequenced
        diff = setDifference blocks1 blocks2

        getBlockIds (CmmData _ _) = setEmpty
        getBlockIds (CmmProc _ _ _ (ListGraph blocks)) =
                setFromList $ map blockId blocks

790

Simon Peyton Jones's avatar
Simon Peyton Jones committed
791
x86fp_kludge :: NatCmmDecl (Alignment, CmmStatics) X86.Instr.Instr -> NatCmmDecl (Alignment, CmmStatics) X86.Instr.Instr
792
x86fp_kludge top@(CmmData _ _) = top
793 794
x86fp_kludge (CmmProc info lbl live (ListGraph code)) =
        CmmProc info lbl live (ListGraph $ X86.Instr.i386_insert_ffrees code)
795

796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817
-- | Compute unwinding tables for the blocks of a procedure
computeUnwinding :: Instruction instr
                 => DynFlags -> NcgImpl statics instr jumpDest
                 -> NatCmmDecl statics instr
                    -- ^ the native code generated for the procedure
                 -> LabelMap [UnwindPoint]
                    -- ^ unwinding tables for all points of all blocks of the
                    -- procedure
computeUnwinding dflags _ _
  | debugLevel dflags == 0         = mapEmpty
computeUnwinding _ _ (CmmData _ _) = mapEmpty
computeUnwinding _ ncgImpl (CmmProc _ _ _ (ListGraph blks)) =
    -- In general we would need to push unwinding information down the
    -- block-level call-graph to ensure that we fully account for all
    -- relevant register writes within a procedure.
    --
    -- However, the only unwinding information that we care about in GHC is for
    -- Sp. The fact that CmmLayoutStack already ensures that we have unwind
    -- information at the beginning of every block means that there is no need
    -- to perform this sort of push-down.
    mapFromList [ (blk_lbl, extractUnwindPoints ncgImpl instrs)
                | BasicBlock blk_lbl instrs <- blks ]
818

819
-- | Build a doc for all the imports.
820
--
821
makeImportsDoc :: DynFlags -> [CLabel] -> SDoc
822
makeImportsDoc dflags imports
823
 = dyld_stubs imports
824
            $$
825 826 827
            -- On recent versions of Darwin, the linker supports
            -- dead-stripping of code and data on a per-symbol basis.
            -- There's a hack to make this work in PprMach.pprNatCmmDecl.
ian@well-typed.com's avatar
ian@well-typed.com committed
828
            (if platformHasSubsectionsViaSymbols platform
829
             then text ".subsections_via_symbols"
830
             else Outputable.empty)
831
            $$
832 833 834 835 836 837
                -- On recent GNU ELF systems one can mark an object file
                -- as not requiring an executable stack. If all objects
                -- linked into a program have this note then the program
                -- will not use an executable stack, which is good for
                -- security. GHC generated code does not need an executable
                -- stack so add the note in:
ian@well-typed.com's avatar
ian@well-typed.com committed
838
            (if platformHasGnuNonexecStack platform
839
             then text ".section .note.GNU-stack,\"\"," <> sectionType "progbits"
840
             else Outputable.empty)
841
            $$
Gabor Greif's avatar
Gabor Greif committed
842
                -- And just because every other compiler does, let's stick in
843
                -- an identifier directive: .ident "GHC x.y.z"
ian@well-typed.com's avatar
ian@well-typed.com committed
844
            (if platformHasIdentDirective platform
845 846
             then let compilerIdent = text "GHC" <+> text cProjectVersion
                   in text ".ident" <+> doubleQuotes compilerIdent
847
             else Outputable.empty)
848

849
 where
ian@well-typed.com's avatar
ian@well-typed.com committed
850 851 852 853
        platform = targetPlatform dflags
        arch = platformArch platform
        os   = platformOS   platform

854 855
        -- Generate "symbol stubs" for all external symbols that might
        -- come from a dynamic library.
856 857
        dyld_stubs :: [CLabel] -> SDoc
{-      dyld_stubs imps = vcat $ map pprDyldSymbolStub $
858 859 860 861
                                    map head $ group $ sort imps-}
        -- (Hack) sometimes two Labels pretty-print the same, but have
        -- different uniques; so we compare their text versions...
        dyld_stubs imps
Ian Lynagh's avatar
Ian Lynagh committed
862
                | needImportedSymbols dflags arch os
863
                = vcat $
Ian Lynagh's avatar
Ian Lynagh committed
864 865
                        (pprGotDeclaration dflags arch os :) $
                        map ( pprImportedSymbol dflags platform . fst . head) $
866 867 868 869 870
                        groupBy (\(_,a) (_,b) -> a == b) $
                        sortBy (\(_,a) (_,b) -> compare a b) $
                        map doPpr $
                        imps
                | otherwise
871
                = Outputable.empty
872

Ian Lynagh's avatar
Ian Lynagh committed
873
        doPpr lbl = (lbl, renderWithStyle dflags (pprCLabel platform lbl) astyle)
874
        astyle = mkCodeStyle AsmStyle
875

876 877 878 879 880 881
-- -----------------------------------------------------------------------------
-- Generate jump tables

-- Analyzes all native code and generates data sections for all jump
-- table instructions.
generateJumpTables
882 883 884
        :: NcgImpl statics instr jumpDest
        -> [NatCmmDecl statics instr] -> [NatCmmDecl statics instr]
generateJumpTables ncgImpl xs = concatMap f xs
885
    where f p@(CmmProc _ _ _ (ListGraph xs)) = p : concatMap g xs
886
          f p = [p]
887
          g (BasicBlock _ xs) = catMaybes (map (generateJumpTableForInstr ncgImpl) xs)
888

889 890 891
-- -----------------------------------------------------------------------------
-- Shortcut branches

892
shortcutBranches
893
        :: forall statics instr jumpDest. (Outputable jumpDest) => DynFlags
894
        -> NcgImpl statics instr jumpDest
895
        -> [NatCmmDecl statics instr]
896 897
        -> CFG
        -> ([NatCmmDecl statics instr],CFG)
898

899
shortcutBranches dflags ncgImpl tops weights
900
  | gopt Opt_AsmShortcutting dflags
901 902
  = ( map (apply_mapping ncgImpl mapping) tops'
    , shortcutWeightMap weights mappingBid )
903
  | otherwise
904
  = (tops, weights)
905
  where
906
    (tops', mappings) = mapAndUnzip (build_mapping ncgImpl) tops
907 908
    mapping = mapUnions mappings :: LabelMap jumpDest
    mappingBid = fmap (getJumpDestBlockId ncgImpl) mapping
909

910 911
build_mapping :: forall instr t d statics jumpDest.
                 NcgImpl statics instr jumpDest
Michal Terepeta's avatar