CmmToAsm.hs 51 KB
Newer Older
1 2 3
-- -----------------------------------------------------------------------------
--
-- (c) The University of Glasgow 1993-2004
4
--
5 6 7
-- This is the top-level module in the native code generator.
--
-- -----------------------------------------------------------------------------
8

9 10
{-# LANGUAGE BangPatterns, CPP, GADTs, ScopedTypeVariables, PatternSynonyms,
    DeriveFunctor #-}
11 12 13 14

#if !defined(GHC_LOADED_INTO_GHCI)
{-# LANGUAGE UnboxedTuples #-}
#endif
15

16 17
{-# OPTIONS_GHC -Wno-incomplete-record-updates #-}

Sylvain Henry's avatar
Sylvain Henry committed
18
module GHC.CmmToAsm (
19 20 21 22 23 24 25 26 27 28 29
                    -- * Module entry point
                    nativeCodeGen

                    -- * Test-only exports: see trac #12744
                    -- used by testGraphNoSpills, which needs to access
                    -- the register allocator intermediate data structures
                    -- cmmNativeGen emits
                  , cmmNativeGen
                  , NcgImpl(..)
                  , x86NcgImpl
                  ) where
30

31
#include "HsVersions.h"
32

33 34
import GhcPrelude

Sylvain Henry's avatar
Sylvain Henry committed
35 36 37 38
import qualified GHC.CmmToAsm.X86.CodeGen as X86.CodeGen
import qualified GHC.CmmToAsm.X86.Regs    as X86.Regs
import qualified GHC.CmmToAsm.X86.Instr   as X86.Instr
import qualified GHC.CmmToAsm.X86.Ppr     as X86.Ppr
39

Sylvain Henry's avatar
Sylvain Henry committed
40 41 42 43 44 45
import qualified GHC.CmmToAsm.SPARC.CodeGen as SPARC.CodeGen
import qualified GHC.CmmToAsm.SPARC.Regs    as SPARC.Regs
import qualified GHC.CmmToAsm.SPARC.Instr   as SPARC.Instr
import qualified GHC.CmmToAsm.SPARC.Ppr     as SPARC.Ppr
import qualified GHC.CmmToAsm.SPARC.ShortcutJump   as SPARC.ShortcutJump
import qualified GHC.CmmToAsm.SPARC.CodeGen.Expand as SPARC.CodeGen.Expand
46

Sylvain Henry's avatar
Sylvain Henry committed
47 48 49 50 51
import qualified GHC.CmmToAsm.PPC.CodeGen as PPC.CodeGen
import qualified GHC.CmmToAsm.PPC.Regs    as PPC.Regs
import qualified GHC.CmmToAsm.PPC.RegInfo as PPC.RegInfo
import qualified GHC.CmmToAsm.PPC.Instr   as PPC.Instr
import qualified GHC.CmmToAsm.PPC.Ppr     as PPC.Ppr
52

Sylvain Henry's avatar
Sylvain Henry committed
53 54
import GHC.CmmToAsm.Reg.Liveness
import qualified GHC.CmmToAsm.Reg.Linear                as Linear
55

Sylvain Henry's avatar
Sylvain Henry committed
56 57 58 59
import qualified GraphColor                             as Color
import qualified GHC.CmmToAsm.Reg.Graph                 as Color
import qualified GHC.CmmToAsm.Reg.Graph.Stats           as Color
import qualified GHC.CmmToAsm.Reg.Graph.TrivColorable   as Color
60

61
import AsmUtils
Sylvain Henry's avatar
Sylvain Henry committed
62
import GHC.CmmToAsm.Reg.Target
John Ericson's avatar
John Ericson committed
63
import GHC.Platform
Sylvain Henry's avatar
Sylvain Henry committed
64
import GHC.CmmToAsm.BlockLayout as BlockLayout
65
import Config
Sylvain Henry's avatar
Sylvain Henry committed
66 67 68 69 70 71
import GHC.CmmToAsm.Instr
import GHC.CmmToAsm.PIC
import GHC.Platform.Reg
import GHC.CmmToAsm.Monad
import GHC.CmmToAsm.CFG
import GHC.CmmToAsm.Dwarf
72
import GHC.CmmToAsm.Config
73
import GHC.Cmm.DebugBlock
74

75
import GHC.Cmm.BlockId
76
import GHC.StgToCmm.CgUtils ( fixStgRegisters )
77 78 79 80 81 82 83 84
import GHC.Cmm
import GHC.Cmm.Utils
import GHC.Cmm.Dataflow.Collections
import GHC.Cmm.Dataflow.Label
import GHC.Cmm.Dataflow.Block
import GHC.Cmm.Opt           ( cmmMachOpFold )
import GHC.Cmm.Ppr
import GHC.Cmm.CLabel
85

86
import UniqFM
87
import UniqSupply
Sylvain Henry's avatar
Sylvain Henry committed
88
import GHC.Driver.Session
89
import Util
90

91
import BasicTypes       ( Alignment )
92
import qualified Pretty
93
import BufWrite
94
import Outputable
95
import FastString
96
import UniqSet
97
import ErrUtils
98
import Module
99 100
import Stream (Stream)
import qualified Stream
101

102 103
-- DEBUGGING ONLY
--import OrdList
104

105
import Data.List
106
import Data.Maybe
107
import Data.Ord         ( comparing )
108
import Control.Exception
109
import Control.Monad
110
import System.IO
111

112 113 114
{-
The native-code generator has machine-independent and
machine-dependent modules.
115

116 117 118 119
This module ("AsmCodeGen") is the top-level machine-independent
module.  Before entering machine-dependent land, we do some
machine-independent optimisations (defined below) on the
'CmmStmts's.
120

121 122 123 124 125 126 127 128 129
We convert to the machine-specific 'Instr' datatype with
'cmmCodeGen', assuming an infinite supply of registers.  We then use
a machine-independent register allocator ('regAlloc') to rejoin
reality.  Obviously, 'regAlloc' has machine-specific helper
functions (see about "RegAllocInfo" below).

Finally, we order the basic blocks of the function so as to minimise
the number of jumps between blocks, by utilising fallthrough wherever
possible.
130 131

The machine-dependent bits break down as follows:
132 133

  * ["MachRegs"]  Everything about the target platform's machine
134 135 136
    registers (and immediate operands, and addresses, which tend to
    intermingle/interact with registers).

137
  * ["MachInstrs"]  Includes the 'Instr' datatype (possibly should
138
    have a module of its own), plus a miscellany of other things
139
    (e.g., 'targetDoubleSize', 'smStablePtrTable', ...)
140

141
  * ["MachCodeGen"]  is where 'Cmm' stuff turns into
142
    machine instructions.
143

144
  * ["PprMach"] 'pprInstr' turns an 'Instr' into text (well, really
145
    a 'SDoc').
146

147 148
  * ["RegAllocInfo"] In the register allocator, we manipulate
    'MRegsState's, which are 'BitSet's, one bit per machine register.
149 150
    When we want to say something about a specific machine register
    (e.g., ``it gets clobbered by this instruction''), we set/unset
151
    its bit.  Obviously, we do this 'BitSet' thing for efficiency
152 153
    reasons.

154
    The 'RegAllocInfo' module collects together the machine-specific
155 156
    info needed to do register allocation.

157 158
   * ["RegisterAlloc"] The (machine-independent) register allocator.
-}
159

160
--------------------
161 162 163
nativeCodeGen :: forall a . DynFlags -> Module -> ModLocation -> Handle -> UniqSupply
              -> Stream IO RawCmmGroup a
              -> IO a
164
nativeCodeGen dflags this_mod modLoc h us cmms
165
 = let platform = targetPlatform dflags
166 167
       nCG' :: ( Outputable statics, Outputable instr
               , Outputable jumpDest, Instruction instr)
168
            => NcgImpl statics instr jumpDest -> IO a
169
       nCG' ncgImpl = nativeCodeGen' dflags this_mod modLoc ncgImpl h us cmms
170
   in case platformArch platform of
171 172 173
      ArchX86       -> nCG' (x86NcgImpl    dflags)
      ArchX86_64    -> nCG' (x86_64NcgImpl dflags)
      ArchPPC       -> nCG' (ppcNcgImpl    dflags)
174
      ArchS390X     -> panic "nativeCodeGen: No NCG for S390X"
175
      ArchSPARC     -> nCG' (sparcNcgImpl  dflags)
176
      ArchSPARC64   -> panic "nativeCodeGen: No NCG for SPARC64"
177 178 179 180 181 182 183 184
      ArchARM {}    -> panic "nativeCodeGen: No NCG for ARM"
      ArchARM64     -> panic "nativeCodeGen: No NCG for ARM64"
      ArchPPC_64 _  -> nCG' (ppcNcgImpl    dflags)
      ArchAlpha     -> panic "nativeCodeGen: No NCG for Alpha"
      ArchMipseb    -> panic "nativeCodeGen: No NCG for mipseb"
      ArchMipsel    -> panic "nativeCodeGen: No NCG for mipsel"
      ArchUnknown   -> panic "nativeCodeGen: No NCG for unknown arch"
      ArchJavaScript-> panic "nativeCodeGen: No NCG for JavaScript"
185

186
x86NcgImpl :: DynFlags -> NcgImpl (Alignment, RawCmmStatics)
187
                                  X86.Instr.Instr X86.Instr.JumpDest
188
x86NcgImpl dflags
189
 = (x86_64NcgImpl dflags)
190

191
x86_64NcgImpl :: DynFlags -> NcgImpl (Alignment, RawCmmStatics)
192
                                  X86.Instr.Instr X86.Instr.JumpDest
193 194
x86_64NcgImpl dflags
 = NcgImpl {
195 196 197
        ncgConfig                 = config
       ,cmmTopCodeGen             = X86.CodeGen.cmmTopCodeGen
       ,generateJumpTableForInstr = X86.CodeGen.generateJumpTableForInstr config
198 199 200 201
       ,getJumpDestBlockId        = X86.Instr.getJumpDestBlockId
       ,canShortcut               = X86.Instr.canShortcut
       ,shortcutStatics           = X86.Instr.shortcutStatics
       ,shortcutJump              = X86.Instr.shortcutJump
202 203
       ,pprNatCmmDecl             = X86.Ppr.pprNatCmmDecl config
       ,maxSpillSlots             = X86.Instr.maxSpillSlots config
204 205 206
       ,allocatableRegs           = X86.Regs.allocatableRegs platform
       ,ncgAllocMoreStack         = X86.Instr.allocMoreStack platform
       ,ncgExpandTop              = id
207
       ,ncgMakeFarBranches        = const id
208
       ,extractUnwindPoints       = X86.CodeGen.extractUnwindPoints
209
       ,invertCondBranches        = X86.CodeGen.invertCondBranches
210
   }
211 212 213
    where
      config   = initConfig dflags
      platform = ncgPlatform config
214

215
ppcNcgImpl :: DynFlags -> NcgImpl RawCmmStatics PPC.Instr.Instr PPC.RegInfo.JumpDest
216 217
ppcNcgImpl dflags
 = NcgImpl {
218 219 220
        ncgConfig                 = config
       ,cmmTopCodeGen             = PPC.CodeGen.cmmTopCodeGen
       ,generateJumpTableForInstr = PPC.CodeGen.generateJumpTableForInstr config
221 222 223 224
       ,getJumpDestBlockId        = PPC.RegInfo.getJumpDestBlockId
       ,canShortcut               = PPC.RegInfo.canShortcut
       ,shortcutStatics           = PPC.RegInfo.shortcutStatics
       ,shortcutJump              = PPC.RegInfo.shortcutJump
225 226
       ,pprNatCmmDecl             = PPC.Ppr.pprNatCmmDecl config
       ,maxSpillSlots             = PPC.Instr.maxSpillSlots config
227 228 229
       ,allocatableRegs           = PPC.Regs.allocatableRegs platform
       ,ncgAllocMoreStack         = PPC.Instr.allocMoreStack platform
       ,ncgExpandTop              = id
230
       ,ncgMakeFarBranches        = PPC.Instr.makeFarBranches
231
       ,extractUnwindPoints       = const []
232
       ,invertCondBranches        = \_ _ -> id
233
   }
234 235 236
    where
      config   = initConfig dflags
      platform = ncgPlatform config
237

238
sparcNcgImpl :: DynFlags -> NcgImpl RawCmmStatics SPARC.Instr.Instr SPARC.ShortcutJump.JumpDest
239 240
sparcNcgImpl dflags
 = NcgImpl {
241 242
        ncgConfig                 = config
       ,cmmTopCodeGen             = SPARC.CodeGen.cmmTopCodeGen
243 244 245 246 247
       ,generateJumpTableForInstr = SPARC.CodeGen.generateJumpTableForInstr dflags
       ,getJumpDestBlockId        = SPARC.ShortcutJump.getJumpDestBlockId
       ,canShortcut               = SPARC.ShortcutJump.canShortcut
       ,shortcutStatics           = SPARC.ShortcutJump.shortcutStatics
       ,shortcutJump              = SPARC.ShortcutJump.shortcutJump
248 249
       ,pprNatCmmDecl             = SPARC.Ppr.pprNatCmmDecl config
       ,maxSpillSlots             = SPARC.Instr.maxSpillSlots config
250 251 252
       ,allocatableRegs           = SPARC.Regs.allocatableRegs
       ,ncgAllocMoreStack         = noAllocMoreStack
       ,ncgExpandTop              = map SPARC.CodeGen.Expand.expandTop
253
       ,ncgMakeFarBranches        = const id
254
       ,extractUnwindPoints       = const []
255
       ,invertCondBranches        = \_ _ -> id
256
   }
257 258
    where
      config   = initConfig dflags
259 260 261 262 263 264 265

--
-- Allocating more stack space for spilling is currently only
-- supported for the linear register allocator on x86/x86_64, the rest
-- default to the panic below.  To support allocating extra stack on
-- more platforms provide a definition of ncgAllocMoreStack.
--
266 267
noAllocMoreStack :: Int -> NatCmmDecl statics instr
                 -> UniqSM (NatCmmDecl statics instr, [(BlockId,BlockId)])
268 269 270 271 272 273 274 275 276
noAllocMoreStack amount _
  = panic $   "Register allocator: out of stack slots (need " ++ show amount ++ ")\n"
        ++  "   If you are trying to compile SHA1.hs from the crypto library then this\n"
        ++  "   is a known limitation in the linear allocator.\n"
        ++  "\n"
        ++  "   Try enabling the graph colouring allocator with -fregs-graph instead."
        ++  "   You can still file a bug report if you like.\n"


277 278 279 280 281 282 283 284 285 286 287 288 289
-- | Data accumulated during code generation. Mostly about statistics,
-- but also collects debug data for DWARF generation.
data NativeGenAcc statics instr
  = NGS { ngs_imports     :: ![[CLabel]]
        , ngs_natives     :: ![[NatCmmDecl statics instr]]
             -- ^ Native code generated, for statistics. This might
             -- hold a lot of data, so it is important to clear this
             -- field as early as possible if it isn't actually
             -- required.
        , ngs_colorStats  :: ![[Color.RegAllocStats statics instr]]
        , ngs_linearStats :: ![[Linear.RegAllocStats]]
        , ngs_labels      :: ![Label]
        , ngs_debug       :: ![DebugBlock]
290
        , ngs_dwarfFiles  :: !DwarfFiles
291 292 293
        , ngs_unwinds     :: !(LabelMap [UnwindPoint])
             -- ^ see Note [Unwinding information in the NCG]
             -- and Note [What is this unwinding business?] in Debug.
294
        }
295

296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320
{-
Note [Unwinding information in the NCG]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Unwind information is a type of metadata which allows a debugging tool
to reconstruct the values of machine registers at the time a procedure was
entered. For the most part, the production of unwind information is handled by
the Cmm stage, where it is represented by CmmUnwind nodes.

Unfortunately, the Cmm stage doesn't know everything necessary to produce
accurate unwinding information. For instance, the x86-64 calling convention
requires that the stack pointer be aligned to 16 bytes, which in turn means that
GHC must sometimes add padding to $sp prior to performing a foreign call. When
this happens unwind information must be updated accordingly.
For this reason, we make the NCG backends responsible for producing
unwinding tables (with the extractUnwindPoints function in NcgImpl).

We accumulate the produced unwind tables over CmmGroups in the ngs_unwinds
field of NativeGenAcc. This is a label map which contains an entry for each
procedure, containing a list of unwinding points (e.g. a label and an associated
unwinding table).

See also Note [What is this unwinding business?] in Debug.
-}

321 322
nativeCodeGen' :: (Outputable statics, Outputable instr,Outputable jumpDest,
                   Instruction instr)
323
               => DynFlags
324
               -> Module -> ModLocation
325
               -> NcgImpl statics instr jumpDest
326
               -> Handle
327
               -> UniqSupply
328 329
               -> Stream IO RawCmmGroup a
               -> IO a
330
nativeCodeGen' dflags this_mod modLoc ncgImpl h us cmms
331
 = do
332 333 334
        -- BufHandle is a performance hack.  We could hide it inside
        -- Pretty if it weren't for the fact that we do lots of little
        -- printDocs here (in order to do codegen in constant space).
335
        bufh <- newBufHandle h
336
        let ngs0 = NGS [] [] [] [] [] [] emptyUFM mapEmpty
337
        (ngs, us', a) <- cmmNativeGenStream dflags this_mod modLoc ncgImpl bufh us
338
                                         cmms ngs0
339 340
        _ <- finishNativeGen dflags modLoc bufh us' ngs
        return a
341 342

finishNativeGen :: Instruction instr
343
                => DynFlags
Peter Wortmann's avatar
Peter Wortmann committed
344
                -> ModLocation
345
                -> BufHandle
Peter Wortmann's avatar
Peter Wortmann committed
346
                -> UniqSupply
347
                -> NativeGenAcc statics instr
Peter Wortmann's avatar
Peter Wortmann committed
348 349
                -> IO UniqSupply
finishNativeGen dflags modLoc bufh@(BufHandle _ _ h) us ngs
350
 = withTimingSilent dflags (text "NCG") (`seq` ()) $ do
Peter Wortmann's avatar
Peter Wortmann committed
351
        -- Write debug data and finish
Ben Gamari's avatar
Ben Gamari committed
352
        let emitDw = debugLevel dflags > 0
Peter Wortmann's avatar
Peter Wortmann committed
353 354 355 356
        us' <- if not emitDw then return us else do
          (dwarf, us') <- dwarfGen dflags modLoc us (ngs_debug ngs)
          emitNativeCode dflags bufh dwarf
          return us'
357
        bFlush bufh
358

359
        -- dump global NCG stats for graph coloring allocator
360
        let stats = concat (ngs_colorStats ngs)
361
        unless (null stats) $ do
362

Peter Wortmann's avatar
Peter Wortmann committed
363 364
          -- build the global register conflict graph
          let graphGlobal
365
                  = foldl' Color.union Color.initGraph
Peter Wortmann's avatar
Peter Wortmann committed
366 367
                  $ [ Color.raGraph stat
                          | stat@Color.RegAllocStatsStart{} <- stats]
368

Peter Wortmann's avatar
Peter Wortmann committed
369
          dump_stats (Color.pprStats stats graphGlobal)
370

Peter Wortmann's avatar
Peter Wortmann committed
371 372 373
          let platform = targetPlatform dflags
          dumpIfSet_dyn dflags
                  Opt_D_dump_asm_conflicts "Register conflict graph"
Sylvain Henry's avatar
Sylvain Henry committed
374
                  FormatText
Peter Wortmann's avatar
Peter Wortmann committed
375 376 377 378 379 380
                  $ Color.dotGraph
                          (targetRegDotColor platform)
                          (Color.trivColorable platform
                                  (targetVirtualRegSqueeze platform)
                                  (targetRealRegSqueeze platform))
                  $ graphGlobal
381 382 383


        -- dump global NCG stats for linear allocator
384
        let linearStats = concat (ngs_linearStats ngs)
385
        unless (null linearStats) $
386
          dump_stats (Linear.pprStats (concat (ngs_natives ngs)) linearStats)
387 388

        -- write out the imports
389
        printSDocLn Pretty.LeftMode dflags h (mkCodeStyle AsmStyle)
390
                $ makeImportsDoc dflags (concat (ngs_imports ngs))
Peter Wortmann's avatar
Peter Wortmann committed
391
        return us'
392
  where
Sylvain Henry's avatar
Sylvain Henry committed
393 394 395
    dump_stats = dumpAction dflags (mkDumpStyle dflags alwaysQualify)
                   (dumpOptionsFromFlag Opt_D_dump_asm_stats) "NCG stats"
                   FormatText
396

397 398
cmmNativeGenStream :: (Outputable statics, Outputable instr
                      ,Outputable jumpDest, Instruction instr)
399
              => DynFlags
400
              -> Module -> ModLocation
401 402
              -> NcgImpl statics instr jumpDest
              -> BufHandle
403
              -> UniqSupply
404
              -> Stream IO RawCmmGroup a
405
              -> NativeGenAcc statics instr
406
              -> IO (NativeGenAcc statics instr, UniqSupply, a)
407

408
cmmNativeGenStream dflags this_mod modLoc ncgImpl h us cmm_stream ngs
409 410
 = do r <- Stream.runStream cmm_stream
      case r of
411
        Left a ->
412 413 414 415 416
          return (ngs { ngs_imports = reverse $ ngs_imports ngs
                      , ngs_natives = reverse $ ngs_natives ngs
                      , ngs_colorStats = reverse $ ngs_colorStats ngs
                      , ngs_linearStats = reverse $ ngs_linearStats ngs
                      },
417 418
                  us,
                  a)
419
        Right (cmms, cmm_stream') -> do
420
          (us', ngs'') <-
421
            withTimingSilent
422
                dflags
423
                ncglabel (\(a, b) -> a `seq` b `seq` ()) $ do
424 425 426 427 428 429 430 431 432 433 434 435 436
              -- Generate debug information
              let debugFlag = debugLevel dflags > 0
                  !ndbgs | debugFlag = cmmDebugGen modLoc cmms
                         | otherwise = []
                  dbgMap = debugToMap ndbgs

              -- Generate native code
              (ngs',us') <- cmmNativeGens dflags this_mod modLoc ncgImpl h
                                               dbgMap us cmms ngs 0

              -- Link native code information into debug blocks
              -- See Note [What is this unwinding business?] in Debug.
              let !ldbgs = cmmDebugLink (ngs_labels ngs') (ngs_unwinds ngs') ndbgs
437
              unless (null ldbgs) $
Sylvain Henry's avatar
Sylvain Henry committed
438
                dumpIfSet_dyn dflags Opt_D_dump_debug "Debug Infos" FormatText
439
                  (vcat $ map ppr ldbgs)
440 441 442 443

              -- Accumulate debug information for emission in finishNativeGen.
              let ngs'' = ngs' { ngs_debug = ngs_debug ngs' ++ ldbgs, ngs_labels = [] }
              return (us', ngs'')
Ben Gamari's avatar
Ben Gamari committed
444 445

          cmmNativeGenStream dflags this_mod modLoc ncgImpl h us'
446
              cmm_stream' ngs''
447

448 449
    where ncglabel = text "NCG"

450 451
-- | Do native code generation on all these cmms.
--
452
cmmNativeGens :: forall statics instr jumpDest.
453 454
                 (Outputable statics, Outputable instr
                 ,Outputable jumpDest, Instruction instr)
455
              => DynFlags
456
              -> Module -> ModLocation
457 458
              -> NcgImpl statics instr jumpDest
              -> BufHandle
459
              -> LabelMap DebugBlock
dterei's avatar
dterei committed
460
              -> UniqSupply
Simon Peyton Jones's avatar
Simon Peyton Jones committed
461
              -> [RawCmmDecl]
462 463 464
              -> NativeGenAcc statics instr
              -> Int
              -> IO (NativeGenAcc statics instr, UniqSupply)
465

466 467
cmmNativeGens dflags this_mod modLoc ncgImpl h dbgMap = go
  where
468 469
    go :: UniqSupply -> [RawCmmDecl]
       -> NativeGenAcc statics instr -> Int
470
       -> IO (NativeGenAcc statics instr, UniqSupply)
471

472 473 474 475
    go us [] ngs !_ =
        return (ngs, us)

    go us (cmm : cmms) ngs count = do
476
        let fileIds = ngs_dwarfFiles ngs
477
        (us', fileIds', native, imports, colorStats, linearStats, unwinds)
478 479 480 481
          <- {-# SCC "cmmNativeGen" #-}
             cmmNativeGen dflags this_mod modLoc ncgImpl us fileIds dbgMap
                          cmm count

482 483 484
        -- Generate .file directives for every new file that has been
        -- used. Note that it is important that we generate these in
        -- ascending order, as Clang's 3.6 assembler complains.
485 486 487
        let newFileIds = sortBy (comparing snd) $
                         nonDetEltsUFM $ fileIds' `minusUFM` fileIds
            -- See Note [Unique Determinism and code generation]
488
            pprDecl (f,n) = text "\t.file " <> ppr n <+>
489
                            pprFilePathString (unpackFS f)
490

491
        emitNativeCode dflags h $ vcat $
492
          map pprDecl newFileIds ++
493
          map (pprNatCmmDecl ncgImpl) native
494

Gabor Greif's avatar
typo  
Gabor Greif committed
495
        -- force evaluation all this stuff to avoid space leaks
496
        {-# SCC "seqString" #-} evaluate $ seqList (showSDoc dflags $ vcat $ map ppr imports) ()
497

498
        let !labels' = if debugLevel dflags > 0
499 500 501
                       then cmmDebugLabels isMetaInstr native else []
            !natives' = if dopt Opt_D_dump_asm_stats dflags
                        then native : ngs_natives ngs else []
502

503 504 505 506 507 508
            mCon = maybe id (:)
            ngs' = ngs{ ngs_imports     = imports : ngs_imports ngs
                      , ngs_natives     = natives'
                      , ngs_colorStats  = colorStats `mCon` ngs_colorStats ngs
                      , ngs_linearStats = linearStats `mCon` ngs_linearStats ngs
                      , ngs_labels      = ngs_labels ngs ++ labels'
509
                      , ngs_dwarfFiles  = fileIds'
510
                      , ngs_unwinds     = ngs_unwinds ngs `mapUnion` unwinds
511
                      }
512
        go us' cmms ngs' (count + 1)
513

514

515 516 517
emitNativeCode :: DynFlags -> BufHandle -> SDoc -> IO ()
emitNativeCode dflags h sdoc = do

518 519
        {-# SCC "pprNativeCode" #-} bufLeftRenderSDoc dflags h
                                      (mkCodeStyle AsmStyle) sdoc
520 521 522

        -- dump native code
        dumpIfSet_dyn dflags
Sylvain Henry's avatar
Sylvain Henry committed
523
                Opt_D_dump_asm "Asm code" FormatASM
524 525
                sdoc

526
-- | Complete native code generation phase for a single top-level chunk of Cmm.
527 528
--      Dumping the output of each stage along the way.
--      Global conflict graph and NGC stats
529
cmmNativeGen
530 531
    :: forall statics instr jumpDest. (Instruction instr,
        Outputable statics, Outputable instr, Outputable jumpDest)
532
    => DynFlags
533
    -> Module -> ModLocation
534
    -> NcgImpl statics instr jumpDest
535
        -> UniqSupply
536 537
        -> DwarfFiles
        -> LabelMap DebugBlock
538 539 540
        -> RawCmmDecl                                   -- ^ the cmm to generate code for
        -> Int                                          -- ^ sequence number of this top thing
        -> IO   ( UniqSupply
541
                , DwarfFiles
542 543 544
                , [NatCmmDecl statics instr]                -- native code
                , [CLabel]                                  -- things imported by this cmm
                , Maybe [Color.RegAllocStats statics instr] -- stats for the coloring register allocator
545 546 547
                , Maybe [Linear.RegAllocStats]              -- stats for the linear register allocators
                , LabelMap [UnwindPoint]                    -- unwinding information for blocks
                )
548

549
cmmNativeGen dflags this_mod modLoc ncgImpl us fileIds dbgMap cmm count
550
 = do
551 552
        let config   = ncgConfig ncgImpl
        let platform = ncgPlatform config
553

554 555 556 557
        let proc_name = case cmm of
                (CmmProc _ entry_label _ _) -> ppr entry_label
                _                           -> text "DataChunk"

558 559 560
        -- rewrite assignments to global regs
        let fixed_cmm =
                {-# SCC "fixStgRegisters" #-}
561
                fixStgRegisters dflags cmm
562

563 564 565
        -- cmm to cmm optimisations
        let (opt_cmm, imports) =
                {-# SCC "cmmToCmm" #-}
566
                cmmToCmm dflags this_mod fixed_cmm
567

568
        dumpIfSet_dyn dflags
Sylvain Henry's avatar
Sylvain Henry committed
569
                Opt_D_dump_opt_cmm "Optimised Cmm" FormatCMM
570
                (pprCmmGroup [opt_cmm])
571

572 573 574
        let cmmCfg = {-# SCC "getCFG" #-}
                     getCfgProc (cfgWeightInfo dflags) opt_cmm

575
        -- generate native code from cmm
576
        let ((native, lastMinuteImports, fileIds', nativeCfgWeights), usGen) =
577
                {-# SCC "genMachCode" #-}
578 579
                initUs us $ genMachCode dflags this_mod modLoc
                                        (cmmTopCodeGen ncgImpl)
580 581
                                        fileIds dbgMap opt_cmm cmmCfg

582
        dumpIfSet_dyn dflags
Sylvain Henry's avatar
Sylvain Henry committed
583
                Opt_D_dump_asm_native "Native code" FormatASM
584
                (vcat $ map (pprNatCmmDecl ncgImpl) native)
585

586
        maybeDumpCfg dflags (Just nativeCfgWeights) "CFG Weights - Native" proc_name
587

588
        -- tag instructions with register liveness information
589 590
        -- also drops dead code. We don't keep the cfg in sync on
        -- some backends, so don't use it there.
591
        let livenessCfg = if backendMaintainsCfg platform
592 593
                                then Just nativeCfgWeights
                                else Nothing
594 595 596
        let (withLiveness, usLive) =
                {-# SCC "regLiveness" #-}
                initUs usGen
597
                        $ mapM (cmmTopLiveness livenessCfg platform) native
598 599 600

        dumpIfSet_dyn dflags
                Opt_D_dump_asm_liveness "Liveness annotations added"
Sylvain Henry's avatar
Sylvain Henry committed
601
                FormatCMM
Ian Lynagh's avatar
Ian Lynagh committed
602
                (vcat $ map ppr withLiveness)
603 604

        -- allocate registers
605
        (alloced, usAlloc, ppr_raStatsColor, ppr_raStatsLinear, raStats, stack_updt_blks) <-
606 607
         if ( gopt Opt_RegsGraph dflags
           || gopt Opt_RegsIterative dflags )
608 609 610 611 612 613
          then do
                -- the regs usable for allocation
                let (alloc_regs :: UniqFM (UniqSet RealReg))
                        = foldr (\r -> plusUFM_C unionUniqSets
                                        $ unitUFM (targetClassOfRealReg platform r) (unitUniqSet r))
                                emptyUFM
614
                        $ allocatableRegs ncgImpl
615 616

                -- do the graph coloring register allocation
617
                let ((alloced, maybe_more_stack, regAllocStats), usAlloc)
618
                        = {-# SCC "RegAlloc-color" #-}
619 620
                          initUs usLive
                          $ Color.regAlloc
621
                                config
622
                                alloc_regs
623
                                (mkUniqSet [0 .. maxSpillSlots ncgImpl])
624
                                (maxSpillSlots ncgImpl)
625
                                withLiveness
626 627 628 629 630 631 632 633 634 635 636
                                livenessCfg

                let ((alloced', stack_updt_blks), usAlloc')
                        = initUs usAlloc $
                                case maybe_more_stack of
                                Nothing     -> return (alloced, [])
                                Just amount -> do
                                    (alloced',stack_updt_blks) <- unzip <$>
                                                (mapM ((ncgAllocMoreStack ncgImpl) amount) alloced)
                                    return (alloced', concat stack_updt_blks )

637 638 639 640

                -- dump out what happened during register allocation
                dumpIfSet_dyn dflags
                        Opt_D_dump_asm_regalloc "Registers allocated"
Sylvain Henry's avatar
Sylvain Henry committed
641
                        FormatCMM
642
                        (vcat $ map (pprNatCmmDecl ncgImpl) alloced)
643 644 645

                dumpIfSet_dyn dflags
                        Opt_D_dump_asm_regalloc_stages "Build/spill stages"
Sylvain Henry's avatar
Sylvain Henry committed
646
                        FormatText
647 648 649
                        (vcat   $ map (\(stage, stats)
                                        -> text "# --------------------------"
                                        $$ text "#  cmm " <> int count <> text " Stage " <> int stage
Ian Lynagh's avatar
Ian Lynagh committed
650
                                        $$ ppr stats)
651 652 653
                                $ zip [0..] regAllocStats)

                let mPprStats =
654
                        if dopt Opt_D_dump_asm_stats dflags
655 656 657 658 659
                         then Just regAllocStats else Nothing

                -- force evaluation of the Maybe to avoid space leak
                mPprStats `seq` return ()

660
                return  ( alloced', usAlloc'
661
                        , mPprStats
662
                        , Nothing
663
                        , [], stack_updt_blks)
664 665 666

          else do
                -- do linear register allocation
667 668
                let reg_alloc proc = do
                       (alloced, maybe_more_stack, ra_stats) <-
669
                               Linear.regAlloc config proc
670
                       case maybe_more_stack of
671
                         Nothing -> return ( alloced, ra_stats, [] )
672
                         Just amount -> do
673 674 675
                           (alloced',stack_updt_blks) <-
                               ncgAllocMoreStack ncgImpl amount alloced
                           return (alloced', ra_stats, stack_updt_blks )
676

677
                let ((alloced, regAllocStats, stack_updt_blks), usAlloc)
678
                        = {-# SCC "RegAlloc-linear" #-}
679
                          initUs usLive
680
                          $ liftM unzip3
681
                          $ mapM reg_alloc withLiveness
682 683 684

                dumpIfSet_dyn dflags
                        Opt_D_dump_asm_regalloc "Registers allocated"
Sylvain Henry's avatar
Sylvain Henry committed
685
                        FormatCMM
686
                        (vcat $ map (pprNatCmmDecl ncgImpl) alloced)
687 688

                let mPprStats =
689
                        if dopt Opt_D_dump_asm_stats dflags
690 691 692 693 694 695 696
                         then Just (catMaybes regAllocStats) else Nothing

                -- force evaluation of the Maybe to avoid space leak
                mPprStats `seq` return ()

                return  ( alloced, usAlloc
                        , Nothing
697 698 699 700 701 702 703 704
                        , mPprStats, (catMaybes regAllocStats)
                        , concat stack_updt_blks )

        -- Fixupblocks the register allocator inserted (from, regMoves, to)
        let cfgRegAllocUpdates :: [(BlockId,BlockId,BlockId)]
            cfgRegAllocUpdates = (concatMap Linear.ra_fixupList raStats)

        let cfgWithFixupBlks =
705
                (\cfg -> addNodesBetween dflags cfg cfgRegAllocUpdates) <$> livenessCfg
706 707 708

        -- Insert stack update blocks
        let postRegCFG =
709
                pure (foldl' (\m (from,to) -> addImmediateSuccessor dflags from to m ))
710 711
                     <*> cfgWithFixupBlks
                     <*> pure stack_updt_blks
712

713
        ---- generate jump tables
714 715
        let tabled      =
                {-# SCC "generateJumpTables" #-}
716
                generateJumpTables ncgImpl alloced
717

718
        when (not $ null nativeCfgWeights) $ dumpIfSet_dyn dflags
719
                Opt_D_dump_cfg_weights "CFG Update information"
Sylvain Henry's avatar
Sylvain Henry committed
720
                FormatText
721 722 723
                ( text "stack:" <+> ppr stack_updt_blks $$
                  text "linearAlloc:" <+> ppr cfgRegAllocUpdates )

724
        ---- shortcut branches
725
        let (shorted, postShortCFG)     =
726
                {-# SCC "shortcutBranches" #-}
727 728
                shortcutBranches dflags ncgImpl tabled postRegCFG

729 730 731
        let optimizedCFG :: Maybe CFG
            optimizedCFG =
                optimizeCFG (cfgWeightInfo dflags) cmm <$!> postShortCFG
732

733
        maybeDumpCfg dflags optimizedCFG "CFG Weights - Final" proc_name
734 735 736 737 738

        --TODO: Partially check validity of the cfg.
        let getBlks (CmmProc _info _lbl _live (ListGraph blocks)) = blocks
            getBlks _ = []

739
        when ( backendMaintainsCfg platform &&
740 741 742
                (gopt Opt_DoAsmLinting dflags || debugIsOn )) $ do
                let blocks = concatMap getBlks shorted
                let labels = setFromList $ fmap blockId blocks :: LabelSet
743 744
                let cfg = fromJust optimizedCFG
                return $! seq (sanityCheckCfg cfg labels $
745
                                text "cfg not in lockstep") ()
746

747
        ---- sequence blocks
748 749 750
        let sequenced :: [NatCmmDecl statics instr]
            sequenced =
                checkLayout shorted $
751
                {-# SCC "sequenceBlocks" #-}
752 753 754 755 756 757 758 759 760 761
                map (BlockLayout.sequenceTop
                        dflags
                        ncgImpl optimizedCFG)
                    shorted

        let branchOpt :: [NatCmmDecl statics instr]
            branchOpt =
                {-# SCC "invertCondBranches" #-}
                map invert sequenced
              where
762
                invertConds :: LabelMap RawCmmStatics -> [NatBasicBlock instr]
763 764
                            -> [NatBasicBlock instr]
                invertConds = invertCondBranches ncgImpl optimizedCFG
765 766 767
                invert top@CmmData {} = top
                invert (CmmProc info lbl live (ListGraph blocks)) =
                    CmmProc info lbl live (ListGraph $ invertConds info blocks)
768

769
        ---- expansion of SPARC synthetic instrs
770 771
        let expanded =
                {-# SCC "sparc_expand" #-}
772 773
                ncgExpandTop ncgImpl branchOpt
                --ncgExpandTop ncgImpl sequenced
774

775 776
        dumpIfSet_dyn dflags
                Opt_D_dump_asm_expanded "Synthetic instructions expanded"
Sylvain Henry's avatar
Sylvain Henry committed
777
                FormatCMM
778
                (vcat $ map (pprNatCmmDecl ncgImpl) expanded)
779

780 781 782 783 784 785 786 787 788
        -- generate unwinding information from cmm
        let unwinds :: BlockMap [UnwindPoint]
            unwinds =
                {-# SCC "unwindingInfo" #-}
                foldl' addUnwind mapEmpty expanded
              where
                addUnwind acc proc =
                    acc `mapUnion` computeUnwinding dflags ncgImpl proc

789
        return  ( usAlloc
790
                , fileIds'
791 792 793
                , expanded
                , lastMinuteImports ++ imports
                , ppr_raStatsColor
794 795
                , ppr_raStatsLinear
                , unwinds )
796

797 798 799 800 801 802 803
maybeDumpCfg :: DynFlags -> Maybe CFG -> String -> SDoc -> IO ()
maybeDumpCfg _dflags Nothing _ _ = return ()
maybeDumpCfg dflags (Just cfg) msg proc_name
        | null cfg = return ()
        | otherwise
        = dumpIfSet_dyn
                dflags Opt_D_dump_cfg_weights msg
Sylvain Henry's avatar
Sylvain Henry committed
804
                FormatText
805 806
                (proc_name <> char ':' $$ pprEdgeWeights cfg)

807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824
-- | Make sure all blocks we want the layout algorithm to place have been placed.
checkLayout :: [NatCmmDecl statics instr] -> [NatCmmDecl statics instr]
            -> [NatCmmDecl statics instr]
checkLayout procsUnsequenced procsSequenced =
        ASSERT2(setNull diff,
                ppr "Block sequencing dropped blocks:" <> ppr diff)
        procsSequenced
  where
        blocks1 = foldl' (setUnion) setEmpty $
                        map getBlockIds procsUnsequenced :: LabelSet
        blocks2 = foldl' (setUnion) setEmpty $
                        map getBlockIds procsSequenced
        diff = setDifference blocks1 blocks2

        getBlockIds (CmmData _ _) = setEmpty
        getBlockIds (CmmProc _ _ _ (ListGraph blocks)) =
                setFromList $ map blockId blocks

825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841
-- | Compute unwinding tables for the blocks of a procedure
computeUnwinding :: Instruction instr
                 => DynFlags -> NcgImpl statics instr jumpDest
                 -> NatCmmDecl statics instr
                    -- ^ the native code generated for the procedure
                 -> LabelMap [UnwindPoint]
                    -- ^ unwinding tables for all points of all blocks of the
                    -- procedure
computeUnwinding dflags _ _
  | debugLevel dflags == 0         = mapEmpty
computeUnwinding _ _ (CmmData _ _) = mapEmpty
computeUnwinding _ ncgImpl (CmmProc _ _ _ (ListGraph blks)) =
    -- In general we would need to push unwinding information down the
    -- block-level call-graph to ensure that we fully account for all
    -- relevant register writes within a procedure.
    --
    -- However, the only unwinding information that we care about in GHC is for
842
    -- Sp. The fact that GHC.Cmm.LayoutStack already ensures that we have unwind
843 844 845 846
    -- information at the beginning of every block means that there is no need
    -- to perform this sort of push-down.
    mapFromList [ (blk_lbl, extractUnwindPoints ncgImpl instrs)
                | BasicBlock blk_lbl instrs <- blks ]
847

848
-- | Build a doc for all the imports.
849
--
850
makeImportsDoc :: DynFlags -> [CLabel] -> SDoc
851
makeImportsDoc dflags imports
852
 = dyld_stubs imports
853
            $$
854 855 856
            -- On recent versions of Darwin, the linker supports
            -- dead-stripping of code and data on a per-symbol basis.
            -- There's a hack to make this work in PprMach.pprNatCmmDecl.
ian@well-typed.com's avatar
ian@well-typed.com committed
857
            (if platformHasSubsectionsViaSymbols platform
858
             then text ".subsections_via_symbols"
859
             else Outputable.empty)
860
            $$
861 862 863 864 865 866
                -- On recent GNU ELF systems one can mark an object file
                -- as not requiring an executable stack. If all objects
                -- linked into a program have this note then the program
                -- will not use an executable stack, which is good for
                -- security. GHC generated code does not need an executable
                -- stack so add the note in:
ian@well-typed.com's avatar
ian@well-typed.com committed
867
            (if platformHasGnuNonexecStack platform
868
             then text ".section .note.GNU-stack,\"\"," <> sectionType platform "progbits"
869
             else Outputable.empty)
870
            $$
Gabor Greif's avatar
Gabor Greif committed
871
                -- And just because every other compiler does, let's stick in
872
                -- an identifier directive: .ident "GHC x.y.z"
ian@well-typed.com's avatar
ian@well-typed.com committed
873
            (if platformHasIdentDirective platform
874 875
             then let compilerIdent = text "GHC" <+> text cProjectVersion
                   in text ".ident" <+> doubleQuotes compilerIdent
876
             else Outputable.empty)
877

878
 where
879 880
        config   = initConfig dflags
        platform = ncgPlatform config
ian@well-typed.com's avatar
ian@well-typed.com committed
881

882 883
        -- Generate "symbol stubs" for all external symbols that might
        -- come from a dynamic library.
884 885
        dyld_stubs :: [CLabel] -> SDoc
{-      dyld_stubs imps = vcat $ map pprDyldSymbolStub $
886 887 888 889
                                    map head $ group $ sort imps-}
        -- (Hack) sometimes two Labels pretty-print the same, but have
        -- different uniques; so we compare their text versions...
        dyld_stubs imps
890
                | needImportedSymbols config
891
                = vcat $
892 893
                        (pprGotDeclaration config :) $
                        map ( pprImportedSymbol dflags config . fst . head) $
894 895 896 897 898
                        groupBy (\(_,a) (_,b) -> a == b) $
                        sortBy (\(_,a) (_,b) -> compare a b) $
                        map doPpr $
                        imps
                | otherwise
899
                = Outputable.empty
900

901 902 903
        doPpr lbl = (lbl, renderWithStyle
                              (initSDocContext dflags astyle)
                              (pprCLabel dflags lbl))
904
        astyle = mkCodeStyle AsmStyle
905

906 907 908 909 910 911
-- -----------------------------------------------------------------------------
-- Generate jump tables

-- Analyzes all native code and generates data sections for all jump
-- table instructions.
generateJumpTables
912 913 914
        :: NcgImpl statics instr jumpDest
        -> [NatCmmDecl statics instr] -> [NatCmmDecl statics instr]
generateJumpTables ncgImpl xs = concatMap f xs
915
    where f p@(CmmProc _ _ _ (ListGraph xs)) = p : concatMap g xs
916
          f p = [p]
917
          g (BasicBlock _ xs) = catMaybes (map (generateJumpTableForInstr ncgImpl) xs)
918

919 920 921
-- -----------------------------------------------------------------------------
-- Shortcut branches

922
shortcutBranches
923
        :: forall statics instr jumpDest. (Outputable jumpDest) => DynFlags
924
        -> NcgImpl statics instr jumpDest
925
        -> [NatCmmDecl statics instr]
926 927
        -> Maybe CFG
        -> ([NatCmmDecl statics instr],Maybe CFG)
928

929
shortcutBranches dflags ncgImpl tops weights
930
  | gopt Opt_AsmShortcutting dflags
931
  = ( map (apply_mapping ncgImpl mapping) tops'
932
    , shortcutWeightMap mappingBid <$!> weights )
933
  | otherwise
934
  = (tops, weights)
935
  where
936
    (tops', mappings) = mapAndUnzip (build_mapping ncgImpl) tops
937 938
    mapping = mapUnions mappings :: LabelMap jumpDest
    mappingBid = fmap (getJumpDestBlockId ncgImpl) mapping
939