AsmCodeGen.hs 50 KB
Newer Older
1 2 3
-- -----------------------------------------------------------------------------
--
-- (c) The University of Glasgow 1993-2004
4
--
5 6 7
-- This is the top-level module in the native code generator.
--
-- -----------------------------------------------------------------------------
8

9 10
{-# LANGUAGE BangPatterns, CPP, GADTs, ScopedTypeVariables, PatternSynonyms,
    DeriveFunctor #-}
11 12 13 14

#if !defined(GHC_LOADED_INTO_GHCI)
{-# LANGUAGE UnboxedTuples #-}
#endif
15

16 17
{-# OPTIONS_GHC -Wno-incomplete-record-updates #-}

18 19 20 21 22 23 24 25 26 27 28 29
module AsmCodeGen (
                    -- * Module entry point
                    nativeCodeGen

                    -- * Test-only exports: see trac #12744
                    -- used by testGraphNoSpills, which needs to access
                    -- the register allocator intermediate data structures
                    -- cmmNativeGen emits
                  , cmmNativeGen
                  , NcgImpl(..)
                  , x86NcgImpl
                  ) where
30

31
#include "HsVersions.h"
32

33 34
import GhcPrelude

35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
import qualified X86.CodeGen
import qualified X86.Regs
import qualified X86.Instr
import qualified X86.Ppr

import qualified SPARC.CodeGen
import qualified SPARC.Regs
import qualified SPARC.Instr
import qualified SPARC.Ppr
import qualified SPARC.ShortcutJump
import qualified SPARC.CodeGen.Expand

import qualified PPC.CodeGen
import qualified PPC.Regs
import qualified PPC.RegInfo
import qualified PPC.Instr
import qualified PPC.Ppr
52 53

import RegAlloc.Liveness
54
import qualified RegAlloc.Linear.Main           as Linear
55

56 57 58 59
import qualified GraphColor                     as Color
import qualified RegAlloc.Graph.Main            as Color
import qualified RegAlloc.Graph.Stats           as Color
import qualified RegAlloc.Graph.TrivColorable   as Color
60

61
import AsmUtils
62
import TargetReg
John Ericson's avatar
John Ericson committed
63
import GHC.Platform
64
import BlockLayout
65
import Config
66 67 68 69
import Instruction
import PIC
import Reg
import NCGMonad
70
import CFG
Peter Wortmann's avatar
Peter Wortmann committed
71
import Dwarf
72
import GHC.Cmm.DebugBlock
73

74
import GHC.Cmm.BlockId
75
import GHC.StgToCmm.CgUtils ( fixStgRegisters )
76 77 78 79 80 81 82 83
import GHC.Cmm
import GHC.Cmm.Utils
import GHC.Cmm.Dataflow.Collections
import GHC.Cmm.Dataflow.Label
import GHC.Cmm.Dataflow.Block
import GHC.Cmm.Opt           ( cmmMachOpFold )
import GHC.Cmm.Ppr
import GHC.Cmm.CLabel
84

85
import UniqFM
86
import UniqSupply
87 88
import DynFlags
import Util
89

90
import BasicTypes       ( Alignment )
91
import qualified Pretty
92
import BufWrite
93
import Outputable
94
import FastString
95
import UniqSet
96
import ErrUtils
97
import Module
98 99
import Stream (Stream)
import qualified Stream
100

101 102
-- DEBUGGING ONLY
--import OrdList
103

104
import Data.List
105
import Data.Maybe
106
import Data.Ord         ( comparing )
107
import Control.Exception
108
import Control.Monad
109
import System.IO
110

111 112 113
{-
The native-code generator has machine-independent and
machine-dependent modules.
114

115 116 117 118
This module ("AsmCodeGen") is the top-level machine-independent
module.  Before entering machine-dependent land, we do some
machine-independent optimisations (defined below) on the
'CmmStmts's.
119

120 121 122 123 124 125 126 127 128
We convert to the machine-specific 'Instr' datatype with
'cmmCodeGen', assuming an infinite supply of registers.  We then use
a machine-independent register allocator ('regAlloc') to rejoin
reality.  Obviously, 'regAlloc' has machine-specific helper
functions (see about "RegAllocInfo" below).

Finally, we order the basic blocks of the function so as to minimise
the number of jumps between blocks, by utilising fallthrough wherever
possible.
129 130

The machine-dependent bits break down as follows:
131 132

  * ["MachRegs"]  Everything about the target platform's machine
133 134 135
    registers (and immediate operands, and addresses, which tend to
    intermingle/interact with registers).

136
  * ["MachInstrs"]  Includes the 'Instr' datatype (possibly should
137
    have a module of its own), plus a miscellany of other things
138
    (e.g., 'targetDoubleSize', 'smStablePtrTable', ...)
139

140
  * ["MachCodeGen"]  is where 'Cmm' stuff turns into
141
    machine instructions.
142

143
  * ["PprMach"] 'pprInstr' turns an 'Instr' into text (well, really
144
    a 'SDoc').
145

146 147
  * ["RegAllocInfo"] In the register allocator, we manipulate
    'MRegsState's, which are 'BitSet's, one bit per machine register.
148 149
    When we want to say something about a specific machine register
    (e.g., ``it gets clobbered by this instruction''), we set/unset
150
    its bit.  Obviously, we do this 'BitSet' thing for efficiency
151 152
    reasons.

153
    The 'RegAllocInfo' module collects together the machine-specific
154 155
    info needed to do register allocation.

156 157
   * ["RegisterAlloc"] The (machine-independent) register allocator.
-}
158

159
--------------------
160 161 162
nativeCodeGen :: forall a . DynFlags -> Module -> ModLocation -> Handle -> UniqSupply
              -> Stream IO RawCmmGroup a
              -> IO a
163
nativeCodeGen dflags this_mod modLoc h us cmms
164
 = let platform = targetPlatform dflags
165 166
       nCG' :: ( Outputable statics, Outputable instr
               , Outputable jumpDest, Instruction instr)
167
            => NcgImpl statics instr jumpDest -> IO a
168
       nCG' ncgImpl = nativeCodeGen' dflags this_mod modLoc ncgImpl h us cmms
169
   in case platformArch platform of
170 171 172
      ArchX86       -> nCG' (x86NcgImpl    dflags)
      ArchX86_64    -> nCG' (x86_64NcgImpl dflags)
      ArchPPC       -> nCG' (ppcNcgImpl    dflags)
173
      ArchS390X     -> panic "nativeCodeGen: No NCG for S390X"
174
      ArchSPARC     -> nCG' (sparcNcgImpl  dflags)
175
      ArchSPARC64   -> panic "nativeCodeGen: No NCG for SPARC64"
176 177 178 179 180 181 182 183
      ArchARM {}    -> panic "nativeCodeGen: No NCG for ARM"
      ArchARM64     -> panic "nativeCodeGen: No NCG for ARM64"
      ArchPPC_64 _  -> nCG' (ppcNcgImpl    dflags)
      ArchAlpha     -> panic "nativeCodeGen: No NCG for Alpha"
      ArchMipseb    -> panic "nativeCodeGen: No NCG for mipseb"
      ArchMipsel    -> panic "nativeCodeGen: No NCG for mipsel"
      ArchUnknown   -> panic "nativeCodeGen: No NCG for unknown arch"
      ArchJavaScript-> panic "nativeCodeGen: No NCG for JavaScript"
184

185 186
x86NcgImpl :: DynFlags -> NcgImpl (Alignment, CmmStatics)
                                  X86.Instr.Instr X86.Instr.JumpDest
187
x86NcgImpl dflags
188
 = (x86_64NcgImpl dflags)
189

190 191
x86_64NcgImpl :: DynFlags -> NcgImpl (Alignment, CmmStatics)
                                  X86.Instr.Instr X86.Instr.JumpDest
192 193 194 195 196 197 198 199 200 201 202 203 204
x86_64NcgImpl dflags
 = NcgImpl {
        cmmTopCodeGen             = X86.CodeGen.cmmTopCodeGen
       ,generateJumpTableForInstr = X86.CodeGen.generateJumpTableForInstr dflags
       ,getJumpDestBlockId        = X86.Instr.getJumpDestBlockId
       ,canShortcut               = X86.Instr.canShortcut
       ,shortcutStatics           = X86.Instr.shortcutStatics
       ,shortcutJump              = X86.Instr.shortcutJump
       ,pprNatCmmDecl             = X86.Ppr.pprNatCmmDecl
       ,maxSpillSlots             = X86.Instr.maxSpillSlots dflags
       ,allocatableRegs           = X86.Regs.allocatableRegs platform
       ,ncgAllocMoreStack         = X86.Instr.allocMoreStack platform
       ,ncgExpandTop              = id
205
       ,ncgMakeFarBranches        = const id
206
       ,extractUnwindPoints       = X86.CodeGen.extractUnwindPoints
207
       ,invertCondBranches        = X86.CodeGen.invertCondBranches
208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224
   }
    where platform = targetPlatform dflags

ppcNcgImpl :: DynFlags -> NcgImpl CmmStatics PPC.Instr.Instr PPC.RegInfo.JumpDest
ppcNcgImpl dflags
 = NcgImpl {
        cmmTopCodeGen             = PPC.CodeGen.cmmTopCodeGen
       ,generateJumpTableForInstr = PPC.CodeGen.generateJumpTableForInstr dflags
       ,getJumpDestBlockId        = PPC.RegInfo.getJumpDestBlockId
       ,canShortcut               = PPC.RegInfo.canShortcut
       ,shortcutStatics           = PPC.RegInfo.shortcutStatics
       ,shortcutJump              = PPC.RegInfo.shortcutJump
       ,pprNatCmmDecl             = PPC.Ppr.pprNatCmmDecl
       ,maxSpillSlots             = PPC.Instr.maxSpillSlots dflags
       ,allocatableRegs           = PPC.Regs.allocatableRegs platform
       ,ncgAllocMoreStack         = PPC.Instr.allocMoreStack platform
       ,ncgExpandTop              = id
225
       ,ncgMakeFarBranches        = PPC.Instr.makeFarBranches
226
       ,extractUnwindPoints       = const []
227
       ,invertCondBranches        = \_ _ -> id
228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244
   }
    where platform = targetPlatform dflags

sparcNcgImpl :: DynFlags -> NcgImpl CmmStatics SPARC.Instr.Instr SPARC.ShortcutJump.JumpDest
sparcNcgImpl dflags
 = NcgImpl {
        cmmTopCodeGen             = SPARC.CodeGen.cmmTopCodeGen
       ,generateJumpTableForInstr = SPARC.CodeGen.generateJumpTableForInstr dflags
       ,getJumpDestBlockId        = SPARC.ShortcutJump.getJumpDestBlockId
       ,canShortcut               = SPARC.ShortcutJump.canShortcut
       ,shortcutStatics           = SPARC.ShortcutJump.shortcutStatics
       ,shortcutJump              = SPARC.ShortcutJump.shortcutJump
       ,pprNatCmmDecl             = SPARC.Ppr.pprNatCmmDecl
       ,maxSpillSlots             = SPARC.Instr.maxSpillSlots dflags
       ,allocatableRegs           = SPARC.Regs.allocatableRegs
       ,ncgAllocMoreStack         = noAllocMoreStack
       ,ncgExpandTop              = map SPARC.CodeGen.Expand.expandTop
245
       ,ncgMakeFarBranches        = const id
246
       ,extractUnwindPoints       = const []
247
       ,invertCondBranches        = \_ _ -> id
248
   }
249 250 251 252 253 254 255

--
-- Allocating more stack space for spilling is currently only
-- supported for the linear register allocator on x86/x86_64, the rest
-- default to the panic below.  To support allocating extra stack on
-- more platforms provide a definition of ncgAllocMoreStack.
--
256 257
noAllocMoreStack :: Int -> NatCmmDecl statics instr
                 -> UniqSM (NatCmmDecl statics instr, [(BlockId,BlockId)])
258 259 260 261 262 263 264 265 266
noAllocMoreStack amount _
  = panic $   "Register allocator: out of stack slots (need " ++ show amount ++ ")\n"
        ++  "   If you are trying to compile SHA1.hs from the crypto library then this\n"
        ++  "   is a known limitation in the linear allocator.\n"
        ++  "\n"
        ++  "   Try enabling the graph colouring allocator with -fregs-graph instead."
        ++  "   You can still file a bug report if you like.\n"


267 268 269 270 271 272 273 274 275 276 277 278 279
-- | Data accumulated during code generation. Mostly about statistics,
-- but also collects debug data for DWARF generation.
data NativeGenAcc statics instr
  = NGS { ngs_imports     :: ![[CLabel]]
        , ngs_natives     :: ![[NatCmmDecl statics instr]]
             -- ^ Native code generated, for statistics. This might
             -- hold a lot of data, so it is important to clear this
             -- field as early as possible if it isn't actually
             -- required.
        , ngs_colorStats  :: ![[Color.RegAllocStats statics instr]]
        , ngs_linearStats :: ![[Linear.RegAllocStats]]
        , ngs_labels      :: ![Label]
        , ngs_debug       :: ![DebugBlock]
280
        , ngs_dwarfFiles  :: !DwarfFiles
281 282 283
        , ngs_unwinds     :: !(LabelMap [UnwindPoint])
             -- ^ see Note [Unwinding information in the NCG]
             -- and Note [What is this unwinding business?] in Debug.
284
        }
285

286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310
{-
Note [Unwinding information in the NCG]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Unwind information is a type of metadata which allows a debugging tool
to reconstruct the values of machine registers at the time a procedure was
entered. For the most part, the production of unwind information is handled by
the Cmm stage, where it is represented by CmmUnwind nodes.

Unfortunately, the Cmm stage doesn't know everything necessary to produce
accurate unwinding information. For instance, the x86-64 calling convention
requires that the stack pointer be aligned to 16 bytes, which in turn means that
GHC must sometimes add padding to $sp prior to performing a foreign call. When
this happens unwind information must be updated accordingly.
For this reason, we make the NCG backends responsible for producing
unwinding tables (with the extractUnwindPoints function in NcgImpl).

We accumulate the produced unwind tables over CmmGroups in the ngs_unwinds
field of NativeGenAcc. This is a label map which contains an entry for each
procedure, containing a list of unwinding points (e.g. a label and an associated
unwinding table).

See also Note [What is this unwinding business?] in Debug.
-}

311 312
nativeCodeGen' :: (Outputable statics, Outputable instr,Outputable jumpDest,
                   Instruction instr)
313
               => DynFlags
314
               -> Module -> ModLocation
315
               -> NcgImpl statics instr jumpDest
316
               -> Handle
317
               -> UniqSupply
318 319
               -> Stream IO RawCmmGroup a
               -> IO a
320
nativeCodeGen' dflags this_mod modLoc ncgImpl h us cmms
321
 = do
322 323 324
        -- BufHandle is a performance hack.  We could hide it inside
        -- Pretty if it weren't for the fact that we do lots of little
        -- printDocs here (in order to do codegen in constant space).
325
        bufh <- newBufHandle h
326
        let ngs0 = NGS [] [] [] [] [] [] emptyUFM mapEmpty
327
        (ngs, us', a) <- cmmNativeGenStream dflags this_mod modLoc ncgImpl bufh us
328
                                         cmms ngs0
329 330
        _ <- finishNativeGen dflags modLoc bufh us' ngs
        return a
331 332

finishNativeGen :: Instruction instr
333
                => DynFlags
Peter Wortmann's avatar
Peter Wortmann committed
334
                -> ModLocation
335
                -> BufHandle
Peter Wortmann's avatar
Peter Wortmann committed
336
                -> UniqSupply
337
                -> NativeGenAcc statics instr
Peter Wortmann's avatar
Peter Wortmann committed
338 339
                -> IO UniqSupply
finishNativeGen dflags modLoc bufh@(BufHandle _ _ h) us ngs
340
 = withTimingSilent dflags (text "NCG") (`seq` ()) $ do
Peter Wortmann's avatar
Peter Wortmann committed
341
        -- Write debug data and finish
Ben Gamari's avatar
Ben Gamari committed
342
        let emitDw = debugLevel dflags > 0
Peter Wortmann's avatar
Peter Wortmann committed
343 344 345 346
        us' <- if not emitDw then return us else do
          (dwarf, us') <- dwarfGen dflags modLoc us (ngs_debug ngs)
          emitNativeCode dflags bufh dwarf
          return us'
347
        bFlush bufh
348

349
        -- dump global NCG stats for graph coloring allocator
350
        let stats = concat (ngs_colorStats ngs)
351
        unless (null stats) $ do
352

Peter Wortmann's avatar
Peter Wortmann committed
353 354
          -- build the global register conflict graph
          let graphGlobal
355
                  = foldl' Color.union Color.initGraph
Peter Wortmann's avatar
Peter Wortmann committed
356 357
                  $ [ Color.raGraph stat
                          | stat@Color.RegAllocStatsStart{} <- stats]
358

Peter Wortmann's avatar
Peter Wortmann committed
359
          dump_stats (Color.pprStats stats graphGlobal)
360

Peter Wortmann's avatar
Peter Wortmann committed
361 362 363
          let platform = targetPlatform dflags
          dumpIfSet_dyn dflags
                  Opt_D_dump_asm_conflicts "Register conflict graph"
Sylvain Henry's avatar
Sylvain Henry committed
364
                  FormatText
Peter Wortmann's avatar
Peter Wortmann committed
365 366 367 368 369 370
                  $ Color.dotGraph
                          (targetRegDotColor platform)
                          (Color.trivColorable platform
                                  (targetVirtualRegSqueeze platform)
                                  (targetRealRegSqueeze platform))
                  $ graphGlobal
371 372 373


        -- dump global NCG stats for linear allocator
374
        let linearStats = concat (ngs_linearStats ngs)
375
        unless (null linearStats) $
376
          dump_stats (Linear.pprStats (concat (ngs_natives ngs)) linearStats)
377 378

        -- write out the imports
379
        printSDocLn Pretty.LeftMode dflags h (mkCodeStyle AsmStyle)
380
                $ makeImportsDoc dflags (concat (ngs_imports ngs))
Peter Wortmann's avatar
Peter Wortmann committed
381
        return us'
382
  where
Sylvain Henry's avatar
Sylvain Henry committed
383 384 385
    dump_stats = dumpAction dflags (mkDumpStyle dflags alwaysQualify)
                   (dumpOptionsFromFlag Opt_D_dump_asm_stats) "NCG stats"
                   FormatText
386

387 388
cmmNativeGenStream :: (Outputable statics, Outputable instr
                      ,Outputable jumpDest, Instruction instr)
389
              => DynFlags
390
              -> Module -> ModLocation
391 392
              -> NcgImpl statics instr jumpDest
              -> BufHandle
393
              -> UniqSupply
394
              -> Stream IO RawCmmGroup a
395
              -> NativeGenAcc statics instr
396
              -> IO (NativeGenAcc statics instr, UniqSupply, a)
397

398
cmmNativeGenStream dflags this_mod modLoc ncgImpl h us cmm_stream ngs
399 400
 = do r <- Stream.runStream cmm_stream
      case r of
401
        Left a ->
402 403 404 405 406
          return (ngs { ngs_imports = reverse $ ngs_imports ngs
                      , ngs_natives = reverse $ ngs_natives ngs
                      , ngs_colorStats = reverse $ ngs_colorStats ngs
                      , ngs_linearStats = reverse $ ngs_linearStats ngs
                      },
407 408
                  us,
                  a)
409
        Right (cmms, cmm_stream') -> do
410
          (us', ngs'') <-
411
            withTimingSilent
412
                dflags
413
                ncglabel (\(a, b) -> a `seq` b `seq` ()) $ do
414 415 416 417 418 419 420 421 422 423 424 425 426
              -- Generate debug information
              let debugFlag = debugLevel dflags > 0
                  !ndbgs | debugFlag = cmmDebugGen modLoc cmms
                         | otherwise = []
                  dbgMap = debugToMap ndbgs

              -- Generate native code
              (ngs',us') <- cmmNativeGens dflags this_mod modLoc ncgImpl h
                                               dbgMap us cmms ngs 0

              -- Link native code information into debug blocks
              -- See Note [What is this unwinding business?] in Debug.
              let !ldbgs = cmmDebugLink (ngs_labels ngs') (ngs_unwinds ngs') ndbgs
427
              unless (null ldbgs) $
Sylvain Henry's avatar
Sylvain Henry committed
428
                dumpIfSet_dyn dflags Opt_D_dump_debug "Debug Infos" FormatText
429
                  (vcat $ map ppr ldbgs)
430 431 432 433

              -- Accumulate debug information for emission in finishNativeGen.
              let ngs'' = ngs' { ngs_debug = ngs_debug ngs' ++ ldbgs, ngs_labels = [] }
              return (us', ngs'')
Ben Gamari's avatar
Ben Gamari committed
434 435

          cmmNativeGenStream dflags this_mod modLoc ncgImpl h us'
436
              cmm_stream' ngs''
437

438 439
    where ncglabel = text "NCG"

440 441
-- | Do native code generation on all these cmms.
--
442
cmmNativeGens :: forall statics instr jumpDest.
443 444
                 (Outputable statics, Outputable instr
                 ,Outputable jumpDest, Instruction instr)
445
              => DynFlags
446
              -> Module -> ModLocation
447 448
              -> NcgImpl statics instr jumpDest
              -> BufHandle
449
              -> LabelMap DebugBlock
dterei's avatar
dterei committed
450
              -> UniqSupply
Simon Peyton Jones's avatar
Simon Peyton Jones committed
451
              -> [RawCmmDecl]
452 453 454
              -> NativeGenAcc statics instr
              -> Int
              -> IO (NativeGenAcc statics instr, UniqSupply)
455

456 457
cmmNativeGens dflags this_mod modLoc ncgImpl h dbgMap = go
  where
458 459
    go :: UniqSupply -> [RawCmmDecl]
       -> NativeGenAcc statics instr -> Int
460
       -> IO (NativeGenAcc statics instr, UniqSupply)
461

462 463 464 465
    go us [] ngs !_ =
        return (ngs, us)

    go us (cmm : cmms) ngs count = do
466
        let fileIds = ngs_dwarfFiles ngs
467
        (us', fileIds', native, imports, colorStats, linearStats, unwinds)
468 469 470 471
          <- {-# SCC "cmmNativeGen" #-}
             cmmNativeGen dflags this_mod modLoc ncgImpl us fileIds dbgMap
                          cmm count

472 473 474
        -- Generate .file directives for every new file that has been
        -- used. Note that it is important that we generate these in
        -- ascending order, as Clang's 3.6 assembler complains.
475 476 477
        let newFileIds = sortBy (comparing snd) $
                         nonDetEltsUFM $ fileIds' `minusUFM` fileIds
            -- See Note [Unique Determinism and code generation]
478
            pprDecl (f,n) = text "\t.file " <> ppr n <+>
479
                            pprFilePathString (unpackFS f)
480

481
        emitNativeCode dflags h $ vcat $
482
          map pprDecl newFileIds ++
483
          map (pprNatCmmDecl ncgImpl) native
484

Gabor Greif's avatar
typo  
Gabor Greif committed
485
        -- force evaluation all this stuff to avoid space leaks
486
        {-# SCC "seqString" #-} evaluate $ seqList (showSDoc dflags $ vcat $ map ppr imports) ()
487

488
        let !labels' = if debugLevel dflags > 0
489 490 491
                       then cmmDebugLabels isMetaInstr native else []
            !natives' = if dopt Opt_D_dump_asm_stats dflags
                        then native : ngs_natives ngs else []
492

493 494 495 496 497 498
            mCon = maybe id (:)
            ngs' = ngs{ ngs_imports     = imports : ngs_imports ngs
                      , ngs_natives     = natives'
                      , ngs_colorStats  = colorStats `mCon` ngs_colorStats ngs
                      , ngs_linearStats = linearStats `mCon` ngs_linearStats ngs
                      , ngs_labels      = ngs_labels ngs ++ labels'
499
                      , ngs_dwarfFiles  = fileIds'
500
                      , ngs_unwinds     = ngs_unwinds ngs `mapUnion` unwinds
501
                      }
502
        go us' cmms ngs' (count + 1)
503

504

505 506 507
emitNativeCode :: DynFlags -> BufHandle -> SDoc -> IO ()
emitNativeCode dflags h sdoc = do

508 509
        {-# SCC "pprNativeCode" #-} bufLeftRenderSDoc dflags h
                                      (mkCodeStyle AsmStyle) sdoc
510 511 512

        -- dump native code
        dumpIfSet_dyn dflags
Sylvain Henry's avatar
Sylvain Henry committed
513
                Opt_D_dump_asm "Asm code" FormatASM
514 515
                sdoc

516
-- | Complete native code generation phase for a single top-level chunk of Cmm.
517 518
--      Dumping the output of each stage along the way.
--      Global conflict graph and NGC stats
519
cmmNativeGen
520 521
    :: forall statics instr jumpDest. (Instruction instr,
        Outputable statics, Outputable instr, Outputable jumpDest)
522
    => DynFlags
523
    -> Module -> ModLocation
524
    -> NcgImpl statics instr jumpDest
525
        -> UniqSupply
526 527
        -> DwarfFiles
        -> LabelMap DebugBlock
528 529 530
        -> RawCmmDecl                                   -- ^ the cmm to generate code for
        -> Int                                          -- ^ sequence number of this top thing
        -> IO   ( UniqSupply
531
                , DwarfFiles
532 533 534
                , [NatCmmDecl statics instr]                -- native code
                , [CLabel]                                  -- things imported by this cmm
                , Maybe [Color.RegAllocStats statics instr] -- stats for the coloring register allocator
535 536 537
                , Maybe [Linear.RegAllocStats]              -- stats for the linear register allocators
                , LabelMap [UnwindPoint]                    -- unwinding information for blocks
                )
538

539
cmmNativeGen dflags this_mod modLoc ncgImpl us fileIds dbgMap cmm count
540
 = do
541
        let platform = targetPlatform dflags
542

543 544 545 546
        let proc_name = case cmm of
                (CmmProc _ entry_label _ _) -> ppr entry_label
                _                           -> text "DataChunk"

547 548 549
        -- rewrite assignments to global regs
        let fixed_cmm =
                {-# SCC "fixStgRegisters" #-}
550
                fixStgRegisters dflags cmm
551

552 553 554
        -- cmm to cmm optimisations
        let (opt_cmm, imports) =
                {-# SCC "cmmToCmm" #-}
555
                cmmToCmm dflags this_mod fixed_cmm
556

557
        dumpIfSet_dyn dflags
Sylvain Henry's avatar
Sylvain Henry committed
558
                Opt_D_dump_opt_cmm "Optimised Cmm" FormatCMM
559
                (pprCmmGroup [opt_cmm])
560

561 562 563
        let cmmCfg = {-# SCC "getCFG" #-}
                     getCfgProc (cfgWeightInfo dflags) opt_cmm

564
        -- generate native code from cmm
565
        let ((native, lastMinuteImports, fileIds', nativeCfgWeights), usGen) =
566
                {-# SCC "genMachCode" #-}
567 568
                initUs us $ genMachCode dflags this_mod modLoc
                                        (cmmTopCodeGen ncgImpl)
569 570
                                        fileIds dbgMap opt_cmm cmmCfg

571
        dumpIfSet_dyn dflags
Sylvain Henry's avatar
Sylvain Henry committed
572
                Opt_D_dump_asm_native "Native code" FormatASM
573
                (vcat $ map (pprNatCmmDecl ncgImpl) native)
574

575
        maybeDumpCfg dflags (Just nativeCfgWeights) "CFG Weights - Native" proc_name
576

577
        -- tag instructions with register liveness information
578 579
        -- also drops dead code. We don't keep the cfg in sync on
        -- some backends, so don't use it there.
580 581 582
        let livenessCfg = if (backendMaintainsCfg dflags)
                                then Just nativeCfgWeights
                                else Nothing
583 584 585
        let (withLiveness, usLive) =
                {-# SCC "regLiveness" #-}
                initUs usGen
586
                        $ mapM (cmmTopLiveness livenessCfg platform) native
587 588 589

        dumpIfSet_dyn dflags
                Opt_D_dump_asm_liveness "Liveness annotations added"
Sylvain Henry's avatar
Sylvain Henry committed
590
                FormatCMM
Ian Lynagh's avatar
Ian Lynagh committed
591
                (vcat $ map ppr withLiveness)
592 593

        -- allocate registers
594
        (alloced, usAlloc, ppr_raStatsColor, ppr_raStatsLinear, raStats, stack_updt_blks) <-
595 596
         if ( gopt Opt_RegsGraph dflags
           || gopt Opt_RegsIterative dflags )
597 598 599 600 601 602
          then do
                -- the regs usable for allocation
                let (alloc_regs :: UniqFM (UniqSet RealReg))
                        = foldr (\r -> plusUFM_C unionUniqSets
                                        $ unitUFM (targetClassOfRealReg platform r) (unitUniqSet r))
                                emptyUFM
603
                        $ allocatableRegs ncgImpl
604 605

                -- do the graph coloring register allocation
606
                let ((alloced, maybe_more_stack, regAllocStats), usAlloc)
607
                        = {-# SCC "RegAlloc-color" #-}
608 609 610 611
                          initUs usLive
                          $ Color.regAlloc
                                dflags
                                alloc_regs
612
                                (mkUniqSet [0 .. maxSpillSlots ncgImpl])
613
                                (maxSpillSlots ncgImpl)
614
                                withLiveness
615 616 617 618 619 620 621 622 623 624 625
                                livenessCfg

                let ((alloced', stack_updt_blks), usAlloc')
                        = initUs usAlloc $
                                case maybe_more_stack of
                                Nothing     -> return (alloced, [])
                                Just amount -> do
                                    (alloced',stack_updt_blks) <- unzip <$>
                                                (mapM ((ncgAllocMoreStack ncgImpl) amount) alloced)
                                    return (alloced', concat stack_updt_blks )

626 627 628 629

                -- dump out what happened during register allocation
                dumpIfSet_dyn dflags
                        Opt_D_dump_asm_regalloc "Registers allocated"
Sylvain Henry's avatar
Sylvain Henry committed
630
                        FormatCMM
631
                        (vcat $ map (pprNatCmmDecl ncgImpl) alloced)
632 633 634

                dumpIfSet_dyn dflags
                        Opt_D_dump_asm_regalloc_stages "Build/spill stages"
Sylvain Henry's avatar
Sylvain Henry committed
635
                        FormatText
636 637 638
                        (vcat   $ map (\(stage, stats)
                                        -> text "# --------------------------"
                                        $$ text "#  cmm " <> int count <> text " Stage " <> int stage
Ian Lynagh's avatar
Ian Lynagh committed
639
                                        $$ ppr stats)
640 641 642
                                $ zip [0..] regAllocStats)

                let mPprStats =
643
                        if dopt Opt_D_dump_asm_stats dflags
644 645 646 647 648
                         then Just regAllocStats else Nothing

                -- force evaluation of the Maybe to avoid space leak
                mPprStats `seq` return ()

649
                return  ( alloced', usAlloc'
650
                        , mPprStats
651
                        , Nothing
652
                        , [], stack_updt_blks)
653 654 655

          else do
                -- do linear register allocation
656 657 658 659
                let reg_alloc proc = do
                       (alloced, maybe_more_stack, ra_stats) <-
                               Linear.regAlloc dflags proc
                       case maybe_more_stack of
660
                         Nothing -> return ( alloced, ra_stats, [] )
661
                         Just amount -> do
662 663 664
                           (alloced',stack_updt_blks) <-
                               ncgAllocMoreStack ncgImpl amount alloced
                           return (alloced', ra_stats, stack_updt_blks )
665

666
                let ((alloced, regAllocStats, stack_updt_blks), usAlloc)
667
                        = {-# SCC "RegAlloc-linear" #-}
668
                          initUs usLive
669
                          $ liftM unzip3
670
                          $ mapM reg_alloc withLiveness
671 672 673

                dumpIfSet_dyn dflags
                        Opt_D_dump_asm_regalloc "Registers allocated"
Sylvain Henry's avatar
Sylvain Henry committed
674
                        FormatCMM
675
                        (vcat $ map (pprNatCmmDecl ncgImpl) alloced)
676 677

                let mPprStats =
678
                        if dopt Opt_D_dump_asm_stats dflags
679 680 681 682 683 684 685
                         then Just (catMaybes regAllocStats) else Nothing

                -- force evaluation of the Maybe to avoid space leak
                mPprStats `seq` return ()

                return  ( alloced, usAlloc
                        , Nothing
686 687 688 689 690 691 692 693
                        , mPprStats, (catMaybes regAllocStats)
                        , concat stack_updt_blks )

        -- Fixupblocks the register allocator inserted (from, regMoves, to)
        let cfgRegAllocUpdates :: [(BlockId,BlockId,BlockId)]
            cfgRegAllocUpdates = (concatMap Linear.ra_fixupList raStats)

        let cfgWithFixupBlks =
694
                (\cfg -> addNodesBetween cfg cfgRegAllocUpdates) <$> livenessCfg
695 696 697

        -- Insert stack update blocks
        let postRegCFG =
698 699 700
                pure (foldl' (\m (from,to) -> addImmediateSuccessor from to m ))
                     <*> cfgWithFixupBlks
                     <*> pure stack_updt_blks
701

702
        ---- generate jump tables
703 704
        let tabled      =
                {-# SCC "generateJumpTables" #-}
705
                generateJumpTables ncgImpl alloced
706

707
        when (not $ null nativeCfgWeights) $ dumpIfSet_dyn dflags
708
                Opt_D_dump_cfg_weights "CFG Update information"
Sylvain Henry's avatar
Sylvain Henry committed
709
                FormatText
710 711 712
                ( text "stack:" <+> ppr stack_updt_blks $$
                  text "linearAlloc:" <+> ppr cfgRegAllocUpdates )

713
        ---- shortcut branches
714
        let (shorted, postShortCFG)     =
715
                {-# SCC "shortcutBranches" #-}
716 717
                shortcutBranches dflags ncgImpl tabled postRegCFG

718 719 720
        let optimizedCFG :: Maybe CFG
            optimizedCFG =
                optimizeCFG (cfgWeightInfo dflags) cmm <$!> postShortCFG
721

722
        maybeDumpCfg dflags optimizedCFG "CFG Weights - Final" proc_name
723 724 725 726 727 728 729 730 731

        --TODO: Partially check validity of the cfg.
        let getBlks (CmmProc _info _lbl _live (ListGraph blocks)) = blocks
            getBlks _ = []

        when ( backendMaintainsCfg dflags &&
                (gopt Opt_DoAsmLinting dflags || debugIsOn )) $ do
                let blocks = concatMap getBlks shorted
                let labels = setFromList $ fmap blockId blocks :: LabelSet
732 733
                let cfg = fromJust optimizedCFG
                return $! seq (sanityCheckCfg cfg labels $
734
                                text "cfg not in lockstep") ()
735

736
        ---- sequence blocks
737 738 739
        let sequenced :: [NatCmmDecl statics instr]
            sequenced =
                checkLayout shorted $
740
                {-# SCC "sequenceBlocks" #-}
741 742 743 744 745 746 747 748 749 750
                map (BlockLayout.sequenceTop
                        dflags
                        ncgImpl optimizedCFG)
                    shorted

        let branchOpt :: [NatCmmDecl statics instr]
            branchOpt =
                {-# SCC "invertCondBranches" #-}
                map invert sequenced
              where
751 752 753
                invertConds :: LabelMap CmmStatics -> [NatBasicBlock instr]
                            -> [NatBasicBlock instr]
                invertConds = invertCondBranches ncgImpl optimizedCFG
754 755 756
                invert top@CmmData {} = top
                invert (CmmProc info lbl live (ListGraph blocks)) =
                    CmmProc info lbl live (ListGraph $ invertConds info blocks)
757

758
        ---- expansion of SPARC synthetic instrs
759 760
        let expanded =
                {-# SCC "sparc_expand" #-}
761 762
                ncgExpandTop ncgImpl branchOpt
                --ncgExpandTop ncgImpl sequenced
763

764 765
        dumpIfSet_dyn dflags
                Opt_D_dump_asm_expanded "Synthetic instructions expanded"
Sylvain Henry's avatar
Sylvain Henry committed
766
                FormatCMM
767
                (vcat $ map (pprNatCmmDecl ncgImpl) expanded)
768

769 770 771 772 773 774 775 776 777
        -- generate unwinding information from cmm
        let unwinds :: BlockMap [UnwindPoint]
            unwinds =
                {-# SCC "unwindingInfo" #-}
                foldl' addUnwind mapEmpty expanded
              where
                addUnwind acc proc =
                    acc `mapUnion` computeUnwinding dflags ncgImpl proc

778
        return  ( usAlloc
779
                , fileIds'
780 781 782
                , expanded
                , lastMinuteImports ++ imports
                , ppr_raStatsColor
783 784
                , ppr_raStatsLinear
                , unwinds )
785

786 787 788 789 790 791 792
maybeDumpCfg :: DynFlags -> Maybe CFG -> String -> SDoc -> IO ()
maybeDumpCfg _dflags Nothing _ _ = return ()
maybeDumpCfg dflags (Just cfg) msg proc_name
        | null cfg = return ()
        | otherwise
        = dumpIfSet_dyn
                dflags Opt_D_dump_cfg_weights msg
Sylvain Henry's avatar
Sylvain Henry committed
793
                FormatText
794 795
                (proc_name <> char ':' $$ pprEdgeWeights cfg)

796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813
-- | Make sure all blocks we want the layout algorithm to place have been placed.
checkLayout :: [NatCmmDecl statics instr] -> [NatCmmDecl statics instr]
            -> [NatCmmDecl statics instr]
checkLayout procsUnsequenced procsSequenced =
        ASSERT2(setNull diff,
                ppr "Block sequencing dropped blocks:" <> ppr diff)
        procsSequenced
  where
        blocks1 = foldl' (setUnion) setEmpty $
                        map getBlockIds procsUnsequenced :: LabelSet
        blocks2 = foldl' (setUnion) setEmpty $
                        map getBlockIds procsSequenced
        diff = setDifference blocks1 blocks2

        getBlockIds (CmmData _ _) = setEmpty
        getBlockIds (CmmProc _ _ _ (ListGraph blocks)) =
                setFromList $ map blockId blocks

814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830
-- | Compute unwinding tables for the blocks of a procedure
computeUnwinding :: Instruction instr
                 => DynFlags -> NcgImpl statics instr jumpDest
                 -> NatCmmDecl statics instr
                    -- ^ the native code generated for the procedure
                 -> LabelMap [UnwindPoint]
                    -- ^ unwinding tables for all points of all blocks of the
                    -- procedure
computeUnwinding dflags _ _
  | debugLevel dflags == 0         = mapEmpty
computeUnwinding _ _ (CmmData _ _) = mapEmpty
computeUnwinding _ ncgImpl (CmmProc _ _ _ (ListGraph blks)) =
    -- In general we would need to push unwinding information down the
    -- block-level call-graph to ensure that we fully account for all
    -- relevant register writes within a procedure.
    --
    -- However, the only unwinding information that we care about in GHC is for
831
    -- Sp. The fact that GHC.Cmm.LayoutStack already ensures that we have unwind
832 833 834 835
    -- information at the beginning of every block means that there is no need
    -- to perform this sort of push-down.
    mapFromList [ (blk_lbl, extractUnwindPoints ncgImpl instrs)
                | BasicBlock blk_lbl instrs <- blks ]
836

837
-- | Build a doc for all the imports.
838
--
839
makeImportsDoc :: DynFlags -> [CLabel] -> SDoc
840
makeImportsDoc dflags imports
841
 = dyld_stubs imports
842
            $$
843 844 845
            -- On recent versions of Darwin, the linker supports
            -- dead-stripping of code and data on a per-symbol basis.
            -- There's a hack to make this work in PprMach.pprNatCmmDecl.
ian@well-typed.com's avatar
ian@well-typed.com committed
846
            (if platformHasSubsectionsViaSymbols platform
847
             then text ".subsections_via_symbols"
848
             else Outputable.empty)
849
            $$
850 851 852 853 854 855
                -- On recent GNU ELF systems one can mark an object file
                -- as not requiring an executable stack. If all objects
                -- linked into a program have this note then the program
                -- will not use an executable stack, which is good for
                -- security. GHC generated code does not need an executable
                -- stack so add the note in:
ian@well-typed.com's avatar
ian@well-typed.com committed
856
            (if platformHasGnuNonexecStack platform
857
             then text ".section .note.GNU-stack,\"\"," <> sectionType "progbits"
858
             else Outputable.empty)
859
            $$
Gabor Greif's avatar
Gabor Greif committed
860
                -- And just because every other compiler does, let's stick in
861
                -- an identifier directive: .ident "GHC x.y.z"
ian@well-typed.com's avatar
ian@well-typed.com committed
862
            (if platformHasIdentDirective platform
863 864
             then let compilerIdent = text "GHC" <+> text cProjectVersion
                   in text ".ident" <+> doubleQuotes compilerIdent
865
             else Outputable.empty)
866

867
 where
ian@well-typed.com's avatar
ian@well-typed.com committed
868 869 870 871
        platform = targetPlatform dflags
        arch = platformArch platform
        os   = platformOS   platform

872 873
        -- Generate "symbol stubs" for all external symbols that might
        -- come from a dynamic library.
874 875
        dyld_stubs :: [CLabel] -> SDoc
{-      dyld_stubs imps = vcat $ map pprDyldSymbolStub $
876 877 878 879
                                    map head $ group $ sort imps-}
        -- (Hack) sometimes two Labels pretty-print the same, but have
        -- different uniques; so we compare their text versions...
        dyld_stubs imps
Ian Lynagh's avatar
Ian Lynagh committed
880
                | needImportedSymbols dflags arch os
881
                = vcat $
Ian Lynagh's avatar
Ian Lynagh committed
882 883
                        (pprGotDeclaration dflags arch os :) $
                        map ( pprImportedSymbol dflags platform . fst . head) $
884 885 886 887 888
                        groupBy (\(_,a) (_,b) -> a == b) $
                        sortBy (\(_,a) (_,b) -> compare a b) $
                        map doPpr $
                        imps
                | otherwise
889
                = Outputable.empty
890

891
        doPpr lbl = (lbl, renderWithStyle dflags (pprCLabel dflags lbl) astyle)
892
        astyle = mkCodeStyle AsmStyle
893

894 895 896 897 898 899
-- -----------------------------------------------------------------------------
-- Generate jump tables

-- Analyzes all native code and generates data sections for all jump
-- table instructions.
generateJumpTables
900 901 902
        :: NcgImpl statics instr jumpDest
        -> [NatCmmDecl statics instr] -> [NatCmmDecl statics instr]
generateJumpTables ncgImpl xs = concatMap f xs
903
    where f p@(CmmProc _ _ _ (ListGraph xs)) = p : concatMap g xs
904
          f p = [p]
905
          g (BasicBlock _ xs) = catMaybes (map (generateJumpTableForInstr ncgImpl) xs)
906

907 908 909
-- -----------------------------------------------------------------------------
-- Shortcut branches

910
shortcutBranches
911
        :: forall statics instr jumpDest. (Outputable jumpDest) => DynFlags
912
        -> NcgImpl statics instr jumpDest
913
        -> [NatCmmDecl statics instr]
914 915
        -> Maybe CFG
        -> ([NatCmmDecl statics instr],Maybe CFG)
916

917
shortcutBranches dflags ncgImpl tops weights
918
  | gopt Opt_AsmShortcutting dflags
919
  = ( map (apply_mapping ncgImpl mapping) tops'
920
    , shortcutWeightMap mappingBid <$!> weights )
921
  | otherwise
922
  = (tops, weights)
923
  where
924
    (tops', mappings) = mapAndUnzip (build_mapping ncgImpl) tops
925 926
    mapping = mapUnions mappings :: LabelMap jumpDest
    mappingBid = fmap (getJumpDestBlockId ncgImpl) mapping
927

928 929
build_mapping :: forall instr t d statics jumpDest.
                 NcgImpl statics instr jumpDest
930
              -> GenCmmDecl d (LabelMap t) (ListGraph instr)
931 932 933
              -> (GenCmmDecl d (LabelMap t) (ListGraph instr)
                 ,LabelMap jumpDest)
build_mapping _ top@(CmmData _ _) = (top, mapEmpty)
934
build_mapping _ (CmmProc info lbl live (ListGraph []))
935
  = (CmmProc info lbl live (ListGraph []), mapEmpty)
936 937
build_mapping ncgImpl (CmmProc info lbl live (ListGraph (head:blocks)))
  = (CmmProc info lbl live (ListGraph (head:others)), mapping)
938 939 940 941 942
        -- drop the shorted blocks, but don't ever drop the first one,
        -- because it is pointed to by a global label.
  where
    -- find all the blocks that just consist of a jump that can be
    -- shorted.
943
    -- Don't completely eliminate loops here -- that can leave a dangling jump!
944
    shortcut_blocks :: [(BlockId, jumpDest)]
945
    (_, shortcut_blocks, others) =
946
        foldl' split (setEmpty :: LabelSet, [], []) blocks
947
    split (s, shortcut_blocks, others) b@(BasicBlock id [insn])
948 949 950 951
        | Just jd <- canShortcut ncgImpl insn
        , Just dest <- getJumpDestBlockId ncgImpl jd
        , not (has_info id)
        , (setMember dest s) || dest == id -- loop checks
952 953
        = (s, shortcut_blocks, b : others)
    split (s, shortcut_blocks, others) (BasicBlock id [insn])
954 955
        | Just dest <- canShortcut ncgImpl insn
        , not (has_info id)
956
        = (setInsert id s, (id,dest) : shortcut_blocks, others)
957 958
    split (s, shortcut_blocks, others) other = (s, shortcut_blocks, other : others)

959 960
    -- do not eliminate blocks that have an info table