AsmCodeGen.hs 45.5 KB
Newer Older
1 2 3
-- -----------------------------------------------------------------------------
--
-- (c) The University of Glasgow 1993-2004
4
--
5 6 7
-- This is the top-level module in the native code generator.
--
-- -----------------------------------------------------------------------------
8

9 10
{-# LANGUAGE BangPatterns, CPP, GADTs, ScopedTypeVariables, UnboxedTuples #-}

11
module AsmCodeGen ( nativeCodeGen ) where
12

13
#include "HsVersions.h"
14
#include "nativeGen/NCG.h"
15

16

17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
import qualified X86.CodeGen
import qualified X86.Regs
import qualified X86.Instr
import qualified X86.Ppr

import qualified SPARC.CodeGen
import qualified SPARC.Regs
import qualified SPARC.Instr
import qualified SPARC.Ppr
import qualified SPARC.ShortcutJump
import qualified SPARC.CodeGen.Expand

import qualified PPC.CodeGen
import qualified PPC.Regs
import qualified PPC.RegInfo
import qualified PPC.Instr
import qualified PPC.Ppr
34 35

import RegAlloc.Liveness
36
import qualified RegAlloc.Linear.Main           as Linear
37

38 39 40 41
import qualified GraphColor                     as Color
import qualified RegAlloc.Graph.Main            as Color
import qualified RegAlloc.Graph.Stats           as Color
import qualified RegAlloc.Graph.TrivColorable   as Color
42

43
import TargetReg
44
import Platform
45
import Config
46 47 48 49
import Instruction
import PIC
import Reg
import NCGMonad
Peter Wortmann's avatar
Peter Wortmann committed
50
import Dwarf
51
import Debug
52

53
import BlockId
54
import CgUtils          ( fixStgRegisters )
55 56 57
import Cmm
import CmmUtils
import Hoopl
Simon Marlow's avatar
Simon Marlow committed
58
import CmmOpt           ( cmmMachOpFold )
59
import PprCmm
60
import CLabel
61

62
import UniqFM
63
import UniqSupply
64 65
import DynFlags
import Util
66
import Unique
67

68
import BasicTypes       ( Alignment )
69
import Digraph
70
import qualified Pretty
71
import BufWrite
72
import Outputable
73
import FastString
74
import UniqSet
75
import ErrUtils
76
import Module
77 78
import Stream (Stream)
import qualified Stream
79

80 81
-- DEBUGGING ONLY
--import OrdList
82

83
import Data.List
84
import Data.Maybe
85
import Data.Ord         ( comparing )
86
import Control.Exception
87
#if __GLASGOW_HASKELL__ < 709
Austin Seipp's avatar
Austin Seipp committed
88
import Control.Applicative (Applicative(..))
89
#endif
90
import Control.Monad
91
import System.IO
92

93 94 95
{-
The native-code generator has machine-independent and
machine-dependent modules.
96

97 98 99 100
This module ("AsmCodeGen") is the top-level machine-independent
module.  Before entering machine-dependent land, we do some
machine-independent optimisations (defined below) on the
'CmmStmts's.
101

102 103 104 105 106 107 108 109 110
We convert to the machine-specific 'Instr' datatype with
'cmmCodeGen', assuming an infinite supply of registers.  We then use
a machine-independent register allocator ('regAlloc') to rejoin
reality.  Obviously, 'regAlloc' has machine-specific helper
functions (see about "RegAllocInfo" below).

Finally, we order the basic blocks of the function so as to minimise
the number of jumps between blocks, by utilising fallthrough wherever
possible.
111 112

The machine-dependent bits break down as follows:
113 114

  * ["MachRegs"]  Everything about the target platform's machine
115 116 117
    registers (and immediate operands, and addresses, which tend to
    intermingle/interact with registers).

118
  * ["MachInstrs"]  Includes the 'Instr' datatype (possibly should
119
    have a module of its own), plus a miscellany of other things
120
    (e.g., 'targetDoubleSize', 'smStablePtrTable', ...)
121

122
  * ["MachCodeGen"]  is where 'Cmm' stuff turns into
123
    machine instructions.
124

125
  * ["PprMach"] 'pprInstr' turns an 'Instr' into text (well, really
126
    a 'SDoc').
127

128 129
  * ["RegAllocInfo"] In the register allocator, we manipulate
    'MRegsState's, which are 'BitSet's, one bit per machine register.
130 131
    When we want to say something about a specific machine register
    (e.g., ``it gets clobbered by this instruction''), we set/unset
132
    its bit.  Obviously, we do this 'BitSet' thing for efficiency
133 134
    reasons.

135
    The 'RegAllocInfo' module collects together the machine-specific
136 137
    info needed to do register allocation.

138 139
   * ["RegisterAlloc"] The (machine-independent) register allocator.
-}
140

141 142
-- -----------------------------------------------------------------------------
-- Top-level of the native codegen
143

144
data NcgImpl statics instr jumpDest = NcgImpl {
Simon Peyton Jones's avatar
Simon Peyton Jones committed
145
    cmmTopCodeGen             :: RawCmmDecl -> NatM [NatCmmDecl statics instr],
146
    generateJumpTableForInstr :: instr -> Maybe (NatCmmDecl statics instr),
147 148
    getJumpDestBlockId        :: jumpDest -> Maybe BlockId,
    canShortcut               :: instr -> Maybe jumpDest,
149
    shortcutStatics           :: (BlockId -> Maybe jumpDest) -> statics -> statics,
150
    shortcutJump              :: (BlockId -> Maybe jumpDest) -> instr -> instr,
151
    pprNatCmmDecl             :: NatCmmDecl statics instr -> SDoc,
152 153
    maxSpillSlots             :: Int,
    allocatableRegs           :: [RealReg],
Simon Peyton Jones's avatar
Simon Peyton Jones committed
154 155
    ncg_x86fp_kludge          :: [NatCmmDecl statics instr] -> [NatCmmDecl statics instr],
    ncgExpandTop              :: [NatCmmDecl statics instr] -> [NatCmmDecl statics instr],
156
    ncgAllocMoreStack         :: Int -> NatCmmDecl statics instr -> UniqSM (NatCmmDecl statics instr),
157
    ncgMakeFarBranches        :: BlockEnv CmmStatics -> [NatBasicBlock instr] -> [NatBasicBlock instr]
158 159
    }

160
--------------------
161
nativeCodeGen :: DynFlags -> Module -> ModLocation -> Handle -> UniqSupply
162
              -> Stream IO RawCmmGroup ()
163
              -> IO UniqSupply
164
nativeCodeGen dflags this_mod modLoc h us cmms
165
 = let platform = targetPlatform dflags
166 167
       nCG' :: (Outputable statics, Outputable instr, Instruction instr)
            => NcgImpl statics instr jumpDest -> IO UniqSupply
168
       nCG' ncgImpl = nativeCodeGen' dflags this_mod modLoc ncgImpl h us cmms
169
   in case platformArch platform of
170 171 172 173 174 175 176 177 178 179 180 181
      ArchX86       -> nCG' (x86NcgImpl    dflags)
      ArchX86_64    -> nCG' (x86_64NcgImpl dflags)
      ArchPPC       -> nCG' (ppcNcgImpl    dflags)
      ArchSPARC     -> nCG' (sparcNcgImpl  dflags)
      ArchARM {}    -> panic "nativeCodeGen: No NCG for ARM"
      ArchARM64     -> panic "nativeCodeGen: No NCG for ARM64"
      ArchPPC_64 _  -> nCG' (ppcNcgImpl    dflags)
      ArchAlpha     -> panic "nativeCodeGen: No NCG for Alpha"
      ArchMipseb    -> panic "nativeCodeGen: No NCG for mipseb"
      ArchMipsel    -> panic "nativeCodeGen: No NCG for mipsel"
      ArchUnknown   -> panic "nativeCodeGen: No NCG for unknown arch"
      ArchJavaScript-> panic "nativeCodeGen: No NCG for JavaScript"
182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201

x86NcgImpl :: DynFlags -> NcgImpl (Alignment, CmmStatics) X86.Instr.Instr X86.Instr.JumpDest
x86NcgImpl dflags
 = (x86_64NcgImpl dflags) { ncg_x86fp_kludge = map x86fp_kludge }

x86_64NcgImpl :: DynFlags -> NcgImpl (Alignment, CmmStatics) X86.Instr.Instr X86.Instr.JumpDest
x86_64NcgImpl dflags
 = NcgImpl {
        cmmTopCodeGen             = X86.CodeGen.cmmTopCodeGen
       ,generateJumpTableForInstr = X86.CodeGen.generateJumpTableForInstr dflags
       ,getJumpDestBlockId        = X86.Instr.getJumpDestBlockId
       ,canShortcut               = X86.Instr.canShortcut
       ,shortcutStatics           = X86.Instr.shortcutStatics
       ,shortcutJump              = X86.Instr.shortcutJump
       ,pprNatCmmDecl             = X86.Ppr.pprNatCmmDecl
       ,maxSpillSlots             = X86.Instr.maxSpillSlots dflags
       ,allocatableRegs           = X86.Regs.allocatableRegs platform
       ,ncg_x86fp_kludge          = id
       ,ncgAllocMoreStack         = X86.Instr.allocMoreStack platform
       ,ncgExpandTop              = id
202
       ,ncgMakeFarBranches        = const id
203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220
   }
    where platform = targetPlatform dflags

ppcNcgImpl :: DynFlags -> NcgImpl CmmStatics PPC.Instr.Instr PPC.RegInfo.JumpDest
ppcNcgImpl dflags
 = NcgImpl {
        cmmTopCodeGen             = PPC.CodeGen.cmmTopCodeGen
       ,generateJumpTableForInstr = PPC.CodeGen.generateJumpTableForInstr dflags
       ,getJumpDestBlockId        = PPC.RegInfo.getJumpDestBlockId
       ,canShortcut               = PPC.RegInfo.canShortcut
       ,shortcutStatics           = PPC.RegInfo.shortcutStatics
       ,shortcutJump              = PPC.RegInfo.shortcutJump
       ,pprNatCmmDecl             = PPC.Ppr.pprNatCmmDecl
       ,maxSpillSlots             = PPC.Instr.maxSpillSlots dflags
       ,allocatableRegs           = PPC.Regs.allocatableRegs platform
       ,ncg_x86fp_kludge          = id
       ,ncgAllocMoreStack         = PPC.Instr.allocMoreStack platform
       ,ncgExpandTop              = id
221
       ,ncgMakeFarBranches        = PPC.Instr.makeFarBranches
222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239
   }
    where platform = targetPlatform dflags

sparcNcgImpl :: DynFlags -> NcgImpl CmmStatics SPARC.Instr.Instr SPARC.ShortcutJump.JumpDest
sparcNcgImpl dflags
 = NcgImpl {
        cmmTopCodeGen             = SPARC.CodeGen.cmmTopCodeGen
       ,generateJumpTableForInstr = SPARC.CodeGen.generateJumpTableForInstr dflags
       ,getJumpDestBlockId        = SPARC.ShortcutJump.getJumpDestBlockId
       ,canShortcut               = SPARC.ShortcutJump.canShortcut
       ,shortcutStatics           = SPARC.ShortcutJump.shortcutStatics
       ,shortcutJump              = SPARC.ShortcutJump.shortcutJump
       ,pprNatCmmDecl             = SPARC.Ppr.pprNatCmmDecl
       ,maxSpillSlots             = SPARC.Instr.maxSpillSlots dflags
       ,allocatableRegs           = SPARC.Regs.allocatableRegs
       ,ncg_x86fp_kludge          = id
       ,ncgAllocMoreStack         = noAllocMoreStack
       ,ncgExpandTop              = map SPARC.CodeGen.Expand.expandTop
240
       ,ncgMakeFarBranches        = const id
241
   }
242 243 244 245 246 247 248

--
-- Allocating more stack space for spilling is currently only
-- supported for the linear register allocator on x86/x86_64, the rest
-- default to the panic below.  To support allocating extra stack on
-- more platforms provide a definition of ncgAllocMoreStack.
--
249
noAllocMoreStack :: Int -> NatCmmDecl statics instr -> UniqSM (NatCmmDecl statics instr)
250 251 252 253 254 255 256 257 258
noAllocMoreStack amount _
  = panic $   "Register allocator: out of stack slots (need " ++ show amount ++ ")\n"
        ++  "   If you are trying to compile SHA1.hs from the crypto library then this\n"
        ++  "   is a known limitation in the linear allocator.\n"
        ++  "\n"
        ++  "   Try enabling the graph colouring allocator with -fregs-graph instead."
        ++  "   You can still file a bug report if you like.\n"


259 260 261 262 263 264 265 266 267 268 269 270 271
-- | Data accumulated during code generation. Mostly about statistics,
-- but also collects debug data for DWARF generation.
data NativeGenAcc statics instr
  = NGS { ngs_imports     :: ![[CLabel]]
        , ngs_natives     :: ![[NatCmmDecl statics instr]]
             -- ^ Native code generated, for statistics. This might
             -- hold a lot of data, so it is important to clear this
             -- field as early as possible if it isn't actually
             -- required.
        , ngs_colorStats  :: ![[Color.RegAllocStats statics instr]]
        , ngs_linearStats :: ![[Linear.RegAllocStats]]
        , ngs_labels      :: ![Label]
        , ngs_debug       :: ![DebugBlock]
272
        , ngs_dwarfFiles  :: !DwarfFiles
273
        }
274

Ian Lynagh's avatar
Ian Lynagh committed
275
nativeCodeGen' :: (Outputable statics, Outputable instr, Instruction instr)
276
               => DynFlags
277
               -> Module -> ModLocation
278
               -> NcgImpl statics instr jumpDest
279
               -> Handle
280 281 282
               -> UniqSupply
               -> Stream IO RawCmmGroup ()
               -> IO UniqSupply
283
nativeCodeGen' dflags this_mod modLoc ncgImpl h us cmms
284
 = do
285 286 287
        -- BufHandle is a performance hack.  We could hide it inside
        -- Pretty if it weren't for the fact that we do lots of little
        -- printDocs here (in order to do codegen in constant space).
288
        bufh <- newBufHandle h
289
        let ngs0 = NGS [] [] [] [] [] [] emptyUFM
290
        (ngs, us') <- cmmNativeGenStream dflags this_mod modLoc ncgImpl bufh us
291
                                         cmms ngs0
Peter Wortmann's avatar
Peter Wortmann committed
292
        finishNativeGen dflags modLoc bufh us' ngs
293 294

finishNativeGen :: Instruction instr
295
                => DynFlags
Peter Wortmann's avatar
Peter Wortmann committed
296
                -> ModLocation
297
                -> BufHandle
Peter Wortmann's avatar
Peter Wortmann committed
298
                -> UniqSupply
299
                -> NativeGenAcc statics instr
Peter Wortmann's avatar
Peter Wortmann committed
300 301
                -> IO UniqSupply
finishNativeGen dflags modLoc bufh@(BufHandle _ _ h) us ngs
302
 = do
Peter Wortmann's avatar
Peter Wortmann committed
303 304 305 306 307 308
        -- Write debug data and finish
        let emitDw = gopt Opt_Debug dflags && not (gopt Opt_SplitObjs dflags)
        us' <- if not emitDw then return us else do
          (dwarf, us') <- dwarfGen dflags modLoc us (ngs_debug ngs)
          emitNativeCode dflags bufh dwarf
          return us'
309
        bFlush bufh
310

311
        -- dump global NCG stats for graph coloring allocator
312 313 314
        let stats = concat (ngs_colorStats ngs)
        when (not (null stats)) $ do

Peter Wortmann's avatar
Peter Wortmann committed
315 316 317 318 319
          -- build the global register conflict graph
          let graphGlobal
                  = foldl Color.union Color.initGraph
                  $ [ Color.raGraph stat
                          | stat@Color.RegAllocStatsStart{} <- stats]
320

Peter Wortmann's avatar
Peter Wortmann committed
321
          dump_stats (Color.pprStats stats graphGlobal)
322

Peter Wortmann's avatar
Peter Wortmann committed
323 324 325 326 327 328 329 330 331
          let platform = targetPlatform dflags
          dumpIfSet_dyn dflags
                  Opt_D_dump_asm_conflicts "Register conflict graph"
                  $ Color.dotGraph
                          (targetRegDotColor platform)
                          (Color.trivColorable platform
                                  (targetVirtualRegSqueeze platform)
                                  (targetRealRegSqueeze platform))
                  $ graphGlobal
332 333 334


        -- dump global NCG stats for linear allocator
335 336 337
        let linearStats = concat (ngs_linearStats ngs)
        when (not (null linearStats)) $
          dump_stats (Linear.pprStats (concat (ngs_natives ngs)) linearStats)
338 339

        -- write out the imports
Ian Lynagh's avatar
Ian Lynagh committed
340
        Pretty.printDoc Pretty.LeftMode (pprCols dflags) h
Ian Lynagh's avatar
Ian Lynagh committed
341
                $ withPprStyleDoc dflags (mkCodeStyle AsmStyle)
342
                $ makeImportsDoc dflags (concat (ngs_imports ngs))
Peter Wortmann's avatar
Peter Wortmann committed
343
        return us'
344 345
  where
    dump_stats = dumpSDoc dflags alwaysQualify Opt_D_dump_asm_stats "NCG stats"
346

Simon Marlow's avatar
Simon Marlow committed
347
cmmNativeGenStream :: (Outputable statics, Outputable instr, Instruction instr)
348
              => DynFlags
349
              -> Module -> ModLocation
350 351
              -> NcgImpl statics instr jumpDest
              -> BufHandle
352 353
              -> UniqSupply
              -> Stream IO RawCmmGroup ()
354 355
              -> NativeGenAcc statics instr
              -> IO (NativeGenAcc statics instr, UniqSupply)
356

357
cmmNativeGenStream dflags this_mod modLoc ncgImpl h us cmm_stream ngs
358 359
 = do r <- Stream.runStream cmm_stream
      case r of
360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388
        Left () ->
          return (ngs { ngs_imports = reverse $ ngs_imports ngs
                      , ngs_natives = reverse $ ngs_natives ngs
                      , ngs_colorStats = reverse $ ngs_colorStats ngs
                      , ngs_linearStats = reverse $ ngs_linearStats ngs
                      },
                  us)
        Right (cmms, cmm_stream') -> do

          -- Generate debug information
          let debugFlag = gopt Opt_Debug dflags
              !ndbgs | debugFlag = cmmDebugGen modLoc cmms
                     | otherwise = []
              dbgMap = debugToMap ndbgs

          -- Insert split marker, generate native code
          let splitFlag = gopt Opt_SplitObjs dflags
              split_marker = CmmProc mapEmpty mkSplitMarkerLabel [] $
                             ofBlockList (panic "split_marker_entry") []
              cmms' | splitFlag  = split_marker : cmms
                    | otherwise  = cmms
          (ngs',us') <- cmmNativeGens dflags this_mod modLoc ncgImpl h dbgMap us
                                      cmms' ngs 0

          -- Link native code information into debug blocks
          let !ldbgs = cmmDebugLink (ngs_labels ngs') ndbgs
          dumpIfSet_dyn dflags Opt_D_dump_debug "Debug Infos"
            (vcat $ map ppr ldbgs)

Peter Wortmann's avatar
Peter Wortmann committed
389 390 391 392 393 394 395 396 397 398 399 400 401 402 403
          -- Emit & clear DWARF information when generating split
          -- object files, as we need it to land in the same object file
          (ngs'', us'') <-
            if debugFlag && splitFlag
            then do (dwarf, us'') <- dwarfGen dflags modLoc us ldbgs
                    emitNativeCode dflags h dwarf
                    return (ngs' { ngs_debug = []
                                 , ngs_dwarfFiles = emptyUFM
                                 , ngs_labels = [] },
                            us'')
            else return (ngs' { ngs_debug  = ngs_debug ngs' ++ ldbgs
                              , ngs_labels = [] },
                         us')

          cmmNativeGenStream dflags this_mod modLoc ncgImpl h us''
404
              cmm_stream' ngs''
405

406 407
-- | Do native code generation on all these cmms.
--
Ian Lynagh's avatar
Ian Lynagh committed
408
cmmNativeGens :: (Outputable statics, Outputable instr, Instruction instr)
409
              => DynFlags
410
              -> Module -> ModLocation
411 412
              -> NcgImpl statics instr jumpDest
              -> BufHandle
413
              -> LabelMap DebugBlock
dterei's avatar
dterei committed
414
              -> UniqSupply
Simon Peyton Jones's avatar
Simon Peyton Jones committed
415
              -> [RawCmmDecl]
416 417 418
              -> NativeGenAcc statics instr
              -> Int
              -> IO (NativeGenAcc statics instr, UniqSupply)
419

420
cmmNativeGens _ _ _ _ _ _ us [] ngs !_
421
        = return (ngs, us)
422

423 424
cmmNativeGens dflags this_mod modLoc ncgImpl h dbgMap us
              (cmm : cmms) ngs count
425
 = do
426 427 428 429 430 431
        let fileIds = ngs_dwarfFiles ngs
        (us', fileIds', native, imports, colorStats, linearStats)
          <- {-# SCC "cmmNativeGen" #-}
             cmmNativeGen dflags this_mod modLoc ncgImpl us fileIds dbgMap
                          cmm count

432 433 434 435
        -- Generate .file directives for every new file that has been
        -- used. Note that it is important that we generate these in
        -- ascending order, as Clang's 3.6 assembler complains.
        let newFileIds = sortBy (comparing snd) $ eltsUFM $ fileIds' `minusUFM` fileIds
436 437
            pprDecl (f,n) = ptext (sLit "\t.file ") <> ppr n <+>
                            doubleQuotes (ftext f)
438

439
        emitNativeCode dflags h $ vcat $
440
          map pprDecl newFileIds ++
441
          map (pprNatCmmDecl ncgImpl) native
442

Gabor Greif's avatar
typo  
Gabor Greif committed
443
        -- force evaluation all this stuff to avoid space leaks
444
        {-# SCC "seqString" #-} evaluate $ seqString (showSDoc dflags $ vcat $ map ppr imports)
445

446 447 448 449 450 451 452 453 454 455
        let !labels' = if gopt Opt_Debug dflags
                       then cmmDebugLabels isMetaInstr native else []
            !natives' = if dopt Opt_D_dump_asm_stats dflags
                        then native : ngs_natives ngs else []
            mCon = maybe id (:)
            ngs' = ngs{ ngs_imports     = imports : ngs_imports ngs
                      , ngs_natives     = natives'
                      , ngs_colorStats  = colorStats `mCon` ngs_colorStats ngs
                      , ngs_linearStats = linearStats `mCon` ngs_linearStats ngs
                      , ngs_labels      = ngs_labels ngs ++ labels'
456
                      , ngs_dwarfFiles  = fileIds'
457
                      }
458 459
        cmmNativeGens dflags this_mod modLoc ncgImpl h dbgMap us'
                      cmms ngs' (count + 1)
460

461
 where  seqString []            = ()
462
        seqString (x:xs)        = x `seq` seqString xs
463 464


465 466 467 468 469 470 471 472 473 474 475
emitNativeCode :: DynFlags -> BufHandle -> SDoc -> IO ()
emitNativeCode dflags h sdoc = do

        {-# SCC "pprNativeCode" #-} Pretty.bufLeftRender h
                $ withPprStyleDoc dflags (mkCodeStyle AsmStyle) sdoc

        -- dump native code
        dumpIfSet_dyn dflags
                Opt_D_dump_asm "Asm code"
                sdoc

476
-- | Complete native code generation phase for a single top-level chunk of Cmm.
477 478
--      Dumping the output of each stage along the way.
--      Global conflict graph and NGC stats
479
cmmNativeGen
Ian Lynagh's avatar
Ian Lynagh committed
480
        :: (Outputable statics, Outputable instr, Instruction instr)
481
    => DynFlags
482
    -> Module -> ModLocation
483
    -> NcgImpl statics instr jumpDest
484
        -> UniqSupply
485 486
        -> DwarfFiles
        -> LabelMap DebugBlock
487 488 489
        -> RawCmmDecl                                   -- ^ the cmm to generate code for
        -> Int                                          -- ^ sequence number of this top thing
        -> IO   ( UniqSupply
490
                , DwarfFiles
491 492 493 494
                , [NatCmmDecl statics instr]                -- native code
                , [CLabel]                                  -- things imported by this cmm
                , Maybe [Color.RegAllocStats statics instr] -- stats for the coloring register allocator
                , Maybe [Linear.RegAllocStats])             -- stats for the linear register allocators
495

496
cmmNativeGen dflags this_mod modLoc ncgImpl us fileIds dbgMap cmm count
497
 = do
498
        let platform = targetPlatform dflags
499

500 501 502
        -- rewrite assignments to global regs
        let fixed_cmm =
                {-# SCC "fixStgRegisters" #-}
503
                fixStgRegisters dflags cmm
504

505 506 507
        -- cmm to cmm optimisations
        let (opt_cmm, imports) =
                {-# SCC "cmmToCmm" #-}
508
                cmmToCmm dflags this_mod fixed_cmm
509

510 511
        dumpIfSet_dyn dflags
                Opt_D_dump_opt_cmm "Optimised Cmm"
512
                (pprCmmGroup [opt_cmm])
513

514
        -- generate native code from cmm
515
        let ((native, lastMinuteImports, fileIds'), usGen) =
516
                {-# SCC "genMachCode" #-}
517 518 519
                initUs us $ genMachCode dflags this_mod modLoc
                                        (cmmTopCodeGen ncgImpl)
                                        fileIds dbgMap opt_cmm
520 521 522

        dumpIfSet_dyn dflags
                Opt_D_dump_asm_native "Native code"
523
                (vcat $ map (pprNatCmmDecl ncgImpl) native)
524 525 526 527 528

        -- tag instructions with register liveness information
        let (withLiveness, usLive) =
                {-# SCC "regLiveness" #-}
                initUs usGen
529
                        $ mapM (regLiveness platform)
530 531 532 533
                        $ map natCmmTopToLive native

        dumpIfSet_dyn dflags
                Opt_D_dump_asm_liveness "Liveness annotations added"
Ian Lynagh's avatar
Ian Lynagh committed
534
                (vcat $ map ppr withLiveness)
535 536 537

        -- allocate registers
        (alloced, usAlloc, ppr_raStatsColor, ppr_raStatsLinear) <-
538 539 540 541
         if False
           -- Disabled, see #7679, #8657
           --  ( gopt Opt_RegsGraph dflags
           --  || gopt Opt_RegsIterative dflags)
542 543 544 545 546 547
          then do
                -- the regs usable for allocation
                let (alloc_regs :: UniqFM (UniqSet RealReg))
                        = foldr (\r -> plusUFM_C unionUniqSets
                                        $ unitUFM (targetClassOfRealReg platform r) (unitUniqSet r))
                                emptyUFM
548
                        $ allocatableRegs ncgImpl
549 550 551 552 553 554 555 556

                -- do the graph coloring register allocation
                let ((alloced, regAllocStats), usAlloc)
                        = {-# SCC "RegAlloc" #-}
                          initUs usLive
                          $ Color.regAlloc
                                dflags
                                alloc_regs
557
                                (mkUniqSet [0 .. maxSpillSlots ncgImpl])
558 559 560 561 562
                                withLiveness

                -- dump out what happened during register allocation
                dumpIfSet_dyn dflags
                        Opt_D_dump_asm_regalloc "Registers allocated"
563
                        (vcat $ map (pprNatCmmDecl ncgImpl) alloced)
564 565 566 567 568 569

                dumpIfSet_dyn dflags
                        Opt_D_dump_asm_regalloc_stages "Build/spill stages"
                        (vcat   $ map (\(stage, stats)
                                        -> text "# --------------------------"
                                        $$ text "#  cmm " <> int count <> text " Stage " <> int stage
Ian Lynagh's avatar
Ian Lynagh committed
570
                                        $$ ppr stats)
571 572 573
                                $ zip [0..] regAllocStats)

                let mPprStats =
574
                        if dopt Opt_D_dump_asm_stats dflags
575 576 577 578 579 580 581 582 583 584 585
                         then Just regAllocStats else Nothing

                -- force evaluation of the Maybe to avoid space leak
                mPprStats `seq` return ()

                return  ( alloced, usAlloc
                        , mPprStats
                        , Nothing)

          else do
                -- do linear register allocation
586 587 588 589 590
                let reg_alloc proc = do
                       (alloced, maybe_more_stack, ra_stats) <-
                               Linear.regAlloc dflags proc
                       case maybe_more_stack of
                         Nothing -> return ( alloced, ra_stats )
591 592 593
                         Just amount -> do
                           alloced' <- ncgAllocMoreStack ncgImpl amount alloced
                           return (alloced', ra_stats )
594

595 596 597 598
                let ((alloced, regAllocStats), usAlloc)
                        = {-# SCC "RegAlloc" #-}
                          initUs usLive
                          $ liftM unzip
599
                          $ mapM reg_alloc withLiveness
600 601 602

                dumpIfSet_dyn dflags
                        Opt_D_dump_asm_regalloc "Registers allocated"
603
                        (vcat $ map (pprNatCmmDecl ncgImpl) alloced)
604 605

                let mPprStats =
606
                        if dopt Opt_D_dump_asm_stats dflags
607 608 609 610 611 612 613 614
                         then Just (catMaybes regAllocStats) else Nothing

                -- force evaluation of the Maybe to avoid space leak
                mPprStats `seq` return ()

                return  ( alloced, usAlloc
                        , Nothing
                        , mPprStats)
615

616 617 618 619 620 621 622
        ---- x86fp_kludge.  This pass inserts ffree instructions to clear
        ---- the FPU stack on x86.  The x86 ABI requires that the FPU stack
        ---- is clear, and library functions can return odd results if it
        ---- isn't.
        ----
        ---- NB. must happen before shortcutBranches, because that
        ---- generates JXX_GBLs which we can't fix up in x86fp_kludge.
623
        let kludged = {-# SCC "x86fp_kludge" #-} ncg_x86fp_kludge ncgImpl alloced
624 625

        ---- generate jump tables
626 627
        let tabled      =
                {-# SCC "generateJumpTables" #-}
628
                generateJumpTables ncgImpl kludged
629

630 631 632 633
        ---- shortcut branches
        let shorted     =
                {-# SCC "shortcutBranches" #-}
                shortcutBranches dflags ncgImpl tabled
634

635 636 637 638
        ---- sequence blocks
        let sequenced   =
                {-# SCC "sequenceBlocks" #-}
                map (sequenceTop ncgImpl) shorted
639

640
        ---- expansion of SPARC synthetic instrs
641 642
        let expanded =
                {-# SCC "sparc_expand" #-}
643
                ncgExpandTop ncgImpl sequenced
644

645 646
        dumpIfSet_dyn dflags
                Opt_D_dump_asm_expanded "Synthetic instructions expanded"
647
                (vcat $ map (pprNatCmmDecl ncgImpl) expanded)
648

649
        return  ( usAlloc
650
                , fileIds'
651 652 653 654
                , expanded
                , lastMinuteImports ++ imports
                , ppr_raStatsColor
                , ppr_raStatsLinear)
655

656

Simon Peyton Jones's avatar
Simon Peyton Jones committed
657
x86fp_kludge :: NatCmmDecl (Alignment, CmmStatics) X86.Instr.Instr -> NatCmmDecl (Alignment, CmmStatics) X86.Instr.Instr
658
x86fp_kludge top@(CmmData _ _) = top
659 660
x86fp_kludge (CmmProc info lbl live (ListGraph code)) =
        CmmProc info lbl live (ListGraph $ X86.Instr.i386_insert_ffrees code)
661

662

663
-- | Build a doc for all the imports.
664
--
665
makeImportsDoc :: DynFlags -> [CLabel] -> SDoc
666
makeImportsDoc dflags imports
667
 = dyld_stubs imports
668
            $$
669 670 671
            -- On recent versions of Darwin, the linker supports
            -- dead-stripping of code and data on a per-symbol basis.
            -- There's a hack to make this work in PprMach.pprNatCmmDecl.
ian@well-typed.com's avatar
ian@well-typed.com committed
672
            (if platformHasSubsectionsViaSymbols platform
673
             then text ".subsections_via_symbols"
674
             else Outputable.empty)
675
            $$
676 677 678 679 680 681
                -- On recent GNU ELF systems one can mark an object file
                -- as not requiring an executable stack. If all objects
                -- linked into a program have this note then the program
                -- will not use an executable stack, which is good for
                -- security. GHC generated code does not need an executable
                -- stack so add the note in:
ian@well-typed.com's avatar
ian@well-typed.com committed
682
            (if platformHasGnuNonexecStack platform
683
             then text ".section .note.GNU-stack,\"\",@progbits"
684
             else Outputable.empty)
685
            $$
Gabor Greif's avatar
Gabor Greif committed
686
                -- And just because every other compiler does, let's stick in
687
                -- an identifier directive: .ident "GHC x.y.z"
ian@well-typed.com's avatar
ian@well-typed.com committed
688
            (if platformHasIdentDirective platform
689 690
             then let compilerIdent = text "GHC" <+> text cProjectVersion
                   in text ".ident" <+> doubleQuotes compilerIdent
691
             else Outputable.empty)
692

693
 where
ian@well-typed.com's avatar
ian@well-typed.com committed
694 695 696 697
        platform = targetPlatform dflags
        arch = platformArch platform
        os   = platformOS   platform

698 699
        -- Generate "symbol stubs" for all external symbols that might
        -- come from a dynamic library.
700 701
        dyld_stubs :: [CLabel] -> SDoc
{-      dyld_stubs imps = vcat $ map pprDyldSymbolStub $
702 703 704 705
                                    map head $ group $ sort imps-}
        -- (Hack) sometimes two Labels pretty-print the same, but have
        -- different uniques; so we compare their text versions...
        dyld_stubs imps
Ian Lynagh's avatar
Ian Lynagh committed
706
                | needImportedSymbols dflags arch os
707
                = vcat $
Ian Lynagh's avatar
Ian Lynagh committed
708 709
                        (pprGotDeclaration dflags arch os :) $
                        map ( pprImportedSymbol dflags platform . fst . head) $
710 711 712 713 714
                        groupBy (\(_,a) (_,b) -> a == b) $
                        sortBy (\(_,a) (_,b) -> compare a b) $
                        map doPpr $
                        imps
                | otherwise
715
                = Outputable.empty
716

Ian Lynagh's avatar
Ian Lynagh committed
717
        doPpr lbl = (lbl, renderWithStyle dflags (pprCLabel platform lbl) astyle)
718
        astyle = mkCodeStyle AsmStyle
719 720


721 722 723 724 725 726 727 728 729
-- -----------------------------------------------------------------------------
-- Sequencing the basic blocks

-- Cmm BasicBlocks are self-contained entities: they always end in a
-- jump, either non-local or to another basic block in the same proc.
-- In this phase, we attempt to place the basic blocks in a sequence
-- such that as many of the local jumps as possible turn into
-- fallthroughs.

730 731
sequenceTop
        :: Instruction instr
Simon Peyton Jones's avatar
Simon Peyton Jones committed
732
    => NcgImpl statics instr jumpDest -> NatCmmDecl statics instr -> NatCmmDecl statics instr
733

734
sequenceTop _       top@(CmmData _ _) = top
735
sequenceTop ncgImpl (CmmProc info lbl live (ListGraph blocks)) =
736
  CmmProc info lbl live (ListGraph $ ncgMakeFarBranches ncgImpl info $ sequenceBlocks info blocks)
737 738 739 740 741 742 743 744

-- The algorithm is very simple (and stupid): we make a graph out of
-- the blocks where there is an edge from one block to another iff the
-- first block ends by jumping to the second.  Then we topologically
-- sort this graph.  Then traverse the list: for each block, we first
-- output the block, then if it has an out edge, we move the
-- destination of the out edge to the front of the list, and continue.

745
-- FYI, the classic layout for basic blocks uses postorder DFS; this
746
-- algorithm is implemented in Hoopl.
747

748 749
sequenceBlocks
        :: Instruction instr
750 751
        => BlockEnv i
        -> [NatBasicBlock instr]
752
        -> [NatBasicBlock instr]
753

754 755 756
sequenceBlocks _ [] = []
sequenceBlocks infos (entry:blocks) =
  seqBlocks infos (mkNode entry : reverse (flattenSCCs (sccBlocks blocks)))
757 758
  -- the first block is the entry point ==> it must remain at the start.

759

760 761 762 763
sccBlocks
        :: Instruction instr
        => [NatBasicBlock instr]
        -> [SCC ( NatBasicBlock instr
764 765
                , BlockId
                , [BlockId])]
766

767
sccBlocks blocks = stronglyConnCompFromEdgedVerticesR (map mkNode blocks)
768

769 770
-- we're only interested in the last instruction of
-- the block, and only if it has a single destination.
771 772
getOutEdges
        :: Instruction instr
773
        => [instr] -> [BlockId]
774

775 776
getOutEdges instrs
        = case jumpDestsOfInstr (last instrs) of
777
                [one] -> [one]
778
                _many -> []
779

dterei's avatar
dterei committed
780 781
mkNode :: (Instruction t)
       => GenBasicBlock t
782 783 784 785 786
       -> (GenBasicBlock t, BlockId, [BlockId])
mkNode block@(BasicBlock id instrs) = (block, id, getOutEdges instrs)

seqBlocks :: BlockEnv i -> [(GenBasicBlock t1, BlockId, [BlockId])]
                        -> [GenBasicBlock t1]
787
seqBlocks infos blocks = placeNext pullable0 todo0
788
  where
789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821
    -- pullable: Blocks that are not yet placed
    -- todo:     Original order of blocks, to be followed if we have no good
    --           reason not to;
    --           may include blocks that have already been placed, but then
    --           these are not in pullable
    pullable0 = listToUFM [ (i,(b,n)) | (b,i,n) <- blocks ]
    todo0     = [i | (_,i,_) <- blocks ]

    placeNext _ [] = []
    placeNext pullable (i:rest)
        | Just (block, pullable') <- lookupDeleteUFM pullable i
        = place pullable' rest block
        | otherwise
        -- We already placed this block, so ignore
        = placeNext pullable rest

    place pullable todo (block,[])
                          = block : placeNext pullable todo
    place pullable todo (block@(BasicBlock id instrs),[next])
        | mapMember next infos
        = block : placeNext pullable todo
        | Just (nextBlock, pullable') <- lookupDeleteUFM pullable next
        = BasicBlock id (init instrs) : place pullable' todo nextBlock
        | otherwise
        = block : placeNext pullable todo
    place _ _ (_,tooManyNextNodes)
        = pprPanic "seqBlocks" (ppr tooManyNextNodes)


lookupDeleteUFM :: Uniquable key => UniqFM elt -> key -> Maybe (elt, UniqFM elt)
lookupDeleteUFM m k = do -- Maybe monad
    v <- lookupUFM m k
    return (v, delFromUFM m k)
822

823 824 825 826 827 828
-- -----------------------------------------------------------------------------
-- Generate jump tables

-- Analyzes all native code and generates data sections for all jump
-- table instructions.
generateJumpTables
829 830 831
        :: NcgImpl statics instr jumpDest
        -> [NatCmmDecl statics instr] -> [NatCmmDecl statics instr]
generateJumpTables ncgImpl xs = concatMap f xs
832
    where f p@(CmmProc _ _ _ (ListGraph xs)) = p : concatMap g xs
833
          f p = [p]
834
          g (BasicBlock _ xs) = catMaybes (map (generateJumpTableForInstr ncgImpl) xs)
835

836 837 838
-- -----------------------------------------------------------------------------
-- Shortcut branches

839
shortcutBranches
840
        :: DynFlags
841
    -> NcgImpl statics instr jumpDest
842 843
        -> [NatCmmDecl statics instr]
        -> [NatCmmDecl statics instr]
844

845
shortcutBranches dflags ncgImpl tops
846
  | optLevel dflags < 1 = tops    -- only with -O or higher
847
  | otherwise           = map (apply_mapping ncgImpl mapping) tops'
848
  where
849
    (tops', mappings) = mapAndUnzip (build_mapping ncgImpl) tops
850 851
    mapping = foldr plusUFM emptyUFM mappings

852
build_mapping :: NcgImpl statics instr jumpDest
853 854
              -> GenCmmDecl d (BlockEnv t) (ListGraph instr)
              -> (GenCmmDecl d (BlockEnv t) (ListGraph instr), UniqFM jumpDest)
855
build_mapping _ top@(CmmData _ _) = (top, emptyUFM)
856 857 858 859
build_mapping _ (CmmProc info lbl live (ListGraph []))
  = (CmmProc info lbl live (ListGraph []), emptyUFM)
build_mapping ncgImpl (CmmProc info lbl live (ListGraph (head:blocks)))
  = (CmmProc info lbl live (ListGraph (head:others)), mapping)
860 861 862 863 864
        -- drop the shorted blocks, but don't ever drop the first one,
        -- because it is pointed to by a global label.
  where
    -- find all the blocks that just consist of a jump that can be
    -- shorted.
865 866 867
    -- Don't completely eliminate loops here -- that can leave a dangling jump!
    (_, shortcut_blocks, others) = foldl split (emptyBlockSet, [], []) blocks
    split (s, shortcut_blocks, others) b@(BasicBlock id [insn])
868 869
        | Just jd <- canShortcut ncgImpl insn,
          Just dest <- getJumpDestBlockId ncgImpl jd,
870
          not (has_info id),
871
          (setMember dest s) || dest == id -- loop checks
872 873
        = (s, shortcut_blocks, b : others)
    split (s, shortcut_blocks, others) (BasicBlock id [insn])
874 875
        | Just dest <- canShortcut ncgImpl insn,
          not (has_info id)
876
        = (setInsert id s, (id,dest) : shortcut_blocks, others)
877 878
    split (s, shortcut_blocks, others) other = (s, shortcut_blocks, other : others)

879