AsmCodeGen.lhs 41.1 KB
Newer Older
1
2
3
-- -----------------------------------------------------------------------------
--
-- (c) The University of Glasgow 1993-2004
4
--
5
6
7
-- This is the top-level module in the native code generator.
--
-- -----------------------------------------------------------------------------
8
9

\begin{code}
10
11
{-# LANGUAGE BangPatterns, CPP, GADTs, ScopedTypeVariables, UnboxedTuples #-}

12
module AsmCodeGen ( nativeCodeGen ) where
13

14
#include "HsVersions.h"
15
#include "nativeGen/NCG.h"
16

17

18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import qualified X86.CodeGen
import qualified X86.Regs
import qualified X86.Instr
import qualified X86.Ppr

import qualified SPARC.CodeGen
import qualified SPARC.Regs
import qualified SPARC.Instr
import qualified SPARC.Ppr
import qualified SPARC.ShortcutJump
import qualified SPARC.CodeGen.Expand

import qualified PPC.CodeGen
import qualified PPC.Regs
import qualified PPC.RegInfo
import qualified PPC.Instr
import qualified PPC.Ppr
35
36

import RegAlloc.Liveness
37
import qualified RegAlloc.Linear.Main           as Linear
38

39
40
41
42
import qualified GraphColor                     as Color
import qualified RegAlloc.Graph.Main            as Color
import qualified RegAlloc.Graph.Stats           as Color
import qualified RegAlloc.Graph.TrivColorable   as Color
43

44
import TargetReg
45
import Platform
46
import Config
47
48
49
50
import Instruction
import PIC
import Reg
import NCGMonad
51

52
import BlockId
53
import CgUtils          ( fixStgRegisters )
54
55
56
import Cmm
import CmmUtils
import Hoopl
Simon Marlow's avatar
Simon Marlow committed
57
import CmmOpt           ( cmmMachOpFold )
58
import PprCmm
59
import CLabel
60

61
import UniqFM
62
import UniqSupply
63
64
import DynFlags
import Util
65

66
import BasicTypes       ( Alignment )
67
import Digraph
68
import qualified Pretty
69
import BufWrite
70
import Outputable
71
import FastString
72
import UniqSet
73
import ErrUtils
74
import Module
75
76
import Stream (Stream)
import qualified Stream
77

78
79
-- DEBUGGING ONLY
--import OrdList
80

81
import Data.List
82
import Data.Maybe
83
import Control.Exception
Austin Seipp's avatar
Austin Seipp committed
84
import Control.Applicative (Applicative(..))
85
import Control.Monad
86
import System.IO
87

88
89
90
{-
The native-code generator has machine-independent and
machine-dependent modules.
91

92
93
94
95
This module ("AsmCodeGen") is the top-level machine-independent
module.  Before entering machine-dependent land, we do some
machine-independent optimisations (defined below) on the
'CmmStmts's.
96

97
98
99
100
101
102
103
104
105
We convert to the machine-specific 'Instr' datatype with
'cmmCodeGen', assuming an infinite supply of registers.  We then use
a machine-independent register allocator ('regAlloc') to rejoin
reality.  Obviously, 'regAlloc' has machine-specific helper
functions (see about "RegAllocInfo" below).

Finally, we order the basic blocks of the function so as to minimise
the number of jumps between blocks, by utilising fallthrough wherever
possible.
106
107

The machine-dependent bits break down as follows:
108
109

  * ["MachRegs"]  Everything about the target platform's machine
110
111
112
    registers (and immediate operands, and addresses, which tend to
    intermingle/interact with registers).

113
  * ["MachInstrs"]  Includes the 'Instr' datatype (possibly should
114
    have a module of its own), plus a miscellany of other things
115
    (e.g., 'targetDoubleSize', 'smStablePtrTable', ...)
116

117
  * ["MachCodeGen"]  is where 'Cmm' stuff turns into
118
    machine instructions.
119

120
  * ["PprMach"] 'pprInstr' turns an 'Instr' into text (well, really
121
    a 'SDoc').
122

123
124
  * ["RegAllocInfo"] In the register allocator, we manipulate
    'MRegsState's, which are 'BitSet's, one bit per machine register.
125
126
    When we want to say something about a specific machine register
    (e.g., ``it gets clobbered by this instruction''), we set/unset
127
    its bit.  Obviously, we do this 'BitSet' thing for efficiency
128
129
    reasons.

130
    The 'RegAllocInfo' module collects together the machine-specific
131
132
    info needed to do register allocation.

133
134
   * ["RegisterAlloc"] The (machine-independent) register allocator.
-}
135

136
137
-- -----------------------------------------------------------------------------
-- Top-level of the native codegen
138

139
data NcgImpl statics instr jumpDest = NcgImpl {
Simon Peyton Jones's avatar
Simon Peyton Jones committed
140
    cmmTopCodeGen             :: RawCmmDecl -> NatM [NatCmmDecl statics instr],
141
    generateJumpTableForInstr :: instr -> Maybe (NatCmmDecl statics instr),
142
143
    getJumpDestBlockId        :: jumpDest -> Maybe BlockId,
    canShortcut               :: instr -> Maybe jumpDest,
144
    shortcutStatics           :: (BlockId -> Maybe jumpDest) -> statics -> statics,
145
    shortcutJump              :: (BlockId -> Maybe jumpDest) -> instr -> instr,
146
    pprNatCmmDecl             :: NatCmmDecl statics instr -> SDoc,
147
148
    maxSpillSlots             :: Int,
    allocatableRegs           :: [RealReg],
Simon Peyton Jones's avatar
Simon Peyton Jones committed
149
150
    ncg_x86fp_kludge          :: [NatCmmDecl statics instr] -> [NatCmmDecl statics instr],
    ncgExpandTop              :: [NatCmmDecl statics instr] -> [NatCmmDecl statics instr],
151
    ncgAllocMoreStack         :: Int -> NatCmmDecl statics instr -> UniqSM (NatCmmDecl statics instr),
152
    ncgMakeFarBranches        :: BlockEnv CmmStatics -> [NatBasicBlock instr] -> [NatBasicBlock instr]
153
154
    }

155
--------------------
156
nativeCodeGen :: DynFlags -> Module -> Handle -> UniqSupply
157
              -> Stream IO RawCmmGroup ()
158
              -> IO UniqSupply
159
nativeCodeGen dflags this_mod h us cmms
160
 = let platform = targetPlatform dflags
161
162
       nCG' :: (Outputable statics, Outputable instr, Instruction instr)
            => NcgImpl statics instr jumpDest -> IO UniqSupply
163
       nCG' ncgImpl = nativeCodeGen' dflags this_mod ncgImpl h us cmms
164
   in case platformArch platform of
165
166
167
168
169
      ArchX86     -> nCG' (x86NcgImpl    dflags)
      ArchX86_64  -> nCG' (x86_64NcgImpl dflags)
      ArchPPC     -> nCG' (ppcNcgImpl    dflags)
      ArchSPARC   -> nCG' (sparcNcgImpl  dflags)
      ArchARM {}  -> panic "nativeCodeGen: No NCG for ARM"
Colin Watson's avatar
Colin Watson committed
170
      ArchARM64   -> panic "nativeCodeGen: No NCG for ARM64"
171
172
173
174
175
      ArchPPC_64  -> panic "nativeCodeGen: No NCG for PPC 64"
      ArchAlpha   -> panic "nativeCodeGen: No NCG for Alpha"
      ArchMipseb  -> panic "nativeCodeGen: No NCG for mipseb"
      ArchMipsel  -> panic "nativeCodeGen: No NCG for mipsel"
      ArchUnknown -> panic "nativeCodeGen: No NCG for unknown arch"
thoughtpolice's avatar
thoughtpolice committed
176
      ArchJavaScript -> panic "nativeCodeGen: No NCG for JavaScript"
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196

x86NcgImpl :: DynFlags -> NcgImpl (Alignment, CmmStatics) X86.Instr.Instr X86.Instr.JumpDest
x86NcgImpl dflags
 = (x86_64NcgImpl dflags) { ncg_x86fp_kludge = map x86fp_kludge }

x86_64NcgImpl :: DynFlags -> NcgImpl (Alignment, CmmStatics) X86.Instr.Instr X86.Instr.JumpDest
x86_64NcgImpl dflags
 = NcgImpl {
        cmmTopCodeGen             = X86.CodeGen.cmmTopCodeGen
       ,generateJumpTableForInstr = X86.CodeGen.generateJumpTableForInstr dflags
       ,getJumpDestBlockId        = X86.Instr.getJumpDestBlockId
       ,canShortcut               = X86.Instr.canShortcut
       ,shortcutStatics           = X86.Instr.shortcutStatics
       ,shortcutJump              = X86.Instr.shortcutJump
       ,pprNatCmmDecl             = X86.Ppr.pprNatCmmDecl
       ,maxSpillSlots             = X86.Instr.maxSpillSlots dflags
       ,allocatableRegs           = X86.Regs.allocatableRegs platform
       ,ncg_x86fp_kludge          = id
       ,ncgAllocMoreStack         = X86.Instr.allocMoreStack platform
       ,ncgExpandTop              = id
197
       ,ncgMakeFarBranches        = const id
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
   }
    where platform = targetPlatform dflags

ppcNcgImpl :: DynFlags -> NcgImpl CmmStatics PPC.Instr.Instr PPC.RegInfo.JumpDest
ppcNcgImpl dflags
 = NcgImpl {
        cmmTopCodeGen             = PPC.CodeGen.cmmTopCodeGen
       ,generateJumpTableForInstr = PPC.CodeGen.generateJumpTableForInstr dflags
       ,getJumpDestBlockId        = PPC.RegInfo.getJumpDestBlockId
       ,canShortcut               = PPC.RegInfo.canShortcut
       ,shortcutStatics           = PPC.RegInfo.shortcutStatics
       ,shortcutJump              = PPC.RegInfo.shortcutJump
       ,pprNatCmmDecl             = PPC.Ppr.pprNatCmmDecl
       ,maxSpillSlots             = PPC.Instr.maxSpillSlots dflags
       ,allocatableRegs           = PPC.Regs.allocatableRegs platform
       ,ncg_x86fp_kludge          = id
       ,ncgAllocMoreStack         = PPC.Instr.allocMoreStack platform
       ,ncgExpandTop              = id
216
       ,ncgMakeFarBranches        = PPC.Instr.makeFarBranches
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
   }
    where platform = targetPlatform dflags

sparcNcgImpl :: DynFlags -> NcgImpl CmmStatics SPARC.Instr.Instr SPARC.ShortcutJump.JumpDest
sparcNcgImpl dflags
 = NcgImpl {
        cmmTopCodeGen             = SPARC.CodeGen.cmmTopCodeGen
       ,generateJumpTableForInstr = SPARC.CodeGen.generateJumpTableForInstr dflags
       ,getJumpDestBlockId        = SPARC.ShortcutJump.getJumpDestBlockId
       ,canShortcut               = SPARC.ShortcutJump.canShortcut
       ,shortcutStatics           = SPARC.ShortcutJump.shortcutStatics
       ,shortcutJump              = SPARC.ShortcutJump.shortcutJump
       ,pprNatCmmDecl             = SPARC.Ppr.pprNatCmmDecl
       ,maxSpillSlots             = SPARC.Instr.maxSpillSlots dflags
       ,allocatableRegs           = SPARC.Regs.allocatableRegs
       ,ncg_x86fp_kludge          = id
       ,ncgAllocMoreStack         = noAllocMoreStack
       ,ncgExpandTop              = map SPARC.CodeGen.Expand.expandTop
235
       ,ncgMakeFarBranches        = const id
236
   }
237
238
239
240
241
242
243

--
-- Allocating more stack space for spilling is currently only
-- supported for the linear register allocator on x86/x86_64, the rest
-- default to the panic below.  To support allocating extra stack on
-- more platforms provide a definition of ncgAllocMoreStack.
--
244
noAllocMoreStack :: Int -> NatCmmDecl statics instr -> UniqSM (NatCmmDecl statics instr)
245
246
247
248
249
250
251
252
253
noAllocMoreStack amount _
  = panic $   "Register allocator: out of stack slots (need " ++ show amount ++ ")\n"
        ++  "   If you are trying to compile SHA1.hs from the crypto library then this\n"
        ++  "   is a known limitation in the linear allocator.\n"
        ++  "\n"
        ++  "   Try enabling the graph colouring allocator with -fregs-graph instead."
        ++  "   You can still file a bug report if you like.\n"


254
255
256
257
258
259
type NativeGenAcc statics instr
        = ([[CLabel]],
           [([NatCmmDecl statics instr],
             Maybe [Color.RegAllocStats statics instr],
             Maybe [Linear.RegAllocStats])])

Ian Lynagh's avatar
Ian Lynagh committed
260
nativeCodeGen' :: (Outputable statics, Outputable instr, Instruction instr)
261
               => DynFlags
262
               -> Module
263
               -> NcgImpl statics instr jumpDest
264
               -> Handle
265
266
267
               -> UniqSupply
               -> Stream IO RawCmmGroup ()
               -> IO UniqSupply
268
nativeCodeGen' dflags this_mod ncgImpl h us cmms
269
 = do
270
        let split_cmms  = Stream.map add_split cmms
271
272
273
        -- BufHandle is a performance hack.  We could hide it inside
        -- Pretty if it weren't for the fact that we do lots of little
        -- printDocs here (in order to do codegen in constant space).
274
        bufh <- newBufHandle h
275
        (ngs, us') <- cmmNativeGenStream dflags this_mod ncgImpl bufh us split_cmms ([], [])
276
        finishNativeGen dflags ncgImpl bufh ngs
277
278
279
280
281
282
283
284
285
286
287
288

        return us'

 where  add_split tops
                | gopt Opt_SplitObjs dflags = split_marker : tops
                | otherwise                 = tops

        split_marker = CmmProc mapEmpty mkSplitMarkerLabel []
                               (ofBlockList (panic "split_marker_entry") [])


finishNativeGen :: Instruction instr
289
290
291
292
                => DynFlags
                -> NcgImpl statics instr jumpDest
                -> BufHandle
                -> NativeGenAcc statics instr
293
                -> IO ()
294
finishNativeGen dflags ncgImpl bufh@(BufHandle _ _ h) (imports, prof)
295
 = do
296
        bFlush bufh
297

298
        let platform = targetPlatform dflags
299
300
301
302
303
304
        let (native, colorStats, linearStats)
                = unzip3 prof

        -- dump native code
        dumpIfSet_dyn dflags
                Opt_D_dump_asm "Asm code"
305
                (vcat $ map (pprNatCmmDecl ncgImpl) $ concat native)
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336

        -- dump global NCG stats for graph coloring allocator
        (case concat $ catMaybes colorStats of
          []    -> return ()
          stats -> do
                -- build the global register conflict graph
                let graphGlobal
                        = foldl Color.union Color.initGraph
                        $ [ Color.raGraph stat
                                | stat@Color.RegAllocStatsStart{} <- stats]

                dumpSDoc dflags Opt_D_dump_asm_stats "NCG stats"
                        $ Color.pprStats stats graphGlobal

                dumpIfSet_dyn dflags
                        Opt_D_dump_asm_conflicts "Register conflict graph"
                        $ Color.dotGraph
                                (targetRegDotColor platform)
                                (Color.trivColorable platform
                                        (targetVirtualRegSqueeze platform)
                                        (targetRealRegSqueeze platform))
                        $ graphGlobal)


        -- dump global NCG stats for linear allocator
        (case concat $ catMaybes linearStats of
                []      -> return ()
                stats   -> dumpSDoc dflags Opt_D_dump_asm_stats "NCG stats"
                                $ Linear.pprStats (concat native) stats)

        -- write out the imports
Ian Lynagh's avatar
Ian Lynagh committed
337
        Pretty.printDoc Pretty.LeftMode (pprCols dflags) h
Ian Lynagh's avatar
Ian Lynagh committed
338
                $ withPprStyleDoc dflags (mkCodeStyle AsmStyle)
339
340
                $ makeImportsDoc dflags (concat imports)

Simon Marlow's avatar
Simon Marlow committed
341
cmmNativeGenStream :: (Outputable statics, Outputable instr, Instruction instr)
342
              => DynFlags
343
              -> Module
344
345
              -> NcgImpl statics instr jumpDest
              -> BufHandle
346
347
              -> UniqSupply
              -> Stream IO RawCmmGroup ()
348
349
              -> NativeGenAcc statics instr
              -> IO (NativeGenAcc statics instr, UniqSupply)
350

351
cmmNativeGenStream dflags this_mod ncgImpl h us cmm_stream ngs@(impAcc, profAcc)
352
353
 = do r <- Stream.runStream cmm_stream
      case r of
354
          Left () ->
355
              return ((reverse impAcc, reverse profAcc) , us)
356
          Right (cmms, cmm_stream') -> do
357
358
              (ngs',us') <- cmmNativeGens dflags this_mod ncgImpl h us cmms ngs 0
              cmmNativeGenStream dflags this_mod ncgImpl h us' cmm_stream' ngs'
359

360
361
-- | Do native code generation on all these cmms.
--
Ian Lynagh's avatar
Ian Lynagh committed
362
cmmNativeGens :: (Outputable statics, Outputable instr, Instruction instr)
363
              => DynFlags
364
              -> Module
365
366
              -> NcgImpl statics instr jumpDest
              -> BufHandle
dterei's avatar
dterei committed
367
              -> UniqSupply
Simon Peyton Jones's avatar
Simon Peyton Jones committed
368
              -> [RawCmmDecl]
369
370
371
              -> NativeGenAcc statics instr
              -> Int
              -> IO (NativeGenAcc statics instr, UniqSupply)
372

373
cmmNativeGens _ _ _ _ us [] ngs _
374
        = return (ngs, us)
375

376
cmmNativeGens dflags this_mod ncgImpl h us (cmm : cmms) (impAcc, profAcc) count
377
 = do
378
        (us', native, imports, colorStats, linearStats)
379
                <- {-# SCC "cmmNativeGen" #-} cmmNativeGen dflags this_mod ncgImpl us cmm count
380

Simon Marlow's avatar
Simon Marlow committed
381
        {-# SCC "pprNativeCode" #-} Pretty.bufLeftRender h
Ian Lynagh's avatar
Ian Lynagh committed
382
                $ withPprStyleDoc dflags (mkCodeStyle AsmStyle)
383
                $ vcat $ map (pprNatCmmDecl ncgImpl) native
384

385
        let !lsPprNative =
386
387
                if  dopt Opt_D_dump_asm       dflags
                 || dopt Opt_D_dump_asm_stats dflags
388
389
                        then native
                        else []
390

391
        let !count' = count + 1
392

Gabor Greif's avatar
typo    
Gabor Greif committed
393
        -- force evaluation all this stuff to avoid space leaks
394
        {-# SCC "seqString" #-} evaluate $ seqString (showSDoc dflags $ vcat $ map ppr imports)
395

396
        cmmNativeGens dflags this_mod ncgImpl h
397
398
            us' cmms ((imports : impAcc),
                      ((lsPprNative, colorStats, linearStats) : profAcc))
399
                     count'
400

401
 where  seqString []            = ()
402
        seqString (x:xs)        = x `seq` seqString xs
403
404


405
-- | Complete native code generation phase for a single top-level chunk of Cmm.
406
407
--      Dumping the output of each stage along the way.
--      Global conflict graph and NGC stats
408
cmmNativeGen
Ian Lynagh's avatar
Ian Lynagh committed
409
        :: (Outputable statics, Outputable instr, Instruction instr)
410
    => DynFlags
411
    -> Module
412
    -> NcgImpl statics instr jumpDest
413
414
415
416
417
418
419
420
        -> UniqSupply
        -> RawCmmDecl                                   -- ^ the cmm to generate code for
        -> Int                                          -- ^ sequence number of this top thing
        -> IO   ( UniqSupply
                , [NatCmmDecl statics instr]                -- native code
                , [CLabel]                                  -- things imported by this cmm
                , Maybe [Color.RegAllocStats statics instr] -- stats for the coloring register allocator
                , Maybe [Linear.RegAllocStats])             -- stats for the linear register allocators
421

422
cmmNativeGen dflags this_mod ncgImpl us cmm count
423
 = do
424
        let platform = targetPlatform dflags
425

426
427
428
        -- rewrite assignments to global regs
        let fixed_cmm =
                {-# SCC "fixStgRegisters" #-}
429
                fixStgRegisters dflags cmm
430

431
432
433
        -- cmm to cmm optimisations
        let (opt_cmm, imports) =
                {-# SCC "cmmToCmm" #-}
434
                cmmToCmm dflags this_mod fixed_cmm
435

436
437
        dumpIfSet_dyn dflags
                Opt_D_dump_opt_cmm "Optimised Cmm"
438
                (pprCmmGroup [opt_cmm])
439

440
441
442
        -- generate native code from cmm
        let ((native, lastMinuteImports), usGen) =
                {-# SCC "genMachCode" #-}
443
                initUs us $ genMachCode dflags this_mod (cmmTopCodeGen ncgImpl) opt_cmm
444
445
446

        dumpIfSet_dyn dflags
                Opt_D_dump_asm_native "Native code"
447
                (vcat $ map (pprNatCmmDecl ncgImpl) native)
448
449
450
451
452

        -- tag instructions with register liveness information
        let (withLiveness, usLive) =
                {-# SCC "regLiveness" #-}
                initUs usGen
453
                        $ mapM (regLiveness platform)
454
455
456
457
                        $ map natCmmTopToLive native

        dumpIfSet_dyn dflags
                Opt_D_dump_asm_liveness "Liveness annotations added"
Ian Lynagh's avatar
Ian Lynagh committed
458
                (vcat $ map ppr withLiveness)
459
460
461

        -- allocate registers
        (alloced, usAlloc, ppr_raStatsColor, ppr_raStatsLinear) <-
462
463
464
465
         if False
           -- Disabled, see #7679, #8657
           --  ( gopt Opt_RegsGraph dflags
           --  || gopt Opt_RegsIterative dflags)
466
467
468
469
470
471
          then do
                -- the regs usable for allocation
                let (alloc_regs :: UniqFM (UniqSet RealReg))
                        = foldr (\r -> plusUFM_C unionUniqSets
                                        $ unitUFM (targetClassOfRealReg platform r) (unitUniqSet r))
                                emptyUFM
472
                        $ allocatableRegs ncgImpl
473
474
475
476
477
478
479
480

                -- do the graph coloring register allocation
                let ((alloced, regAllocStats), usAlloc)
                        = {-# SCC "RegAlloc" #-}
                          initUs usLive
                          $ Color.regAlloc
                                dflags
                                alloc_regs
481
                                (mkUniqSet [0 .. maxSpillSlots ncgImpl])
482
483
484
485
486
                                withLiveness

                -- dump out what happened during register allocation
                dumpIfSet_dyn dflags
                        Opt_D_dump_asm_regalloc "Registers allocated"
487
                        (vcat $ map (pprNatCmmDecl ncgImpl) alloced)
488
489
490
491
492
493

                dumpIfSet_dyn dflags
                        Opt_D_dump_asm_regalloc_stages "Build/spill stages"
                        (vcat   $ map (\(stage, stats)
                                        -> text "# --------------------------"
                                        $$ text "#  cmm " <> int count <> text " Stage " <> int stage
Ian Lynagh's avatar
Ian Lynagh committed
494
                                        $$ ppr stats)
495
496
497
                                $ zip [0..] regAllocStats)

                let mPprStats =
498
                        if dopt Opt_D_dump_asm_stats dflags
499
500
501
502
503
504
505
506
507
508
509
                         then Just regAllocStats else Nothing

                -- force evaluation of the Maybe to avoid space leak
                mPprStats `seq` return ()

                return  ( alloced, usAlloc
                        , mPprStats
                        , Nothing)

          else do
                -- do linear register allocation
510
511
512
513
514
                let reg_alloc proc = do
                       (alloced, maybe_more_stack, ra_stats) <-
                               Linear.regAlloc dflags proc
                       case maybe_more_stack of
                         Nothing -> return ( alloced, ra_stats )
515
516
517
                         Just amount -> do
                           alloced' <- ncgAllocMoreStack ncgImpl amount alloced
                           return (alloced', ra_stats )
518

519
520
521
522
                let ((alloced, regAllocStats), usAlloc)
                        = {-# SCC "RegAlloc" #-}
                          initUs usLive
                          $ liftM unzip
523
                          $ mapM reg_alloc withLiveness
524
525
526

                dumpIfSet_dyn dflags
                        Opt_D_dump_asm_regalloc "Registers allocated"
527
                        (vcat $ map (pprNatCmmDecl ncgImpl) alloced)
528
529

                let mPprStats =
530
                        if dopt Opt_D_dump_asm_stats dflags
531
532
533
534
535
536
537
538
                         then Just (catMaybes regAllocStats) else Nothing

                -- force evaluation of the Maybe to avoid space leak
                mPprStats `seq` return ()

                return  ( alloced, usAlloc
                        , Nothing
                        , mPprStats)
539

540
541
542
543
544
545
546
        ---- x86fp_kludge.  This pass inserts ffree instructions to clear
        ---- the FPU stack on x86.  The x86 ABI requires that the FPU stack
        ---- is clear, and library functions can return odd results if it
        ---- isn't.
        ----
        ---- NB. must happen before shortcutBranches, because that
        ---- generates JXX_GBLs which we can't fix up in x86fp_kludge.
547
        let kludged = {-# SCC "x86fp_kludge" #-} ncg_x86fp_kludge ncgImpl alloced
548
549

        ---- generate jump tables
550
551
        let tabled      =
                {-# SCC "generateJumpTables" #-}
552
                generateJumpTables ncgImpl kludged
553

554
555
556
557
        ---- shortcut branches
        let shorted     =
                {-# SCC "shortcutBranches" #-}
                shortcutBranches dflags ncgImpl tabled
558

559
560
561
562
        ---- sequence blocks
        let sequenced   =
                {-# SCC "sequenceBlocks" #-}
                map (sequenceTop ncgImpl) shorted
563

564
        ---- expansion of SPARC synthetic instrs
565
566
        let expanded =
                {-# SCC "sparc_expand" #-}
567
                ncgExpandTop ncgImpl sequenced
568

569
570
        dumpIfSet_dyn dflags
                Opt_D_dump_asm_expanded "Synthetic instructions expanded"
571
                (vcat $ map (pprNatCmmDecl ncgImpl) expanded)
572

573
574
575
576
577
        return  ( usAlloc
                , expanded
                , lastMinuteImports ++ imports
                , ppr_raStatsColor
                , ppr_raStatsLinear)
578

579

Simon Peyton Jones's avatar
Simon Peyton Jones committed
580
x86fp_kludge :: NatCmmDecl (Alignment, CmmStatics) X86.Instr.Instr -> NatCmmDecl (Alignment, CmmStatics) X86.Instr.Instr
581
x86fp_kludge top@(CmmData _ _) = top
582
583
x86fp_kludge (CmmProc info lbl live (ListGraph code)) =
        CmmProc info lbl live (ListGraph $ X86.Instr.i386_insert_ffrees code)
584

585

586
-- | Build a doc for all the imports.
587
--
588
makeImportsDoc :: DynFlags -> [CLabel] -> SDoc
589
makeImportsDoc dflags imports
590
 = dyld_stubs imports
591
            $$
592
593
594
            -- On recent versions of Darwin, the linker supports
            -- dead-stripping of code and data on a per-symbol basis.
            -- There's a hack to make this work in PprMach.pprNatCmmDecl.
ian@well-typed.com's avatar
ian@well-typed.com committed
595
            (if platformHasSubsectionsViaSymbols platform
596
597
598
             then text ".subsections_via_symbols"
             else empty)
            $$
599
600
601
602
603
604
                -- On recent GNU ELF systems one can mark an object file
                -- as not requiring an executable stack. If all objects
                -- linked into a program have this note then the program
                -- will not use an executable stack, which is good for
                -- security. GHC generated code does not need an executable
                -- stack so add the note in:
ian@well-typed.com's avatar
ian@well-typed.com committed
605
            (if platformHasGnuNonexecStack platform
606
607
608
             then text ".section .note.GNU-stack,\"\",@progbits"
             else empty)
            $$
Gabor Greif's avatar
Gabor Greif committed
609
                -- And just because every other compiler does, let's stick in
610
                -- an identifier directive: .ident "GHC x.y.z"
ian@well-typed.com's avatar
ian@well-typed.com committed
611
            (if platformHasIdentDirective platform
612
613
614
             then let compilerIdent = text "GHC" <+> text cProjectVersion
                   in text ".ident" <+> doubleQuotes compilerIdent
             else empty)
615

616
 where
ian@well-typed.com's avatar
ian@well-typed.com committed
617
618
619
620
        platform = targetPlatform dflags
        arch = platformArch platform
        os   = platformOS   platform

621
622
        -- Generate "symbol stubs" for all external symbols that might
        -- come from a dynamic library.
623
624
        dyld_stubs :: [CLabel] -> SDoc
{-      dyld_stubs imps = vcat $ map pprDyldSymbolStub $
625
626
627
628
                                    map head $ group $ sort imps-}
        -- (Hack) sometimes two Labels pretty-print the same, but have
        -- different uniques; so we compare their text versions...
        dyld_stubs imps
Ian Lynagh's avatar
Ian Lynagh committed
629
                | needImportedSymbols dflags arch os
630
                = vcat $
Ian Lynagh's avatar
Ian Lynagh committed
631
632
                        (pprGotDeclaration dflags arch os :) $
                        map ( pprImportedSymbol dflags platform . fst . head) $
633
634
635
636
637
                        groupBy (\(_,a) (_,b) -> a == b) $
                        sortBy (\(_,a) (_,b) -> compare a b) $
                        map doPpr $
                        imps
                | otherwise
638
                = empty
639

Ian Lynagh's avatar
Ian Lynagh committed
640
        doPpr lbl = (lbl, renderWithStyle dflags (pprCLabel platform lbl) astyle)
641
        astyle = mkCodeStyle AsmStyle
642
643


644
645
646
647
648
649
650
651
652
-- -----------------------------------------------------------------------------
-- Sequencing the basic blocks

-- Cmm BasicBlocks are self-contained entities: they always end in a
-- jump, either non-local or to another basic block in the same proc.
-- In this phase, we attempt to place the basic blocks in a sequence
-- such that as many of the local jumps as possible turn into
-- fallthroughs.

653
654
sequenceTop
        :: Instruction instr
Simon Peyton Jones's avatar
Simon Peyton Jones committed
655
    => NcgImpl statics instr jumpDest -> NatCmmDecl statics instr -> NatCmmDecl statics instr
656

657
sequenceTop _       top@(CmmData _ _) = top
658
sequenceTop ncgImpl (CmmProc info lbl live (ListGraph blocks)) =
659
  CmmProc info lbl live (ListGraph $ ncgMakeFarBranches ncgImpl info $ sequenceBlocks info blocks)
660
661
662
663
664
665
666
667

-- The algorithm is very simple (and stupid): we make a graph out of
-- the blocks where there is an edge from one block to another iff the
-- first block ends by jumping to the second.  Then we topologically
-- sort this graph.  Then traverse the list: for each block, we first
-- output the block, then if it has an out edge, we move the
-- destination of the out edge to the front of the list, and continue.

668
-- FYI, the classic layout for basic blocks uses postorder DFS; this
669
-- algorithm is implemented in Hoopl.
670

671
672
sequenceBlocks
        :: Instruction instr
673
674
        => BlockEnv i
        -> [NatBasicBlock instr]
675
        -> [NatBasicBlock instr]
676

677
678
679
sequenceBlocks _ [] = []
sequenceBlocks infos (entry:blocks) =
  seqBlocks infos (mkNode entry : reverse (flattenSCCs (sccBlocks blocks)))
680
681
  -- the first block is the entry point ==> it must remain at the start.

682

683
684
685
686
sccBlocks
        :: Instruction instr
        => [NatBasicBlock instr]
        -> [SCC ( NatBasicBlock instr
687
688
                , BlockId
                , [BlockId])]
689

690
sccBlocks blocks = stronglyConnCompFromEdgedVerticesR (map mkNode blocks)
691

692
693
-- we're only interested in the last instruction of
-- the block, and only if it has a single destination.
694
695
getOutEdges
        :: Instruction instr
696
        => [instr] -> [BlockId]
697

698
699
getOutEdges instrs
        = case jumpDestsOfInstr (last instrs) of
700
                [one] -> [one]
701
                _many -> []
702

dterei's avatar
dterei committed
703
704
mkNode :: (Instruction t)
       => GenBasicBlock t
705
706
707
708
709
710
711
712
713
714
715
       -> (GenBasicBlock t, BlockId, [BlockId])
mkNode block@(BasicBlock id instrs) = (block, id, getOutEdges instrs)

seqBlocks :: BlockEnv i -> [(GenBasicBlock t1, BlockId, [BlockId])]
                        -> [GenBasicBlock t1]
seqBlocks _ [] = []
seqBlocks infos ((block,_,[]) : rest)
  = block : seqBlocks infos rest
seqBlocks infos ((block@(BasicBlock id instrs),_,[next]) : rest)
  | can_fallthrough = BasicBlock id (init instrs) : seqBlocks infos rest'
  | otherwise       = block : seqBlocks infos rest'
716
  where
717
718
        can_fallthrough = not (mapMember next infos) && can_reorder
        (can_reorder, rest') = reorder next [] rest
719
720
          -- TODO: we should do a better job for cycles; try to maximise the
          -- fallthroughs within a loop.
721
seqBlocks _ _ = panic "AsmCodegen:seqBlocks"
722

dterei's avatar
dterei committed
723
724
reorder :: (Eq a) => a -> [(t, a, t1)] -> [(t, a, t1)] -> (Bool, [(t, a, t1)])
reorder  _ accum [] = (False, reverse accum)
725
726
727
728
reorder id accum (b@(block,id',out) : rest)
  | id == id'  = (True, (block,id,out) : reverse accum ++ rest)
  | otherwise  = reorder id (b:accum) rest

729

730
731
732
733
734
735
-- -----------------------------------------------------------------------------
-- Generate jump tables

-- Analyzes all native code and generates data sections for all jump
-- table instructions.
generateJumpTables
736
737
738
        :: NcgImpl statics instr jumpDest
        -> [NatCmmDecl statics instr] -> [NatCmmDecl statics instr]
generateJumpTables ncgImpl xs = concatMap f xs
739
    where f p@(CmmProc _ _ _ (ListGraph xs)) = p : concatMap g xs
740
          f p = [p]
741
          g (BasicBlock _ xs) = catMaybes (map (generateJumpTableForInstr ncgImpl) xs)
742

743
744
745
-- -----------------------------------------------------------------------------
-- Shortcut branches

746
shortcutBranches
747
        :: DynFlags
748
    -> NcgImpl statics instr jumpDest
749
750
        -> [NatCmmDecl statics instr]
        -> [NatCmmDecl statics instr]
751

752
shortcutBranches dflags ncgImpl tops
753
  | optLevel dflags < 1 = tops    -- only with -O or higher
754
  | otherwise           = map (apply_mapping ncgImpl mapping) tops'
755
  where
756
    (tops', mappings) = mapAndUnzip (build_mapping ncgImpl) tops
757
758
    mapping = foldr plusUFM emptyUFM mappings

759
build_mapping :: NcgImpl statics instr jumpDest
760
761
              -> GenCmmDecl d (BlockEnv t) (ListGraph instr)
              -> (GenCmmDecl d (BlockEnv t) (ListGraph instr), UniqFM jumpDest)
762
build_mapping _ top@(CmmData _ _) = (top, emptyUFM)
763
764
765
766
build_mapping _ (CmmProc info lbl live (ListGraph []))
  = (CmmProc info lbl live (ListGraph []), emptyUFM)
build_mapping ncgImpl (CmmProc info lbl live (ListGraph (head:blocks)))
  = (CmmProc info lbl live (ListGraph (head:others)), mapping)
767
768
769
770
771
        -- drop the shorted blocks, but don't ever drop the first one,
        -- because it is pointed to by a global label.
  where
    -- find all the blocks that just consist of a jump that can be
    -- shorted.
772
773
774
    -- Don't completely eliminate loops here -- that can leave a dangling jump!
    (_, shortcut_blocks, others) = foldl split (emptyBlockSet, [], []) blocks
    split (s, shortcut_blocks, others) b@(BasicBlock id [insn])
775
776
        | Just jd <- canShortcut ncgImpl insn,
          Just dest <- getJumpDestBlockId ncgImpl jd,
777
          not (has_info id),
778
          (setMember dest s) || dest == id -- loop checks
779
780
        = (s, shortcut_blocks, b : others)
    split (s, shortcut_blocks, others) (BasicBlock id [insn])
781
782
        | Just dest <- canShortcut ncgImpl insn,
          not (has_info id)
783
        = (setInsert id s, (id,dest) : shortcut_blocks, others)
784
785
    split (s, shortcut_blocks, others) other = (s, shortcut_blocks, other : others)

786
787
    -- do not eliminate blocks that have an info table
    has_info l = mapMember l info
788
789
790
791

    -- build a mapping from BlockId to JumpDest for shorting branches
    mapping = foldl add emptyUFM shortcut_blocks
    add ufm (id,dest) = addToUFM ufm id dest
792

793
apply_mapping :: NcgImpl statics instr jumpDest
794
              -> UniqFM jumpDest
Simon Peyton Jones's avatar
Simon Peyton Jones committed
795
796
              -> GenCmmDecl statics h (ListGraph instr)
              -> GenCmmDecl statics h (ListGraph instr)
797
apply_mapping ncgImpl ufm (CmmData sec statics)
798
  = CmmData sec (shortcutStatics ncgImpl (lookupUFM ufm) statics)
799
800
apply_mapping ncgImpl ufm (CmmProc info lbl live (ListGraph blocks))
  = CmmProc info lbl live (ListGraph $ map short_bb blocks)
801
802
  where
    short_bb (BasicBlock id insns) = BasicBlock id $! map short_insn insns
803
    short_insn i = shortcutJump ncgImpl (lookupUFM ufm) i
804
805
806
                 -- shortcutJump should apply the mapping repeatedly,
                 -- just in case we can short multiple branches.

807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
-- -----------------------------------------------------------------------------
-- Instruction selection

-- Native code instruction selection for a chunk of stix code.  For
-- this part of the computation, we switch from the UniqSM monad to
-- the NatM monad.  The latter carries not only a Unique, but also an
-- Int denoting the current C stack pointer offset in the generated
-- code; this is needed for creating correct spill offsets on
-- architectures which don't offer, or for which it would be
-- prohibitively expensive to employ, a frame pointer register.  Viz,
-- x86.

-- The offset is measured in bytes, and indicates the difference
-- between the current (simulated) C stack-ptr and the value it was at
-- the beginning of the block.  For stacks which grow down, this value
-- should be either zero or negative.

-- Switching between the two monads whilst carrying along the same
-- Unique supply breaks abstraction.  Is that bad?

827
828
genMachCode
        :: DynFlags
829
        -> Module
Simon Peyton Jones's avatar
Simon Peyton Jones committed
830
        -> (RawCmmDecl -> NatM [NatCmmDecl statics instr])
831
832
833
834
        -> RawCmmDecl
        -> UniqSM
                ( [NatCmmDecl statics instr]
                , [CLabel])
835

836
genMachCode dflags this_mod cmmTopCodeGen cmm_top
837
  = do  { initial_us <- getUs
838
        ; let initial_st           = mkNatM_State initial_us 0 dflags this_mod
839
840
841
842
              (new_tops, final_st) = initNat initial_st (cmmTopCodeGen cmm_top)
              final_delta          = natm_delta final_st
              final_imports        = natm_imports final_st
        ; if   final_delta == 0
843
844
845
          then return (new_tops, final_imports)
          else pprPanic "genMachCode: nonzero final delta" (int final_delta)
    }
846

847
848
849
850
851
852
853
-- -----------------------------------------------------------------------------
-- Generic Cmm optimiser

{-
Here we do:

  (a) Constant folding
854
  (c) Position independent code and dynamic linking
855
856
857
        (i)  introduce the appropriate indirections
             and position independent refs
        (ii) compile a list of imported symbols
858
  (d) Some arch-specific optimizations
859

Simon Marlow's avatar
Simon Marlow committed
860
(a) will be moving to the new Hoopl pipeline, however, (c) and
861
862
863
864
(d) are only needed by the native backend and will continue to live
here.

Ideas for other things we could do (put these in Hoopl please!):
865
866

  - shortcut jumps-to-jumps
867
868
869
  - simple CSE: if an expr is assigned to a temp, then replace later occs of
    that expr with the temp, until the expr is no longer valid (can push through
    temp assignments, and certain assigns to mem...)
870
871
-}

872
873
874
875
876
877
cmmToCmm :: DynFlags -> Module -> RawCmmDecl -> (RawCmmDecl, [CLabel])
cmmToCmm _ _ top@(CmmData _ _) = (top, [])
cmmToCmm dflags this_mod (CmmProc info lbl live graph)
    = runCmmOpt dflags this_mod $
      do blocks' <- mapM cmmBlockConFold (toBlockList graph)
         return $ CmmProc info lbl live (ofBlockList (g_entry graph) blocks')
878

879
newtype CmmOptM a = CmmOptM (DynFlags -> Module -> [CLabel] -> (# a, [CLabel] #))
880

Austin Seipp's avatar
Austin Seipp committed
881
882
883
884
885
886
887
instance Functor CmmOptM where
    fmap = liftM

instance Applicative CmmOptM where
    pure = return
    (<*>) = ap

888
instance Monad CmmOptM where
889
  return x = CmmOptM $ \_ _ imports -> (# x, imports #)
890
  (CmmOptM f) >>= g =
891
892
    CmmOptM $ \dflags this_mod imports ->
                case f dflags this_mod imports of
893
894
                  (# x, imports' #) ->
                    case g x of
895
                      CmmOptM g' -> g' dflags this_mod imports'
896

897
898
instance CmmMakeDynamicReferenceM CmmOptM where
    addImport = addImportCmmOpt
899
    getThisModule = CmmOptM $ \_ this_mod imports -> (# this_mod, imports #)
900

901
addImportCmmOpt :: CLabel -> CmmOptM ()
902
addImportCmmOpt lbl = CmmOptM $ \_ _ imports -> (# (), lbl:imports #)
903

904
instance HasDynFlags CmmOptM where
905
    getDynFlags = CmmOptM $ \dflags _ imports -> (# dflags, imports #)
906

907
908
runCmmOpt :: DynFlags -> Module -> CmmOptM a -> (a, [CLabel])
runCmmOpt dflags this_mod (CmmOptM f) = case f dflags this_mod [] of
909
910
                        (# result, imports #) -> (result, imports)

911
912
913
914
cmmBlockConFold :: CmmBlock -> CmmOptM CmmBlock
cmmBlockConFold block = do
  let (entry, middle, last) = blockSplit block
      stmts = blockToList middle
915
  stmts' <- mapM cmmStmtConFold stmts
916
917
  last' <- cmmStmtConFold last
  return $ blockJoin entry (blockFromList stmts') last'
918

919
920
921
922
923
924
-- This does three optimizations, but they're very quick to check, so we don't
-- bother turning them off even when the Hoopl code is active.  Since
-- this is on the old Cmm representation, we can't reuse the code either:
--  * reg = reg      --> nop
--  * if 0 then jump --> nop
--  * if 1 then jump --> jump
Ian Lynagh's avatar
Ian Lynagh committed
925
-- We might be tempted to skip this step entirely of not Opt_PIC, but
926
927
-- there is some PowerPC code for the non-PIC case, which would also
-- have to be separated.
928
cmmStmtConFold :: CmmNode e x -> CmmOptM (CmmNode e x)
929
930
931
cmmStmtConFold stmt
   = case stmt of
        CmmAssign reg src
932
           -> do src' <- cmmExprConFold DataReference src
933
                 return $ case src' of
934
                   CmmReg reg' | reg == reg' -> CmmComment (fsLit "nop")
935
                   new_src -> CmmAssign reg new_src
936
937

        CmmStore addr src
938
939
           -> do addr' <- cmmExprConFold DataReference addr
                 src'  <- cmmExprConFold DataReference src
940
                 return $ CmmStore addr' src'
941

942
        CmmCall { cml_target = addr }
943
           -> do addr' <- cmmExprConFold JumpReference addr
944
                 return $ stmt { cml_target = addr' }
945

946
        CmmUnsafeForeignCall target regs args
947
           -> do target' <- case target of
948
                              ForeignTarget e conv -> do
949
                                e' <- cmmExprConFold CallReference e
950
951
952
953
954
955
956
                                return $ ForeignTarget e' conv
                              PrimTarget _ ->
                                return target
                 args' <- mapM (cmmExprConFold DataReference) args
                 return $ CmmUnsafeForeignCall target' regs args'

        CmmCondBranch test true false
957
           -> do test' <- cmmExprConFold DataReference test
958
                 return $ case test' of
959
960
961
                   CmmLit (CmmInt 0 _) -> CmmBranch false
                   CmmLit (CmmInt _ _) -> CmmBranch true
                   _other -> CmmCondBranch test' true false
962

963
964
965
        CmmSwitch expr ids
           -> do expr' <- cmmExprConFold DataReference expr
                 return $ CmmSwitch expr' ids
966
967

        other
968
           -> return other
969

dterei's avatar
dterei committed
970
cmmExprConFold :: ReferenceKind -> CmmExpr -> CmmOptM CmmExpr
971
cmmExprConFold referenceKind expr = do
972
    dflags <- getDynFlags
973
974
975
976

    -- With -O1 and greater, the cmmSink pass does constant-folding, so
    -- we don't need to do it again here.
    let expr' = if optLevel dflags >= 1
977
                    then expr
978
                    else cmmExprCon dflags expr
979

980
    cmmExprNative referenceKind expr'
981

982
983
984
985
cmmExprCon :: DynFlags -> CmmExpr -> CmmExpr
cmmExprCon dflags (CmmLoad addr rep) = CmmLoad (cmmExprCon dflags addr) rep
cmmExprCon dflags (CmmMachOp mop args)
    = cmmMachOpFold dflags mop (map (cmmExprCon dflags) args)
Ian Lynagh's avatar
Ian Lynagh committed
986
cmmExprCon _ other = other
987
988
989
990
991

-- handles both PIC and non-PIC cases... a very strange mixture
-- of things to do.
cmmExprNative :: ReferenceKind -> CmmExpr -> CmmOptM CmmExpr
cmmExprNative referenceKind expr = do
992
     dflags <- getDynFlags
Ian Lynagh's avatar
Ian Lynagh committed
993
994
     let platform = targetPlatform dflags
         arch = platformArch platform
995
     case expr of
996
        CmmLoad addr rep
997
           -> do addr' <- cmmExprNative DataReference addr
998
                 return $ CmmLoad addr' rep
999
1000

        CmmMachOp mop args
1001
1002
           -> do args' <- mapM (cmmExprNative DataReference) args
                 return $ CmmMachOp mop args'
1003

1004
1005
1006
1007
1008
1009
        CmmLit (CmmBlock id)
           -> cmmExprNative referenceKind (CmmLit (CmmLabel (infoTblLbl id)))
           -- we must convert block Ids to CLabels here, because we
           -- might have to do the PIC transformation.  Hence we must
           -- not modify BlockIds beyond this point.

1010
        CmmLit (CmmLabel lbl)
1011
           -> do
1012
                cmmMakeDynamicReference dflags referenceKind lbl
1013
        CmmLit (CmmLabelOff lbl off)
1014
           -> do
1015
                 dynRef <- cmmMakeDynamicReference dflags referenceKind lbl
1016
                 -- need to optimize here, since it's late
1017
                 return $ cmmMachOpFol