Pipeline.hs 13.2 KB
Newer Older
1 2
{-# LANGUAGE BangPatterns #-}

3
module GHC.Cmm.Pipeline (
4 5 6 7
  -- | Converts C-- with an implicit stack and native C-- calls into
  -- optimized, CPS converted and native-call-less C--.  The latter
  -- C-- can be used to generate assembly.
  cmmPipeline
8
) where
9

10
import GHC.Prelude
11

12 13 14 15 16 17 18 19 20 21
import GHC.Cmm
import GHC.Cmm.Lint
import GHC.Cmm.Info.Build
import GHC.Cmm.CommonBlockElim
import GHC.Cmm.Switch.Implement
import GHC.Cmm.ProcPoint
import GHC.Cmm.ContFlowOpt
import GHC.Cmm.LayoutStack
import GHC.Cmm.Sink
import GHC.Cmm.Dataflow.Collections
22

Sylvain Henry's avatar
Sylvain Henry committed
23
import GHC.Types.Unique.Supply
Sylvain Henry's avatar
Sylvain Henry committed
24
import GHC.Driver.Session
Sylvain Henry's avatar
Sylvain Henry committed
25
import GHC.Driver.Backend
26
import GHC.Utils.Error
Sylvain Henry's avatar
Sylvain Henry committed
27
import GHC.Driver.Env
Ian Lynagh's avatar
Ian Lynagh committed
28
import Control.Monad
29
import GHC.Utils.Outputable
John Ericson's avatar
John Ericson committed
30
import GHC.Platform
31
import Data.Either (partitionEithers)
Simon Marlow's avatar
Simon Marlow committed
32

33
-----------------------------------------------------------------------------
34
-- | Top level driver for C-- pipeline
35
-----------------------------------------------------------------------------
36

37 38 39 40 41
cmmPipeline
 :: HscEnv -- Compilation env including
           -- dynamic flags: -dcmm-lint -ddump-cmm-cps
 -> ModuleSRTInfo        -- Info about SRTs generated so far
 -> CmmGroup             -- Input C-- with Procedures
42
 -> IO (ModuleSRTInfo, CmmGroupSRTs) -- Output CPS transformed C--
43

44
cmmPipeline hsc_env srtInfo prog = withTimingSilent dflags (text "Cmm pipeline") forceRes $
45
  do let dflags = hsc_dflags hsc_env
Sylvain Henry's avatar
Sylvain Henry committed
46
         platform = targetPlatform dflags
47

48
     tops <- {-# SCC "tops" #-} mapM (cpsTop dflags) prog
49

50 51
     let (procs, data_) = partitionEithers tops
     (srtInfo, cmms) <- {-# SCC "doSRTs" #-} doSRTs dflags srtInfo procs data_
Sylvain Henry's avatar
Sylvain Henry committed
52
     dumpWith dflags Opt_D_dump_cmm_cps "Post CPS Cmm" FormatCMM (pdoc platform cmms)
53

54
     return (srtInfo, cmms)
55

56 57 58 59
  where forceRes (info, group) =
          info `seq` foldr (\decl r -> decl `seq` r) () group

        dflags = hsc_dflags hsc_env
60

61 62 63
cpsTop :: DynFlags -> CmmDecl -> IO (Either (CAFEnv, [CmmDecl]) (CAFSet, CmmDecl))
cpsTop dflags p@(CmmData _ statics) = return (Right (cafAnalData (targetPlatform dflags) statics, p))
cpsTop dflags proc =
64
    do
Simon Marlow's avatar
Simon Marlow committed
65
       ----------- Control-flow optimisations ----------------------------------
66 67 68 69 70

       -- The first round of control-flow optimisation speeds up the
       -- later passes by removing lots of empty blocks, so we do it
       -- even when optimisation isn't turned on.
       --
71
       CmmProc h l v g <- {-# SCC "cmmCfgOpts(1)" #-}
72
            return $ cmmCfgOptsProc splitting_proc_points proc
Gabor Greif's avatar
Gabor Greif committed
73
       dump Opt_D_dump_cmm_cfg "Post control-flow optimisations" g
74

75 76 77
       let !TopInfo {stack_info=StackInfo { arg_space = entry_off
                                          , do_layout = do_layout }} = h

Simon Marlow's avatar
Simon Marlow committed
78
       ----------- Eliminate common blocks -------------------------------------
79 80 81
       g <- {-# SCC "elimCommonBlocks" #-}
            condPass Opt_CmmElimCommonBlocks elimCommonBlocks g
                          Opt_D_dump_cmm_cbe "Post common block elimination"
82

83 84
       -- Any work storing block Labels must be performed _after_
       -- elimCommonBlocks
85

86
       ----------- Implement switches ------------------------------------------
87
       g <- {-# SCC "createSwitchPlans" #-}
88
            runUniqSM $ cmmImplementSwitchPlans (backend dflags) platform g
89 90
       dump Opt_D_dump_cmm_switch "Post switch plan" g

Simon Marlow's avatar
Simon Marlow committed
91
       ----------- Proc points -------------------------------------------------
92 93 94
       let
         call_pps :: ProcPointSet -- LabelMap
         call_pps = {-# SCC "callProcPoints" #-} callProcPoints g
95 96
       proc_points <-
          if splitting_proc_points
97 98
             then do
               pp <- {-# SCC "minimalProcPointSet" #-} runUniqSM $
99
                  minimalProcPointSet platform call_pps g
100
               dumpWith dflags Opt_D_dump_cmm_proc "Proc points"
Sylvain Henry's avatar
Sylvain Henry committed
101
                     FormatCMM (pdoc platform l $$ ppr pp $$ pdoc platform g)
102
               return pp
103
             else
104
               return call_pps
105

106
       ----------- Layout the stack and manifest Sp ----------------------------
Simon Marlow's avatar
Simon Marlow committed
107 108
       (g, stackmaps) <-
            {-# SCC "layoutStack" #-}
109 110 111
            if do_layout
               then runUniqSM $ cmmLayoutStack dflags proc_points entry_off g
               else return (g, mapEmpty)
112
       dump Opt_D_dump_cmm_sp "Layout Stack" g
Simon Marlow's avatar
Simon Marlow committed
113

Jan Stolarek's avatar
Jan Stolarek committed
114 115
       ----------- Sink and inline assignments  --------------------------------
       g <- {-# SCC "sink" #-} -- See Note [Sinking after stack layout]
116
            condPass Opt_CmmSink (cmmSink platform) g
117
                     Opt_D_dump_cmm_sink "Sink assignments"
118

Simon Marlow's avatar
Simon Marlow committed
119
       ------------- CAF analysis ----------------------------------------------
120
       let cafEnv = {-# SCC "cafAnal" #-} cafAnal platform call_pps l g
Sylvain Henry's avatar
Sylvain Henry committed
121
       dumpWith dflags Opt_D_dump_cmm_caf "CAFEnv" FormatText (pdoc platform cafEnv)
122

123 124 125
       g <- if splitting_proc_points
            then do
               ------------- Split into separate procedures -----------------------
126 127
               let pp_map = {-# SCC "procPointAnalysis" #-}
                            procPointAnalysis proc_points g
Sylvain Henry's avatar
Sylvain Henry committed
128 129
               dumpWith dflags Opt_D_dump_cmm_procmap "procpoint map"
                  FormatCMM (ppr pp_map)
130
               g <- {-# SCC "splitAtProcPoints" #-} runUniqSM $
131
                    splitAtProcPoints platform l call_pps proc_points pp_map
132 133 134 135 136 137 138 139 140
                                      (CmmProc h l v g)
               dumps Opt_D_dump_cmm_split "Post splitting" g
               return g
             else do
               -- attach info tables to return points
               return $ [attachContInfoTables call_pps (CmmProc h l v g)]

       ------------- Populate info tables with stack info -----------------
       g <- {-# SCC "setInfoTableStackMap" #-}
141
            return $ map (setInfoTableStackMap platform stackmaps) g
142 143 144 145 146 147 148 149 150 151 152
       dumps Opt_D_dump_cmm_info "after setInfoTableStackMap" g

       ----------- Control-flow optimisations -----------------------------
       g <- {-# SCC "cmmCfgOpts(2)" #-}
            return $ if optLevel dflags >= 1
                     then map (cmmCfgOptsProc splitting_proc_points) g
                     else g
       g <- return (map removeUnreachableBlocksProc g)
            -- See Note [unreachable blocks]
       dumps Opt_D_dump_cmm_cfg "Post control-flow optimisations" g

153
       return (Left (cafEnv, g))
154

155
  where platform = targetPlatform dflags
156 157
        dump = dumpGraph dflags

Simon Marlow's avatar
Simon Marlow committed
158
        dumps flag name
Sylvain Henry's avatar
Sylvain Henry committed
159
           = mapM_ (dumpWith dflags flag name FormatCMM . pdoc platform)
Simon Marlow's avatar
Simon Marlow committed
160

Simon Marlow's avatar
Simon Marlow committed
161
        condPass flag pass g dumpflag dumpname =
ian@well-typed.com's avatar
ian@well-typed.com committed
162
            if gopt flag dflags
Simon Marlow's avatar
Simon Marlow committed
163 164 165 166 167 168
               then do
                    g <- return $ pass g
                    dump dumpflag dumpname g
                    return g
               else return g

169 170 171 172
        -- we don't need to split proc points for the NCG, unless
        -- tablesNextToCode is off.  The latter is because we have no
        -- label to put on info tables for basic blocks that are not
        -- the entry point.
Sylvain Henry's avatar
Sylvain Henry committed
173
        splitting_proc_points = backend dflags /= NCG
174
                             || not (platformTablesNextToCode platform)
ian@well-typed.com's avatar
ian@well-typed.com committed
175 176
                             || -- Note [inconsistent-pic-reg]
                                usingInconsistentPicReg
177
        usingInconsistentPicReg
178
           = case (platformArch platform, platformOS platform, positionIndependent dflags)
179 180
             of   (ArchX86, OSDarwin, pic) -> pic
                  _                        -> False
181

Jan Stolarek's avatar
Jan Stolarek committed
182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304
-- Note [Sinking after stack layout]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--
-- In the past we considered running sinking pass also before stack
-- layout, but after making some measurements we realized that:
--
--   a) running sinking only before stack layout produces slower
--      code than running sinking only before stack layout
--
--   b) running sinking both before and after stack layout produces
--      code that has the same performance as when running sinking
--      only after stack layout.
--
-- In other words sinking before stack layout doesn't buy as anything.
--
-- An interesting question is "why is it better to run sinking after
-- stack layout"? It seems that the major reason are stores and loads
-- generated by stack layout. Consider this code before stack layout:
--
--  c1E:
--      _c1C::P64 = R3;
--      _c1B::P64 = R2;
--      _c1A::P64 = R1;
--      I64[(young<c1D> + 8)] = c1D;
--      call stg_gc_noregs() returns to c1D, args: 8, res: 8, upd: 8;
--  c1D:
--      R3 = _c1C::P64;
--      R2 = _c1B::P64;
--      R1 = _c1A::P64;
--      call (P64[(old + 8)])(R3, R2, R1) args: 8, res: 0, upd: 8;
--
-- Stack layout pass will save all local variables live across a call
-- (_c1C, _c1B and _c1A in this example) on the stack just before
-- making a call and reload them from the stack after returning from a
-- call:
--
--  c1E:
--      _c1C::P64 = R3;
--      _c1B::P64 = R2;
--      _c1A::P64 = R1;
--      I64[Sp - 32] = c1D;
--      P64[Sp - 24] = _c1A::P64;
--      P64[Sp - 16] = _c1B::P64;
--      P64[Sp - 8] = _c1C::P64;
--      Sp = Sp - 32;
--      call stg_gc_noregs() returns to c1D, args: 8, res: 8, upd: 8;
--  c1D:
--      _c1A::P64 = P64[Sp + 8];
--      _c1B::P64 = P64[Sp + 16];
--      _c1C::P64 = P64[Sp + 24];
--      R3 = _c1C::P64;
--      R2 = _c1B::P64;
--      R1 = _c1A::P64;
--      Sp = Sp + 32;
--      call (P64[Sp])(R3, R2, R1) args: 8, res: 0, upd: 8;
--
-- If we don't run sinking pass after stack layout we are basically
-- left with such code. However, running sinking on this code can lead
-- to significant improvements:
--
--  c1E:
--      I64[Sp - 32] = c1D;
--      P64[Sp - 24] = R1;
--      P64[Sp - 16] = R2;
--      P64[Sp - 8] = R3;
--      Sp = Sp - 32;
--      call stg_gc_noregs() returns to c1D, args: 8, res: 8, upd: 8;
--  c1D:
--      R3 = P64[Sp + 24];
--      R2 = P64[Sp + 16];
--      R1 = P64[Sp + 8];
--      Sp = Sp + 32;
--      call (P64[Sp])(R3, R2, R1) args: 8, res: 0, upd: 8;
--
-- Now we only have 9 assignments instead of 15.
--
-- There is one case when running sinking before stack layout could
-- be beneficial. Consider this:
--
--   L1:
--      x = y
--      call f() returns L2
--   L2: ...x...y...
--
-- Since both x and y are live across a call to f, they will be stored
-- on the stack during stack layout and restored after the call:
--
--   L1:
--      x = y
--      P64[Sp - 24] = L2
--      P64[Sp - 16] = x
--      P64[Sp - 8]  = y
--      Sp = Sp - 24
--      call f() returns L2
--   L2:
--      y = P64[Sp + 16]
--      x = P64[Sp + 8]
--      Sp = Sp + 24
--      ...x...y...
--
-- However, if we run sinking before stack layout we would propagate x
-- to its usage place (both x and y must be local register for this to
-- be possible - global registers cannot be floated past a call):
--
--   L1:
--      x = y
--      call f() returns L2
--   L2: ...y...y...
--
-- Thus making x dead at the call to f(). If we ran stack layout now
-- we would generate less stores and loads:
--
--   L1:
--      x = y
--      P64[Sp - 16] = L2
--      P64[Sp - 8]  = y
--      Sp = Sp - 16
--      call f() returns L2
--   L2:
--      y = P64[Sp + 8]
--      Sp = Sp + 16
--      ...y...y...
--
305
-- But since we don't see any benefits from running sinking before stack
Jan Stolarek's avatar
Jan Stolarek committed
306 307 308
-- layout, this situation probably doesn't arise too often in practice.
--

ian@well-typed.com's avatar
ian@well-typed.com committed
309
{- Note [inconsistent-pic-reg]
310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336

On x86/Darwin, PIC is implemented by inserting a sequence like

    call 1f
 1: popl %reg

at the proc entry point, and then referring to labels as offsets from
%reg.  If we don't split proc points, then we could have many entry
points in a proc that would need this sequence, and each entry point
would then get a different value for %reg.  If there are any join
points, then at the join point we don't have a consistent value for
%reg, so we don't know how to refer to labels.

Hence, on x86/Darwin, we have to split proc points, and then each proc
point will get its own PIC initialisation sequence.

This isn't an issue on x86/ELF, where the sequence is

    call 1f
 1: popl %reg
    addl $_GLOBAL_OFFSET_TABLE_+(.-1b), %reg

so %reg always has a consistent value: the address of
_GLOBAL_OFFSET_TABLE_, regardless of which entry point we arrived via.

-}

337
{- Note [unreachable blocks]
338

339
The control-flow optimiser sometimes leaves unreachable blocks behind
Simon Marlow's avatar
Simon Marlow committed
340 341 342
containing junk code.  These aren't necessarily a problem, but
removing them is good because it might save time in the native code
generator later.
343 344

-}
345

346 347 348 349
runUniqSM :: UniqSM a -> IO a
runUniqSM m = do
  us <- mkSplitUniqSupply 'u'
  return (initUs_ us m)
350

351

352
dumpGraph :: DynFlags -> DumpFlag -> String -> CmmGraph -> IO ()
Simon Marlow's avatar
Simon Marlow committed
353
dumpGraph dflags flag name g = do
ian@well-typed.com's avatar
ian@well-typed.com committed
354
  when (gopt Opt_DoCmmLinting dflags) $ do_lint g
Sylvain Henry's avatar
Sylvain Henry committed
355
  dumpWith dflags flag name FormatCMM (pdoc platform g)
356
 where
Sylvain Henry's avatar
Sylvain Henry committed
357 358
  platform = targetPlatform dflags
  do_lint g = case cmmLintGraph platform g of
Simon Marlow's avatar
Simon Marlow committed
359
                 Just err -> do { fatalErrorMsg dflags err
360 361 362
                                ; ghcExit dflags 1
                                }
                 Nothing  -> return ()
Simon Marlow's avatar
Simon Marlow committed
363

Sylvain Henry's avatar
Sylvain Henry committed
364 365 366
dumpWith :: DynFlags -> DumpFlag -> String -> DumpFormat -> SDoc -> IO ()
dumpWith dflags flag txt fmt sdoc = do
  dumpIfSet_dyn dflags flag txt fmt sdoc
367 368 369 370
  when (not (dopt flag dflags)) $
    -- If `-ddump-cmm-verbose -ddump-to-file` is specified,
    -- dump each Cmm pipeline stage output to a separate file.  #16930
    when (dopt Opt_D_dump_cmm_verbose dflags)
Sylvain Henry's avatar
Sylvain Henry committed
371
      $ dumpAction dflags (mkDumpStyle alwaysQualify)
Sylvain Henry's avatar
Sylvain Henry committed
372 373
                   (dumpOptionsFromFlag flag) txt fmt sdoc
  dumpIfSet_dyn dflags Opt_D_dump_cmm_verbose_by_proc txt fmt sdoc