Commit c846618a authored by Ömer Sinan Ağacan's avatar Ömer Sinan Ağacan

Do CafInfo/SRT analysis in Cmm

This patch removes all CafInfo predictions and various hacks to preserve
predicted CafInfos from the compiler and assigns final CafInfos to
interface Ids after code generation. SRT analysis is extended to support
static data, and Cmm generator is modified to allow generating
static_link fields after SRT analysis.

This also fixes `-fcatch-bottoms`, which introduces error calls in case
expressions in CorePrep, which runs *after* CoreTidy (which is where we
decide on CafInfos) and turns previously non-CAFFY things into CAFFY.

Fixes #17648
Fixes #9718

Evaluation
==========

NoFib
-----

Boot with: `make boot mode=fast`
Run: `make mode=fast EXTRA_RUNTEST_OPTS="-cachegrind" NoFibRuns=1`

--------------------------------------------------------------------------------
        Program           Size    Allocs    Instrs     Reads    Writes
--------------------------------------------------------------------------------
             CS          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
            CSD          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
             FS          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
              S          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
             VS          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
            VSD          -0.0%      0.0%     -0.0%     -0.0%     -0.5%
            VSM          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
           anna          -0.1%      0.0%     -0.0%     -0.0%     -0.0%
           ansi          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
           atom          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
         awards          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
         banner          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
     bernouilli          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
   binary-trees          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
          boyer          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
         boyer2          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
           bspt          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
      cacheprof          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
       calendar          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
       cichelli          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
        circsim          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
       clausify          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
  comp_lab_zift          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
       compress          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
      compress2          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
    constraints          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
   cryptarithm1          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
   cryptarithm2          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
            cse          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
   digits-of-e1          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
   digits-of-e2          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
         dom-lt          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
          eliza          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
          event          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
    exact-reals          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
         exp3_8          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
         expert          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
 fannkuch-redux          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
          fasta          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
            fem          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
            fft          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
           fft2          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
       fibheaps          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
           fish          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
          fluid          -0.1%      0.0%     -0.0%     -0.0%     -0.0%
         fulsom          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
         gamteb          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
            gcd          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
    gen_regexps          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
         genfft          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
             gg          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
           grep          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
         hidden          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
            hpg          -0.1%      0.0%     -0.0%     -0.0%     -0.0%
            ida          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
          infer          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
        integer          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
      integrate          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
   k-nucleotide          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
          kahan          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
        knights          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
         lambda          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
     last-piece          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
           lcss          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
           life          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
           lift          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
         linear          -0.1%      0.0%     -0.0%     -0.0%     -0.0%
      listcompr          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
       listcopy          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
       maillist          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
         mandel          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
        mandel2          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
           mate          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
        minimax          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
        mkhprog          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
     multiplier          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
         n-body          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
       nucleic2          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
           para          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
      paraffins          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
         parser          -0.1%      0.0%     -0.0%     -0.0%     -0.0%
        parstof          -0.1%      0.0%     -0.0%     -0.0%     -0.0%
            pic          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
       pidigits          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
          power          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
         pretty          -0.0%      0.0%     -0.3%     -0.4%     -0.4%
         primes          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
      primetest          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
         prolog          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
         puzzle          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
         queens          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
        reptile          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
reverse-complem          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
        rewrite          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
           rfib          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
            rsa          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
            scc          -0.0%      0.0%     -0.3%     -0.5%     -0.4%
          sched          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
            scs          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
         simple          -0.1%      0.0%     -0.0%     -0.0%     -0.0%
          solid          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
        sorting          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
  spectral-norm          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
         sphere          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
         symalg          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
            tak          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
      transform          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
       treejoin          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
      typecheck          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
        veritas          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
           wang          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
      wave4main          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
   wheel-sieve1          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
   wheel-sieve2          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
           x2n1          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
--------------------------------------------------------------------------------
            Min          -0.1%      0.0%     -0.3%     -0.5%     -0.5%
            Max          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
 Geometric Mean          -0.0%     -0.0%     -0.0%     -0.0%     -0.0%

--------------------------------------------------------------------------------
        Program           Size    Allocs    Instrs     Reads    Writes
--------------------------------------------------------------------------------
        circsim          -0.1%      0.0%     -0.0%     -0.0%     -0.0%
    constraints          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
       fibheaps          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
       gc_bench          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
           hash          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
           lcss          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
          power          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
     spellcheck          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
--------------------------------------------------------------------------------
            Min          -0.1%      0.0%     -0.0%     -0.0%     -0.0%
            Max          -0.0%      0.0%     -0.0%     -0.0%     -0.0%
 Geometric Mean          -0.0%     +0.0%     -0.0%     -0.0%     -0.0%

Manual inspection of programs in testsuite/tests/programs
---------------------------------------------------------

I built these programs with a bunch of dump flags and `-O` and compared
STG, Cmm, and Asm dumps and file sizes.

(Below the numbers in parenthesis show number of modules in the program)

These programs have identical compiler (same .hi and .o sizes, STG, and
Cmm and Asm dumps):

- Queens (1), andre_monad (1), cholewo-eval (2), cvh_unboxing (3),
  andy_cherry (7), fun_insts (1), hs-boot (4), fast2haskell (2),
  jl_defaults (1), jq_readsPrec (1), jules_xref (1), jtod_circint (4),
  jules_xref2 (1), lennart_range (1), lex (1), life_space_leak (1),
  bargon-mangler-bug (7), record_upd (1), rittri (1), sanders_array (1),
  strict_anns (1), thurston-module-arith (2), okeefe_neural (1),
  joao-circular (6), 10queens (1)

Programs with different compiler outputs:

- jl_defaults (1): For some reason GHC HEAD marks a lot of top-level
  `[Int]` closures as CAFFY for no reason. With this patch we no longer
  make them CAFFY and generate less SRT entries. For some reason Main.o
  is slightly larger with this patch (1.3%) and the executable sizes are
  the same. (I'd expect both to be smaller)

- launchbury (1): Same as jl_defaults: top-level `[Int]` closures marked
  as CAFFY for no reason. Similarly `Main.o` is 1.4% larger but the
  executable sizes are the same.

- galois_raytrace (13): Differences are in the Parse module. There are a
  lot, but some of the changes are caused by the fact that for some
  reason (I think a bug) GHC HEAD marks the dictionary for `Functor
  Identity` as CAFFY. Parse.o is 0.4% larger, the executable size is the
  same.

- north_array: We now generate less SRT entries because some of array
  primops used in this program like `NewArrayOp` get eliminated during
  Stg-to-Cmm and turn some CAFFY things into non-CAFFY. Main.o gets 24%
  larger (9224 bytes from 9000 bytes), executable sizes are the same.

- seward-space-leak: Difference in this program is better shown by this
  smaller example:

      module Lib where

      data CDS
        = Case [CDS] [(Int, CDS)]
        | Call CDS CDS

      instance Eq CDS where
        Case sels1 rets1 == Case sels2 rets2 =
            sels1 == sels2 && rets1 == rets2
        Call a1 b1 == Call a2 b2 =
            a1 == a2 && b1 == b2
        _ == _ =
            False

   In this program GHC HEAD builds a new SRT for the recursive group of
   `(==)`, `(/=)` and the dictionary closure. Then `/=` points to `==`
   in its SRT field, and `==` uses the SRT object as its SRT. With this
   patch we use the closure for `/=` as the SRT and add `==` there. Then
   `/=` gets an empty SRT field and `==` points to `/=` in its SRT
   field.

   This change looks fine to me.

   Main.o gets 0.07% larger, executable sizes are identical.

head.hackage
------------

head.hackage's CI script builds 428 packages from Hackage using this
patch with no failures.

Compiler performance
--------------------

The compiler perf tests report that the compiler allocates slightly more
(worst case observed so far is 4%). However most programs in the test
suite are small, single file programs. To benchmark compiler performance
on something more realistic I build Cabal (the library, 236 modules)
with different optimisation levels. For the "max residency" row I run
GHC with `+RTS -s -A100k -i0 -h` for more accurate numbers. Other rows
are generated with just `-s`. (This is because `-i0` causes running GC
much more frequently and as a result "bytes copied" gets inflated by
more than 25x in some cases)

* -O0

|                 | GHC HEAD       | This MR        | Diff   |
| --------------- | -------------- | -------------- | ------ |
| Bytes allocated | 54,413,350,872 | 54,701,099,464 | +0.52% |
| Bytes copied    |  4,926,037,184 |  4,990,638,760 | +1.31% |
| Max residency   |    421,225,624 |    424,324,264 | +0.73% |

* -O1

|                 | GHC HEAD        | This MR         | Diff   |
| --------------- | --------------- | --------------- | ------ |
| Bytes allocated | 245,849,209,992 | 246,562,088,672 | +0.28% |
| Bytes copied    |  26,943,452,560 |  27,089,972,296 | +0.54% |
| Max residency   |     982,643,440 |     991,663,432 | +0.91% |

* -O2

|                 | GHC HEAD        | This MR         | Diff   |
| --------------- | --------------- | --------------- | ------ |
| Bytes allocated | 291,044,511,408 | 291,863,910,912 | +0.28% |
| Bytes copied    |  37,044,237,616 |  36,121,690,472 | -2.49% |
| Max residency   |   1,071,600,328 |   1,086,396,256 | +1.38% |

Extra compiler allocations
--------------------------

Runtime allocations of programs are as reported above (NoFib section).

The compiler now allocates more than before. Main source of allocation
in this patch compared to base commit is the new SRT algorithm
(GHC.Cmm.Info.Build). Below is some of the extra work we do with this
patch, numbers generated by profiled stage 2 compiler when building a
pathological case (the test 'ManyConstructors') with '-O2':

- We now sort the final STG for a module, which means traversing the
  entire program, generating free variable set for each top-level
  binding, doing SCC analysis, and re-ordering the program. In
  ManyConstructors this step allocates 97,889,952 bytes.

- We now do SRT analysis on static data, which in a program like
  ManyConstructors causes analysing 10,000 bindings that we would
  previously just skip. This step allocates 70,898,352 bytes.

- We now maintain an SRT map for the entire module as we compile Cmm
  groups:

      data ModuleSRTInfo = ModuleSRTInfo
        { ...
        , moduleSRTMap :: SRTMap
        }

   (SRTMap is just a strict Map from the 'containers' library)

   This map gets an entry for most bindings in a module (exceptions are
   THUNKs and CAFFY static functions). For ManyConstructors this map
   gets 50015 entries.

- Once we're done with code generation we generate a NameSet from SRTMap
  for the non-CAFFY names in the current module. This set gets the same
  number of entries as the SRTMap.

- Finally we update CafInfos in ModDetails for the non-CAFFY Ids, using
  the NameSet generated in the previous step. This usually does the
  least amount of allocation among the work listed here.

Only place with this patch where we do less work in the CAF analysis in
the tidying pass (CoreTidy). However that doesn't save us much, as the
pass still needs to traverse the whole program and update IdInfos for
other reasons. Only thing we don't here do is the `hasCafRefs` pass over
the RHS of bindings, which is a stateless pass that returns a boolean
value, so it doesn't allocate much.

(Metric changes blow are all increased allocations)

Metric changes
--------------

Metric Increase:
    ManyAlternatives
    ManyConstructors
    T13035
    T14683
    T1969
    T9961
parent 01b15b83
......@@ -3,12 +3,11 @@
module GHC.Cmm (
-- * Cmm top-level datatypes
CmmProgram, CmmGroup, GenCmmGroup,
CmmDecl, GenCmmDecl(..),
CmmProgram, CmmGroup, CmmGroupSRTs, RawCmmGroup, GenCmmGroup,
CmmDecl, CmmDeclSRTs, GenCmmDecl(..),
CmmGraph, GenCmmGraph(..),
CmmBlock,
RawCmmDecl, RawCmmGroup,
Section(..), SectionType(..), CmmStatics(..), CmmStatic(..),
CmmBlock, RawCmmDecl,
Section(..), SectionType(..), CmmStatics(..), RawCmmStatics(..), CmmStatic(..),
isSecConstant,
-- ** Blocks containing lists
......@@ -56,8 +55,12 @@ import Data.ByteString (ByteString)
type CmmProgram = [CmmGroup]
type GenCmmGroup d h g = [GenCmmDecl d h g]
type CmmGroup = GenCmmGroup CmmStatics CmmTopInfo CmmGraph
type RawCmmGroup = GenCmmGroup CmmStatics (LabelMap CmmStatics) CmmGraph
-- | Cmm group before SRT generation
type CmmGroup = GenCmmGroup CmmStatics CmmTopInfo CmmGraph
-- | Cmm group with SRTs
type CmmGroupSRTs = GenCmmGroup RawCmmStatics CmmTopInfo CmmGraph
-- | "Raw" cmm group (TODO (osa): not sure what that means)
type RawCmmGroup = GenCmmGroup RawCmmStatics (LabelMap RawCmmStatics) CmmGraph
-----------------------------------------------------------------------------
-- CmmDecl, GenCmmDecl
......@@ -89,12 +92,13 @@ data GenCmmDecl d h g
Section
d
type CmmDecl = GenCmmDecl CmmStatics CmmTopInfo CmmGraph
type CmmDecl = GenCmmDecl CmmStatics CmmTopInfo CmmGraph
type CmmDeclSRTs = GenCmmDecl RawCmmStatics CmmTopInfo CmmGraph
type RawCmmDecl
= GenCmmDecl
CmmStatics
(LabelMap CmmStatics)
RawCmmStatics
(LabelMap RawCmmStatics)
CmmGraph
-----------------------------------------------------------------------------
......@@ -199,8 +203,20 @@ data CmmStatic
| CmmString ByteString
-- string of 8-bit values only, not zero terminated.
-- Static data before SRT generation
data CmmStatics
= Statics
= CmmStatics
CLabel -- Label of statics
CmmInfoTable
CostCentreStack
[CmmLit] -- Payload
| CmmStaticsRaw
CLabel -- Label of statics
[CmmStatic] -- The static data itself
-- Static data, after SRTs are generated
data RawCmmStatics
= RawCmmStatics
CLabel -- Label of statics
[CmmStatic] -- The static data itself
......
......@@ -106,7 +106,8 @@ module GHC.Cmm.CLabel (
pprCLabel,
isInfoTableLabel,
isConInfoTableLabel
isConInfoTableLabel,
isIdLabel
) where
#include "HsVersions.h"
......@@ -262,6 +263,10 @@ data CLabel
deriving Eq
isIdLabel :: CLabel -> Bool
isIdLabel IdLabel{} = True
isIdLabel _ = False
-- This is laborious, but necessary. We can't derive Ord because
-- Unique doesn't have an Ord instance. Note nonDetCmpUnique in the
-- implementation. See Note [No Ord for Unique]
......
......@@ -161,7 +161,7 @@ cmmDebugGen modLoc decls = map (blocksForScope Nothing) topScopes
= DebugBlock { dblProcedure = g_entry graph
, dblLabel = label
, dblCLabel = case info of
Just (Statics infoLbl _) -> infoLbl
Just (RawCmmStatics infoLbl _) -> infoLbl
Nothing
| g_entry graph == label -> entryLbl
| otherwise -> blockLbl label
......
......@@ -2,7 +2,6 @@
module GHC.Cmm.Info (
mkEmptyContInfoTable,
cmmToRawCmm,
mkInfoTable,
srtEscape,
-- info table accessors
......@@ -67,11 +66,11 @@ mkEmptyContInfoTable info_lbl
, cit_srt = Nothing
, cit_clo = Nothing }
cmmToRawCmm :: DynFlags -> Stream IO CmmGroup a
cmmToRawCmm :: DynFlags -> Stream IO CmmGroupSRTs a
-> IO (Stream IO RawCmmGroup a)
cmmToRawCmm dflags cmms
= do { uniqs <- mkSplitUniqSupply 'i'
; let do_one :: UniqSupply -> [CmmDecl] -> IO (UniqSupply, [RawCmmDecl])
; let do_one :: UniqSupply -> [CmmDeclSRTs] -> IO (UniqSupply, [RawCmmDecl])
do_one uniqs cmm =
-- NB. strictness fixes a space leak. DO NOT REMOVE.
withTimingSilent dflags (text "Cmm -> Raw Cmm")
......@@ -117,9 +116,8 @@ cmmToRawCmm dflags cmms
--
-- * The SRT slot is only there if there is SRT info to record
mkInfoTable :: DynFlags -> CmmDecl -> UniqSM [RawCmmDecl]
mkInfoTable _ (CmmData sec dat)
= return [CmmData sec dat]
mkInfoTable :: DynFlags -> CmmDeclSRTs -> UniqSM [RawCmmDecl]
mkInfoTable _ (CmmData sec dat) = return [CmmData sec dat]
mkInfoTable dflags proc@(CmmProc infos entry_lbl live blocks)
--
......@@ -169,7 +167,7 @@ mkInfoTable dflags proc@(CmmProc infos entry_lbl live blocks)
rel_std_info = map (makeRelativeRefTo dflags info_lbl) std_info
rel_extra_bits = map (makeRelativeRefTo dflags info_lbl) extra_bits
--
return (top_decls, (lbl, Statics info_lbl $ map CmmStaticLit $
return (top_decls, (lbl, RawCmmStatics info_lbl $ map CmmStaticLit $
reverse rel_extra_bits ++ rel_std_info))
-----------------------------------------------------
......@@ -423,7 +421,7 @@ mkProfLits _ (ProfilingInfo td cd)
; (cd_lit, cd_decl) <- newStringLit cd
; return ((td_lit,cd_lit), [td_decl,cd_decl]) }
newStringLit :: ByteString -> UniqSM (CmmLit, GenCmmDecl CmmStatics info stmt)
newStringLit :: ByteString -> UniqSM (CmmLit, GenCmmDecl RawCmmStatics info stmt)
newStringLit bytes
= do { uniq <- getUniqueM
; return (mkByteStringCLit (mkStringLitLabel uniq) bytes) }
......
This diff is collapsed.
......@@ -394,7 +394,7 @@ cmmdata :: { CmmParse () }
: 'section' STRING '{' data_label statics '}'
{ do lbl <- $4;
ss <- sequence $5;
code (emitDecl (CmmData (Section (section $2) lbl) (Statics lbl $ concat ss))) }
code (emitDecl (CmmData (Section (section $2) lbl) (CmmStaticsRaw lbl (concat ss)))) }
data_label :: { CmmParse CLabel }
: NAME ':'
......@@ -1175,7 +1175,7 @@ staticClosure :: UnitId -> FastString -> FastString -> [CmmLit] -> CmmParse ()
staticClosure pkg cl_label info payload
= do dflags <- getDynFlags
let lits = mkStaticClosure dflags (mkCmmInfoLabel pkg info) dontCareCCS payload [] [] []
code $ emitDataLits (mkCmmDataLabel pkg cl_label) lits
code $ emitRawDataLits (mkCmmDataLabel pkg cl_label) lits
foreignCall
:: String
......
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE TupleSections #-}
module GHC.Cmm.Pipeline (
-- | Converts C-- with an implicit stack and native C-- calls into
......@@ -27,6 +29,7 @@ import HscTypes
import Control.Monad
import Outputable
import GHC.Platform
import Data.Either (partitionEithers)
-----------------------------------------------------------------------------
-- | Top level driver for C-- pipeline
......@@ -37,14 +40,15 @@ cmmPipeline
-- dynamic flags: -dcmm-lint -ddump-cmm-cps
-> ModuleSRTInfo -- Info about SRTs generated so far
-> CmmGroup -- Input C-- with Procedures
-> IO (ModuleSRTInfo, CmmGroup) -- Output CPS transformed C--
-> IO (ModuleSRTInfo, CmmGroupSRTs) -- Output CPS transformed C--
cmmPipeline hsc_env srtInfo prog = withTimingSilent dflags (text "Cmm pipeline") forceRes $
do let dflags = hsc_dflags hsc_env
tops <- {-# SCC "tops" #-} mapM (cpsTop hsc_env) prog
(srtInfo, cmms) <- {-# SCC "doSRTs" #-} doSRTs dflags srtInfo tops
let (procs, data_) = partitionEithers tops
(srtInfo, cmms) <- {-# SCC "doSRTs" #-} doSRTs dflags srtInfo procs data_
dumpWith dflags Opt_D_dump_cmm_cps "Post CPS Cmm" FormatCMM (ppr cmms)
return (srtInfo, cmms)
......@@ -54,8 +58,8 @@ cmmPipeline hsc_env srtInfo prog = withTimingSilent dflags (text "Cmm pipeline")
dflags = hsc_dflags hsc_env
cpsTop :: HscEnv -> CmmDecl -> IO (CAFEnv, [CmmDecl])
cpsTop _ p@(CmmData {}) = return (mapEmpty, [p])
cpsTop :: HscEnv -> CmmDecl -> IO (Either (CAFEnv, [CmmDecl]) (CAFSet, CmmDecl))
cpsTop _ p@(CmmData _ statics) = return (Right (cafAnalData statics, p))
cpsTop hsc_env proc =
do
----------- Control-flow optimisations ----------------------------------
......@@ -85,7 +89,9 @@ cpsTop hsc_env proc =
dump Opt_D_dump_cmm_switch "Post switch plan" g
----------- Proc points -------------------------------------------------
let call_pps = {-# SCC "callProcPoints" #-} callProcPoints g
let
call_pps :: ProcPointSet -- LabelMap
call_pps = {-# SCC "callProcPoints" #-} callProcPoints g
proc_points <-
if splitting_proc_points
then do
......@@ -144,7 +150,7 @@ cpsTop hsc_env proc =
-- See Note [unreachable blocks]
dumps Opt_D_dump_cmm_cfg "Post control-flow optimisations" g
return (cafEnv, g)
return (Left (cafEnv, g))
where dflags = hsc_dflags hsc_env
platform = targetPlatform dflags
......
......@@ -54,13 +54,13 @@ import qualified Data.ByteString as BS
pprCmms :: (Outputable info, Outputable g)
=> [GenCmmGroup CmmStatics info g] -> SDoc
=> [GenCmmGroup RawCmmStatics info g] -> SDoc
pprCmms cmms = pprCode CStyle (vcat (intersperse separator $ map ppr cmms))
where
separator = space $$ text "-------------------" $$ space
writeCmms :: (Outputable info, Outputable g)
=> DynFlags -> Handle -> [GenCmmGroup CmmStatics info g] -> IO ()
=> DynFlags -> Handle -> [GenCmmGroup RawCmmStatics info g] -> IO ()
writeCmms dflags handle cmms = printForC dflags handle (pprCmms cmms)
-----------------------------------------------------------------------------
......@@ -72,6 +72,9 @@ instance (Outputable d, Outputable info, Outputable i)
instance Outputable CmmStatics where
ppr = pprStatics
instance Outputable RawCmmStatics where
ppr = pprRawStatics
instance Outputable CmmStatic where
ppr = pprStatic
......@@ -136,8 +139,14 @@ instance Outputable ForeignHint where
-- Strings are printed as C strings, and we print them as I8[],
-- following C--
--
pprStatics :: CmmStatics -> SDoc
pprStatics (Statics lbl ds) = vcat ((ppr lbl <> colon) : map ppr ds)
pprStatics (CmmStatics lbl itbl ccs payload) =
ppr lbl <> colon <+> ppr itbl <+> ppr ccs <+> ppr payload
pprStatics (CmmStaticsRaw lbl ds) = pprRawStatics (RawCmmStatics lbl ds)
pprRawStatics :: RawCmmStatics -> SDoc
pprRawStatics (RawCmmStatics lbl ds) = vcat ((ppr lbl <> colon) : map ppr ds)
pprStatic :: CmmStatic -> SDoc
pprStatic s = case s of
......
......@@ -192,22 +192,22 @@ mkWordCLit :: DynFlags -> Integer -> CmmLit
mkWordCLit dflags wd = CmmInt wd (wordWidth dflags)
mkByteStringCLit
:: CLabel -> ByteString -> (CmmLit, GenCmmDecl CmmStatics info stmt)
:: CLabel -> ByteString -> (CmmLit, GenCmmDecl RawCmmStatics info stmt)
-- We have to make a top-level decl for the string,
-- and return a literal pointing to it
mkByteStringCLit lbl bytes
= (CmmLabel lbl, CmmData (Section sec lbl) $ Statics lbl [CmmString bytes])
= (CmmLabel lbl, CmmData (Section sec lbl) $ RawCmmStatics lbl [CmmString bytes])
where
-- This can not happen for String literals (as there \NUL is replaced by
-- C0 80). However, it can happen with Addr# literals.
sec = if 0 `BS.elem` bytes then ReadOnlyData else CString
mkDataLits :: Section -> CLabel -> [CmmLit] -> GenCmmDecl CmmStatics info stmt
mkDataLits :: Section -> CLabel -> [CmmLit] -> GenCmmDecl RawCmmStatics info stmt
-- Build a data-segment data block
mkDataLits section lbl lits
= CmmData section (Statics lbl $ map CmmStaticLit lits)
= CmmData section (RawCmmStatics lbl $ map CmmStaticLit lits)
mkRODataLits :: CLabel -> [CmmLit] -> GenCmmDecl CmmStatics info stmt
mkRODataLits :: CLabel -> [CmmLit] -> GenCmmDecl RawCmmStatics info stmt
-- Build a read-only data block
mkRODataLits lbl lits
= mkDataLits section lbl lits
......
......@@ -87,7 +87,7 @@ pprTop (CmmProc infos clbl _in_live_regs graph) =
(case mapLookup (g_entry graph) infos of
Nothing -> empty
Just (Statics info_clbl info_dat) ->
Just (RawCmmStatics info_clbl info_dat) ->
pprDataExterns info_dat $$
pprWordArray info_is_in_rodata info_clbl info_dat) $$
(vcat [
......@@ -110,21 +110,21 @@ pprTop (CmmProc infos clbl _in_live_regs graph) =
-- We only handle (a) arrays of word-sized things and (b) strings.
pprTop (CmmData section (Statics lbl [CmmString str])) =
pprTop (CmmData section (RawCmmStatics lbl [CmmString str])) =
pprExternDecl lbl $$
hcat [
pprLocalness lbl, pprConstness (isSecConstant section), text "char ", ppr lbl,
text "[] = ", pprStringInCStyle str, semi
]
pprTop (CmmData section (Statics lbl [CmmUninitialised size])) =
pprTop (CmmData section (RawCmmStatics lbl [CmmUninitialised size])) =
pprExternDecl lbl $$
hcat [
pprLocalness lbl, pprConstness (isSecConstant section), text "char ", ppr lbl,
brackets (int size), semi
]
pprTop (CmmData section (Statics lbl lits)) =
pprTop (CmmData section (RawCmmStatics lbl lits)) =
pprDataExterns lits $$
pprWordArray (isSecConstant section) lbl lits
......
......@@ -33,8 +33,7 @@ import DataCon
import CostCentre
import VarEnv
import Module
import Name ( isExternalName, nameOccName, nameModule_maybe )
import OccName ( occNameFS )
import Name ( isExternalName, nameModule_maybe )
import BasicTypes ( Arity )
import TysWiredIn ( unboxedUnitDataCon, unitDataConId )
import Literal
......@@ -268,7 +267,6 @@ coreTopBindToStg dflags this_mod env ccs (NonRec id rhs)
bind = StgTopLifted $ StgNonRec id stg_rhs
in
assertConsistentCafInfo dflags id bind (ppr bind)
-- NB: previously the assertion printed 'rhs' and 'bind'
-- as well as 'id', but that led to a black hole
-- where printing the assertion error tripped the
......@@ -296,34 +294,8 @@ coreTopBindToStg dflags this_mod env ccs (Rec pairs)
bind = StgTopLifted $ StgRec (zip binders stg_rhss)
in
assertConsistentCafInfo dflags (head binders) bind (ppr binders)
(env', ccs', bind)
-- | CAF consistency issues will generally result in segfaults and are quite
-- difficult to debug (see #16846). We enable checking of the
-- 'consistentCafInfo' invariant with @-dstg-lint@ to increase the chance that
-- we catch these issues.
assertConsistentCafInfo :: DynFlags -> Id -> StgTopBinding -> SDoc -> a -> a
assertConsistentCafInfo dflags id bind err_doc result
| gopt Opt_DoStgLinting dflags || debugIsOn
, not $ consistentCafInfo id bind = pprPanic "assertConsistentCafInfo" err_doc
| otherwise = result
-- Assertion helper: this checks that the CafInfo on the Id matches
-- what CoreToStg has figured out about the binding's SRT. The
-- CafInfo will be exact in all cases except when CorePrep has
-- floated out a binding, in which case it will be approximate.
consistentCafInfo :: Id -> StgTopBinding -> Bool
consistentCafInfo id bind
= WARN( not (exact || is_sat_thing) , ppr id <+> ppr id_marked_caffy <+> ppr binding_is_caffy )
safe
where
safe = id_marked_caffy || not binding_is_caffy
exact = id_marked_caffy == binding_is_caffy
id_marked_caffy = mayHaveCafRefs (idCafInfo id)
binding_is_caffy = topStgBindHasCafRefs bind
is_sat_thing = occNameFS (nameOccName (idName id)) == fsLit "sat"
coreToTopStgRhs
:: DynFlags
-> CollectedCCs
......
......@@ -30,7 +30,6 @@ import CoreFVs
import CoreMonad ( CoreToDo(..) )
import CoreLint ( endPassIO )
import CoreSyn
import CoreSubst
import MkCore hiding( FloatBind(..) ) -- We use our own FloatBind here
import Type
import Literal
......@@ -54,13 +53,11 @@ import ErrUtils
import DynFlags
import Util
import Outputable
import GHC.Platform
import FastString
import Name ( NamedThing(..), nameSrcSpan )
import SrcLoc ( SrcSpan(..), realSrcLocSpan, mkRealSrcLoc )
import Data.Bits
import MonadUtils ( mapAccumLM )
import Data.List ( mapAccumL )
import Control.Monad
import CostCentre ( CostCentre, ccFromThisModule )
import qualified Data.Set as S
......@@ -266,40 +263,6 @@ where x is demanded, in which case we want to finish with
x* = f a
And then x will actually end up case-bound
Note [CafInfo and floating]
~~~~~~~~~~~~~~~~~~~~~~~~~~~
What happens when we try to float bindings to the top level? At this
point all the CafInfo is supposed to be correct, and we must make certain
that is true of the new top-level bindings. There are two cases
to consider
a) The top-level binding is marked asCafRefs. In that case we are
basically fine. The floated bindings had better all be lazy lets,
so they can float to top level, but they'll all have HasCafRefs
(the default) which is safe.
b) The top-level binding is marked NoCafRefs. This really happens
Example. CoreTidy produces
$fApplicativeSTM [NoCafRefs] = D:Alternative retry# ...blah...
Now CorePrep has to eta-expand to
$fApplicativeSTM = let sat = \xy. retry x y
in D:Alternative sat ...blah...
So what we *want* is
sat [NoCafRefs] = \xy. retry x y
$fApplicativeSTM [NoCafRefs] = D:Alternative sat ...blah...
So, gruesomely, we must set the NoCafRefs flag on the sat bindings,
*and* substitute the modified 'sat' into the old RHS.
It should be the case that 'sat' is itself [NoCafRefs] (a value, no
cafs) else the original top-level binding would not itself have been
marked [NoCafRefs]. The DEBUG check in CoreToStg for
consistentCafInfo will find this.
This is all very gruesome and horrible. It would be better to figure
out CafInfo later, after CorePrep. We'll do that in due course.
Meanwhile this horrible hack works.
Note [Join points and floating]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Join points can float out of other join points but not out of value bindings:
......@@ -503,8 +466,6 @@ cpePair top_lvl is_rec dmd is_unlifted env bndr rhs
; return (floats4, rhs4) }
where
platform = targetPlatform (cpe_dynFlags env)
arity = idArity bndr -- We must match this arity
---------------------
......@@ -520,14 +481,12 @@ cpePair top_lvl is_rec dmd is_unlifted env bndr rhs
| otherwise = dontFloat floats rhs
---------------------
float_top floats rhs -- Urhgh! See Note [CafInfo and floating]
| mayHaveCafRefs (idCafInfo bndr)
, allLazyTop floats
float_top floats rhs
| allLazyTop floats
= return (floats, rhs)
-- So the top-level binding is marked NoCafRefs
| Just (floats', rhs') <- canFloatFromNoCaf platform floats rhs
= return (floats', rhs')
| Just floats <- canFloat floats rhs
= return floats
| otherwise
= dontFloat floats rhs
......@@ -1321,57 +1280,27 @@ deFloatTop (Floats _ floats)
---------------------------------------------------------------------------
canFloatFromNoCaf :: Platform -> Floats -> CpeRhs -> Maybe (Floats, CpeRhs)
-- Note [CafInfo and floating]
canFloatFromNoCaf platform (Floats ok_to_spec fs) rhs
canFloat :: Floats -> CpeRhs -> Maybe (Floats, CpeRhs)
canFloat (Floats ok_to_spec fs) rhs
| OkToSpec <- ok_to_spec -- Worth trying
, Just (subst, fs') <- go (emptySubst, nilOL) (fromOL fs)
= Just (Floats OkToSpec fs', subst_expr subst rhs)
, Just fs' <- go nilOL (fromOL fs)
= Just (Floats OkToSpec fs', rhs)
| otherwise
= Nothing
where
subst_expr = substExpr (text "CorePrep")
go :: OrdList FloatingBind -> [FloatingBind]
-> Maybe (OrdList FloatingBind)
go :: (Subst, OrdList FloatingBind) -> [FloatingBind]
-> Maybe (Subst, OrdList FloatingBind)
go (fbs_out) [] = Just fbs_out
go (subst, fbs_out) [] = Just (subst, fbs_out)
go fbs_out (fb@(FloatLet _) : fbs_in)
= go (fbs_out `snocOL` fb) fbs_in
go (subst, fbs_out) (FloatLet (NonRec b r) : fbs_in)
| rhs_ok r
= go (subst', fbs_out `snocOL` new_fb) fbs_in
where
(subst', b') = set_nocaf_bndr subst b
new_fb = FloatLet (NonRec b' (subst_expr subst r))
go fbs_out (ft@FloatTick{} : fbs_in)
= go (fbs_out `snocOL` ft) fbs_in
go (subst, fbs_out) (FloatLet (Rec prs) : fbs_in)
| all rhs_ok rs
= go (subst', fbs_out `snocOL` new_fb) fbs_in
where
(bs,rs) = unzip prs
(subst', bs') = mapAccumL set_nocaf_bndr subst bs
rs' = map (subst_expr subst') rs
new_fb = FloatLet (Rec (bs' `zip` rs'))
go _ (FloatCase{} : _) = Nothing
go (subst, fbs_out) (ft@FloatTick{} : fbs_in)
= go (subst, fbs_out `snocOL` ft) fbs_in
go _ _ = Nothing -- Encountered a caffy binding
------------
set_nocaf_bndr subst bndr
= (extendIdSubst subst bndr (Var bndr'), bndr')
where
bndr' = bndr `setIdCafInfo` NoCafRefs
------------
rhs_ok :: CoreExpr -> Bool
-- We can only float to top level from a NoCaf thing if
-- the new binding is static. However it can't mention
-- any non-static things or it would *already* be Caffy
rhs_ok = rhsIsStatic platform (\_ -> False)
(\_nt i -> pprPanic "rhsIsStatic" (integer i))
-- Integer or Natural literals should not show up
wantFloatNested :: RecFlag -> Demand -> Bool -> Floats -> CpeRhs -> Bool
wantFloatNested is_rec dmd is_unlifted floats rhs
......
This diff is collapsed.
......@@ -160,17 +160,38 @@ mkPartialIface hsc_env mod_details
-- | Fully instantiate a interface
-- Adds fingerprints and potentially code generator produced information.
mkFullIface :: HscEnv -> PartialModIface -> IO ModIface
mkFullIface hsc_env partial_iface = do
mkFullIface :: HscEnv -> PartialModIface -> Maybe NameSet -> IO ModIface
mkFullIface hsc_env partial_iface mb_non_cafs = do
let decls
| gopt Opt_OmitInterfacePragmas (hsc_dflags hsc_env)
= mi_decls partial_iface
| otherwise
= updateDeclCafInfos (mi_decls partial_iface) mb_non_cafs
full_iface <-
{-# SCC "addFingerprints" #-}
addFingerprints hsc_env partial_iface
addFingerprints hsc_env partial_iface{ mi_decls = decls }
-- Debug printing
dumpIfSet_dyn (hsc_dflags hsc_env) Opt_D_dump_hi "FINAL INTERFACE" FormatText (pprModIface full_iface)
return full_iface
updateDeclCafInfos :: [IfaceDecl] -> Maybe NameSet -> [IfaceDecl]
updateDeclCafInfos decls Nothing = decls
updateDeclCafInfos decls (Just non_cafs) = map update_decl decls
where
update_decl decl
| IfaceId nm ty details id_info <- decl
, elemNameSet nm non_cafs
= IfaceId nm ty details $
case id_info of
NoInfo -> HasInfo [HsNoCafRefs]
HasInfo infos -> HasInfo (HsNoCafRefs : infos)
| otherwise
= decl
-- | Make an interface from the results of typechecking only. Useful
-- for non-optimising compilation, or where we aren't generating any
-- object code at all ('HscNothing').
......@@ -221,7 +242,7 @@ mkIfaceTc hsc_env safe_mode mod_details
doc_hdr' doc_map arg_map
mod_details
mkFullIface hsc_env partial_iface
mkFullIface hsc_env partial_iface Nothing
mkIface_ :: HscEnv -> Module -> HscSource
-> Bool -> Dependencies -> GlobalRdrEnv
......
{-# LANGUAGE CPP #-}
module GHC.Stg.DepAnal (depSortStgPgm) where
import GhcPrelude
import GHC.Stg.Syntax
import Id
import Name (Name)
import NameEnv
import Outputable
import UniqSet (nonDetEltsUniqSet)
import VarSet
import Data.Graph (SCC (..))
--------------------------------------------------------------------------------
-- * Dependency analysis
-- | Set of bound variables
type BVs = VarSet
-- | Set of free variables
type FVs = VarSet
-- | Dependency analysis on STG terms.
--
-- Dependencies of a binding are just free variables in the binding. This
-- includes imported ids and ids in the current module. For recursive groups we
-- just return one set of free variables which is just the union of dependencies
-- of all bindings in the group.
--
-- Implementation: pass bound variables (BVs) to recursive calls, get free
-- variables (FVs) back.
--
annTopBindingsDeps :: [StgTopBinding] -> [(StgTopBinding, FVs)]
annTopBindingsDeps bs = zip bs (map top_bind bs)
where
top_bind :: StgTopBinding -> FVs
top_bind StgTopStringLit{} =
emptyVarSet
top_bind (StgTopLifted bs) =
binding emptyVarSet bs
binding :: BVs -> StgBinding -> FVs
binding bounds (StgNonRec _ r) =
rhs bounds r
binding bounds (StgRec bndrs) =
unionVarSets $
map (bind_non_rec (extendVarSetList bounds (map fst bndrs))) bndrs
bind_non_rec :: BVs -> (Id, StgRhs) -> FVs
bind_non_rec bounds (_, r) =
rhs bounds r
rhs :: BVs -> StgRhs -> FVs
rhs bounds (StgRhsClosure _ _ _ as e) =
expr (extendVarSetList bounds as) e
rhs bounds (StgRhsCon _ _ as) =
args bounds as
var :: BVs -> Var -> FVs