Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • ghc/ghc
  • bgamari/ghc
  • syd/ghc
  • ggreif/ghc
  • watashi/ghc
  • RolandSenn/ghc
  • mpickering/ghc
  • DavidEichmann/ghc
  • carter/ghc
  • harpocrates/ghc
  • ethercrow/ghc
  • mijicd/ghc
  • adamse/ghc
  • alexbiehl/ghc
  • gridaphobe/ghc
  • trofi/ghc
  • supersven/ghc
  • ppk/ghc
  • ulysses4ever/ghc
  • AndreasK/ghc
  • ghuntley/ghc
  • shayne-fletcher-da/ghc
  • fgaz/ghc
  • yav/ghc
  • osa1/ghc
  • mbbx6spp/ghc
  • JulianLeviston/ghc
  • reactormonk/ghc
  • rae/ghc
  • takenobu-hs/ghc
  • michalt/ghc
  • andrewthad/ghc
  • hsyl20/ghc
  • scottgw/ghc
  • sjakobi/ghc
  • angerman/ghc
  • RyanGlScott/ghc
  • hvr/ghc
  • howtonotwin/ghc
  • chessai/ghc
  • m-renaud/ghc
  • brprice/ghc
  • stevehartdata/ghc
  • sighingnow/ghc
  • kgardas/ghc
  • ckoparkar/ghc
  • alp/ghc
  • smaeul/ghc
  • kakkun61/ghc
  • sykloid/ghc
  • newhoggy/ghc
  • toonn/ghc
  • nineonine/ghc
  • Phyx/ghc
  • ezyang/ghc
  • tweag/ghc
  • langston/ghc
  • ndmitchell/ghc
  • rockbmb/ghc
  • artempyanykh/ghc
  • mniip/ghc
  • mynguyenbmc/ghc
  • alexfmpe/ghc
  • crockeea/ghc
  • nh2/ghc
  • vaibhavsagar/ghc
  • phadej/ghc
  • Haskell-mouse/ghc
  • lolotp/ghc
  • spacekitteh/ghc
  • michaelpj/ghc
  • mgsloan/ghc
  • HPCohen/ghc
  • tmobile/ghc
  • radrow/ghc
  • simonmar/ghc
  • _deepfire/ghc
  • Ericson2314/ghc
  • leitao/ghc
  • fumieval/ghc
  • trac-isovector/ghc
  • cblp/ghc
  • xich/ghc
  • ciil/ghc
  • erthalion/ghc
  • xldenis/ghc
  • autotaker/ghc
  • haskell-wasm/ghc
  • kcsongor/ghc
  • agander/ghc
  • Baranowski/ghc
  • trac-dredozubov/ghc
  • 23Skidoo/ghc
  • iustin/ghc
  • ningning/ghc
  • josefs/ghc
  • kabuhr/ghc
  • gallais/ghc
  • dten/ghc
  • expipiplus1/ghc
  • Pluralia/ghc
  • rohanjr/ghc
  • intricate/ghc
  • kirelagin/ghc
  • Javran/ghc
  • DanielG/ghc
  • trac-mizunashi_mana/ghc
  • pparkkin/ghc
  • bollu/ghc
  • ntc2/ghc
  • jaspervdj/ghc
  • JoshMeredith/ghc
  • wz1000/ghc
  • zkourouma/ghc
  • code5hot/ghc
  • jdprice/ghc
  • tdammers/ghc
  • J-mie6/ghc
  • trac-lantti/ghc
  • ch1bo/ghc
  • cgohla/ghc
  • lucamolteni/ghc
  • acairncross/ghc
  • amerocu/ghc
  • chreekat/ghc
  • txsmith/ghc
  • trupill/ghc
  • typetetris/ghc
  • sergv/ghc
  • fryguybob/ghc
  • erikd/ghc
  • trac-roland/ghc
  • setupminimal/ghc
  • Friede80/ghc
  • SkyWriter/ghc
  • xplorld/ghc
  • abrar/ghc
  • obsidiansystems/ghc
  • Icelandjack/ghc
  • adinapoli/ghc
  • trac-matthewbauer/ghc
  • heatsink/ghc
  • dwijnand/ghc
  • Cmdv/ghc
  • alinab/ghc
  • pepeiborra/ghc
  • fommil/ghc
  • luochen1990/ghc
  • rlupton20/ghc
  • applePrincess/ghc
  • lehins/ghc
  • ronmrdechai/ghc
  • leeadam/ghc
  • harendra/ghc
  • mightymosquito1991/ghc
  • trac-gershomb/ghc
  • lucajulian/ghc
  • Rizary/ghc
  • VictorCMiraldo/ghc
  • jamesbrock/ghc
  • andrewdmeier/ghc
  • luke/ghc
  • pranaysashank/ghc
  • cocreature/ghc
  • hithroc/ghc
  • obreitwi/ghc
  • slrtbtfs/ghc
  • kaol/ghc
  • yairchu/ghc
  • Mathemagician98/ghc
  • trac-taylorfausak/ghc
  • leungbk/ghc
  • MichaWiedenmann/ghc
  • chris-martin/ghc
  • TDecki/ghc
  • adithyaov/ghc
  • trac-gelisam/ghc
  • Lysxia/ghc
  • complyue/ghc
  • bwignall/ghc
  • sternmull/ghc
  • sonika/ghc
  • leif/ghc
  • broadwaylamb/ghc
  • myszon/ghc
  • danbroooks/ghc
  • Mechachleopteryx/ghc
  • zardyh/ghc
  • trac-vdukhovni/ghc
  • OmarKhaledAbdo/ghc
  • arrowd/ghc
  • Bodigrim/ghc
  • matheus23/ghc
  • cardenaso11/ghc
  • trac-Athas/ghc
  • mb720/ghc
  • DylanZA/ghc
  • liff/ghc
  • typedrat/ghc
  • trac-claude/ghc
  • jbm/ghc
  • Gertjan423/ghc
  • PHO/ghc
  • JKTKops/ghc
  • kockahonza/ghc
  • msakai/ghc
  • Sir4ur0n/ghc
  • barambani/ghc
  • vishnu.c/ghc
  • dcoutts/ghc
  • trac-runeks/ghc
  • trac-MaxGabriel/ghc
  • lexi.lambda/ghc
  • strake/ghc
  • spavikevik/ghc
  • JakobBruenker/ghc
  • rmanne/ghc
  • gdziadkiewicz/ghc
  • ani/ghc
  • iliastsi/ghc
  • smunix/ghc
  • judah/ghc
  • blackgnezdo/ghc
  • emilypi/ghc
  • trac-bpfoley/ghc
  • muesli4/ghc
  • trac-gkaracha/ghc
  • Kleidukos/ghc
  • nek0/ghc
  • TristanCacqueray/ghc
  • dwulive/ghc
  • mbakke/ghc
  • arybczak/ghc
  • Yang123321/ghc
  • maksbotan/ghc
  • QuietMisdreavus/ghc
  • trac-olshanskydr/ghc
  • emekoi/ghc
  • samuela/ghc
  • josephcsible/ghc
  • dramforever/ghc
  • lpsmith/ghc
  • DenisFrezzato/ghc
  • michivi/ghc
  • jneira/ghc
  • jeffhappily/ghc
  • Ivan-Yudin/ghc
  • nakaji-dayo/ghc
  • gdevanla/ghc
  • galen/ghc
  • fendor/ghc
  • yaitskov/ghc
  • rcythr/ghc
  • awpr/ghc
  • jeremyschlatter/ghc
  • Aver1y/ghc
  • mitchellvitez/ghc
  • merijn/ghc
  • tomjaguarpaw1/ghc
  • trac-NoidedSuper/ghc
  • erewok/ghc
  • trac-junji.hashimoto/ghc
  • adamwespiser/ghc
  • bjaress/ghc
  • jhrcek/ghc
  • leonschoorl/ghc
  • lukasz-golebiewski/ghc
  • sheaf/ghc
  • last-g/ghc
  • carassius1014/ghc
  • eschwartz/ghc
  • dwincort/ghc
  • felixwiemuth/ghc
  • TimWSpence/ghc
  • marcusmonteirodesouza/ghc
  • WJWH/ghc
  • vtols/ghc
  • theobat/ghc
  • BinderDavid/ghc
  • ckoparkar0/ghc
  • alexander-kjeldaas/ghc
  • dme2/ghc
  • philderbeast/ghc
  • aaronallen8455/ghc
  • rayshih/ghc
  • benkard/ghc
  • mpardalos/ghc
  • saidelman/ghc
  • leiftw/ghc
  • ca333/ghc
  • bwroga/ghc
  • nmichael44/ghc
  • trac-crobbins/ghc
  • felixonmars/ghc
  • adityagupta1089/ghc
  • hgsipiere/ghc
  • treeowl/ghc
  • alexpeits/ghc
  • CraigFe/ghc
  • dnlkrgr/ghc
  • kerckhove_ts/ghc
  • cptwunderlich/ghc
  • eiais/ghc
  • hahohihu/ghc
  • sanchayan/ghc
  • lemmih/ghc
  • sehqlr/ghc
  • trac-dbeacham/ghc
  • luite/ghc
  • trac-f-a/ghc
  • vados/ghc
  • luntain/ghc
  • fatho/ghc
  • alexbiehl-gc/ghc
  • dcbdan/ghc
  • tvh/ghc
  • liam-ly/ghc
  • timbobbarnes/ghc
  • GovanifY/ghc
  • shanth2600/ghc
  • gliboc/ghc
  • duog/ghc
  • moxonsghost/ghc
  • zander/ghc
  • masaeedu/ghc
  • georgefst/ghc
  • guibou/ghc
  • nicuveo/ghc
  • mdebruijne/ghc
  • stjordanis/ghc
  • emiflake/ghc
  • wygulmage/ghc
  • frasertweedale/ghc
  • coot/ghc
  • aratamizuki/ghc
  • tsandstr/ghc
  • mrBliss/ghc
  • Anton-Latukha/ghc
  • tadfisher/ghc
  • vapourismo/ghc
  • Sorokin-Anton/ghc
  • basile-henry/ghc
  • trac-mightybyte/ghc
  • AbsoluteNikola/ghc
  • cobrien99/ghc
  • songzh/ghc
  • blamario/ghc
  • aj4ayushjain/ghc
  • trac-utdemir/ghc
  • tangcl/ghc
  • hdgarrood/ghc
  • maerwald/ghc
  • arjun/ghc
  • ratherforky/ghc
  • haskieLambda/ghc
  • EmilGedda/ghc
  • Bogicevic/ghc
  • eddiejessup/ghc
  • kozross/ghc
  • AlistairB/ghc
  • 3Rafal/ghc
  • christiaanb/ghc
  • trac-bit/ghc
  • matsumonkie/ghc
  • trac-parsonsmatt/ghc
  • chisui/ghc
  • jaro/ghc
  • trac-kmiyazato/ghc
  • davidsd/ghc
  • Tritlo/ghc
  • I-B-3/ghc
  • lykahb/ghc
  • AriFordsham/ghc
  • turion1/ghc
  • berberman/ghc
  • christiantakle/ghc
  • zyklotomic/ghc
  • trac-ocramz/ghc
  • CSEdd/ghc
  • doyougnu/ghc
  • mmhat/ghc
  • why-not-try-calmer/ghc
  • plutotulp/ghc
  • kjekac/ghc
  • Manvi07/ghc
  • teo/ghc
  • cactus/ghc
  • CarrieMY/ghc
  • abel/ghc
  • yihming/ghc
  • tsakki/ghc
  • jessicah/ghc
  • oliverbunting/ghc
  • meld/ghc
  • friedbrice/ghc
  • Joald/ghc
  • abarbu/ghc
  • DigitalBrains1/ghc
  • sternenseemann/ghc
  • alexDarcy/ghc
  • hexchain/ghc
  • minimario/ghc
  • zliu41/ghc
  • tommd/ghc
  • jazcarate/ghc
  • peterbecich/ghc
  • alirezaghey/ghc
  • solomon/ghc
  • mikael.urankar/ghc
  • davjam/ghc
  • int-index/ghc
  • MorrowM/ghc
  • nrnrnr/ghc
  • Sonfamm/ghc-test-only
  • afzt1/ghc
  • nguyenhaibinh-tpc/ghc
  • trac-lierdakil/ghc
  • MichaWiedenmann1/ghc
  • jmorag/ghc
  • Ziharrk/ghc
  • trac-MitchellSalad/ghc
  • juampe/ghc
  • jwaldmann/ghc
  • snowleopard/ghc
  • juhp/ghc
  • normalcoder/ghc
  • ksqsf/ghc
  • trac-jberryman/ghc
  • roberth/ghc
  • 1ntEgr8/ghc
  • epworth/ghc
  • MrAdityaAlok/ghc
  • JunmingZhao42/ghc
  • jappeace/ghc
  • trac-Gabriel439/ghc
  • alt-romes/ghc
  • HugoPeters1024/ghc
  • 10ne1/ghc-fork
  • agentultra/ghc
  • Garfield1002/ghc
  • ChickenProp/ghc
  • clyring/ghc
  • MaxHearnden/ghc
  • jumper149/ghc
  • vem/ghc
  • ketzacoatl/ghc
  • Rosuavio/ghc
  • jackohughes/ghc
  • p4l1ly/ghc
  • konsumlamm/ghc
  • shlevy/ghc
  • torsten.schmits/ghc
  • andremarianiello/ghc
  • amesgen/ghc
  • googleson78/ghc
  • InfiniteVerma/ghc
  • uhbif19/ghc
  • yiyunliu/ghc
  • raehik/ghc
  • mrkun/ghc
  • telser/ghc
  • 1Jajen1/ghc
  • slotThe/ghc
  • WinstonHartnett/ghc
  • brandonchinn178/ghc
  • mpilgrem/ghc
  • dreamsmasher/ghc
  • schuelermine/ghc
  • trac-Viwor/ghc
  • undergroundquizscene/ghc
  • evertedsphere/ghc
  • coltenwebb/ghc
  • oberblastmeister/ghc
  • agrue/ghc
  • lf-/ghc
  • zacwood9/ghc
  • steshaw/ghc
  • high-cloud/ghc
  • SkamDart/ghc
  • PiDelport/ghc
  • maoif/ghc
  • RossPaterson/ghc
  • CharlesTaylor7/ghc
  • ribosomerocker/ghc
  • trac-ramirez7/ghc
  • daig/ghc
  • NicolasT/ghc
  • FinleyMcIlwaine/ghc
  • lawtonnichols/ghc
  • jmtd/ghc
  • ozkutuk/ghc
  • wildsebastian/ghc
  • nikshalark/ghc
  • lrzlin/ghc
  • tobias/ghc
  • fw/ghc
  • hawkinsw/ghc
  • type-dance/ghc
  • rui314/ghc
  • ocharles/ghc
  • wavewave/ghc
  • TheKK/ghc
  • nomeata/ghc
  • trac-csabahruska/ghc
  • jonathanjameswatson/ghc
  • L-as/ghc
  • Axman6/ghc
  • barracuda156/ghc
  • trac-jship/ghc
  • jake-87/ghc
  • meooow/ghc
  • rebeccat/ghc
  • hamana55/ghc
  • Enigmage/ghc
  • kokobd/ghc
  • agevelt/ghc
  • gshen42/ghc
  • chrismwendt/ghc
  • MangoIV/ghc
  • teto/ghc
  • Sookr1/ghc
  • trac-thomasjm/ghc
  • barci2/ghc-dev
  • trac-m4dc4p/ghc
  • dixonary/ghc
  • breakerzirconia/ghc
  • alexsio27444/ghc
  • glocq/ghc
  • sourabhxyz/ghc
  • ryantrinkle/ghc
  • Jade/ghc
  • scedfaliako/ghc
  • martijnbastiaan/ghc
  • trac-george.colpitts/ghc
  • ammarbinfaisal/ghc
  • mimi.vx/ghc
  • lortabac/ghc
  • trac-zyla/ghc
  • benbellick/ghc
  • aadaa-fgtaa/ghc
  • jvanbruegge/ghc
  • archbung/ghc
  • gilmi/ghc
  • mfonism/ghc
  • alex-mckenna/ghc
  • Ei30metry/ghc
  • DiegoDiverio/ghc
  • jorgecunhamendes/ghc
  • liesnikov/ghc
  • akrmn/ghc
  • trac-simplifierticks/ghc
  • jacco/ghc
  • rhendric/ghc
  • damhiya/ghc
  • ryndubei/ghc
  • DaveBarton/ghc
  • trac-Profpatsch/ghc
  • GZGavinZhao/ghc
  • ncfavier/ghc
  • jameshaydon/ghc
  • ajccosta/ghc
  • dschrempf/ghc
  • cydparser/ghc
  • LinuxUserGD/ghc
  • elodielander/ghc
  • facundominguez/ghc
  • psilospore/ghc
  • lachrimae/ghc
  • dylan-thinnes/ghc-type-errors-plugin
  • hamishmack/ghc
  • Leary/ghc
  • lzszt/ghc
  • lyokha/ghc
  • trac-glaubitz/ghc
  • Rewbert/ghc
  • andreabedini/ghc
  • Jasagredo/ghc
  • sol/ghc
  • OlegAlexander/ghc
  • trac-sthibaul/ghc
  • avdv/ghc
  • Wendaolee/ghc
  • ur4t/ghc
  • daylily/ghc
  • boltzmannrain/ghc
  • mmzk1526/ghc
  • trac-fizzixnerd/ghc
  • soulomoon/ghc
  • rwmjones/ghc
  • j14i/ghc
  • tracsis/ghc
  • gesh/ghc
  • flip101/ghc
  • eldritch-cookie/ghc
  • LemonjamesD/ghc
  • pgujjula/ghc
  • skeuchel/ghc
  • noteed/ghc
  • gulin.serge/ghc
  • Torrekie/ghc
  • jlwoodwa/ghc
  • ayanamists/ghc
  • husong998/ghc
  • trac-edmundnoble/ghc
  • josephf/ghc
  • contrun/ghc
  • baulig/ghc
  • edsko/ghc
  • mzschr/ghc-issue-24732
  • ulidtko/ghc
  • Arsen/ghc
  • trac-sjoerd_visscher/ghc
  • crumbtoo/ghc
  • L0neGamer/ghc
  • DrewFenwick/ghc
  • benz0li/ghc
  • MaciejWas/ghc
  • jordanrule/ghc
  • trac-qqwy/ghc
  • LiamGoodacre/ghc
  • isomorpheme/ghc
  • trac-danidiaz/ghc
  • Kariim/ghc
  • MTaimoorZaeem/ghc
  • hololeap/ghc
  • ticat-fp/ghc
  • meritamen/ghc
  • criskell/ghc
  • trac-kraai/ghc
  • aergus/ghc
  • jdral/ghc
  • SamB/ghc
  • fp/ghc
  • Tristian/ghc
  • ywgrit/ghc
  • KatsuPatrick/ghc
  • OsePedro/ghc
637 results
Show changes
Commits on Source (5)
  • Andreas Klebinger's avatar
    CodeGen: Make folds User/DefinerOfRegs INLINEABLE. · 51e3bb6d
    Andreas Klebinger authored and Marge Bot's avatar Marge Bot committed
    Reduces allocation for the test case I was looking at by about 1.2%.
    Mostly from avoiding allocation of some folding functions which turn
    into let-no-escape bindings which just reuse their environment instead.
    
    We also force inlining in a few key places in CmmSink which helps a bit
    more.
    51e3bb6d
  • Andreas Klebinger's avatar
    CmmSink: Force inlining of foldRegsDefd · 69ae10c3
    Andreas Klebinger authored and Marge Bot's avatar Marge Bot committed
    Helps avoid allocating the folding function. Improves
    perf for T3294 by about 1%.
    69ae10c3
  • Andreas Klebinger's avatar
    Cmm: Make a few types and utility function slightly stricter. · 6e3da800
    Andreas Klebinger authored and Marge Bot's avatar Marge Bot committed
    About 0.6% reduction in allocations for the code I was looking at.
    
    Not a huge difference but no need to throw away performance.
    6e3da800
  • Andreas Klebinger's avatar
    Cmm.Sink: Optimize retaining of assignments, live sets. · aef44d7f
    Andreas Klebinger authored and Marge Bot's avatar Marge Bot committed
    Sinking requires us to track live local regs after each
    cmm statement. We used to do this via "Set LocalReg".
    
    However we can replace this with a solution based on IntSet
    which is overall more efficient without losing much. The thing
    we lose is width of the variables, which isn't used by the sinking
    pass anyway.
    
    I also reworked how we keep assignments to regs mentioned in
    skipped assignments. I put the details into
    Note [Keeping assignemnts mentioned in skipped RHSs].
    
    The gist of it is instead of keeping track of it via the use count
    which is a `IntMap Int` we now use the live regs set (IntSet) which
    is quite a bit faster.
    
    I think it also matches the semantics a lot better. The skipped
    (not discarded) assignment does in fact keep the regs on it's rhs
    alive so keeping track of this in the live set seems like the clearer
    solution as well.
    
    Improves allocations for T3294 by yet another 1%.
    aef44d7f
  • Andreas Klebinger's avatar
    GHC.Cmm.Opt: Be stricter in results. · 59f2249b
    Andreas Klebinger authored and Marge Bot's avatar Marge Bot committed
    Optimization either returns Nothing if nothing is to be done or
    `Just <cmmExpr>` otherwise. There is no point in being lazy in
    `cmmExpr`. We usually inspect this element so the thunk gets forced
    not long after.
    
    We might eliminate it as dead code once in a blue moon but that's
    not a case worth optimizing for.
    
    Overall the impact of this is rather low. As Cmm.Opt doesn't allocate
    much (compared to the rest of GHC) to begin with.
    59f2249b
......@@ -53,14 +53,14 @@ import GHC.Types.Basic (Alignment, mkAlignment, alignmentOf)
-----------------------------------------------------------------------------
data CmmExpr
= CmmLit CmmLit -- Literal
= CmmLit !CmmLit -- Literal
| CmmLoad !CmmExpr !CmmType -- Read memory location
| CmmReg !CmmReg -- Contents of register
| CmmMachOp MachOp [CmmExpr] -- Machine operation (+, -, *, etc.)
| CmmStackSlot Area {-# UNPACK #-} !Int
-- addressing expression of a stack slot
-- See Note [CmmStackSlot aliasing]
| CmmRegOff !CmmReg Int
| CmmRegOff !CmmReg !Int
-- CmmRegOff reg i
-- ** is shorthand only, meaning **
-- CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt (fromIntegral i) rep)]
......@@ -173,16 +173,16 @@ Now, the assignments of y go away,
-}
data CmmLit
= CmmInt !Integer Width
= CmmInt !Integer !Width
-- Interpretation: the 2's complement representation of the value
-- is truncated to the specified size. This is easier than trying
-- to keep the value within range, because we don't know whether
-- it will be used as a signed or unsigned value (the CmmType doesn't
-- distinguish between signed & unsigned).
| CmmFloat Rational Width
| CmmFloat Rational !Width
| CmmVec [CmmLit] -- Vector literal
| CmmLabel CLabel -- Address of label
| CmmLabelOff CLabel Int -- Address of label + byte offset
| CmmLabelOff CLabel !Int -- Address of label + byte offset
-- Due to limitations in the C backend, the following
-- MUST ONLY be used inside the info table indicated by label2
......@@ -191,7 +191,7 @@ data CmmLit
-- Don't use it at all unless tablesNextToCode.
-- It is also used inside the NCG during when generating
-- position-independent code.
| CmmLabelDiffOff CLabel CLabel Int Width -- label1 - label2 + offset
| CmmLabelDiffOff CLabel CLabel !Int !Width -- label1 - label2 + offset
-- In an expression, the width just has the effect of MO_SS_Conv
-- from wordWidth to the desired width.
--
......@@ -363,6 +363,7 @@ instance DefinerOfRegs LocalReg CmmReg where
foldRegsDefd _ _ z (CmmGlobal _) = z
instance UserOfRegs GlobalReg CmmReg where
{-# INLINEABLE foldRegsUsed #-}
foldRegsUsed _ _ z (CmmLocal _) = z
foldRegsUsed _ f z (CmmGlobal reg) = f z reg
......@@ -379,6 +380,7 @@ instance Ord r => DefinerOfRegs r r where
instance (Ord r, UserOfRegs r CmmReg) => UserOfRegs r CmmExpr where
-- The (Ord r) in the context is necessary here
-- See Note [Recursive superclasses] in GHC.Tc.TyCl.Instance
{-# INLINEABLE foldRegsUsed #-}
foldRegsUsed platform f !z e = expr z e
where expr z (CmmLit _) = z
expr z (CmmLoad addr _) = foldRegsUsed platform f z addr
......
{-# LANGUAGE GADTs #-}
{-# LANGUAGE ScopedTypeVariables #-}
module GHC.Cmm.LRegSet (
LRegSet,
LRegKey,
emptyLRegSet,
nullLRegSet,
insertLRegSet,
elemLRegSet,
deleteFromLRegSet,
sizeLRegSet,
plusLRegSet,
elemsLRegSet
) where
import GHC.Prelude
import GHC.Types.Unique
import GHC.Cmm.Expr
import Data.IntSet as IntSet
-- Compact sets for membership tests of local variables.
type LRegSet = IntSet.IntSet
type LRegKey = Int
emptyLRegSet :: LRegSet
emptyLRegSet = IntSet.empty
nullLRegSet :: LRegSet -> Bool
nullLRegSet = IntSet.null
insertLRegSet :: LocalReg -> LRegSet -> LRegSet
insertLRegSet l = IntSet.insert (getKey (getUnique l))
elemLRegSet :: LocalReg -> LRegSet -> Bool
elemLRegSet l = IntSet.member (getKey (getUnique l))
deleteFromLRegSet :: LRegSet -> LocalReg -> LRegSet
deleteFromLRegSet set reg = IntSet.delete (getKey . getUnique $ reg) set
sizeLRegSet :: IntSet -> Int
sizeLRegSet = IntSet.size
plusLRegSet :: IntSet -> IntSet -> IntSet
plusLRegSet = IntSet.union
elemsLRegSet :: IntSet -> [Int]
elemsLRegSet = IntSet.toList
......@@ -6,9 +6,12 @@
module GHC.Cmm.Liveness
( CmmLocalLive
, cmmLocalLiveness
, cmmLocalLivenessL
, cmmGlobalLiveness
, liveLattice
, liveLatticeL
, gen_kill
, gen_killL
)
where
......@@ -22,11 +25,14 @@ import GHC.Cmm.Dataflow.Block
import GHC.Cmm.Dataflow.Collections
import GHC.Cmm.Dataflow
import GHC.Cmm.Dataflow.Label
import GHC.Cmm.LRegSet
import GHC.Data.Maybe
import GHC.Utils.Outputable
import GHC.Utils.Panic
import GHC.Types.Unique
-----------------------------------------------------------------------------
-- Calculating what variables are live on entry to a basic block
-----------------------------------------------------------------------------
......@@ -92,3 +98,66 @@ xferLive platform (BlockCC eNode middle xNode) fBase =
in mapSingleton (entryLabel eNode) result
{-# SPECIALIZE xferLive :: Platform -> TransferFun (CmmLive LocalReg) #-}
{-# SPECIALIZE xferLive :: Platform -> TransferFun (CmmLive GlobalReg) #-}
-----------------------------------------------------------------------------
-- | Specialization that only retains the keys for local variables.
--
-- Local variablas are mostly glorified Ints, and some parts of the compiler
-- really don't care about anything but the Int part. So we can avoid some
-- overhead by computing a IntSet instead of a Set LocalReg which (unsurprisingly)
-- is quite a bit faster.
-----------------------------------------------------------------------------
type BlockEntryLivenessL = LabelMap LRegSet
-- | The dataflow lattice
liveLatticeL :: DataflowLattice LRegSet
liveLatticeL = DataflowLattice emptyLRegSet add
where
add (OldFact old) (NewFact new) =
let !join = plusLRegSet old new
in changedIf (sizeLRegSet join > sizeLRegSet old) join
cmmLocalLivenessL :: Platform -> CmmGraph -> BlockEntryLivenessL
cmmLocalLivenessL platform graph =
check $ analyzeCmmBwd liveLatticeL (xferLiveL platform) graph mapEmpty
where
entry = g_entry graph
check facts =
noLiveOnEntryL entry (expectJust "check" $ mapLookup entry facts) facts
-- | On entry to the procedure, there had better not be any LocalReg's live-in.
noLiveOnEntryL :: BlockId -> LRegSet -> a -> a
noLiveOnEntryL bid in_fact x =
if nullLRegSet in_fact then x
else pprPanic "LocalReg's live-in to graph" (ppr bid <+> ppr reg_uniques)
where
-- We convert the int's to uniques so that the printing matches that
-- of registers.
reg_uniques = map mkUniqueGrimily $ elemsLRegSet in_fact
gen_killL
:: (DefinerOfRegs LocalReg n, UserOfRegs LocalReg n)
=> Platform -> n -> LRegSet -> LRegSet
gen_killL platform node set =
let !afterKill = foldRegsDefd platform deleteFromLRegSet set node
in foldRegsUsed platform (flip insertLRegSet) afterKill node
{-# INLINE gen_killL #-}
xferLiveL
:: ( UserOfRegs LocalReg (CmmNode O O)
, DefinerOfRegs LocalReg (CmmNode O O)
, UserOfRegs LocalReg (CmmNode O C)
, DefinerOfRegs LocalReg (CmmNode O C)
)
=> Platform -> TransferFun LRegSet
xferLiveL platform (BlockCC eNode middle xNode) fBase =
let joined = gen_killL platform xNode $! joinOutFacts liveLatticeL xNode fBase
!result = foldNodesBwdOO (gen_killL platform) middle joined
in mapSingleton (entryLabel eNode) result
......@@ -318,6 +318,7 @@ foreignTargetHints target
-- Instances of register and slot users / definers
instance UserOfRegs LocalReg (CmmNode e x) where
{-# INLINEABLE foldRegsUsed #-}
foldRegsUsed platform f !z n = case n of
CmmAssign _ expr -> fold f z expr
CmmStore addr rval -> fold f (fold f z addr) rval
......@@ -332,6 +333,7 @@ instance UserOfRegs LocalReg (CmmNode e x) where
fold f z n = foldRegsUsed platform f z n
instance UserOfRegs GlobalReg (CmmNode e x) where
{-# INLINEABLE foldRegsUsed #-}
foldRegsUsed platform f !z n = case n of
CmmAssign _ expr -> fold f z expr
CmmStore addr rval -> fold f (fold f z addr) rval
......@@ -348,10 +350,12 @@ instance UserOfRegs GlobalReg (CmmNode e x) where
instance (Ord r, UserOfRegs r CmmReg) => UserOfRegs r ForeignTarget where
-- The (Ord r) in the context is necessary here
-- See Note [Recursive superclasses] in GHC.Tc.TyCl.Instance
{-# INLINEABLE foldRegsUsed #-}
foldRegsUsed _ _ !z (PrimTarget _) = z
foldRegsUsed platform f !z (ForeignTarget e _) = foldRegsUsed platform f z e
instance DefinerOfRegs LocalReg (CmmNode e x) where
{-# INLINEABLE foldRegsDefd #-}
foldRegsDefd platform f !z n = case n of
CmmAssign lhs _ -> fold f z lhs
CmmUnsafeForeignCall _ fs _ -> fold f z fs
......@@ -362,6 +366,7 @@ instance DefinerOfRegs LocalReg (CmmNode e x) where
fold f z n = foldRegsDefd platform f z n
instance DefinerOfRegs GlobalReg (CmmNode e x) where
{-# INLINEABLE foldRegsDefd #-}
foldRegsDefd platform f !z n = case n of
CmmAssign lhs _ -> fold f z lhs
CmmUnsafeForeignCall tgt _ _ -> fold f z (foreignTargetRegs tgt)
......
......@@ -58,7 +58,7 @@ cmmMachOpFoldM
-> Maybe CmmExpr
cmmMachOpFoldM _ op [CmmLit (CmmInt x rep)]
= Just $ case op of
= Just $! case op of
MO_S_Neg _ -> CmmLit (CmmInt (-x) rep)
MO_Not _ -> CmmLit (CmmInt (complement x) rep)
......@@ -90,13 +90,13 @@ cmmMachOpFoldM platform conv_outer [CmmMachOp conv_inner [x]]
-- but remember to use the signedness from the widening, just in case
-- the final conversion is a widen.
| rep1 < rep2 && rep2 > rep3 ->
Just $ cmmMachOpFold platform (intconv signed1 rep1 rep3) [x]
Just $! cmmMachOpFold platform (intconv signed1 rep1 rep3) [x]
-- Nested widenings: collapse if the signedness is the same
| rep1 < rep2 && rep2 < rep3 && signed1 == signed2 ->
Just $ cmmMachOpFold platform (intconv signed1 rep1 rep3) [x]
Just $! cmmMachOpFold platform (intconv signed1 rep1 rep3) [x]
-- Nested narrowings: collapse
| rep1 > rep2 && rep2 > rep3 ->
Just $ cmmMachOpFold platform (MO_UU_Conv rep1 rep3) [x]
Just $! cmmMachOpFold platform (MO_UU_Conv rep1 rep3) [x]
| otherwise ->
Nothing
where
......@@ -117,34 +117,34 @@ cmmMachOpFoldM platform mop [CmmLit (CmmInt x xrep), CmmLit (CmmInt y _)]
= case mop of
-- for comparisons: don't forget to narrow the arguments before
-- comparing, since they might be out of range.
MO_Eq _ -> Just $ CmmLit (CmmInt (if x_u == y_u then 1 else 0) (wordWidth platform))
MO_Ne _ -> Just $ CmmLit (CmmInt (if x_u /= y_u then 1 else 0) (wordWidth platform))
MO_U_Gt _ -> Just $ CmmLit (CmmInt (if x_u > y_u then 1 else 0) (wordWidth platform))
MO_U_Ge _ -> Just $ CmmLit (CmmInt (if x_u >= y_u then 1 else 0) (wordWidth platform))
MO_U_Lt _ -> Just $ CmmLit (CmmInt (if x_u < y_u then 1 else 0) (wordWidth platform))
MO_U_Le _ -> Just $ CmmLit (CmmInt (if x_u <= y_u then 1 else 0) (wordWidth platform))
MO_S_Gt _ -> Just $ CmmLit (CmmInt (if x_s > y_s then 1 else 0) (wordWidth platform))
MO_S_Ge _ -> Just $ CmmLit (CmmInt (if x_s >= y_s then 1 else 0) (wordWidth platform))
MO_S_Lt _ -> Just $ CmmLit (CmmInt (if x_s < y_s then 1 else 0) (wordWidth platform))
MO_S_Le _ -> Just $ CmmLit (CmmInt (if x_s <= y_s then 1 else 0) (wordWidth platform))
MO_Add r -> Just $ CmmLit (CmmInt (x + y) r)
MO_Sub r -> Just $ CmmLit (CmmInt (x - y) r)
MO_Mul r -> Just $ CmmLit (CmmInt (x * y) r)
MO_U_Quot r | y /= 0 -> Just $ CmmLit (CmmInt (x_u `quot` y_u) r)
MO_U_Rem r | y /= 0 -> Just $ CmmLit (CmmInt (x_u `rem` y_u) r)
MO_S_Quot r | y /= 0 -> Just $ CmmLit (CmmInt (x `quot` y) r)
MO_S_Rem r | y /= 0 -> Just $ CmmLit (CmmInt (x `rem` y) r)
MO_And r -> Just $ CmmLit (CmmInt (x .&. y) r)
MO_Or r -> Just $ CmmLit (CmmInt (x .|. y) r)
MO_Xor r -> Just $ CmmLit (CmmInt (x `xor` y) r)
MO_Shl r -> Just $ CmmLit (CmmInt (x `shiftL` fromIntegral y) r)
MO_U_Shr r -> Just $ CmmLit (CmmInt (x_u `shiftR` fromIntegral y) r)
MO_S_Shr r -> Just $ CmmLit (CmmInt (x `shiftR` fromIntegral y) r)
MO_Eq _ -> Just $! CmmLit (CmmInt (if x_u == y_u then 1 else 0) (wordWidth platform))
MO_Ne _ -> Just $! CmmLit (CmmInt (if x_u /= y_u then 1 else 0) (wordWidth platform))
MO_U_Gt _ -> Just $! CmmLit (CmmInt (if x_u > y_u then 1 else 0) (wordWidth platform))
MO_U_Ge _ -> Just $! CmmLit (CmmInt (if x_u >= y_u then 1 else 0) (wordWidth platform))
MO_U_Lt _ -> Just $! CmmLit (CmmInt (if x_u < y_u then 1 else 0) (wordWidth platform))
MO_U_Le _ -> Just $! CmmLit (CmmInt (if x_u <= y_u then 1 else 0) (wordWidth platform))
MO_S_Gt _ -> Just $! CmmLit (CmmInt (if x_s > y_s then 1 else 0) (wordWidth platform))
MO_S_Ge _ -> Just $! CmmLit (CmmInt (if x_s >= y_s then 1 else 0) (wordWidth platform))
MO_S_Lt _ -> Just $! CmmLit (CmmInt (if x_s < y_s then 1 else 0) (wordWidth platform))
MO_S_Le _ -> Just $! CmmLit (CmmInt (if x_s <= y_s then 1 else 0) (wordWidth platform))
MO_Add r -> Just $! CmmLit (CmmInt (x + y) r)
MO_Sub r -> Just $! CmmLit (CmmInt (x - y) r)
MO_Mul r -> Just $! CmmLit (CmmInt (x * y) r)
MO_U_Quot r | y /= 0 -> Just $! CmmLit (CmmInt (x_u `quot` y_u) r)
MO_U_Rem r | y /= 0 -> Just $! CmmLit (CmmInt (x_u `rem` y_u) r)
MO_S_Quot r | y /= 0 -> Just $! CmmLit (CmmInt (x `quot` y) r)
MO_S_Rem r | y /= 0 -> Just $! CmmLit (CmmInt (x `rem` y) r)
MO_And r -> Just $! CmmLit (CmmInt (x .&. y) r)
MO_Or r -> Just $! CmmLit (CmmInt (x .|. y) r)
MO_Xor r -> Just $! CmmLit (CmmInt (x `xor` y) r)
MO_Shl r -> Just $! CmmLit (CmmInt (x `shiftL` fromIntegral y) r)
MO_U_Shr r -> Just $! CmmLit (CmmInt (x_u `shiftR` fromIntegral y) r)
MO_S_Shr r -> Just $! CmmLit (CmmInt (x `shiftR` fromIntegral y) r)
_ -> Nothing
......@@ -162,7 +162,7 @@ cmmMachOpFoldM platform mop [CmmLit (CmmInt x xrep), CmmLit (CmmInt y _)]
cmmMachOpFoldM platform op [x@(CmmLit _), y]
| not (isLit y) && isCommutableMachOp op
= Just (cmmMachOpFold platform op [y, x])
= Just $! (cmmMachOpFold platform op [y, x])
-- Turn (a+b)+c into a+(b+c) where possible. Because literals are
-- moved to the right, it is more likely that we will find
......@@ -183,7 +183,7 @@ cmmMachOpFoldM platform op [x@(CmmLit _), y]
cmmMachOpFoldM platform mop1 [CmmMachOp mop2 [arg1,arg2], arg3]
| mop2 `associates_with` mop1
&& not (isLit arg1) && not (isPicReg arg1)
= Just (cmmMachOpFold platform mop2 [arg1, cmmMachOpFold platform mop1 [arg2,arg3]])
= Just $! (cmmMachOpFold platform mop2 [arg1, cmmMachOpFold platform mop1 [arg2,arg3]])
where
MO_Add{} `associates_with` MO_Sub{} = True
mop1 `associates_with` mop2 =
......@@ -192,7 +192,7 @@ cmmMachOpFoldM platform mop1 [CmmMachOp mop2 [arg1,arg2], arg3]
-- special case: (a - b) + c ==> a + (c - b)
cmmMachOpFoldM platform mop1@(MO_Add{}) [CmmMachOp mop2@(MO_Sub{}) [arg1,arg2], arg3]
| not (isLit arg1) && not (isPicReg arg1)
= Just (cmmMachOpFold platform mop1 [arg1, cmmMachOpFold platform mop2 [arg3,arg2]])
= Just $! (cmmMachOpFold platform mop1 [arg1, cmmMachOpFold platform mop2 [arg3,arg2]])
-- special case: (PicBaseReg + lit) + N ==> PicBaseReg + (lit+N)
--
......@@ -205,27 +205,27 @@ cmmMachOpFoldM platform mop1@(MO_Add{}) [CmmMachOp mop2@(MO_Sub{}) [arg1,arg2],
cmmMachOpFoldM _ MO_Add{} [ CmmMachOp op@MO_Add{} [pic, CmmLit lit]
, CmmLit (CmmInt n rep) ]
| isPicReg pic
= Just $ CmmMachOp op [pic, CmmLit $ cmmOffsetLit lit off ]
= Just $! CmmMachOp op [pic, CmmLit $ cmmOffsetLit lit off ]
where off = fromIntegral (narrowS rep n)
-- Make a RegOff if we can
cmmMachOpFoldM _ (MO_Add _) [CmmReg reg, CmmLit (CmmInt n rep)]
= Just $ cmmRegOff reg (fromIntegral (narrowS rep n))
= Just $! cmmRegOff reg (fromIntegral (narrowS rep n))
cmmMachOpFoldM _ (MO_Add _) [CmmRegOff reg off, CmmLit (CmmInt n rep)]
= Just $ cmmRegOff reg (off + fromIntegral (narrowS rep n))
= Just $! cmmRegOff reg (off + fromIntegral (narrowS rep n))
cmmMachOpFoldM _ (MO_Sub _) [CmmReg reg, CmmLit (CmmInt n rep)]
= Just $ cmmRegOff reg (- fromIntegral (narrowS rep n))
= Just $! cmmRegOff reg (- fromIntegral (narrowS rep n))
cmmMachOpFoldM _ (MO_Sub _) [CmmRegOff reg off, CmmLit (CmmInt n rep)]
= Just $ cmmRegOff reg (off - fromIntegral (narrowS rep n))
= Just $! cmmRegOff reg (off - fromIntegral (narrowS rep n))
-- Fold label(+/-)offset into a CmmLit where possible
cmmMachOpFoldM _ (MO_Add _) [CmmLit lit, CmmLit (CmmInt i rep)]
= Just $ CmmLit (cmmOffsetLit lit (fromIntegral (narrowU rep i)))
= Just $! CmmLit (cmmOffsetLit lit (fromIntegral (narrowU rep i)))
cmmMachOpFoldM _ (MO_Add _) [CmmLit (CmmInt i rep), CmmLit lit]
= Just $ CmmLit (cmmOffsetLit lit (fromIntegral (narrowU rep i)))
= Just $! CmmLit (cmmOffsetLit lit (fromIntegral (narrowU rep i)))
cmmMachOpFoldM _ (MO_Sub _) [CmmLit lit, CmmLit (CmmInt i rep)]
= Just $ CmmLit (cmmOffsetLit lit (fromIntegral (negate (narrowU rep i))))
= Just $! CmmLit (cmmOffsetLit lit (fromIntegral (negate (narrowU rep i))))
-- Comparison of literal with widened operand: perform the comparison
......@@ -245,7 +245,7 @@ cmmMachOpFoldM platform cmp [CmmMachOp conv [x], CmmLit (CmmInt i _)]
-- and the literal fits in the smaller size:
i == narrow_fn rep i
-- then we can do the comparison at the smaller size
= Just (cmmMachOpFold platform narrow_cmp [x, CmmLit (CmmInt i rep)])
= Just $! (cmmMachOpFold platform narrow_cmp [x, CmmLit (CmmInt i rep)])
where
maybe_conversion (MO_UU_Conv from to)
| to > from
......@@ -320,8 +320,8 @@ cmmMachOpFoldM platform mop [x, (CmmLit (CmmInt 1 rep))]
MO_Mul _ -> Just x
MO_S_Quot _ -> Just x
MO_U_Quot _ -> Just x
MO_S_Rem _ -> Just $ CmmLit (CmmInt 0 rep)
MO_U_Rem _ -> Just $ CmmLit (CmmInt 0 rep)
MO_S_Rem _ -> Just $! CmmLit (CmmInt 0 rep)
MO_U_Rem _ -> Just $! CmmLit (CmmInt 0 rep)
-- Comparisons; trickier
-- See Note [Comparison operators]
......@@ -346,18 +346,18 @@ cmmMachOpFoldM platform mop [x, (CmmLit (CmmInt n _))]
= case mop of
MO_Mul rep
| Just p <- exactLog2 n ->
Just (cmmMachOpFold platform (MO_Shl rep) [x, CmmLit (CmmInt p rep)])
Just $! (cmmMachOpFold platform (MO_Shl rep) [x, CmmLit (CmmInt p rep)])
MO_U_Quot rep
| Just p <- exactLog2 n ->
Just (cmmMachOpFold platform (MO_U_Shr rep) [x, CmmLit (CmmInt p rep)])
Just $! (cmmMachOpFold platform (MO_U_Shr rep) [x, CmmLit (CmmInt p rep)])
MO_U_Rem rep
| Just _ <- exactLog2 n ->
Just (cmmMachOpFold platform (MO_And rep) [x, CmmLit (CmmInt (n - 1) rep)])
Just $! (cmmMachOpFold platform (MO_And rep) [x, CmmLit (CmmInt (n - 1) rep)])
MO_S_Quot rep
| Just p <- exactLog2 n,
CmmReg _ <- x -> -- We duplicate x in signedQuotRemHelper, hence require
-- it is a reg. FIXME: remove this restriction.
Just (cmmMachOpFold platform (MO_S_Shr rep)
Just $! (cmmMachOpFold platform (MO_S_Shr rep)
[signedQuotRemHelper rep p, CmmLit (CmmInt p rep)])
MO_S_Rem rep
| Just p <- exactLog2 n,
......@@ -366,7 +366,7 @@ cmmMachOpFoldM platform mop [x, (CmmLit (CmmInt n _))]
-- We replace (x `rem` 2^p) by (x - (x `quot` 2^p) * 2^p).
-- Moreover, we fuse MO_S_Shr (last operation of MO_S_Quot)
-- and MO_S_Shl (multiplication by 2^p) into a single MO_And operation.
Just (cmmMachOpFold platform (MO_Sub rep)
Just $! (cmmMachOpFold platform (MO_Sub rep)
[x, cmmMachOpFold platform (MO_And rep)
[signedQuotRemHelper rep p, CmmLit (CmmInt (- n) rep)]])
_ -> Nothing
......
{-# LANGUAGE GADTs #-}
{-# LANGUAGE ScopedTypeVariables #-}
module GHC.Cmm.Sink (
cmmSink
) where
......@@ -8,6 +10,7 @@ import GHC.Prelude
import GHC.Cmm
import GHC.Cmm.Opt
import GHC.Cmm.Liveness
import GHC.Cmm.LRegSet
import GHC.Cmm.Utils
import GHC.Cmm.Dataflow.Block
import GHC.Cmm.Dataflow.Label
......@@ -16,29 +19,13 @@ import GHC.Cmm.Dataflow.Graph
import GHC.Platform.Regs
import GHC.Platform
import GHC.Types.Unique
import GHC.Types.Unique.FM
import qualified Data.IntSet as IntSet
import Data.List (partition)
import qualified Data.Set as Set
import Data.Maybe
-- Compact sets for membership tests of local variables.
type LRegSet = IntSet.IntSet
emptyLRegSet :: LRegSet
emptyLRegSet = IntSet.empty
nullLRegSet :: LRegSet -> Bool
nullLRegSet = IntSet.null
insertLRegSet :: LocalReg -> LRegSet -> LRegSet
insertLRegSet l = IntSet.insert (getKey (getUnique l))
elemLRegSet :: LocalReg -> LRegSet -> Bool
elemLRegSet l = IntSet.member (getKey (getUnique l))
import GHC.Exts (inline)
-- -----------------------------------------------------------------------------
-- Sinking and inlining
......@@ -167,8 +154,8 @@ type Assignments = [Assignment]
cmmSink :: Platform -> CmmGraph -> CmmGraph
cmmSink platform graph = ofBlockList (g_entry graph) $ sink mapEmpty $ blocks
where
liveness = cmmLocalLiveness platform graph
getLive l = mapFindWithDefault Set.empty l liveness
liveness = cmmLocalLivenessL platform graph
getLive l = mapFindWithDefault emptyLRegSet l liveness
blocks = revPostorder graph
......@@ -188,8 +175,8 @@ cmmSink platform graph = ofBlockList (g_entry graph) $ sink mapEmpty $ blocks
-- Annotate the middle nodes with the registers live *after*
-- the node. This will help us decide whether we can inline
-- an assignment in the current node or not.
live = Set.unions (map getLive succs)
live_middle = gen_kill platform last live
live = IntSet.unions (map getLive succs)
live_middle = gen_killL platform last live
ann_middles = annotate platform live_middle (blockToList middle)
-- Now sink and inline in this block
......@@ -201,7 +188,7 @@ cmmSink platform graph = ofBlockList (g_entry graph) $ sink mapEmpty $ blocks
-- one predecessor), so identify the join points and the set
-- of registers live in them.
(joins, nonjoins) = partition (`mapMember` join_pts) succs
live_in_joins = Set.unions (map getLive joins)
live_in_joins = IntSet.unions (map getLive joins)
-- We do not want to sink an assignment into multiple branches,
-- so identify the set of registers live in multiple successors.
......@@ -210,26 +197,28 @@ cmmSink platform graph = ofBlockList (g_entry graph) $ sink mapEmpty $ blocks
-- now live in multiple branches.
init_live_sets = map getLive nonjoins
live_in_multi live_sets r =
case filter (Set.member r) live_sets of
case filter (elemLRegSet r) live_sets of
(_one:_two:_) -> True
_ -> False
-- Now, drop any assignments that we will not sink any further.
(dropped_last, assigs'') = dropAssignments platform drop_if init_live_sets assigs'
drop_if :: (LocalReg, CmmExpr, AbsMem)
-> [LRegSet] -> (Bool, [LRegSet])
drop_if a@(r,rhs,_) live_sets = (should_drop, live_sets')
where
should_drop = conflicts platform a final_last
|| not (isTrivial platform rhs) && live_in_multi live_sets r
|| r `Set.member` live_in_joins
|| r `elemLRegSet` live_in_joins
live_sets' | should_drop = live_sets
| otherwise = map upd live_sets
upd set | r `Set.member` set = set `Set.union` live_rhs
upd set | r `elemLRegSet` set = set `IntSet.union` live_rhs
| otherwise = set
live_rhs = foldRegsUsed platform extendRegSet emptyRegSet rhs
live_rhs = foldRegsUsed platform (flip insertLRegSet) emptyLRegSet rhs
final_middle = foldl' blockSnoc middle' dropped_last
......@@ -266,9 +255,9 @@ isTrivial _ _ = False
--
-- annotate each node with the set of registers live *after* the node
--
annotate :: Platform -> LocalRegSet -> [CmmNode O O] -> [(LocalRegSet, CmmNode O O)]
annotate :: Platform -> LRegSet -> [CmmNode O O] -> [(LRegSet, CmmNode O O)]
annotate platform live nodes = snd $ foldr ann (live,[]) nodes
where ann n (live,nodes) = (gen_kill platform n live, (live,n) : nodes)
where ann n (live,nodes) = (gen_killL platform n live, (live,n) : nodes)
--
-- Find the blocks that have multiple successors (join points)
......@@ -285,13 +274,13 @@ findJoinPoints blocks = mapFilter (>1) succ_counts
-- filter the list of assignments to remove any assignments that
-- are not live in a continuation.
--
filterAssignments :: Platform -> LocalRegSet -> Assignments -> Assignments
filterAssignments :: Platform -> LRegSet -> Assignments -> Assignments
filterAssignments platform live assigs = reverse (go assigs [])
where go [] kept = kept
go (a@(r,_,_):as) kept | needed = go as (a:kept)
| otherwise = go as kept
where
needed = r `Set.member` live
needed = r `elemLRegSet` live
|| any (conflicts platform a) (map toNode kept)
-- Note that we must keep assignments that are
-- referred to by other assignments we have
......@@ -312,7 +301,7 @@ filterAssignments platform live assigs = reverse (go assigs [])
--
walk :: Platform
-> [(LocalRegSet, CmmNode O O)] -- nodes of the block, annotated with
-> [(LRegSet, CmmNode O O)] -- nodes of the block, annotated with
-- the set of registers live *after*
-- this node.
......@@ -366,11 +355,11 @@ shouldSink _ _other = Nothing
-- out of inlining, but the inliner will see that r is live
-- after the instruction and choose not to inline r in the rhs.
--
shouldDiscard :: CmmNode e x -> LocalRegSet -> Bool
shouldDiscard :: CmmNode e x -> LRegSet -> Bool
shouldDiscard node live
= case node of
CmmAssign r (CmmReg r') | r == r' -> True
CmmAssign (CmmLocal r) _ -> not (r `Set.member` live)
CmmAssign (CmmLocal r) _ -> not (r `elemLRegSet` live)
_otherwise -> False
......@@ -403,8 +392,9 @@ dropAssignments platform should_drop state assigs
-- inlining opens up opportunities for doing so.
tryToInline
:: Platform
-> LocalRegSet -- set of registers live after this
:: forall x. Platform
-> LRegSet -- set of registers live after this
-- -> LocalRegSet -- set of registers live after this
-- node. We cannot inline anything
-- that is live after the node, unless
-- it is small enough to duplicate.
......@@ -415,35 +405,42 @@ tryToInline
, Assignments -- Remaining assignments
)
tryToInline platform live node assigs = go usages node emptyLRegSet assigs
tryToInline platform liveAfter node assigs =
-- pprTrace "tryToInline assig length:" (ppr $ length assigs) $
go usages liveAfter node emptyLRegSet assigs
where
usages :: UniqFM LocalReg Int -- Maps each LocalReg to a count of how often it is used
usages = foldLocalRegsUsed platform addUsage emptyUFM node
go _usages node _skipped [] = (node, [])
go :: UniqFM LocalReg Int -> LRegSet -> CmmNode O x -> LRegSet -> Assignments
-> (CmmNode O x, Assignments)
go _usages _live node _skipped [] = (node, [])
go usages node skipped (a@(l,rhs,_) : rest)
| cannot_inline = dont_inline
| occurs_none = discard -- Note [discard during inlining]
| occurs_once = inline_and_discard
| isTrivial platform rhs = inline_and_keep
| otherwise = dont_inline
go usages live node skipped (a@(l,rhs,_) : rest)
| cannot_inline = dont_inline
| occurs_none = discard -- Note [discard during inlining]
| occurs_once = inline_and_discard
| isTrivial platform rhs = inline_and_keep
| otherwise = dont_inline
where
inline_and_discard = go usages' inl_node skipped rest
inline_and_discard = go usages' live inl_node skipped rest
where usages' = foldLocalRegsUsed platform addUsage usages rhs
discard = go usages node skipped rest
discard = go usages live node skipped rest
dont_inline = keep node -- don't inline the assignment, keep it
inline_and_keep = keep inl_node -- inline the assignment, keep it
keep :: CmmNode O x -> (CmmNode O x, Assignments)
keep node' = (final_node, a : rest')
where (final_node, rest') = go usages' node' (insertLRegSet l skipped) rest
usages' = foldLocalRegsUsed platform (\m r -> addToUFM m r 2)
usages rhs
-- we must not inline anything that is mentioned in the RHS
-- of a binding that we have already skipped, so we set the
-- usages of the regs on the RHS to 2.
where (final_node, rest') = go usages live' node' (insertLRegSet l skipped) rest
-- Avoid discarding of assignments to vars on the rhs.
-- See Note [Keeping assignemnts mentioned in skipped RHSs]
-- usages' = foldLocalRegsUsed platform (\m r -> addToUFM m r 2)
-- usages rhs
live' = inline foldLocalRegsUsed platform (\m r -> insertLRegSet r m)
live rhs
cannot_inline = skipped `regsUsedIn` rhs -- Note [dependent assignments]
|| l `elemLRegSet` skipped
......@@ -451,7 +448,7 @@ tryToInline platform live node assigs = go usages node emptyLRegSet assigs
-- How often is l used in the current node.
l_usages = lookupUFM usages l
l_live = l `elemRegSet` live
l_live = l `elemLRegSet` live
occurs_once = not l_live && l_usages == Just 1
occurs_none = not l_live && l_usages == Nothing
......@@ -467,6 +464,27 @@ tryToInline platform live node assigs = go usages node emptyLRegSet assigs
inl_exp (CmmMachOp op args) = cmmMachOpFold platform op args
inl_exp other = other
{- Note [Keeping assignemnts mentioned in skipped RHSs]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
If we have to assignments: [z = y, y = e1] and we skip
z we *must* retain the assignment y = e1. This is because
we might inline "z = y" into another node later on so we
must ensure y is still defined at this point.
If we dropped the assignment of "y = e1" then we would end up
referencing a variable which hasn't been mentioned after
inlining.
We use a hack to do this.
We pretend the regs from the rhs are live after the current
node. Since we only discard assignments to variables
which are dead after the current block this prevents discarding of the
assignment. It still allows inlining should e1 be a trivial rhs
however.
-}
{- Note [improveConditional]
......@@ -610,18 +628,34 @@ conflicts platform (r, rhs, addr) node
-- (7) otherwise, no conflict
| otherwise = False
{- Note [Inlining foldRegsDefd]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
foldRegsDefd is, after optimization, *not* a small function so
it's only marked INLINEABLE, but not INLINE.
However in some specific cases we call it *very* often making it
important to avoid the overhead of allocating the folding function.
So we simply force inlining via the magic inline function.
For T3294 this improves allocation with -O by ~1%.
-}
-- Returns True if node defines any global registers that are used in the
-- Cmm expression
globalRegistersConflict :: Platform -> CmmExpr -> CmmNode e x -> Bool
globalRegistersConflict platform expr node =
foldRegsDefd platform (\b r -> b || regUsedIn platform (CmmGlobal r) expr)
-- See Note [Inlining foldRegsDefd]
inline foldRegsDefd platform (\b r -> b || regUsedIn platform (CmmGlobal r) expr)
False node
-- Returns True if node defines any local registers that are used in the
-- Cmm expression
localRegistersConflict :: Platform -> CmmExpr -> CmmNode e x -> Bool
localRegistersConflict platform expr node =
foldRegsDefd platform (\b r -> b || regUsedIn platform (CmmLocal r) expr)
-- See Note [Inlining foldRegsDefd]
inline foldRegsDefd platform (\b r -> b || regUsedIn platform (CmmLocal r) expr)
False node
-- Note [Sinking and calls]
......
......@@ -264,9 +264,11 @@ cmmOffset platform e byte_off = case e of
CmmStackSlot area off -> CmmStackSlot area (off - byte_off)
-- note stack area offsets increase towards lower addresses
CmmMachOp (MO_Add rep) [expr, CmmLit (CmmInt byte_off1 _rep)]
-> CmmMachOp (MO_Add rep) [expr, CmmLit (CmmInt (byte_off1 + toInteger byte_off) rep)]
_ -> CmmMachOp (MO_Add width) [e, CmmLit (CmmInt (toInteger byte_off) width)]
where width = cmmExprWidth platform e
-> let !lit_off = (byte_off1 + toInteger byte_off)
in CmmMachOp (MO_Add rep) [expr, CmmLit (CmmInt lit_off rep)]
_ -> let !width = cmmExprWidth platform e
in
CmmMachOp (MO_Add width) [e, CmmLit (CmmInt (toInteger byte_off) width)]
-- Smart constructor for CmmRegOff. Same caveats as cmmOffset above.
cmmRegOff :: CmmReg -> Int -> CmmExpr
......
......@@ -115,6 +115,7 @@ import Data.Int
import qualified Data.IntMap as IM
import Data.Set (Set)
import qualified Data.Set as Set
import qualified Data.IntSet as IntSet
import Data.String
import Data.Word
import System.IO ( Handle )
......@@ -863,6 +864,9 @@ instance (Outputable a) => Outputable (NonEmpty a) where
instance (Outputable a) => Outputable (Set a) where
ppr s = braces (fsep (punctuate comma (map ppr (Set.toList s))))
instance Outputable IntSet.IntSet where
ppr s = braces (fsep (punctuate comma (map ppr (IntSet.toList s))))
instance (Outputable a, Outputable b) => Outputable (a, b) where
ppr (x,y) = parens (sep [ppr x <> comma, ppr y])
......
......@@ -205,6 +205,7 @@ Library
GHC.Cmm.Switch
GHC.Cmm.Switch.Implement
GHC.CmmToAsm
GHC.Cmm.LRegSet
GHC.CmmToAsm.BlockLayout
GHC.CmmToAsm.CFG
GHC.CmmToAsm.CFG.Dominators
......