From 65256948f73cc9e6eb4b9b2b25c085ef7a72004e Mon Sep 17 00:00:00 2001 From: Simon Marlow Date: Wed, 7 Mar 2012 15:06:47 +0000 Subject: [PATCH] Improve the case-alternative heap checks The code we were generating for heap-checks in algebraic case alternatives wasn't working well with the common-block eliminator. A small tweak to make the heap-check failure jump back to the same place in all branches lets the common-block eliminator squash more code. --- compiler/codeGen/StgCmmExpr.hs | 71 ++++++++++++++++++++++++---------- compiler/codeGen/StgCmmHeap.hs | 50 ++++++++++++++++++------ 2 files changed, 89 insertions(+), 32 deletions(-) diff --git a/compiler/codeGen/StgCmmExpr.hs b/compiler/codeGen/StgCmmExpr.hs index ccc9e6b9c1..e682af0ced 100644 --- a/compiler/codeGen/StgCmmExpr.hs +++ b/compiler/codeGen/StgCmmExpr.hs @@ -278,8 +278,7 @@ Hence: two basic plans for data GcPlan = GcInAlts -- Put a GC check at the start the case alternatives, [LocalReg] -- which binds these registers - SRT -- using this SRT - | NoGcInAlts -- The scrutinee is a primitive value, or a call to a + | NoGcInAlts -- The scrutinee is a primitive value, or a call to a -- primitive op which does no GC. Absorb the allocation -- of the case alternative(s) into the upstream check @@ -297,7 +296,8 @@ cgCase (StgOpApp (StgPrimOp op) args _) bndr _srt (AlgAlt tycon) alts ; emitAssign (CmmLocal tmp_reg) (tagToClosure tycon tag_expr) } - ; (mb_deflt, branches) <- cgAlgAltRhss NoGcInAlts (NonVoid bndr) alts + ; (mb_deflt, branches) <- cgAlgAltRhss NoGcInAlts Nothing + (NonVoid bndr) alts ; emitSwitch tag_expr branches mb_deflt 0 (tyConFamilySize tycon - 1) } where @@ -400,7 +400,7 @@ cgCase (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _) bndr srt alt_type alts = -- handle seq#, same return convention as vanilla 'a'. cgCase (StgApp a []) bndr srt alt_type alts -cgCase scrut bndr srt alt_type alts +cgCase scrut bndr _srt alt_type alts = -- the general case do { up_hp_usg <- getVirtHp -- Upstream heap usage ; let ret_bndrs = chooseReturnBndrs bndr alt_type alts @@ -410,7 +410,7 @@ cgCase scrut bndr srt alt_type alts | isSingleton alts = False | up_hp_usg > 0 = False | otherwise = True - gc_plan = if gcInAlts then GcInAlts alt_regs srt else NoGcInAlts + gc_plan = if gcInAlts then GcInAlts alt_regs else NoGcInAlts ; mb_cc <- maybeSaveCostCentre simple_scrut ; withSequel (AssignTo alt_regs gcInAlts) (cgExpr scrut) @@ -468,14 +468,14 @@ chooseReturnBndrs _ _ _ = panic "chooseReturnBndrs" cgAlts :: GcPlan -> NonVoid Id -> AltType -> [StgAlt] -> FCode () -- At this point the result of the case are in the binders cgAlts gc_plan _bndr PolyAlt [(_, _, _, rhs)] - = maybeAltHeapCheck gc_plan (cgExpr rhs) + = maybeAltHeapCheck gc_plan Nothing (cgExpr rhs) cgAlts gc_plan _bndr (UbxTupAlt _) [(_, _, _, rhs)] - = maybeAltHeapCheck gc_plan (cgExpr rhs) + = maybeAltHeapCheck gc_plan Nothing (cgExpr rhs) -- Here bndrs are *already* in scope, so don't rebind them cgAlts gc_plan bndr (PrimAlt _) alts - = do { tagged_cmms <- cgAltRhss gc_plan bndr alts + = do { tagged_cmms <- cgAltRhss gc_plan Nothing bndr alts ; let bndr_reg = CmmLocal (idToReg bndr) (DEFAULT,deflt) = head tagged_cmms @@ -487,7 +487,11 @@ cgAlts gc_plan bndr (PrimAlt _) alts ; emitCmmLitSwitch (CmmReg bndr_reg) tagged_cmms' deflt } cgAlts gc_plan bndr (AlgAlt tycon) alts - = do { (mb_deflt, branches) <- cgAlgAltRhss gc_plan bndr alts + = do { retry_lbl <- newLabelC + ; emitLabel retry_lbl -- Note [alg-alt heap checks] + + ; (mb_deflt, branches) <- cgAlgAltRhss gc_plan (Just retry_lbl) + bndr alts ; let fam_sz = tyConFamilySize tycon bndr_reg = CmmLocal (idToReg bndr) @@ -512,12 +516,32 @@ cgAlts _ _ _ _ = panic "cgAlts" -- UbxTupAlt and PolyAlt have only one alternative +-- Note [alg-alt heap check] +-- +-- In an algebraic case with more than one alternative, we will have +-- code like +-- +-- L0: +-- x = R1 +-- goto L1 +-- L1: +-- if (x & 7 >= 2) then goto L2 else goto L3 +-- L2: +-- Hp = Hp + 16 +-- if (Hp > HpLim) then goto L4 +-- ... +-- L4: +-- call gc() returns to L5 +-- L5: +-- x = R1 +-- goto L1 + ------------------- -cgAlgAltRhss :: GcPlan -> NonVoid Id -> [StgAlt] +cgAlgAltRhss :: GcPlan -> Maybe BlockId -> NonVoid Id -> [StgAlt] -> FCode ( Maybe CmmAGraph , [(ConTagZ, CmmAGraph)] ) -cgAlgAltRhss gc_plan bndr alts - = do { tagged_cmms <- cgAltRhss gc_plan bndr alts +cgAlgAltRhss gc_plan retry_lbl bndr alts + = do { tagged_cmms <- cgAltRhss gc_plan retry_lbl bndr alts ; let { mb_deflt = case tagged_cmms of ((DEFAULT,rhs) : _) -> Just rhs @@ -533,22 +557,26 @@ cgAlgAltRhss gc_plan bndr alts ------------------- -cgAltRhss :: GcPlan -> NonVoid Id -> [StgAlt] -> FCode [(AltCon, CmmAGraph)] -cgAltRhss gc_plan bndr alts +cgAltRhss :: GcPlan -> Maybe BlockId -> NonVoid Id -> [StgAlt] + -> FCode [(AltCon, CmmAGraph)] +cgAltRhss gc_plan retry_lbl bndr alts = forkAlts (map cg_alt alts) where base_reg = idToReg bndr cg_alt :: StgAlt -> FCode (AltCon, CmmAGraph) cg_alt (con, bndrs, _uses, rhs) = getCodeR $ - maybeAltHeapCheck gc_plan $ + maybeAltHeapCheck gc_plan retry_lbl $ do { _ <- bindConArgs con base_reg bndrs ; cgExpr rhs ; return con } -maybeAltHeapCheck :: GcPlan -> FCode a -> FCode a -maybeAltHeapCheck NoGcInAlts code = code -maybeAltHeapCheck (GcInAlts regs _) code = altHeapCheck regs code +maybeAltHeapCheck :: GcPlan -> Maybe BlockId -> FCode a -> FCode a +maybeAltHeapCheck NoGcInAlts mlbl code = code +maybeAltHeapCheck (GcInAlts regs) mlbl code = + case mlbl of + Nothing -> altHeapCheck regs code + Just retry_lbl -> altHeapCheckReturnsTo regs retry_lbl code ----------------------------------------------------------------------------- -- Tail calls @@ -667,11 +695,14 @@ emitEnter fun = do ; let (off, copyin) = copyInOflow NativeReturn area res_regs (outArgs, copyout) = copyOutOflow NativeNodeCall Call area [fun] updfr_off (0,[]) - ; let entry = entryCode (closureInfoPtr fun) + -- refer to fun via nodeReg after the copyout, to avoid having + -- both live simultaneously; this sometimes enables fun to be + -- inlined in the RHS of the R1 assignment. + ; let entry = entryCode (closureInfoPtr (CmmReg nodeReg)) the_call = toCall entry (Just lret) updfr_off off outArgs ; emit $ copyout <*> - mkCbranch (cmmIsTagged fun) lret lcall <*> + mkCbranch (cmmIsTagged (CmmReg nodeReg)) lret lcall <*> outOfLine lcall the_call <*> mkLabel lret <*> copyin diff --git a/compiler/codeGen/StgCmmHeap.hs b/compiler/codeGen/StgCmmHeap.hs index 6533414703..37dc467862 100644 --- a/compiler/codeGen/StgCmmHeap.hs +++ b/compiler/codeGen/StgCmmHeap.hs @@ -10,7 +10,7 @@ module StgCmmHeap ( getVirtHp, setVirtHp, setRealHp, getHpRelOffset, hpRel, - entryHeapCheck, altHeapCheck, + entryHeapCheck, altHeapCheck, altHeapCheckReturnsTo, mkVirtHeapOffsets, mkVirtConstrOffsets, mkStaticClosureFields, mkStaticClosure, @@ -20,7 +20,6 @@ module StgCmmHeap ( #include "HsVersions.h" -import CmmType import StgSyn import CLabel import StgCmmLayout @@ -34,6 +33,7 @@ import StgCmmEnv import MkGraph +import Hoopl hiding ((<*>), mkBranch) import SMRep import Cmm import CmmUtils @@ -342,11 +342,12 @@ entryHeapCheck cl_info offset nodeSet arity args code args' = map (CmmReg . CmmLocal) args setN = case nodeSet of - Just n -> mkAssign nodeReg (CmmReg $ CmmLocal n) + Just n -> mkNop -- No need to assign R1, it already + -- points to the closure Nothing -> mkAssign nodeReg $ CmmLit (CmmLabel $ staticClosureLabel platform cl_info) - {- Thunks: Set R1 = node, jump GCEnter1 + {- Thunks: jump GCEnter1 Function (fast): Set R1 = node, jump GCFun Function (slow): Set R1 = node, call generic_gc -} gc_call upd = setN <*> gc_lbl upd @@ -361,7 +362,10 @@ entryHeapCheck cl_info offset nodeSet arity args code - GC calls, but until then this fishy code works -} updfr_sz <- getUpdFrameOff - heapCheck True (gc_call updfr_sz) code + + loop_id <- newLabelC + emitLabel loop_id + heapCheck True (gc_call updfr_sz <*> mkBranch loop_id) code {- -- This code is slightly outdated now and we could easily keep the above @@ -407,17 +411,24 @@ entryHeapCheck cl_info offset nodeSet arity args code -} --------------------------------------------------------------- --- A heap/stack check at in a case alternative +-- ------------------------------------------------------------ +-- A heap/stack check in a case alternative altHeapCheck :: [LocalReg] -> FCode a -> FCode a altHeapCheck regs code + = do loop_id <- newLabelC + emitLabel loop_id + altHeapCheckReturnsTo regs loop_id code + +altHeapCheckReturnsTo :: [LocalReg] -> Label -> FCode a -> FCode a +altHeapCheckReturnsTo regs retry_lbl code = do updfr_sz <- getUpdFrameOff gc_call_code <- gc_call updfr_sz - heapCheck False gc_call_code code + heapCheck False (gc_call_code <*> mkBranch retry_lbl) code where reg_exprs = map (CmmReg . CmmLocal) regs + -- Note [stg_gc arguments] gc_call sp = case rts_label regs of @@ -440,6 +451,23 @@ altHeapCheck regs code rts_label _ = Nothing +-- Note [stg_gc arguments] +-- It might seem that we could avoid passing the arguments to the +-- stg_gc function, because they are already in the right registers. +-- While this is usually the case, it isn't always. Sometimes the +-- code generator has cleverly avoided the eval in a case, e.g. in +-- ffi/should_run/4221.hs we found +-- +-- case a_r1mb of z +-- FunPtr x y -> ... +-- +-- where a_r1mb is bound a top-level constructor, and is known to be +-- evaluated. The codegen just assigns x, y and z, and continues; +-- R1 is never assigned. +-- +-- So we'll have to rely on optimisations to eliminatethese +-- assignments where possible. + -- | The generic GC procedure; no params, no results generic_gc :: CmmExpr @@ -466,9 +494,7 @@ do_checks :: Bool -- Should we check the stack? -> CmmAGraph -- What to do on failure -> FCode () do_checks checkStack alloc do_gc = do - loop_id <- newLabelC gc_id <- newLabelC - emitLabel loop_id hp_check <- if alloc == 0 then return mkNop else do @@ -483,8 +509,8 @@ do_checks checkStack alloc do_gc = do emitOutOfLine gc_id $ mkComment (mkFastString "outOfLine here") <*> - do_gc <*> - mkBranch loop_id + do_gc -- this is expected to jump back somewhere + -- Test for stack pointer exhaustion, then -- bump heap pointer, and test for heap exhaustion -- Note that we don't move the heap pointer unless the -- GitLab