diff --git a/compiler/GHC/StgToCmm/Bind.hs b/compiler/GHC/StgToCmm/Bind.hs index e7c3d3781b5034dedd73dd63fdea028d7532fc0d..d12d73208922966f88b6a4aa7356717bf0ace207 100644 --- a/compiler/GHC/StgToCmm/Bind.hs +++ b/compiler/GHC/StgToCmm/Bind.hs @@ -572,7 +572,13 @@ closureCodeBody top_lvl bndr cl_info cc args@(arg0:_) body fv_details -- Extend reader monad with information that -- self-recursive tail calls can be optimized into local -- jumps. See Note [Self-recursive tail calls] in GHC.StgToCmm.Expr. - ; withSelfLoop (bndr, loop_header_id, arg_regs) $ do + ; let !self_loop_info = MkSelfLoopInfo + { sli_id = bndr + , sli_arity = arity + , sli_header_block = loop_header_id + , sli_registers = arg_regs + } + ; withSelfLoop self_loop_info $ do { -- Main payload ; entryHeapCheck cl_info node' arity arg_regs $ do diff --git a/compiler/GHC/StgToCmm/Closure.hs b/compiler/GHC/StgToCmm/Closure.hs index ad7377da48733e7a41755050c03f1f42964aa02a..93602825fc2d7bcf97a269be87857543fbb4dd9b 100644 --- a/compiler/GHC/StgToCmm/Closure.hs +++ b/compiler/GHC/StgToCmm/Closure.hs @@ -93,7 +93,6 @@ import GHC.Types.RepType import GHC.Types.Basic import GHC.Utils.Outputable import GHC.Utils.Panic -import GHC.Utils.Misc import GHC.Data.Maybe (isNothing) import Data.Coerce (coerce) @@ -535,12 +534,12 @@ instance Outputable CallMethod where getCallMethod :: StgToCmmConfig -> Name -- Function being applied - -> Id -- Function Id used to chech if it can refer to + -> Id -- Function Id used to check if it can refer to -- CAF's and whether the function is tail-calling -- itself -> LambdaFormInfo -- Its info -> RepArity -- Number of available arguments - -> RepArity -- Number of them being void arguments + -- (including void args) -> CgLoc -- Passed in from cgIdApp so that we can -- handle let-no-escape bindings and self-recursive -- tail calls using the same data constructor, @@ -549,19 +548,22 @@ getCallMethod :: StgToCmmConfig -> Maybe SelfLoopInfo -- can we perform a self-recursive tail-call -> CallMethod -getCallMethod cfg _ id _ n_args v_args _cg_loc (Just (self_loop_id, block_id, args)) +getCallMethod cfg _ id _ n_args _cg_loc (Just self_loop) | stgToCmmLoopification cfg - , id == self_loop_id - , args `lengthIs` (n_args - v_args) + , MkSelfLoopInfo + { sli_id = loop_id, sli_arity = arity + , sli_header_block = blk_id, sli_registers = arg_regs + } <- self_loop + , id == loop_id + , n_args == arity -- If these patterns match then we know that: -- * loopification optimisation is turned on -- * function is performing a self-recursive call in a tail position - -- * number of non-void parameters of the function matches functions arity. - -- See Note [Self-recursive tail calls] and Note [Void arguments in - -- self-recursive tail calls] in GHC.StgToCmm.Expr for more details - = JumpToIt block_id args + -- * number of parameters matches the function's arity. + -- See Note [Self-recursive tail calls] in GHC.StgToCmm.Expr for more details + = JumpToIt blk_id arg_regs -getCallMethod cfg name id (LFReEntrant _ arity _ _) n_args _v_args _cg_loc _self_loop_info +getCallMethod cfg name id (LFReEntrant _ arity _ _) n_args _cg_loc _self_loop_info | n_args == 0 -- No args at all && not (profileIsProfiling (stgToCmmProfile cfg)) -- See Note [Evaluating functions with profiling] in rts/Apply.cmm @@ -569,16 +571,16 @@ getCallMethod cfg name id (LFReEntrant _ arity _ _) n_args _v_args _cg_loc _self | n_args < arity = SlowCall -- Not enough args | otherwise = DirectEntry (enterIdLabel (stgToCmmPlatform cfg) name (idCafInfo id)) arity -getCallMethod _ _name _ LFUnlifted n_args _v_args _cg_loc _self_loop_info +getCallMethod _ _name _ LFUnlifted n_args _cg_loc _self_loop_info = assert (n_args == 0) ReturnIt -getCallMethod _ _name _ (LFCon _) n_args _v_args _cg_loc _self_loop_info +getCallMethod _ _name _ (LFCon _) n_args _cg_loc _self_loop_info = assert (n_args == 0) ReturnIt -- n_args=0 because it'd be ill-typed to apply a saturated -- constructor application to anything getCallMethod cfg name id (LFThunk _ _ updatable std_form_info is_fun) - n_args _v_args _cg_loc _self_loop_info + n_args _cg_loc _self_loop_info | Just sig <- idTagSig_maybe id , isTaggedSig sig -- Infered to be already evaluated by Tag Inference @@ -616,7 +618,7 @@ getCallMethod cfg name id (LFThunk _ _ updatable std_form_info is_fun) updatable) 0 -- Imported(Unknown) Ids -getCallMethod cfg name id (LFUnknown might_be_a_function) n_args _v_args _cg_locs _self_loop_info +getCallMethod cfg name id (LFUnknown might_be_a_function) n_args _cg_locs _self_loop_info | n_args == 0 , Just sig <- idTagSig_maybe id , isTaggedSig sig -- Infered to be already evaluated by Tag Inference @@ -633,14 +635,14 @@ getCallMethod cfg name id (LFUnknown might_be_a_function) n_args _v_args _cg_loc EnterIt -- Not a function -- TODO: Redundant with above match? --- getCallMethod _ name _ (LFUnknown False) n_args _v_args _cg_loc _self_loop_info +-- getCallMethod _ name _ (LFUnknown False) n_args _cg_loc _self_loop_info -- = assertPpr (n_args == 0) (ppr name <+> ppr n_args) -- EnterIt -- Not a function -getCallMethod _ _name _ LFLetNoEscape _n_args _v_args (LneLoc blk_id lne_regs) _self_loop_info +getCallMethod _ _name _ LFLetNoEscape _n_args (LneLoc blk_id lne_regs) _self_loop_info = JumpToIt blk_id lne_regs -getCallMethod _ _ _ _ _ _ _ _ = panic "Unknown call method" +getCallMethod _ _ _ _ _ _ _ = panic "Unknown call method" ----------------------------------------------------------------------------- -- Data types for closure information diff --git a/compiler/GHC/StgToCmm/Expr.hs b/compiler/GHC/StgToCmm/Expr.hs index 3e581c9981cdcdafdcbbc31c99899f48f134b60f..28e93a3d839b81426e0f4bd7b6a4838563441d11 100644 --- a/compiler/GHC/StgToCmm/Expr.hs +++ b/compiler/GHC/StgToCmm/Expr.hs @@ -1080,8 +1080,7 @@ cgIdApp fun_id args = do fun = idInfoToAmode fun_info lf_info = cg_lf fun_info n_args = length args - v_args = length $ filter (null . stgArgRep) args - case getCallMethod cfg fun_name fun_id lf_info n_args v_args (cg_loc fun_info) self_loop of + case getCallMethod cfg fun_name fun_id lf_info n_args (cg_loc fun_info) self_loop of -- A value in WHNF, so we can just return it. ReturnIt | isZeroBitTy (idType fun_id) -> emitReturn [] @@ -1176,12 +1175,14 @@ cgIdApp fun_id args = do -- -- Implementation is spread across a couple of places in the code: -- --- * FCode monad stores additional information in its reader environment --- (stgToCmmSelfLoop field). This information tells us which function can --- tail call itself in an optimized way (it is the function currently --- being compiled), what is the label of a loop header (L1 in example above) --- and information about local registers in which we should arguments --- before making a call (this would be a and b in example above). +-- * FCode monad stores additional information in its reader +-- environment (stgToCmmSelfLoop field). This `SelfLoopInfo` +-- record tells us which function can tail call itself in an +-- optimized way (it is the function currently being compiled), +-- its RepArity, what is the label of its loop header (L1 in +-- example above) and information about which local registers +-- should receive arguments when making a call (this would be a +-- and b in the example above). -- -- * Whenever we are compiling a function, we set that information to reflect -- the fact that function currently being compiled can be jumped to, instead @@ -1205,36 +1206,13 @@ cgIdApp fun_id args = do -- of call will be generated. getCallMethod decides to generate a self -- recursive tail call when (a) environment stores information about -- possible self tail-call; (b) that tail call is to a function currently --- being compiled; (c) number of passed non-void arguments is equal to --- function's arity. (d) loopification is turned on via -floopification --- command-line option. +-- being compiled; (c) number of passed arguments is equal to +-- function's unarised arity. (d) loopification is turned on via +-- -floopification command-line option. -- -- * Command line option to turn loopification on and off is implemented in -- DynFlags, then passed to StgToCmmConfig for this phase. --- --- --- Note [Void arguments in self-recursive tail calls] --- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ --- --- State# tokens can get in the way of the loopification optimization as seen in --- #11372. Consider this: --- --- foo :: [a] --- -> (a -> State# s -> (# State s, Bool #)) --- -> State# s --- -> (# State# s, Maybe a #) --- foo [] f s = (# s, Nothing #) --- foo (x:xs) f s = case f x s of --- (# s', b #) -> case b of --- True -> (# s', Just x #) --- False -> foo xs f s' --- --- We would like to compile the call to foo as a local jump instead of a call --- (see Note [Self-recursive tail calls]). However, the generated function has --- an arity of 2 while we apply it to 3 arguments, one of them being of void --- type. Thus, we mustn't count arguments of void type when checking whether --- we can turn a call into a self-recursive jump. --- + emitEnter :: CmmExpr -> FCode ReturnKind emitEnter fun = do diff --git a/compiler/GHC/StgToCmm/Heap.hs b/compiler/GHC/StgToCmm/Heap.hs index 1f9e0e68b1912046f7d14a01538f6c91e354a174..ff3ca734cefa75d6752414c25ba28ae3b639b913 100644 --- a/compiler/GHC/StgToCmm/Heap.hs +++ b/compiler/GHC/StgToCmm/Heap.hs @@ -635,7 +635,7 @@ do_checks mb_stk_hwm checkYield mb_alloc_lit do_gc = do -- See Note [Self-recursive loop header]. self_loop_info <- getSelfLoop case self_loop_info of - Just (_, loop_header_id, _) + Just MkSelfLoopInfo { sli_header_block = loop_header_id } | checkYield && isJust mb_stk_hwm -> emitLabel loop_header_id _otherwise -> return () diff --git a/compiler/GHC/StgToCmm/Monad.hs b/compiler/GHC/StgToCmm/Monad.hs index 614464befe29b5c2f0edf11736416f5bc8d699ff..43d9e58829235de9ab1b8d6446ae2b1ab6b321c9 100644 --- a/compiler/GHC/StgToCmm/Monad.hs +++ b/compiler/GHC/StgToCmm/Monad.hs @@ -42,6 +42,8 @@ module GHC.StgToCmm.Monad ( Sequel(..), ReturnKind(..), withSequel, getSequel, + SelfLoopInfo(..), + setTickyCtrLabel, getTickyCtrLabel, tickScope, getTickScope, @@ -298,7 +300,7 @@ data FCodeState = -- else the RTS will deadlock _and_ also experience a severe -- performance degradation , fcs_sequel :: !Sequel -- ^ What to do at end of basic block - , fcs_selfloop :: Maybe SelfLoopInfo -- ^ Which tail calls can be compiled as local jumps? + , fcs_selfloop :: !(Maybe SelfLoopInfo) -- ^ Which tail calls can be compiled as local jumps? -- See Note [Self-recursive tail calls] in GHC.StgToCmm.Expr , fcs_ticky :: !CLabel -- ^ Destination for ticky counts , fcs_tickscope :: !CmmTickScope -- ^ Tick scope for new blocks & ticks diff --git a/compiler/GHC/StgToCmm/Sequel.hs b/compiler/GHC/StgToCmm/Sequel.hs index e799e79678829b885755d82fd59f7839b8fd86ea..72986f542de6e3741d0105d67871c56f62ea1a7e 100644 --- a/compiler/GHC/StgToCmm/Sequel.hs +++ b/compiler/GHC/StgToCmm/Sequel.hs @@ -12,13 +12,14 @@ module GHC.StgToCmm.Sequel ( Sequel(..) - , SelfLoopInfo + , SelfLoopInfo(..) ) where import GHC.Cmm.BlockId import GHC.Cmm import GHC.Types.Id +import GHC.Types.Basic (RepArity) import GHC.Utils.Outputable import GHC.Prelude @@ -41,5 +42,14 @@ instance Outputable Sequel where ppr Return = text "Return" ppr (AssignTo regs b) = text "AssignTo" <+> ppr regs <+> ppr b -type SelfLoopInfo = (Id, BlockId, [LocalReg]) +data SelfLoopInfo = MkSelfLoopInfo + { sli_id :: !Id + , sli_arity :: !RepArity + -- ^ always equal to 'idFunRepArity' of sli_id, + -- i.e. unarised arity, including void arguments + , sli_registers :: ![LocalReg] + -- ^ Excludes void arguments (LocalReg is never void) + , sli_header_block :: !BlockId + } + -------------------------------------------------------------------------------- diff --git a/testsuite/tests/codeGen/should_run/T24295a.hs b/testsuite/tests/codeGen/should_run/T24295a.hs new file mode 100644 index 0000000000000000000000000000000000000000..2b5777dfad8724ce01ec9e201f3eeadd1143cc5c --- /dev/null +++ b/testsuite/tests/codeGen/should_run/T24295a.hs @@ -0,0 +1,20 @@ +module Main (main) where + +import Data.IORef (newIORef, readIORef, writeIORef) +import Control.Exception (evaluate) +import GHC.Exts (noinline) + +newtype Tricky = TrickyCon { unTrickyCon :: IO Tricky } + +main :: IO () +main = do + ref <- newIORef False + let + tricky :: Tricky + tricky = TrickyCon $ do + putStrLn "tricky call" + v <- readIORef ref + case v of + False -> writeIORef ref True >> evaluate (noinline tricky) + True -> putStrLn "this shouldn't be printed" >> pure tricky + () <$ unTrickyCon tricky diff --git a/testsuite/tests/codeGen/should_run/T24295a.stdout b/testsuite/tests/codeGen/should_run/T24295a.stdout new file mode 100644 index 0000000000000000000000000000000000000000..acad415b3eb1526c87b9962901b948da8a0da2a6 --- /dev/null +++ b/testsuite/tests/codeGen/should_run/T24295a.stdout @@ -0,0 +1 @@ +tricky call diff --git a/testsuite/tests/codeGen/should_run/T24295b.hs b/testsuite/tests/codeGen/should_run/T24295b.hs new file mode 100644 index 0000000000000000000000000000000000000000..ce335c2289881b844326132bf36baf9cee89fdb7 --- /dev/null +++ b/testsuite/tests/codeGen/should_run/T24295b.hs @@ -0,0 +1,28 @@ +{-# LANGUAGE GHC2021, UnboxedTuples #-} +module Main (main) where + +import Control.Exception + +newtype Tricky = TrickyCon { unTrickyCon :: (# #) -> Tricky } + +data StrictBox a = SBox !a !a + +main :: IO () +main = do + let + tricky :: Tricky + {-# OPAQUE tricky #-} + tricky = TrickyCon $ \(# #) -> TrickyCon $ \(# #) -> + error "tricky called with at least two args" + + applyToN :: Int -> Tricky -> Tricky + {-# OPAQUE applyToN #-} + applyToN n a | n == 0 = a + | otherwise = applyToN (n - 1) a `unTrickyCon` (# #) + + val = applyToN 12345 tricky + + v <- try @ErrorCall $ evaluate (SBox val val) + case v of + Left _ -> pure () + Right _ -> putStrLn "unreachable" diff --git a/testsuite/tests/codeGen/should_run/all.T b/testsuite/tests/codeGen/should_run/all.T index 31a2a2c67e70623eaaa6690503c3e3322ee0d476..5d4e36a3ae73ea637a3c9a00c3c57ea5e4b1b09c 100644 --- a/testsuite/tests/codeGen/should_run/all.T +++ b/testsuite/tests/codeGen/should_run/all.T @@ -241,3 +241,5 @@ test('MulMayOflo_full', multi_compile_and_run, ['MulMayOflo', [('MulMayOflo_full.cmm', '')], '']) test('T24264run', normal, compile_and_run, ['']) +test('T24295a', normal, compile_and_run, ['-O -floopification']) +test('T24295b', normal, compile_and_run, ['-O -floopification -fpedantic-bottoms'])