From 8dd8a076058baca45ac52ace25b9c2797d61ef84 Mon Sep 17 00:00:00 2001 From: Cheng Shao <terrorjack@type.dance> Date: Wed, 22 May 2024 14:30:03 +0000 Subject: [PATCH] compiler: avoid saving foreign call target to local when there are no caller-save GlobalRegs This patch makes the STG->Cmm backend avoid saving foreign call target to local when there are no caller-save GlobalRegs. Since 321941a8ebe25192cdeece723e1058f2f47809ea, when we lower a foreign call, we unconditionally save the foreign call target to a temporary local first, then rely on cmmSink to clean it up later, which only happens with -fcmm-sink (implied by -O) and not in unoptimized code. And this is troublesome for the wasm backend NCG, which needs to infer a foreign call target symbol's type signature from the Cmm call site. Previously, the NCG has been emitting incorrect type signatures for unoptimized code, which happens to work with `wasm-ld` most of the time, but this is never future-proof against upstream toolchain updates, and it causes horrible breakages when LTO objects are included in linker input. Hence this patch. --- compiler/GHC/Driver/Config/StgToCmm.hs | 3 + compiler/GHC/StgToCmm/Config.hs | 2 + compiler/GHC/StgToCmm/Foreign.hs | 76 +++++++++++++++++++++++--- 3 files changed, 73 insertions(+), 8 deletions(-) diff --git a/compiler/GHC/Driver/Config/StgToCmm.hs b/compiler/GHC/Driver/Config/StgToCmm.hs index c249f5f0c9f1..5411861a5330 100644 --- a/compiler/GHC/Driver/Config/StgToCmm.hs +++ b/compiler/GHC/Driver/Config/StgToCmm.hs @@ -14,6 +14,7 @@ import GHC.Driver.Backend import GHC.Driver.Session import GHC.Platform import GHC.Platform.Profile +import GHC.Platform.Regs import GHC.Utils.Error import GHC.Unit.Module import GHC.Utils.Outputable @@ -84,6 +85,8 @@ initStgToCmmConfig dflags mod = StgToCmmConfig , stgToCmmAvx2 = isAvx2Enabled dflags , stgToCmmAvx512f = isAvx512fEnabled dflags , stgToCmmTickyAP = gopt Opt_Ticky_AP dflags + -- See Note [Saving foreign call target to local] + , stgToCmmSaveFCallTargetToLocal = any (callerSaves platform) $ activeStgRegs platform } where profile = targetProfile dflags platform = profilePlatform profile bk_end = backend dflags diff --git a/compiler/GHC/StgToCmm/Config.hs b/compiler/GHC/StgToCmm/Config.hs index da1338af61a3..608ec72706d5 100644 --- a/compiler/GHC/StgToCmm/Config.hs +++ b/compiler/GHC/StgToCmm/Config.hs @@ -73,6 +73,8 @@ data StgToCmmConfig = StgToCmmConfig , stgToCmmAllowWordMul2Instr :: !Bool -- ^ Allowed to generate WordMul2 instruction , stgToCmmAllowFMAInstr :: FMASign -> Bool -- ^ Allowed to generate FMA instruction , stgToCmmTickyAP :: !Bool -- ^ Disable use of precomputed standard thunks. + , stgToCmmSaveFCallTargetToLocal :: !Bool -- ^ Save a foreign call target to a Cmm local, see + -- Note [Saving foreign call target to local] for details ------------------------------ SIMD flags ------------------------------------ -- Each of these flags checks vector compatibility with the backend requested -- during compilation. In essence, this means checking for @-fllvm@ which is diff --git a/compiler/GHC/StgToCmm/Foreign.hs b/compiler/GHC/StgToCmm/Foreign.hs index 7d9173d123a5..7464eda718c2 100644 --- a/compiler/GHC/StgToCmm/Foreign.hs +++ b/compiler/GHC/StgToCmm/Foreign.hs @@ -277,23 +277,83 @@ load_target_into_temp (ForeignTarget expr conv) = do load_target_into_temp other_target@(PrimTarget _) = return other_target +-- Note [Saving foreign call target to local] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- -- What we want to do here is create a new temporary for the foreign -- call argument if it is not safe to use the expression directly, -- because the expression mentions caller-saves GlobalRegs (see -- Note [Register parameter passing]). -- -- However, we can't pattern-match on the expression here, because --- this is used in a loop by GHC.Cmm.Parser, and testing the expression --- results in a black hole. So we always create a temporary, and rely --- on GHC.Cmm.Sink to clean it up later. (Yuck, ToDo). The generated code --- ends up being the same, at least for the RTS .cmm code. +-- this is used in a loop by GHC.Cmm.Parser, and testing the +-- expression results in a black hole. So when there exist +-- caller-saves GlobalRegs, we create a temporary, and rely on +-- GHC.Cmm.Sink to clean it up later. The generated code ends up being +-- the same if -fcmm-sink is enabled (implied by -O). +-- +-- When there doesn't exist caller-save GlobalRegs, keep the original +-- target in place. This matters for the wasm backend, otherwise it +-- cannot infer the target symbol's correct foreign function type in +-- unoptimized Cmm. For instance: +-- +-- foreign import ccall unsafe "foo" c_foo :: IO () +-- +-- Without optimization, previously this would lower to something like: +-- +-- [Test.c_foo_entry() { // [] +-- { [] +-- } +-- {offset +-- cDk: +-- goto cDm; +-- cDm: +-- _cDj::I32 = foo; +-- call "ccall" arg hints: [] result hints: [] (_cDj::I32)(); +-- R1 = GHC.Tuple.()_closure+1; +-- call (I32[P32[Sp]])(R1) args: 4, res: 0, upd: 4; +-- } +-- }, -- +-- The wasm backend only sees "foo" being assigned to a local, but +-- there's no type signature associated with a CLabel! So it has to +-- emit a dummy .functype directive and fingers crossed that wasm-ld +-- tolerates function type mismatch. THis is horrible, not future +-- proof against upstream toolchain upgrades, and already known to +-- break in certain cases (e.g. when LTO objects are involved). +-- +-- Therefore, on wasm as well as other targets that don't risk +-- mentioning caller-saved GlobalRegs in a foreign call target, just +-- keep the original call target in place and don't assign it to a +-- local. So this would now lower to something like: +-- +-- [Test.c_foo_entry() { // [] +-- { [] +-- } +-- {offset +-- cDo: +-- goto cDq; +-- cDq: +-- call "ccall" arg hints: [] result hints: [] foo(); +-- R1 = GHC.Tuple.()_closure+1; +-- call (I32[P32[Sp]])(R1) args: 4, res: 0, upd: 4; +-- } +-- }, +-- +-- Since "foo" appears at call site directly, the wasm backend would +-- now be able to infer its type signature correctly. + maybe_assign_temp :: CmmExpr -> FCode CmmExpr maybe_assign_temp e = do - platform <- getPlatform - reg <- newTemp (cmmExprType platform e) - emitAssign (CmmLocal reg) e - return (CmmReg (CmmLocal reg)) + do_save <- stgToCmmSaveFCallTargetToLocal <$> getStgToCmmConfig + if do_save + then do + platform <- getPlatform + reg <- newTemp (cmmExprType platform e) + emitAssign (CmmLocal reg) e + return (CmmReg (CmmLocal reg)) + else + pure e -- ----------------------------------------------------------------------------- -- Save/restore the thread state in the TSO -- GitLab