From 43fb21b6ae30133d5ed3fab3453305721679e2b7 Mon Sep 17 00:00:00 2001 From: Cheng Shao <terrorjack@type.dance> Date: Wed, 22 May 2024 14:30:03 +0000 Subject: [PATCH] compiler: avoid saving foreign call target to local when there are no caller-save GlobalRegs This patch makes the STG->Cmm backend avoid saving foreign call target to local when there are no caller-save GlobalRegs. Since 321941a8ebe25192cdeece723e1058f2f47809ea, when we lower a foreign call, we unconditionally save the foreign call target to a temporary local first, then rely on cmmSink to clean it up later, which only happens with -fcmm-sink (implied by -O) and not in unoptimized code. And this is troublesome for the wasm backend NCG, which needs to infer a foreign call target symbol's type signature from the Cmm call site. Previously, the NCG has been emitting incorrect type signatures for unoptimized code, which happens to work with `wasm-ld` most of the time, but this is never future-proof against upstream toolchain updates, and it causes horrible breakages when LTO objects are included in linker input. Hence this patch. (cherry picked from commit 8dd8a076058baca45ac52ace25b9c2797d61ef84) (cherry picked from commit 2b2a3db8a92f4a39052b67b38af79c26117f3d53) --- compiler/GHC/Driver/Config/StgToCmm.hs | 3 + compiler/GHC/StgToCmm/Config.hs | 2 + compiler/GHC/StgToCmm/Foreign.hs | 76 +++++++++++++++++++++++--- 3 files changed, 73 insertions(+), 8 deletions(-) diff --git a/compiler/GHC/Driver/Config/StgToCmm.hs b/compiler/GHC/Driver/Config/StgToCmm.hs index 87c72753349..fc17ae7404a 100644 --- a/compiler/GHC/Driver/Config/StgToCmm.hs +++ b/compiler/GHC/Driver/Config/StgToCmm.hs @@ -14,6 +14,7 @@ import GHC.Driver.Backend import GHC.Driver.Session import GHC.Platform import GHC.Platform.Profile +import GHC.Platform.Regs import GHC.Utils.Error import GHC.Unit.Module import GHC.Utils.Outputable @@ -81,6 +82,8 @@ initStgToCmmConfig dflags mod = StgToCmmConfig , stgToCmmAvx2 = isAvx2Enabled dflags , stgToCmmAvx512f = isAvx512fEnabled dflags , stgToCmmTickyAP = gopt Opt_Ticky_AP dflags + -- See Note [Saving foreign call target to local] + , stgToCmmSaveFCallTargetToLocal = any (callerSaves platform) $ activeStgRegs platform } where profile = targetProfile dflags platform = profilePlatform profile bk_end = backend dflags diff --git a/compiler/GHC/StgToCmm/Config.hs b/compiler/GHC/StgToCmm/Config.hs index 1274782725d..6817256d4e5 100644 --- a/compiler/GHC/StgToCmm/Config.hs +++ b/compiler/GHC/StgToCmm/Config.hs @@ -70,6 +70,8 @@ data StgToCmmConfig = StgToCmmConfig , stgToCmmAllowIntMul2Instr :: !Bool -- ^ Allowed to generate IntMul2 instruction , stgToCmmAllowFMAInstr :: FMASign -> Bool -- ^ Allowed to generate FMA instruction , stgToCmmTickyAP :: !Bool -- ^ Disable use of precomputed standard thunks. + , stgToCmmSaveFCallTargetToLocal :: !Bool -- ^ Save a foreign call target to a Cmm local, see + -- Note [Saving foreign call target to local] for details ------------------------------ SIMD flags ------------------------------------ -- Each of these flags checks vector compatibility with the backend requested -- during compilation. In essence, this means checking for @-fllvm@ which is diff --git a/compiler/GHC/StgToCmm/Foreign.hs b/compiler/GHC/StgToCmm/Foreign.hs index 7d9173d123a..7464eda718c 100644 --- a/compiler/GHC/StgToCmm/Foreign.hs +++ b/compiler/GHC/StgToCmm/Foreign.hs @@ -277,23 +277,83 @@ load_target_into_temp (ForeignTarget expr conv) = do load_target_into_temp other_target@(PrimTarget _) = return other_target +-- Note [Saving foreign call target to local] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- -- What we want to do here is create a new temporary for the foreign -- call argument if it is not safe to use the expression directly, -- because the expression mentions caller-saves GlobalRegs (see -- Note [Register parameter passing]). -- -- However, we can't pattern-match on the expression here, because --- this is used in a loop by GHC.Cmm.Parser, and testing the expression --- results in a black hole. So we always create a temporary, and rely --- on GHC.Cmm.Sink to clean it up later. (Yuck, ToDo). The generated code --- ends up being the same, at least for the RTS .cmm code. +-- this is used in a loop by GHC.Cmm.Parser, and testing the +-- expression results in a black hole. So when there exist +-- caller-saves GlobalRegs, we create a temporary, and rely on +-- GHC.Cmm.Sink to clean it up later. The generated code ends up being +-- the same if -fcmm-sink is enabled (implied by -O). +-- +-- When there doesn't exist caller-save GlobalRegs, keep the original +-- target in place. This matters for the wasm backend, otherwise it +-- cannot infer the target symbol's correct foreign function type in +-- unoptimized Cmm. For instance: +-- +-- foreign import ccall unsafe "foo" c_foo :: IO () +-- +-- Without optimization, previously this would lower to something like: +-- +-- [Test.c_foo_entry() { // [] +-- { [] +-- } +-- {offset +-- cDk: +-- goto cDm; +-- cDm: +-- _cDj::I32 = foo; +-- call "ccall" arg hints: [] result hints: [] (_cDj::I32)(); +-- R1 = GHC.Tuple.()_closure+1; +-- call (I32[P32[Sp]])(R1) args: 4, res: 0, upd: 4; +-- } +-- }, -- +-- The wasm backend only sees "foo" being assigned to a local, but +-- there's no type signature associated with a CLabel! So it has to +-- emit a dummy .functype directive and fingers crossed that wasm-ld +-- tolerates function type mismatch. THis is horrible, not future +-- proof against upstream toolchain upgrades, and already known to +-- break in certain cases (e.g. when LTO objects are involved). +-- +-- Therefore, on wasm as well as other targets that don't risk +-- mentioning caller-saved GlobalRegs in a foreign call target, just +-- keep the original call target in place and don't assign it to a +-- local. So this would now lower to something like: +-- +-- [Test.c_foo_entry() { // [] +-- { [] +-- } +-- {offset +-- cDo: +-- goto cDq; +-- cDq: +-- call "ccall" arg hints: [] result hints: [] foo(); +-- R1 = GHC.Tuple.()_closure+1; +-- call (I32[P32[Sp]])(R1) args: 4, res: 0, upd: 4; +-- } +-- }, +-- +-- Since "foo" appears at call site directly, the wasm backend would +-- now be able to infer its type signature correctly. + maybe_assign_temp :: CmmExpr -> FCode CmmExpr maybe_assign_temp e = do - platform <- getPlatform - reg <- newTemp (cmmExprType platform e) - emitAssign (CmmLocal reg) e - return (CmmReg (CmmLocal reg)) + do_save <- stgToCmmSaveFCallTargetToLocal <$> getStgToCmmConfig + if do_save + then do + platform <- getPlatform + reg <- newTemp (cmmExprType platform e) + emitAssign (CmmLocal reg) e + return (CmmReg (CmmLocal reg)) + else + pure e -- ----------------------------------------------------------------------------- -- Save/restore the thread state in the TSO -- GitLab