From 8dd8a076058baca45ac52ace25b9c2797d61ef84 Mon Sep 17 00:00:00 2001
From: Cheng Shao <terrorjack@type.dance>
Date: Wed, 22 May 2024 14:30:03 +0000
Subject: [PATCH] compiler: avoid saving foreign call target to local when
 there are no caller-save GlobalRegs

This patch makes the STG->Cmm backend avoid saving foreign call target
to local when there are no caller-save GlobalRegs.

Since 321941a8ebe25192cdeece723e1058f2f47809ea, when we lower a
foreign call, we unconditionally save the foreign call target to a
temporary local first, then rely on cmmSink to clean it up later,
which only happens with -fcmm-sink (implied by -O) and not in
unoptimized code.

And this is troublesome for the wasm backend NCG, which needs to infer
a foreign call target symbol's type signature from the Cmm call site.
Previously, the NCG has been emitting incorrect type signatures for
unoptimized code, which happens to work with `wasm-ld` most of the
time, but this is never future-proof against upstream toolchain
updates, and it causes horrible breakages when LTO objects are
included in linker input. Hence this patch.
---
 compiler/GHC/Driver/Config/StgToCmm.hs |  3 +
 compiler/GHC/StgToCmm/Config.hs        |  2 +
 compiler/GHC/StgToCmm/Foreign.hs       | 76 +++++++++++++++++++++++---
 3 files changed, 73 insertions(+), 8 deletions(-)

diff --git a/compiler/GHC/Driver/Config/StgToCmm.hs b/compiler/GHC/Driver/Config/StgToCmm.hs
index c249f5f0c9f1..5411861a5330 100644
--- a/compiler/GHC/Driver/Config/StgToCmm.hs
+++ b/compiler/GHC/Driver/Config/StgToCmm.hs
@@ -14,6 +14,7 @@ import GHC.Driver.Backend
 import GHC.Driver.Session
 import GHC.Platform
 import GHC.Platform.Profile
+import GHC.Platform.Regs
 import GHC.Utils.Error
 import GHC.Unit.Module
 import GHC.Utils.Outputable
@@ -84,6 +85,8 @@ initStgToCmmConfig dflags mod = StgToCmmConfig
   , stgToCmmAvx2          = isAvx2Enabled                  dflags
   , stgToCmmAvx512f       = isAvx512fEnabled               dflags
   , stgToCmmTickyAP       = gopt Opt_Ticky_AP dflags
+  -- See Note [Saving foreign call target to local]
+  , stgToCmmSaveFCallTargetToLocal = any (callerSaves platform) $ activeStgRegs platform
   } where profile  = targetProfile dflags
           platform = profilePlatform profile
           bk_end  = backend dflags
diff --git a/compiler/GHC/StgToCmm/Config.hs b/compiler/GHC/StgToCmm/Config.hs
index da1338af61a3..608ec72706d5 100644
--- a/compiler/GHC/StgToCmm/Config.hs
+++ b/compiler/GHC/StgToCmm/Config.hs
@@ -73,6 +73,8 @@ data StgToCmmConfig = StgToCmmConfig
   , stgToCmmAllowWordMul2Instr        :: !Bool   -- ^ Allowed to generate WordMul2 instruction
   , stgToCmmAllowFMAInstr             :: FMASign -> Bool -- ^ Allowed to generate FMA instruction
   , stgToCmmTickyAP                   :: !Bool   -- ^ Disable use of precomputed standard thunks.
+  , stgToCmmSaveFCallTargetToLocal    :: !Bool   -- ^ Save a foreign call target to a Cmm local, see
+                                                 -- Note [Saving foreign call target to local] for details
   ------------------------------ SIMD flags ------------------------------------
   -- Each of these flags checks vector compatibility with the backend requested
   -- during compilation. In essence, this means checking for @-fllvm@ which is
diff --git a/compiler/GHC/StgToCmm/Foreign.hs b/compiler/GHC/StgToCmm/Foreign.hs
index 7d9173d123a5..7464eda718c2 100644
--- a/compiler/GHC/StgToCmm/Foreign.hs
+++ b/compiler/GHC/StgToCmm/Foreign.hs
@@ -277,23 +277,83 @@ load_target_into_temp (ForeignTarget expr conv) = do
 load_target_into_temp other_target@(PrimTarget _) =
   return other_target
 
+-- Note [Saving foreign call target to local]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--
 -- What we want to do here is create a new temporary for the foreign
 -- call argument if it is not safe to use the expression directly,
 -- because the expression mentions caller-saves GlobalRegs (see
 -- Note [Register parameter passing]).
 --
 -- However, we can't pattern-match on the expression here, because
--- this is used in a loop by GHC.Cmm.Parser, and testing the expression
--- results in a black hole.  So we always create a temporary, and rely
--- on GHC.Cmm.Sink to clean it up later.  (Yuck, ToDo).  The generated code
--- ends up being the same, at least for the RTS .cmm code.
+-- this is used in a loop by GHC.Cmm.Parser, and testing the
+-- expression results in a black hole.  So when there exist
+-- caller-saves GlobalRegs, we create a temporary, and rely on
+-- GHC.Cmm.Sink to clean it up later. The generated code ends up being
+-- the same if -fcmm-sink is enabled (implied by -O).
+--
+-- When there doesn't exist caller-save GlobalRegs, keep the original
+-- target in place. This matters for the wasm backend, otherwise it
+-- cannot infer the target symbol's correct foreign function type in
+-- unoptimized Cmm. For instance:
+--
+-- foreign import ccall unsafe "foo" c_foo :: IO ()
+--
+-- Without optimization, previously this would lower to something like:
+--
+-- [Test.c_foo_entry() { //  []
+--          { []
+--          }
+--      {offset
+--        cDk:
+--            goto cDm;
+--        cDm:
+--            _cDj::I32 = foo;
+--            call "ccall" arg hints:  []  result hints:  [] (_cDj::I32)();
+--            R1 = GHC.Tuple.()_closure+1;
+--            call (I32[P32[Sp]])(R1) args: 4, res: 0, upd: 4;
+--      }
+--  },
 --
+-- The wasm backend only sees "foo" being assigned to a local, but
+-- there's no type signature associated with a CLabel! So it has to
+-- emit a dummy .functype directive and fingers crossed that wasm-ld
+-- tolerates function type mismatch. THis is horrible, not future
+-- proof against upstream toolchain upgrades, and already known to
+-- break in certain cases (e.g. when LTO objects are involved).
+--
+-- Therefore, on wasm as well as other targets that don't risk
+-- mentioning caller-saved GlobalRegs in a foreign call target, just
+-- keep the original call target in place and don't assign it to a
+-- local. So this would now lower to something like:
+--
+-- [Test.c_foo_entry() { //  []
+--          { []
+--          }
+--      {offset
+--        cDo:
+--            goto cDq;
+--        cDq:
+--            call "ccall" arg hints:  []  result hints:  [] foo();
+--            R1 = GHC.Tuple.()_closure+1;
+--            call (I32[P32[Sp]])(R1) args: 4, res: 0, upd: 4;
+--      }
+--  },
+--
+-- Since "foo" appears at call site directly, the wasm backend would
+-- now be able to infer its type signature correctly.
+
 maybe_assign_temp :: CmmExpr -> FCode CmmExpr
 maybe_assign_temp e = do
-  platform <- getPlatform
-  reg <- newTemp (cmmExprType platform e)
-  emitAssign (CmmLocal reg) e
-  return (CmmReg (CmmLocal reg))
+  do_save <- stgToCmmSaveFCallTargetToLocal <$> getStgToCmmConfig
+  if do_save
+    then do
+      platform <- getPlatform
+      reg <- newTemp (cmmExprType platform e)
+      emitAssign (CmmLocal reg) e
+      return (CmmReg (CmmLocal reg))
+    else
+      pure e
 
 -- -----------------------------------------------------------------------------
 -- Save/restore the thread state in the TSO
-- 
GitLab