From 49f5264bbebfa1c30ecbcdd9590c9bb76ac58d53 Mon Sep 17 00:00:00 2001
From: Andreas Klebinger <klebinger.andreas@gmx.at>
Date: Thu, 19 Jan 2023 15:29:11 +0100
Subject: [PATCH] Properly compute unpacked sizes for
 -funpack-small-strict-fields.

Use rep size rather than rep count to compute the size.

Fixes #22309
---
 compiler/GHC/Core/TyCon.hs                    | 34 ++++++-
 compiler/GHC/Types/Id/Make.hs                 | 25 +++++-
 docs/users_guide/9.10.1-notes.rst             | 10 +++
 docs/users_guide/using-optimisation.rst       |  6 +-
 .../tests/simplCore/should_compile/T22309.hs  | 35 ++++++++
 .../simplCore/should_compile/T22309.stderr    | 88 +++++++++++++++++++
 .../tests/simplCore/should_compile/all.T      |  1 +
 7 files changed, 192 insertions(+), 7 deletions(-)
 create mode 100644 testsuite/tests/simplCore/should_compile/T22309.hs
 create mode 100644 testsuite/tests/simplCore/should_compile/T22309.stderr

diff --git a/compiler/GHC/Core/TyCon.hs b/compiler/GHC/Core/TyCon.hs
index f6c2229b20e4..830873a81f00 100644
--- a/compiler/GHC/Core/TyCon.hs
+++ b/compiler/GHC/Core/TyCon.hs
@@ -128,8 +128,8 @@ module GHC.Core.TyCon(
         PrimRep(..), PrimElemRep(..), Levity(..),
         primElemRepToPrimRep,
         isVoidRep, isGcPtrRep,
-        primRepSizeB,
-        primElemRepSizeB,
+        primRepSizeB, primRepSizeW64_B,
+        primElemRepSizeB, primElemRepSizeW64_B,
         primRepIsFloat,
         primRepsCompatible,
         primRepCompatible,
@@ -1679,9 +1679,39 @@ primRepSizeB platform = \case
    VoidRep          -> 0
    (VecRep len rep) -> len * primElemRepSizeB platform rep
 
+-- | Like primRepSizeB but assumes pointers/words are 8 words wide.
+--
+-- This can be useful to compute the size of a rep as if we were compiling
+-- for a 64bit platform.
+primRepSizeW64_B :: PrimRep -> Int
+primRepSizeW64_B = \case
+   IntRep           -> 8
+   WordRep          -> 8
+   Int8Rep          -> 1
+   Int16Rep         -> 2
+   Int32Rep         -> 4
+   Int64Rep         -> 8
+   Word8Rep         -> 1
+   Word16Rep        -> 2
+   Word32Rep        -> 4
+   Word64Rep        -> 8
+   FloatRep         -> fLOAT_SIZE
+   DoubleRep        -> dOUBLE_SIZE
+   AddrRep          -> 8
+   BoxedRep{}       -> 8
+   VoidRep          -> 0
+   (VecRep len rep) -> len * primElemRepSizeW64_B rep
+
 primElemRepSizeB :: Platform -> PrimElemRep -> Int
 primElemRepSizeB platform = primRepSizeB platform . primElemRepToPrimRep
 
+-- | Like primElemRepSizeB but assumes pointers/words are 8 words wide.
+--
+-- This can be useful to compute the size of a rep as if we were compiling
+-- for a 64bit platform.
+primElemRepSizeW64_B :: PrimElemRep -> Int
+primElemRepSizeW64_B = primRepSizeW64_B . primElemRepToPrimRep
+
 primElemRepToPrimRep :: PrimElemRep -> PrimRep
 primElemRepToPrimRep Int8ElemRep   = Int8Rep
 primElemRepToPrimRep Int16ElemRep  = Int16Rep
diff --git a/compiler/GHC/Types/Id/Make.hs b/compiler/GHC/Types/Id/Make.hs
index e285c0954eae..0ff8a51cd1cf 100644
--- a/compiler/GHC/Types/Id/Make.hs
+++ b/compiler/GHC/Types/Id/Make.hs
@@ -67,7 +67,7 @@ import GHC.Core.DataCon
 
 import GHC.Types.Literal
 import GHC.Types.SourceText
-import GHC.Types.RepType ( countFunRepArgs )
+import GHC.Types.RepType ( countFunRepArgs, typePrimRep )
 import GHC.Types.Name.Set
 import GHC.Types.Name
 import GHC.Types.Name.Env
@@ -1517,16 +1517,29 @@ shouldUnpackArgTy bang_opts prag fam_envs arg_ty
           | otherwise   -- Wrinkle (W4) of Note [Recursive unboxing]
           -> bang_opt_unbox_strict bang_opts
              || (bang_opt_unbox_small bang_opts
-                 && rep_tys `lengthAtMost` 1)  -- See Note [Unpack one-wide fields]
+                 && is_small_rep)  -- See Note [Unpack one-wide fields]
       where
         (rep_tys, _) = dataConArgUnpack arg_ty
 
+        -- Takes in the list of reps used to represent the dataCon after it's unpacked
+        -- and tells us if they can fit into 8 bytes. See Note [Unpack one-wide fields]
+        is_small_rep =
+          let -- Neccesary to look through unboxed tuples.
+              prim_reps = concatMap (typePrimRep . scaledThing . fst) $ rep_tys
+              -- Void types are erased when unpacked so we
+              nv_prim_reps = filter (not . isVoidRep) prim_reps
+              -- And then get the actual size of the unpacked constructor.
+              rep_size = sum $ map primRepSizeW64_B nv_prim_reps
+          in rep_size <= 8
+
     is_sum :: [DataCon] -> Bool
     -- We never unpack sum types automatically
     -- (Product types, we do. Empty types are weeded out by unpackable_type_datacons.)
     is_sum (_:_:_) = True
     is_sum _       = False
 
+
+
 -- Given a type already assumed to have been normalized by topNormaliseType,
 -- unpackable_type_datacons ty = Just datacons
 -- iff ty is of the form
@@ -1585,6 +1598,14 @@ However
 
 Here we can represent T with an Int#.
 
+Special care has to be taken to make sure we don't mistake fields with unboxed
+tuple/sum rep or very large reps. See #22309
+
+For consistency we unpack anything that fits into 8 bytes on a 64-bit platform,
+even when compiling for 32bit platforms. This way unpacking decisions will be the
+same for 32bit and 64bit systems. To do so we use primRepSizeW64_B instead of
+primRepSizeB. See also the tests in test case T22309.
+
 Note [Recursive unboxing]
 ~~~~~~~~~~~~~~~~~~~~~~~~~
 Consider
diff --git a/docs/users_guide/9.10.1-notes.rst b/docs/users_guide/9.10.1-notes.rst
index 969bfca6426a..08b9d740b1b2 100644
--- a/docs/users_guide/9.10.1-notes.rst
+++ b/docs/users_guide/9.10.1-notes.rst
@@ -73,6 +73,16 @@ Compiler
 - Defaulting plugins can now propose solutions to entangled sets of type variables. This allows defaulting
   of multi-parameter type classes. See :ghc-ticket:`23832`.
 
+- The flag `-funbox-small-strict-fields` will now properly recognize unboxed tuples
+  containing multiple elements as large. Constructors like `Foo (# Int64, Int64# )`
+  will no longer be considered small and therefore not unboxed by default under `-O`
+  even when used as strict field. :ghc-ticket:`22309`.
+
+- The flag `-funbox-small-strict-fields` will now always unpack things as if compiling
+  for a 64bit platform. Even when generating code for a 32bit platform.
+  This makes core optimizations more consistent between 32bit and 64bit platforms
+  at the cost of slightly worse 32bit performance in edge cases.
+
 GHCi
 ~~~~
 
diff --git a/docs/users_guide/using-optimisation.rst b/docs/users_guide/using-optimisation.rst
index a0e925ba0952..e619c9a6c1ed 100644
--- a/docs/users_guide/using-optimisation.rst
+++ b/docs/users_guide/using-optimisation.rst
@@ -1533,9 +1533,9 @@ as such you shouldn't need to set any of them explicitly. A flag
     default you can disable it for certain constructor fields using the
     ``NOUNPACK`` pragma (see :ref:`nounpack-pragma`).
 
-    Note that for consistency ``Double``, ``Word64``, and ``Int64``
-    constructor fields are unpacked on 32-bit platforms, even though
-    they are technically larger than a pointer on those platforms.
+    Note that for consistency constructor fields are unpacked on 32-bit platforms
+    as if it we were compiling for a 64-bit target even if fields are larger
+    than a pointer on those platforms.
 
 .. ghc-flag:: -funbox-strict-fields
     :shortdesc: Flatten strict constructor fields
diff --git a/testsuite/tests/simplCore/should_compile/T22309.hs b/testsuite/tests/simplCore/should_compile/T22309.hs
new file mode 100644
index 000000000000..085370b31d3e
--- /dev/null
+++ b/testsuite/tests/simplCore/should_compile/T22309.hs
@@ -0,0 +1,35 @@
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE UnboxedTuples #-}
+
+module ShouldCompile where
+
+import GHC.Int
+import GHC.Exts
+
+-- These should unbox into another constructor
+data UA = Mk_A !Int
+data UB = Mk_B !Int64
+data UC = Mk_C !Int32
+data UD = Mk_D !Int32 !Int32
+data UE = Mk_E !(# Int# #)
+data UF = Mk_F !(# Double #)
+
+-- These should not be unpacked into another constructor.
+data NU_A = NU_MkA (# Int64, Int64 #)
+data NU_B = NU_MkB !Int64 !Int64
+
+-- The types we unbox into
+
+-- These should unpack their fields.
+data WU_A = MkW_A !UA
+data WU_B = MkW_B !UB
+data WU_C = MkW_C !UC
+data WU_D = MkW_D !UD
+data WU_E = MkW_E !UE
+data WU_F = MkW_F !UF
+
+-- These should not unpack their fields, as they are multiple words large.
+data WNU_A = MkW_NA !NU_A
+data WNU_B = MkW_NB !NU_B
+
+
diff --git a/testsuite/tests/simplCore/should_compile/T22309.stderr b/testsuite/tests/simplCore/should_compile/T22309.stderr
new file mode 100644
index 000000000000..ac0c768688ac
--- /dev/null
+++ b/testsuite/tests/simplCore/should_compile/T22309.stderr
@@ -0,0 +1,88 @@
+
+==================== Final STG: ====================
+$WMkW_NB :: NU_B %1 -> WNU_B =
+    \r [conrep]
+        case conrep of conrep1 { __DEFAULT -> MkW_NB [conrep1]; };
+
+$WMkW_NA :: NU_A %1 -> WNU_A =
+    \r [conrep]
+        case conrep of conrep1 { __DEFAULT -> MkW_NA [conrep1]; };
+
+$WMkW_F :: UF %1 -> WU_F =
+    \r [conrep] case conrep of { Mk_F us -> MkW_F [us]; };
+
+$WMkW_E :: UE %1 -> WU_E =
+    \r [conrep] case conrep of { Mk_E us -> MkW_E [us]; };
+
+$WMkW_D :: UD %1 -> WU_D =
+    \r [conrep]
+        case conrep of { Mk_D unbx unbx1 -> MkW_D [unbx unbx1]; };
+
+$WMkW_C :: UC %1 -> WU_C =
+    \r [conrep] case conrep of { Mk_C unbx -> MkW_C [unbx]; };
+
+$WMkW_B :: UB %1 -> WU_B =
+    \r [conrep] case conrep of { Mk_B unbx -> MkW_B [unbx]; };
+
+$WMkW_A :: UA %1 -> WU_A =
+    \r [conrep] case conrep of { Mk_A unbx -> MkW_A [unbx]; };
+
+$WNU_MkB :: Int64 %1 -> Int64 %1 -> NU_B =
+    \r [conrep conrep1]
+        case conrep of {
+        I64# unbx ->
+        case conrep1 of { I64# unbx1 -> NU_MkB [unbx unbx1]; };
+        };
+
+$WMk_D :: Int32 %1 -> Int32 %1 -> UD =
+    \r [conrep conrep1]
+        case conrep of {
+        I32# unbx -> case conrep1 of { I32# unbx1 -> Mk_D [unbx unbx1]; };
+        };
+
+$WMk_C :: Int32 %1 -> UC =
+    \r [conrep] case conrep of { I32# unbx -> Mk_C [unbx]; };
+
+$WMk_B :: Int64 %1 -> UB =
+    \r [conrep] case conrep of { I64# unbx -> Mk_B [unbx]; };
+
+$WMk_A :: Int %1 -> UA =
+    \r [conrep] case conrep of { I# unbx -> Mk_A [unbx]; };
+
+MkW_NB :: NU_B %1 -> WNU_B =
+    \r [eta] case eta of eta { __DEFAULT -> MkW_NB [eta]; };
+
+MkW_NA :: NU_A %1 -> WNU_A =
+    \r [eta] case eta of eta { __DEFAULT -> MkW_NA [eta]; };
+
+MkW_F :: (# Double #) %1 -> WU_F = \r [us] MkW_F [us];
+
+MkW_E :: (# Int# #) %1 -> WU_E = \r [us] MkW_E [us];
+
+MkW_D :: Int32# %1 -> Int32# %1 -> WU_D =
+    \r [eta eta] MkW_D [eta eta];
+
+MkW_C :: Int32# %1 -> WU_C = \r [eta] MkW_C [eta];
+
+MkW_B :: Int64# %1 -> WU_B = \r [eta] MkW_B [eta];
+
+MkW_A :: Int# %1 -> WU_A = \r [eta] MkW_A [eta];
+
+NU_MkB :: Int64# %1 -> Int64# %1 -> NU_B =
+    \r [eta eta] NU_MkB [eta eta];
+
+NU_MkA :: (# Int, Int #) %1 -> NU_A = \r [us us] NU_MkA [us us];
+
+Mk_F :: (# Double #) %1 -> UF = \r [us] Mk_F [us];
+
+Mk_E :: (# Int# #) %1 -> UE = \r [us] Mk_E [us];
+
+Mk_D :: Int32# %1 -> Int32# %1 -> UD = \r [eta eta] Mk_D [eta eta];
+
+Mk_C :: Int32# %1 -> UC = \r [eta] Mk_C [eta];
+
+Mk_B :: Int64# %1 -> UB = \r [eta] Mk_B [eta];
+
+Mk_A :: Int# %1 -> UA = \r [eta] Mk_A [eta];
+
+
diff --git a/testsuite/tests/simplCore/should_compile/all.T b/testsuite/tests/simplCore/should_compile/all.T
index 3120a598d61d..b654e56fe1bc 100644
--- a/testsuite/tests/simplCore/should_compile/all.T
+++ b/testsuite/tests/simplCore/should_compile/all.T
@@ -488,6 +488,7 @@ test('T23307', normal, compile, ['-O -ddump-simpl -dno-typeable-binds -dsuppress
 test('T23307a', normal, compile, ['-O -ddump-simpl -dno-typeable-binds -dsuppress-uniques'])
 test('T23307b', normal, compile, ['-O'])
 test('T23307c', normal, compile, ['-O'])
+test('T22309', [grep_errmsg(r'MkW'), only_ways(['optasm']) ], compile, ['-O -ddump-stg-final -dsuppress-uniques -dsuppress-all -dno-typeable-binds -dno-suppress-type-signatures -dsuppress-module-prefixes'])
 test('T23426', normal, compile, ['-O'])
 test('T23491a', [extra_files(['T23491.hs']), grep_errmsg(r'Float out')], multimod_compile, ['T23491', '-ffull-laziness -ddump-full-laziness'])
 test('T23491b', [extra_files(['T23491.hs']), grep_errmsg(r'Float inwards')], multimod_compile, ['T23491', '-ffloat-in -ddump-float-in'])
-- 
GitLab