From 49f5264bbebfa1c30ecbcdd9590c9bb76ac58d53 Mon Sep 17 00:00:00 2001 From: Andreas Klebinger <klebinger.andreas@gmx.at> Date: Thu, 19 Jan 2023 15:29:11 +0100 Subject: [PATCH] Properly compute unpacked sizes for -funpack-small-strict-fields. Use rep size rather than rep count to compute the size. Fixes #22309 --- compiler/GHC/Core/TyCon.hs | 34 ++++++- compiler/GHC/Types/Id/Make.hs | 25 +++++- docs/users_guide/9.10.1-notes.rst | 10 +++ docs/users_guide/using-optimisation.rst | 6 +- .../tests/simplCore/should_compile/T22309.hs | 35 ++++++++ .../simplCore/should_compile/T22309.stderr | 88 +++++++++++++++++++ .../tests/simplCore/should_compile/all.T | 1 + 7 files changed, 192 insertions(+), 7 deletions(-) create mode 100644 testsuite/tests/simplCore/should_compile/T22309.hs create mode 100644 testsuite/tests/simplCore/should_compile/T22309.stderr diff --git a/compiler/GHC/Core/TyCon.hs b/compiler/GHC/Core/TyCon.hs index f6c2229b20e4..830873a81f00 100644 --- a/compiler/GHC/Core/TyCon.hs +++ b/compiler/GHC/Core/TyCon.hs @@ -128,8 +128,8 @@ module GHC.Core.TyCon( PrimRep(..), PrimElemRep(..), Levity(..), primElemRepToPrimRep, isVoidRep, isGcPtrRep, - primRepSizeB, - primElemRepSizeB, + primRepSizeB, primRepSizeW64_B, + primElemRepSizeB, primElemRepSizeW64_B, primRepIsFloat, primRepsCompatible, primRepCompatible, @@ -1679,9 +1679,39 @@ primRepSizeB platform = \case VoidRep -> 0 (VecRep len rep) -> len * primElemRepSizeB platform rep +-- | Like primRepSizeB but assumes pointers/words are 8 words wide. +-- +-- This can be useful to compute the size of a rep as if we were compiling +-- for a 64bit platform. +primRepSizeW64_B :: PrimRep -> Int +primRepSizeW64_B = \case + IntRep -> 8 + WordRep -> 8 + Int8Rep -> 1 + Int16Rep -> 2 + Int32Rep -> 4 + Int64Rep -> 8 + Word8Rep -> 1 + Word16Rep -> 2 + Word32Rep -> 4 + Word64Rep -> 8 + FloatRep -> fLOAT_SIZE + DoubleRep -> dOUBLE_SIZE + AddrRep -> 8 + BoxedRep{} -> 8 + VoidRep -> 0 + (VecRep len rep) -> len * primElemRepSizeW64_B rep + primElemRepSizeB :: Platform -> PrimElemRep -> Int primElemRepSizeB platform = primRepSizeB platform . primElemRepToPrimRep +-- | Like primElemRepSizeB but assumes pointers/words are 8 words wide. +-- +-- This can be useful to compute the size of a rep as if we were compiling +-- for a 64bit platform. +primElemRepSizeW64_B :: PrimElemRep -> Int +primElemRepSizeW64_B = primRepSizeW64_B . primElemRepToPrimRep + primElemRepToPrimRep :: PrimElemRep -> PrimRep primElemRepToPrimRep Int8ElemRep = Int8Rep primElemRepToPrimRep Int16ElemRep = Int16Rep diff --git a/compiler/GHC/Types/Id/Make.hs b/compiler/GHC/Types/Id/Make.hs index e285c0954eae..0ff8a51cd1cf 100644 --- a/compiler/GHC/Types/Id/Make.hs +++ b/compiler/GHC/Types/Id/Make.hs @@ -67,7 +67,7 @@ import GHC.Core.DataCon import GHC.Types.Literal import GHC.Types.SourceText -import GHC.Types.RepType ( countFunRepArgs ) +import GHC.Types.RepType ( countFunRepArgs, typePrimRep ) import GHC.Types.Name.Set import GHC.Types.Name import GHC.Types.Name.Env @@ -1517,16 +1517,29 @@ shouldUnpackArgTy bang_opts prag fam_envs arg_ty | otherwise -- Wrinkle (W4) of Note [Recursive unboxing] -> bang_opt_unbox_strict bang_opts || (bang_opt_unbox_small bang_opts - && rep_tys `lengthAtMost` 1) -- See Note [Unpack one-wide fields] + && is_small_rep) -- See Note [Unpack one-wide fields] where (rep_tys, _) = dataConArgUnpack arg_ty + -- Takes in the list of reps used to represent the dataCon after it's unpacked + -- and tells us if they can fit into 8 bytes. See Note [Unpack one-wide fields] + is_small_rep = + let -- Neccesary to look through unboxed tuples. + prim_reps = concatMap (typePrimRep . scaledThing . fst) $ rep_tys + -- Void types are erased when unpacked so we + nv_prim_reps = filter (not . isVoidRep) prim_reps + -- And then get the actual size of the unpacked constructor. + rep_size = sum $ map primRepSizeW64_B nv_prim_reps + in rep_size <= 8 + is_sum :: [DataCon] -> Bool -- We never unpack sum types automatically -- (Product types, we do. Empty types are weeded out by unpackable_type_datacons.) is_sum (_:_:_) = True is_sum _ = False + + -- Given a type already assumed to have been normalized by topNormaliseType, -- unpackable_type_datacons ty = Just datacons -- iff ty is of the form @@ -1585,6 +1598,14 @@ However Here we can represent T with an Int#. +Special care has to be taken to make sure we don't mistake fields with unboxed +tuple/sum rep or very large reps. See #22309 + +For consistency we unpack anything that fits into 8 bytes on a 64-bit platform, +even when compiling for 32bit platforms. This way unpacking decisions will be the +same for 32bit and 64bit systems. To do so we use primRepSizeW64_B instead of +primRepSizeB. See also the tests in test case T22309. + Note [Recursive unboxing] ~~~~~~~~~~~~~~~~~~~~~~~~~ Consider diff --git a/docs/users_guide/9.10.1-notes.rst b/docs/users_guide/9.10.1-notes.rst index 969bfca6426a..08b9d740b1b2 100644 --- a/docs/users_guide/9.10.1-notes.rst +++ b/docs/users_guide/9.10.1-notes.rst @@ -73,6 +73,16 @@ Compiler - Defaulting plugins can now propose solutions to entangled sets of type variables. This allows defaulting of multi-parameter type classes. See :ghc-ticket:`23832`. +- The flag `-funbox-small-strict-fields` will now properly recognize unboxed tuples + containing multiple elements as large. Constructors like `Foo (# Int64, Int64# )` + will no longer be considered small and therefore not unboxed by default under `-O` + even when used as strict field. :ghc-ticket:`22309`. + +- The flag `-funbox-small-strict-fields` will now always unpack things as if compiling + for a 64bit platform. Even when generating code for a 32bit platform. + This makes core optimizations more consistent between 32bit and 64bit platforms + at the cost of slightly worse 32bit performance in edge cases. + GHCi ~~~~ diff --git a/docs/users_guide/using-optimisation.rst b/docs/users_guide/using-optimisation.rst index a0e925ba0952..e619c9a6c1ed 100644 --- a/docs/users_guide/using-optimisation.rst +++ b/docs/users_guide/using-optimisation.rst @@ -1533,9 +1533,9 @@ as such you shouldn't need to set any of them explicitly. A flag default you can disable it for certain constructor fields using the ``NOUNPACK`` pragma (see :ref:`nounpack-pragma`). - Note that for consistency ``Double``, ``Word64``, and ``Int64`` - constructor fields are unpacked on 32-bit platforms, even though - they are technically larger than a pointer on those platforms. + Note that for consistency constructor fields are unpacked on 32-bit platforms + as if it we were compiling for a 64-bit target even if fields are larger + than a pointer on those platforms. .. ghc-flag:: -funbox-strict-fields :shortdesc: Flatten strict constructor fields diff --git a/testsuite/tests/simplCore/should_compile/T22309.hs b/testsuite/tests/simplCore/should_compile/T22309.hs new file mode 100644 index 000000000000..085370b31d3e --- /dev/null +++ b/testsuite/tests/simplCore/should_compile/T22309.hs @@ -0,0 +1,35 @@ +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE UnboxedTuples #-} + +module ShouldCompile where + +import GHC.Int +import GHC.Exts + +-- These should unbox into another constructor +data UA = Mk_A !Int +data UB = Mk_B !Int64 +data UC = Mk_C !Int32 +data UD = Mk_D !Int32 !Int32 +data UE = Mk_E !(# Int# #) +data UF = Mk_F !(# Double #) + +-- These should not be unpacked into another constructor. +data NU_A = NU_MkA (# Int64, Int64 #) +data NU_B = NU_MkB !Int64 !Int64 + +-- The types we unbox into + +-- These should unpack their fields. +data WU_A = MkW_A !UA +data WU_B = MkW_B !UB +data WU_C = MkW_C !UC +data WU_D = MkW_D !UD +data WU_E = MkW_E !UE +data WU_F = MkW_F !UF + +-- These should not unpack their fields, as they are multiple words large. +data WNU_A = MkW_NA !NU_A +data WNU_B = MkW_NB !NU_B + + diff --git a/testsuite/tests/simplCore/should_compile/T22309.stderr b/testsuite/tests/simplCore/should_compile/T22309.stderr new file mode 100644 index 000000000000..ac0c768688ac --- /dev/null +++ b/testsuite/tests/simplCore/should_compile/T22309.stderr @@ -0,0 +1,88 @@ + +==================== Final STG: ==================== +$WMkW_NB :: NU_B %1 -> WNU_B = + \r [conrep] + case conrep of conrep1 { __DEFAULT -> MkW_NB [conrep1]; }; + +$WMkW_NA :: NU_A %1 -> WNU_A = + \r [conrep] + case conrep of conrep1 { __DEFAULT -> MkW_NA [conrep1]; }; + +$WMkW_F :: UF %1 -> WU_F = + \r [conrep] case conrep of { Mk_F us -> MkW_F [us]; }; + +$WMkW_E :: UE %1 -> WU_E = + \r [conrep] case conrep of { Mk_E us -> MkW_E [us]; }; + +$WMkW_D :: UD %1 -> WU_D = + \r [conrep] + case conrep of { Mk_D unbx unbx1 -> MkW_D [unbx unbx1]; }; + +$WMkW_C :: UC %1 -> WU_C = + \r [conrep] case conrep of { Mk_C unbx -> MkW_C [unbx]; }; + +$WMkW_B :: UB %1 -> WU_B = + \r [conrep] case conrep of { Mk_B unbx -> MkW_B [unbx]; }; + +$WMkW_A :: UA %1 -> WU_A = + \r [conrep] case conrep of { Mk_A unbx -> MkW_A [unbx]; }; + +$WNU_MkB :: Int64 %1 -> Int64 %1 -> NU_B = + \r [conrep conrep1] + case conrep of { + I64# unbx -> + case conrep1 of { I64# unbx1 -> NU_MkB [unbx unbx1]; }; + }; + +$WMk_D :: Int32 %1 -> Int32 %1 -> UD = + \r [conrep conrep1] + case conrep of { + I32# unbx -> case conrep1 of { I32# unbx1 -> Mk_D [unbx unbx1]; }; + }; + +$WMk_C :: Int32 %1 -> UC = + \r [conrep] case conrep of { I32# unbx -> Mk_C [unbx]; }; + +$WMk_B :: Int64 %1 -> UB = + \r [conrep] case conrep of { I64# unbx -> Mk_B [unbx]; }; + +$WMk_A :: Int %1 -> UA = + \r [conrep] case conrep of { I# unbx -> Mk_A [unbx]; }; + +MkW_NB :: NU_B %1 -> WNU_B = + \r [eta] case eta of eta { __DEFAULT -> MkW_NB [eta]; }; + +MkW_NA :: NU_A %1 -> WNU_A = + \r [eta] case eta of eta { __DEFAULT -> MkW_NA [eta]; }; + +MkW_F :: (# Double #) %1 -> WU_F = \r [us] MkW_F [us]; + +MkW_E :: (# Int# #) %1 -> WU_E = \r [us] MkW_E [us]; + +MkW_D :: Int32# %1 -> Int32# %1 -> WU_D = + \r [eta eta] MkW_D [eta eta]; + +MkW_C :: Int32# %1 -> WU_C = \r [eta] MkW_C [eta]; + +MkW_B :: Int64# %1 -> WU_B = \r [eta] MkW_B [eta]; + +MkW_A :: Int# %1 -> WU_A = \r [eta] MkW_A [eta]; + +NU_MkB :: Int64# %1 -> Int64# %1 -> NU_B = + \r [eta eta] NU_MkB [eta eta]; + +NU_MkA :: (# Int, Int #) %1 -> NU_A = \r [us us] NU_MkA [us us]; + +Mk_F :: (# Double #) %1 -> UF = \r [us] Mk_F [us]; + +Mk_E :: (# Int# #) %1 -> UE = \r [us] Mk_E [us]; + +Mk_D :: Int32# %1 -> Int32# %1 -> UD = \r [eta eta] Mk_D [eta eta]; + +Mk_C :: Int32# %1 -> UC = \r [eta] Mk_C [eta]; + +Mk_B :: Int64# %1 -> UB = \r [eta] Mk_B [eta]; + +Mk_A :: Int# %1 -> UA = \r [eta] Mk_A [eta]; + + diff --git a/testsuite/tests/simplCore/should_compile/all.T b/testsuite/tests/simplCore/should_compile/all.T index 3120a598d61d..b654e56fe1bc 100644 --- a/testsuite/tests/simplCore/should_compile/all.T +++ b/testsuite/tests/simplCore/should_compile/all.T @@ -488,6 +488,7 @@ test('T23307', normal, compile, ['-O -ddump-simpl -dno-typeable-binds -dsuppress test('T23307a', normal, compile, ['-O -ddump-simpl -dno-typeable-binds -dsuppress-uniques']) test('T23307b', normal, compile, ['-O']) test('T23307c', normal, compile, ['-O']) +test('T22309', [grep_errmsg(r'MkW'), only_ways(['optasm']) ], compile, ['-O -ddump-stg-final -dsuppress-uniques -dsuppress-all -dno-typeable-binds -dno-suppress-type-signatures -dsuppress-module-prefixes']) test('T23426', normal, compile, ['-O']) test('T23491a', [extra_files(['T23491.hs']), grep_errmsg(r'Float out')], multimod_compile, ['T23491', '-ffull-laziness -ddump-full-laziness']) test('T23491b', [extra_files(['T23491.hs']), grep_errmsg(r'Float inwards')], multimod_compile, ['T23491', '-ffloat-in -ddump-float-in']) -- GitLab