Skip to content

andArityType allocation regression - 9.2.4

From a client's code base we see the following function is eta-expanded in 9.2.3 but not 9.2.4 which leads to failures in their allocations benchmark.

See Encoding.hs

Compile with

ghc-9.2.3 -O2 Encoding.hs
ghc-9.2.4 -O2 Encoding.hs
{-# LANGUAGE BangPatterns, CPP, GeneralizedNewtypeDeriving, MagicHash,
    UnliftedFFITypes #-}
{-# LANGUAGE Trustworthy #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE ViewPatterns #-}
{-# OPTIONS_GHC -ddump-simpl -ddump-to-file -fno-worker-wrapper #-}

module Encoding
    ( encodeUtf8BuilderEscaped
    ) where


import Data.Text.Internal
import GHC.Word
import Foreign.Ptr (Ptr, minusPtr, plusPtr)
import Foreign.Storable (poke, peekByteOff)
import qualified Data.ByteString.Builder as B
import qualified Data.ByteString.Builder.Internal as B hiding (empty, append)
import qualified Data.ByteString.Builder.Prim as BP
import qualified Data.ByteString.Builder.Prim.Internal as BP
import qualified Data.Text.Array as A
import qualified Data.Text.Internal.Encoding.Utf16 as U16
import Data.Bits
import Data.Char
#if defined(ASSERTS)
import GHC.Stack (HasCallStack)
#endif

{-# INLINE encodeUtf8BuilderEscaped #-}
-- TODO: Extend documentation with references to source code in @blaze-html@
-- or @aeson@ that uses this function.
encodeUtf8BuilderEscaped :: BP.BoundedPrim Word8 -> Text -> B.Builder
encodeUtf8BuilderEscaped be =
    -- manual eta-expansion to ensure inlining works as expected
    \txt -> B.builder (mkBuildstep txt)
  where
    bound = max 4 $ BP.sizeBound be

    mkBuildstep (Text arr off len) !k =
        outerLoop off
      where
        iend = off + len

        outerLoop !i0 !br@(B.BufferRange op0 ope)
          | i0 >= iend       = k br
          | outRemaining > 0 = goPartial (i0 + min outRemaining inpRemaining)
          -- TODO: Use a loop with an integrated bound's check if outRemaining
          -- is smaller than 8, as this will save on divisions.
          | otherwise        = return $ B.bufferFull bound op0 (outerLoop i0)
          where
            outRemaining = (ope `minusPtr` op0) `div` bound
            inpRemaining = iend - i0

            goPartial !iendTmp = go i0 op0
              where
                go !i !op
                  | i < iendTmp = case A.unsafeIndex arr i of
                      w | w <= 0x7F -> do
                            BP.runB be (fromIntegral w) op >>= go (i + 1)
                        | w <= 0x7FF -> do
                            poke8 @Word16 0 $ (w `shiftR` 6) + 0xC0
                            poke8 @Word16 1 $ (w .&. 0x3f) + 0x80
                            go (i + 1) (op `plusPtr` 2)
                        | 0xD800 <= w && w <= 0xDBFF -> do
                            let c = ord $ U16.chr2 w (A.unsafeIndex arr (i+1))
                            poke8 @Int 0 $ (c `shiftR` 18) + 0xF0
                            poke8 @Int 1 $ ((c `shiftR` 12) .&. 0x3F) + 0x80
                            poke8 @Int 2 $ ((c `shiftR` 6) .&. 0x3F) + 0x80
                            poke8 @Int 3 $ (c .&. 0x3F) + 0x80
                            go (i + 2) (op `plusPtr` 4)
                        | otherwise -> do
                            poke8 @Word16 0 $ (w `shiftR` 12) + 0xE0
                            poke8 @Word16 1 $ ((w `shiftR` 6) .&. 0x3F) + 0x80
                            poke8 @Word16 2 $ (w .&. 0x3F) + 0x80
                            go (i + 1) (op `plusPtr` 3)
                  | otherwise =
                      outerLoop i (B.BufferRange op ope)
                  where
                    -- Take care, a is either Word16 or Int above
                    poke8 :: Integral a => Int -> a -> IO ()
                    poke8 j v = poke (op `plusPtr` j) (fromIntegral v :: Word8)

Then look at the difference in core, you will see that in 9.2.3 we eta-expand but not in 9.2.4.

9.2.3 (see eta-expanded, State# token in outerLoop):

encodeUtf8BuilderEscaped [InlPrag=INLINE (sat-args=1)]
  :: BP.BoundedPrim Word8 -> Text -> B.Builder
[GblId,
 Arity=5,
 Str=<MP(L,LCL(C1(C1(P(L,1P(L))))))><1P(L,L,L)><1CL(C1(L))><1P(L,L)><L>,
 Unf=Unf{Src=InlineStable, TopLvl=True, Value=True, ConLike=True,
         WorkFree=True, Expandable=True,
         Guidance=ALWAYS_IF(arity=1,unsat_ok=False,boring_ok=False)
         Tmpl= (\ (be_aGl :: BP.BoundedPrim Word8)
                  (eta_B0 [Occ=Once1!] :: Text)
                  (@r_a1FM)
                  (eta1_B1 [Occ=Once1] :: B.BuildStep r_a1FM)
                  (eta2_B2 [Occ=Once1] :: B.BufferRange)
                  (eta3_B3 [Occ=Once1, OS=OneShot]
                     :: GHC.Prim.State# GHC.Prim.RealWorld) ->
                  let {
                    bound_aGm :: Int
                    [LclId]
                    bound_aGm
                      = case be_aGl of
                        { Data.ByteString.Builder.Prim.Internal.BP dt_a1zq _ [Occ=Dead] ->
                        case GHC.Prim.<=# 4# dt_a1zq of {
                          __DEFAULT -> GHC.Types.I# 4#;
                          1# -> GHC.Types.I# dt_a1zq
                        }
                        } } in
                  case eta_B0 of { Text dt_d1Fy dt1_d1Fz dt2_d1FA [Occ=Once1] ->
                  case eta1_B1 of k_X2 [Occ=OnceL1!] { __DEFAULT ->

9.2.4 (not eta-expanded)

-- RHS size: {terms: 5,165,
              types: 1,469,
              coercions: 569,
              joins: 15/64}
encodeUtf8BuilderEscaped [InlPrag=INLINE (sat-args=1)]
  :: BP.BoundedPrim Word8 -> Text -> B.Builder
[GblId,
 Arity=4,
 Str=<MP(L,LCL(C1(C1(P(L,1P(L))))))><1P(L,L,L)><1L><1P(L,L)>,
 Unf=Unf{Src=InlineStable, TopLvl=True, Value=True, ConLike=True,
         WorkFree=True, Expandable=True,
         Guidance=ALWAYS_IF(arity=1,unsat_ok=False,boring_ok=False)
         Tmpl= (\ (be_aGm :: BP.BoundedPrim Word8)
                  (eta_B0 [Occ=Once1!] :: Text)
                  (@r_a1FN)
                  (eta1_B1 [Occ=Once1] :: B.BuildStep r_a1FN)
                  (eta2_B2 [Occ=Once1] :: B.BufferRange) ->
                  let {
                    bound_aGn :: Int
                    [LclId]
                    bound_aGn
                      = case be_aGm of
                        { Data.ByteString.Builder.Prim.Internal.BP dt_a1zr _ [Occ=Dead] ->
                        case GHC.Prim.<=# 4# dt_a1zr of {
                          __DEFAULT -> GHC.Types.I# 4#;
                          1# -> GHC.Types.I# dt_a1zr
                        }
                        } } in
                  case eta_B0 of { Text dt_d1Fz dt1_d1FA dt2_d1FB [Occ=Once1] ->
                  case eta1_B1 of k_X2 [Occ=OnceL1!] { __DEFAULT ->
                  let {
                    iend_s1Kg :: GHC.Prim.Int#
                    [LclId]
                    iend_s1Kg = GHC.Prim.+# dt1_d1FA dt2_d1FB } in
                  let {
                    iend1_aS0 [Occ=OnceL1] :: Int
                    [LclId, Unf=OtherCon []]
                    iend1_aS0 = GHC.Types.I# iend_s1Kg } in
                  letrec {
                    outerLoop_a1gh [Occ=LoopBreaker]
                      :: Int -> B.BufferRange -> IO (B.BuildSignal r_a1FN)
                    [LclId, Arity=2, Unf=OtherCon []]
To upload designs, you'll need to enable LFS and have an admin enable hashed storage. More information