bytestring benchmarks: 9.8 eta-expansion regression
If you compile these two files.
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE UnboxedTuples #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
module Builder(Builder, word8) where
import Data.Word
import GHC.Ptr
import GHC.Word
import GHC.Prim
import GHC.IO
newtype Builder = Builder (forall r. BuildStep r -> BuildStep r)
type BuildStep a = BufferRange -> IO (BuildSignal a)
data BufferRange = BufferRange {-# UNPACK #-} !(Ptr Word8) -- First byte of range
{-# UNPACK #-} !(Ptr Word8) -- First byte /after/ range
-- | 'BuildSignal's abstract signals to the caller of a 'BuildStep'. There are
-- three signals: 'done', 'bufferFull', or 'insertChunks signals
data BuildSignal a =
Done {-# UNPACK #-} !(Ptr Word8) a
--type BuildStep r = () -> r
{-# NOINLINE word8 #-}
word8 :: Word8 -> Builder
word8 x = Builder (\bs br -> IO $ \s -> word1 x bs br s)
word1
:: Word8
-> (forall r.
BuildStep r
-> BufferRange
-> State# RealWorld
-> (# State# RealWorld,
BuildSignal r #))
word1
= \ (x_a3Nz :: Word8)
(eta_a3NB :: BuildStep r_a3NA)
(eta1_a3NC
:: BufferRange)
(eta2_a3ND :: State# RealWorld) ->
case eta1_a3NC of
{ BufferRange (Ptr bx_a3NO) (Ptr bx1_a3NP) ->
case (<#) (minusAddr# bx1_a3NP bx_a3NO) 1# of {
__DEFAULT ->
case x_a3Nz of { W8# x1_i2Az_af5Q ->
case writeWord8OffAddr#
@RealWorld bx_a3NO 0# x1_i2Az_af5Q eta2_a3ND
of
{ s2_i2 ->
(coerce (eta_a3NB
(BufferRange
(Ptr (plusAddr# bx_a3NO 1#)) (Ptr bx1_a3NP)))
s2_i2)
}
};
}
}
-- | The 'Builder' denoting a zero-length sequence of bytes. This function is
-- only exported for use in rewriting rules. Use 'mempty' otherwise.
{-# INLINE[1] empty #-}
empty :: Builder
empty = Builder ($)
-- This eta expansion (hopefully) allows GHC to worker-wrapper the
-- 'BufferRange' in the 'empty' base case of loops (since
-- worker-wrapper requires (TODO: verify this) that all paths match
-- against the wrapped argument.
-- | Concatenate two 'Builder's. This function is only exported for use in rewriting
-- rules. Use 'mappend' otherwise.
{-# INLINE[1] append #-}
append :: Builder -> Builder -> Builder
append (Builder b1) (Builder b2) = Builder $ b1 . b2
instance Semigroup Builder where
{-# INLINE (<>) #-}
(<>) = append
instance Monoid Builder where
{-# INLINE mempty #-}
mempty = empty
{-# INLINE mappend #-}
mappend = (<>)
{-# INLINE mconcat #-}
mconcat = foldr mappend mempty
and
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE PackageImports #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE MagicHash #-}
{-# OPTIONS_GHC -dno-typeable-binds #-}
module Main (main, repro) where
import qualified Builder as B
import Data.Foldable
repro :: [Int] -> B.Builder
repro xs = foldMap (B.word8 . fromIntegral) xs
main :: IO ()
main = print ()
Compile with ghc -O2 Main.hs
Then you observe the core for repro
you can see that in 9.6.2, the definition is nicely eta-expanded but that doesn't happen in 9.8.1-alpha1.
The result is a regression in runtime performance in the bytestring benchmarks.
ghc-9.6.2:
Rec {
-- RHS size: {terms: 28, types: 20, coercions: 10, joins: 0/0}
Main.repro1 [Occ=LoopBreaker]
:: [Int]
-> forall {r}.
Builder.BuildStep r
-> Builder.BufferRange
-> GHC.Prim.State# GHC.Prim.RealWorld
-> (# GHC.Prim.State# GHC.Prim.RealWorld, Builder.BuildSignal r #)
[GblId, Arity=4, Str=<1L><1C(1,C(1,L))><L><L>, Unf=OtherCon []]
Main.repro1
= \ (ds_a1xD :: [Int])
(@r_X3)
(eta_X4 :: Builder.BuildStep r_X3)
(eta1_X5 :: Builder.BufferRange)
(eta2_X6 [OS=OneShot] :: GHC.Prim.State# GHC.Prim.RealWorld) ->
case ds_a1xD of {
[] ->
((eta_X4 eta1_X5)
`cast` (GHC.Types.N:IO[0] <Builder.BuildSignal r_X3>_R
:: IO (Builder.BuildSignal r_X3)
~R# (GHC.Prim.State# GHC.Prim.RealWorld
-> (# GHC.Prim.State# GHC.Prim.RealWorld,
Builder.BuildSignal r_X3 #))))
eta2_X6;
: y_a1xG ys_a1xH ->
case y_a1xG of { GHC.Types.I# i_a20t ->
case eta1_X5 of { Builder.BufferRange ww1_i2II ww2_i2IJ ->
Builder.$wword8
(GHC.Prim.wordToWord8# (GHC.Prim.int2Word# i_a20t))
@r_X3
((Main.repro1 ys_a1xH @r_X3 eta_X4)
`cast` (<Builder.BufferRange>_R
%<Many>_N ->_R Sym (GHC.Types.N:IO[0] <Builder.BuildSignal r_X3>_R)
:: (Builder.BufferRange
-> GHC.Prim.State# GHC.Prim.RealWorld
-> (# GHC.Prim.State# GHC.Prim.RealWorld,
Builder.BuildSignal r_X3 #))
~R# (Builder.BufferRange -> IO (Builder.BuildSignal r_X3))))
ww1_i2II
ww2_i2IJ
eta2_X6
}
}
}
end Rec }
ghc-9.8.1-alpha1
Rec {
-- RHS size: {terms: 28, types: 22, coercions: 7, joins: 0/1}
Main.repro1 [Occ=LoopBreaker]
:: [Int] -> forall r. Builder.BuildStep r -> Builder.BuildStep r
[GblId, Arity=2, Str=<1L><ML>, Unf=OtherCon []]
Main.repro1
= \ (ds_a1yH :: [Int])
(@r_X1)
(eta_X2 :: Builder.BuildStep r_X1) ->
case ds_a1yH of {
[] -> eta_X2;
: y_a1yK ys_a1yL ->
let {
eta1_X3 [Dmd=LC(S,C(1,L))] :: Builder.BuildStep r_X1
[LclId]
eta1_X3 = Main.repro1 ys_a1yL @r_X1 eta_X2 } in
(\ (eta2_B1 :: Builder.BufferRange)
(eta3_B2 [OS=OneShot] :: GHC.Prim.State# GHC.Prim.RealWorld) ->
case y_a1yK of { GHC.Types.I# i_a21w ->
case eta2_B1 of { Builder.BufferRange ww1_i2Jf ww2_i2Jg ->
Builder.$wword8
(GHC.Prim.wordToWord8# (GHC.Prim.int2Word# i_a21w))
@r_X1
eta1_X3
ww1_i2Jf
ww2_i2Jg
eta3_B2
}
})
`cast` (<Builder.BufferRange>_R
%<Many>_N ->_R Sym (GHC.Types.N:IO[0] <Builder.BuildSignal r_X1>_R)
:: (Builder.BufferRange
-> GHC.Prim.State# GHC.Prim.RealWorld
-> (# GHC.Prim.State# GHC.Prim.RealWorld,
Builder.BuildSignal r_X1 #))
~R# (Builder.BufferRange -> IO (Builder.BuildSignal r_X1)))
}
end Rec }