Skip to content

GHC fails to optimize IO

Summary

While manually writing a bottom-up parser for a simple arithmetic language I noticed some redundant allocations in the STG even when compiling with -O2.

Steps to reproduce

Here is the parser that I'm writing. I've written two versions of step0, step1 and step4 where I've manually applied an optimization to the version without the '.

{-# LANGUAGE BlockArguments #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE UnboxedTuples #-}
{-# LANGUAGE UnliftedDatatypes #-}
{-# LANGUAGE UnliftedNewtypes #-}
{-# LANGUAGE BangPatterns #-}

module BottomUp.ByteString (parseFile, step0', step1', step4') where

-- import Expr ( Expr(..), Op(..) )
import Data.Word ( Word64 )
import Data.ByteString (ByteString)
import qualified Data.ByteString.Char8 as BS
import GHC.Exts (compactNew#, compactAdd#, Compact#, UnliftedType)
import GHC.IO (IO (IO), unIO)
import Data.Char (isSpace)
import Data.Kind (Type)

data Expr = Bin Op Expr Expr | Num Word64
data Op = Add | Sub | Mul | Div

type SnocList :: Type -> UnliftedType
data SnocList a = SnocList a :> !a | Lin
infixl :>

newtype Closure = MkC (ByteString -> SnocList Closure -> Expr -> IO (Maybe Expr))

-- 0 -- ( --> ^
-- 0 -- N --> 1
step0 :: Compact# -> ByteString -> SnocList Closure -> IO (Maybe Expr)
step0 c bs xs = IO \s ->
  case BS.uncons bs of
    Just ('(', bs') -> unIO (step0 c bs' (xs :> MkC \bs'' xs'' x -> step1 c bs'' xs'' x)) s
    _ | Just (n, bs') <- BS.readInt bs -> unIO (step1 c bs' xs =<< num c (fromIntegral n)) s
    Just (spc, bs') | isSpace spc -> unIO (step0 c (BS.dropWhile isSpace bs') xs) s
    _ -> unIO (pure Nothing) s

step0' :: Compact# -> ByteString -> SnocList Closure -> IO (Maybe Expr)
step0' c bs xs = 
  case BS.uncons bs of
    Just ('(', bs') -> step0 c bs' (xs :> MkC \bs'' xs'' x -> step1 c bs'' xs'' x)
    _ | Just (n, bs') <- BS.readInt bs -> step1 c bs' xs =<< num c (fromIntegral n)
    Just (spc, bs') | isSpace spc -> step0 c (BS.dropWhile isSpace bs') xs
    _ -> pure Nothing

-- 1 -- )  --> v
-- 1 -- */ --> 2
-- 1 -- +- --> 3
step1 :: Compact# -> ByteString -> SnocList Closure -> Expr -> IO (Maybe Expr)
step1 c bs xs !x = IO \s ->
  case BS.uncons bs of
    Just (')', bs') | xs' :> MkC f <- xs -> unIO (f bs' xs' x) s
    Just ('*', bs') -> unIO (step2 c bs' xs x Mul) s
    Just ('/', bs') -> unIO (step2 c bs' xs x Div) s
    Just ('+', bs') -> unIO (step3 c bs' xs x Mul) s
    Just ('-', bs') -> unIO (step3 c bs' xs x Div) s
    Nothing -> unIO (pure (Just x)) s
    Just (spc, bs') | isSpace spc -> unIO (step0 c (BS.dropWhile isSpace bs') xs) s
    _ -> unIO (pure Nothing) s

step1' :: Compact# -> ByteString -> SnocList Closure -> Expr -> IO (Maybe Expr)
step1' c bs xs !x =
  case BS.uncons bs of
    Just (')', bs') | xs' :> MkC f <- xs -> f bs' xs' x
    Just ('*', bs') -> step2 c bs' xs x Mul
    Just ('/', bs') -> step2 c bs' xs x Div
    Just ('+', bs') -> step3 c bs' xs x Mul
    Just ('-', bs') -> step3 c bs' xs x Div
    Nothing -> pure (Just x)
    Just (spc, bs') | isSpace spc -> step0 c (BS.dropWhile isSpace bs') xs
    _ -> pure Nothing

-- 2 -- ( --> ^
-- 2 -- N --> 1
step2 :: Compact# -> ByteString -> SnocList Closure -> Expr -> Op -> IO (Maybe Expr)
step2 c bs xs !x !o =
  case BS.uncons bs of
    Just ('(', bs') -> step0 c bs' (xs :> MkC \bs'' xs'' y -> step1 c bs'' xs'' =<< bin c o x y)
    _ | Just (n, bs') <- BS.readInt bs -> step1 c bs' xs =<< bin c o x =<< num c (fromIntegral n)
    Just (s, bs') | isSpace s -> step0 c (BS.dropWhile isSpace bs') xs
    _ -> pure Nothing

-- 3 -- ( --> ^
-- 3 -- N --> 4
step3 :: Compact# -> ByteString -> SnocList Closure -> Expr -> Op -> IO (Maybe Expr)
step3 c bs xs !x !o =
  case BS.uncons bs of
    Just ('(', bs') -> step0 c bs' (xs :> MkC \bs'' xs'' y -> step4 c bs'' xs'' x o y)
    _ | Just (n, bs') <- BS.readInt bs -> step4 c bs' xs x o =<< num c (fromIntegral n)
    Just (s, bs') | isSpace s -> step0 c (BS.dropWhile isSpace bs') xs
    _ -> pure Nothing

-- 4 -- )  --> v
-- 4 -- */ --> 5
-- 4 -- +- --> 3
step4' :: Compact# -> ByteString -> SnocList Closure -> Expr -> Op -> Expr -> IO (Maybe Expr)
step4' c bs xs !x !o !y =
  case BS.uncons bs of
    Just (')', bs') | xs' :> MkC f <- xs -> f bs' xs' =<< bin c o x y
    Just ('*', bs') -> step5 c bs' xs x o y Mul
    Just ('/', bs') -> step5 c bs' xs x o y Div
    Just ('+', bs') -> (\x' -> step3 c bs' xs x' Add) =<< bin c o x y
    Just ('-', bs') -> (\x' -> step3 c bs' xs x' Sub) =<< bin c o x y
    Nothing -> Just <$> bin c o x y
    Just (spc, bs') | isSpace spc -> step0 c (BS.dropWhile isSpace bs') xs
    _ -> pure Nothing

step4 :: Compact# -> ByteString -> SnocList Closure -> Expr -> Op -> Expr -> IO (Maybe Expr)
step4 c bs xs !x !o !y = IO \s ->
  case BS.uncons bs of
    Just (')', bs') | xs' :> MkC f <- xs -> unIO (f bs' xs' =<< bin c o x y) s
    Just ('*', bs') -> unIO (step5 c bs' xs x o y Mul) s
    Just ('/', bs') -> unIO (step5 c bs' xs x o y Div) s
    Just ('+', bs') -> unIO ((\x' -> step3 c bs' xs x' Add) =<< bin c o x y) s
    Just ('-', bs') -> unIO ((\x' -> step3 c bs' xs x' Sub) =<< bin c o x y) s
    Nothing -> unIO (Just <$> bin c o x y) s
    Just (spc, bs') | isSpace spc -> unIO (step0 c (BS.dropWhile isSpace bs') xs) s
    _ -> unIO (pure Nothing) s


-- 5 -- ( --> ^
-- 5 -- N --> 4
step5 :: Compact# -> ByteString -> SnocList Closure -> Expr -> Op -> Expr -> Op -> IO (Maybe Expr)
step5 c bs xs !x !o1 !y !o2 =
  case BS.uncons bs of
    Just ('(', bs') -> step0 c bs' (xs :> MkC \bs'' xs'' z -> step4 c bs'' xs'' x o1 =<< bin c o2 y z)
    _ | Just (n, bs') <- BS.readInt bs -> step4 c bs' xs x o1 =<< bin c o2 y =<< num c (fromIntegral n)
    Just (s, bs') | isSpace s -> step0 c (BS.dropWhile isSpace bs') xs
    _ -> pure Nothing

num :: Compact# -> Word64 -> IO Expr
num c !x = IO (compactAdd# c (Num x))

bin :: Compact# -> Op -> Expr -> Expr -> IO Expr
bin c !op !x !y = IO (compactAdd# c (Bin op x y))

parse :: ByteString -> IO (Maybe Expr)
parse t = IO \s ->
  case compactNew# 65536## s of { (# s', c #) ->
  case step0 c t Lin of { IO io -> 
  io s'
  } }

parseFile :: FilePath -> IO (Maybe Expr)
parseFile path = parse =<< BS.readFile path

Expected behavior

The STG produced for the step1' and step4' functions contains unnecessary allocations, e.g. this is in the STG of the step1' function:

$wstep1'
  :: Compact#
     -> Addr#
     -> ForeignPtrContents
     -> Int#
     -> SnocList Closure
     -> Expr
     -> IO (Maybe Expr) =
  [...]
                    let-no-escape {
                      $j_s2Fq :: IO (Maybe Expr) =
                          \r []
                              let {
                                sat_s2FA
                                  :: State# RealWorld -> (# State# RealWorld, Maybe Expr #) =
                                    \r [void_0E]
                                        case $wfirstnonspace bs'_s2Fj 0# bs'1_s2Fk realWorld# of {
                                        Solo# ww4_s2Fu ->
                                        case ==# [ww4_s2Fu bs'1_s2Fk] of {
                                          __DEFAULT ->
                                              case touch# [ww1_s2F9 void#] of {
                                              (##) ->
                                              case -# [bs'1_s2Fk ww4_s2Fu] of sat_s2Fy {
                                              __DEFAULT ->
                                              case plusAddr# [bs'_s2Fj ww4_s2Fu] of sat_s2Fx {
                                              __DEFAULT ->
                                              $wstep0
                                                  c_s2F7 sat_s2Fx ww1_s2F9 sat_s2Fy xs_s2Fb void#;
                                              };
                                              };
                                              };
                                          1# ->
                                              case touch# [ww1_s2F9 void#] of {
                                              (##) ->
                                              $wstep0 c_s2F7 __NULL FinalPtr 0# xs_s2Fb void#;
                                              };
                                        };
                                        };
                              } in  sat_s2FA;
                    } in
  [...]

That should be the same as in the step1 function:

$wstep1_r2ti
  :: Compact#
     -> Addr#
     -> ForeignPtrContents
     -> Int#
     -> SnocList Closure
     -> Expr
     -> State# RealWorld
     -> (# State# RealWorld, Maybe Expr #) =
  [...]
                    let-no-escape {
                      $j_s3y9 :: (# State# RealWorld, Maybe Expr #) =
                          \r []
                              case $wfirstnonspace bs'_s3y2 0# bs'1_s3y3 realWorld# of {
                              Solo# ww4_s3yc ->
                              case ==# [ww4_s3yc bs'1_s3y3] of {
                                __DEFAULT ->
                                    case touch# [ww1_s3xR void#] of {
                                    (##) ->
                                    case -# [bs'1_s3y3 ww4_s3yc] of sat_s3yg {
                                    __DEFAULT ->
                                    case plusAddr# [bs'_s3y2 ww4_s3yc] of sat_s3yf {
                                    __DEFAULT ->
                                    $wstep0 c_s3xP sat_s3yf ww1_s3xR sat_s3yg xs_s3xT void#;
                                    };
                                    };
                                    };
                                1# ->
                                    case touch# [ww1_s3xR void#] of {
                                    (##) -> $wstep0 c_s3xP __NULL FinalPtr 0# xs_s3xT void#;
                                    };
                              };
                              };
                    } in
  [...]

Concretely, the benchmark that I'm using takes 82.8 ms and allocates 292 MB in total without this optimization compared to 76.9 ms and 199 MB total allocation with the optimization.

Environment

  • GHC version used: 9.2.4 and 9.4.2

Optional:

  • Operating System:
  • System Architecture:
Edited by Jaro Reinders
To upload designs, you'll need to enable LFS and have an admin enable hashed storage. More information