Commit b2ff5dde authored by Ömer Sinan Ağacan's avatar Ömer Sinan Ağacan

Fix #15038

We introduce a new Id for unused pointer values in unboxed sums that is
not CAFFY. Because the Id is not CAFFY it doesn't make non-CAFFY
definitions CAFFY, fixing #15038.

To make sure anything referenced by the new id will be retained we get a
stable pointer to in on RTS startup.

Test Plan: Passes validate

Reviewers: simonmar, simonpj, hvr, bgamari, erikd

Reviewed By: simonmar

Subscribers: rwbarton, thomie, carter

GHC Trac Issues: #15038

Differential Revision: https://phabricator.haskell.org/D4680
parent 5fe6aaa3
...@@ -46,7 +46,7 @@ module MkCore ( ...@@ -46,7 +46,7 @@ module MkCore (
rEC_CON_ERROR_ID, rUNTIME_ERROR_ID, rEC_CON_ERROR_ID, rUNTIME_ERROR_ID,
nON_EXHAUSTIVE_GUARDS_ERROR_ID, nO_METHOD_BINDING_ERROR_ID, nON_EXHAUSTIVE_GUARDS_ERROR_ID, nO_METHOD_BINDING_ERROR_ID,
pAT_ERROR_ID, rEC_SEL_ERROR_ID, aBSENT_ERROR_ID, pAT_ERROR_ID, rEC_SEL_ERROR_ID, aBSENT_ERROR_ID,
tYPE_ERROR_ID, tYPE_ERROR_ID, aBSENT_SUM_FIELD_ERROR_ID
) where ) where
#include "HsVersions.h" #include "HsVersions.h"
...@@ -708,9 +708,12 @@ recSelErrorName, runtimeErrorName, absentErrorName :: Name ...@@ -708,9 +708,12 @@ recSelErrorName, runtimeErrorName, absentErrorName :: Name
recConErrorName, patErrorName :: Name recConErrorName, patErrorName :: Name
nonExhaustiveGuardsErrorName, noMethodBindingErrorName :: Name nonExhaustiveGuardsErrorName, noMethodBindingErrorName :: Name
typeErrorName :: Name typeErrorName :: Name
absentSumFieldErrorName :: Name
recSelErrorName = err_nm "recSelError" recSelErrorIdKey rEC_SEL_ERROR_ID recSelErrorName = err_nm "recSelError" recSelErrorIdKey rEC_SEL_ERROR_ID
absentErrorName = err_nm "absentError" absentErrorIdKey aBSENT_ERROR_ID absentErrorName = err_nm "absentError" absentErrorIdKey aBSENT_ERROR_ID
absentSumFieldErrorName = err_nm "absentSumFieldError" absentSumFieldErrorIdKey
aBSENT_SUM_FIELD_ERROR_ID
runtimeErrorName = err_nm "runtimeError" runtimeErrorIdKey rUNTIME_ERROR_ID runtimeErrorName = err_nm "runtimeError" runtimeErrorIdKey rUNTIME_ERROR_ID
recConErrorName = err_nm "recConError" recConErrorIdKey rEC_CON_ERROR_ID recConErrorName = err_nm "recConError" recConErrorIdKey rEC_CON_ERROR_ID
patErrorName = err_nm "patError" patErrorIdKey pAT_ERROR_ID patErrorName = err_nm "patError" patErrorIdKey pAT_ERROR_ID
...@@ -726,7 +729,7 @@ err_nm str uniq id = mkWiredInIdName cONTROL_EXCEPTION_BASE (fsLit str) uniq id ...@@ -726,7 +729,7 @@ err_nm str uniq id = mkWiredInIdName cONTROL_EXCEPTION_BASE (fsLit str) uniq id
rEC_SEL_ERROR_ID, rUNTIME_ERROR_ID, rEC_CON_ERROR_ID :: Id rEC_SEL_ERROR_ID, rUNTIME_ERROR_ID, rEC_CON_ERROR_ID :: Id
pAT_ERROR_ID, nO_METHOD_BINDING_ERROR_ID, nON_EXHAUSTIVE_GUARDS_ERROR_ID :: Id pAT_ERROR_ID, nO_METHOD_BINDING_ERROR_ID, nON_EXHAUSTIVE_GUARDS_ERROR_ID :: Id
tYPE_ERROR_ID, aBSENT_ERROR_ID :: Id tYPE_ERROR_ID, aBSENT_ERROR_ID, aBSENT_SUM_FIELD_ERROR_ID :: Id
rEC_SEL_ERROR_ID = mkRuntimeErrorId recSelErrorName rEC_SEL_ERROR_ID = mkRuntimeErrorId recSelErrorName
rUNTIME_ERROR_ID = mkRuntimeErrorId runtimeErrorName rUNTIME_ERROR_ID = mkRuntimeErrorId runtimeErrorName
rEC_CON_ERROR_ID = mkRuntimeErrorId recConErrorName rEC_CON_ERROR_ID = mkRuntimeErrorId recConErrorName
...@@ -735,6 +738,35 @@ nO_METHOD_BINDING_ERROR_ID = mkRuntimeErrorId noMethodBindingErrorName ...@@ -735,6 +738,35 @@ nO_METHOD_BINDING_ERROR_ID = mkRuntimeErrorId noMethodBindingErrorName
nON_EXHAUSTIVE_GUARDS_ERROR_ID = mkRuntimeErrorId nonExhaustiveGuardsErrorName nON_EXHAUSTIVE_GUARDS_ERROR_ID = mkRuntimeErrorId nonExhaustiveGuardsErrorName
tYPE_ERROR_ID = mkRuntimeErrorId typeErrorName tYPE_ERROR_ID = mkRuntimeErrorId typeErrorName
-- Note [aBSENT_SUM_FIELD_ERROR_ID]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-- Absent argument error for unused unboxed sum fields are different than absent
-- error used in dummy worker functions (see `mkAbsentErrorApp`):
--
-- - `absentSumFieldError` can't take arguments because it's used in unarise for
-- unused pointer fields in unboxed sums, and applying an argument would
-- require allocating a thunk.
--
-- - `absentSumFieldError` can't be CAFFY because that would mean making some
-- non-CAFFY definitions that use unboxed sums CAFFY in unarise.
--
-- To make `absentSumFieldError` non-CAFFY we get a stable pointer to it in
-- RtsStartup.c and mark it as non-CAFFY here.
--
-- Getting this wrong causes hard-to-debug runtime issues, see #15038.
--
-- TODO: Remove stable pointer hack after fixing #9718.
-- However, we should still be careful about not making things CAFFY just
-- because they use unboxed sums. Unboxed objects are supposed to be
-- efficient, and none of the other unboxed literals make things CAFFY.
aBSENT_SUM_FIELD_ERROR_ID
= mkVanillaGlobalWithInfo absentSumFieldErrorName
(mkSpecForAllTys [alphaTyVar] (mkTyVarTy alphaTyVar)) -- forall a . a
(vanillaIdInfo `setStrictnessInfo` mkClosedStrictSig [] exnRes
`setArityInfo` 0
`setCafInfo` NoCafRefs) -- #15038
mkRuntimeErrorId :: Name -> Id mkRuntimeErrorId :: Name -> Id
-- Error function -- Error function
-- with type: forall (r:RuntimeRep) (a:TYPE r). Addr# -> a -- with type: forall (r:RuntimeRep) (a:TYPE r). Addr# -> a
......
...@@ -880,9 +880,6 @@ dataQual_RDR mod str = mkOrig mod (mkOccNameFS dataName str) ...@@ -880,9 +880,6 @@ dataQual_RDR mod str = mkOrig mod (mkOccNameFS dataName str)
Many of these Names are not really "built in", but some parts of the Many of these Names are not really "built in", but some parts of the
compiler (notably the deriving mechanism) need to mention their names, compiler (notably the deriving mechanism) need to mention their names,
and it's convenient to write them all down in one place. and it's convenient to write them all down in one place.
--MetaHaskell Extension add the constrs and the lower case case
-- guys as well (perhaps) e.g. see trueDataConName below
-} -}
wildCardName :: Name wildCardName :: Name
...@@ -2084,7 +2081,8 @@ wildCardKey, absentErrorIdKey, augmentIdKey, appendIdKey, ...@@ -2084,7 +2081,8 @@ wildCardKey, absentErrorIdKey, augmentIdKey, appendIdKey,
realWorldPrimIdKey, recConErrorIdKey, realWorldPrimIdKey, recConErrorIdKey,
unpackCStringUtf8IdKey, unpackCStringAppendIdKey, unpackCStringUtf8IdKey, unpackCStringAppendIdKey,
unpackCStringFoldrIdKey, unpackCStringIdKey, unpackCStringFoldrIdKey, unpackCStringIdKey,
typeErrorIdKey, divIntIdKey, modIntIdKey :: Unique typeErrorIdKey, divIntIdKey, modIntIdKey,
absentSumFieldErrorIdKey :: Unique
wildCardKey = mkPreludeMiscIdUnique 0 -- See Note [WildCard binders] wildCardKey = mkPreludeMiscIdUnique 0 -- See Note [WildCard binders]
absentErrorIdKey = mkPreludeMiscIdUnique 1 absentErrorIdKey = mkPreludeMiscIdUnique 1
...@@ -2110,6 +2108,7 @@ voidPrimIdKey = mkPreludeMiscIdUnique 21 ...@@ -2110,6 +2108,7 @@ voidPrimIdKey = mkPreludeMiscIdUnique 21
typeErrorIdKey = mkPreludeMiscIdUnique 22 typeErrorIdKey = mkPreludeMiscIdUnique 22
divIntIdKey = mkPreludeMiscIdUnique 23 divIntIdKey = mkPreludeMiscIdUnique 23
modIntIdKey = mkPreludeMiscIdUnique 24 modIntIdKey = mkPreludeMiscIdUnique 24
absentSumFieldErrorIdKey = mkPreludeMiscIdUnique 9
unsafeCoerceIdKey, concatIdKey, filterIdKey, zipIdKey, bindIOIdKey, unsafeCoerceIdKey, concatIdKey, filterIdKey, zipIdKey, bindIOIdKey,
returnIOIdKey, newStablePtrIdKey, returnIOIdKey, newStablePtrIdKey,
......
...@@ -204,7 +204,7 @@ import DataCon ...@@ -204,7 +204,7 @@ import DataCon
import FastString (FastString, mkFastString) import FastString (FastString, mkFastString)
import Id import Id
import Literal (Literal (..), literalType) import Literal (Literal (..), literalType)
import MkCore (aBSENT_ERROR_ID) import MkCore (aBSENT_SUM_FIELD_ERROR_ID)
import MkId (voidPrimId, voidArgId) import MkId (voidPrimId, voidArgId)
import MonadUtils (mapAccumLM) import MonadUtils (mapAccumLM)
import Outputable import Outputable
...@@ -577,7 +577,8 @@ mkUbxSum dc ty_args args0 ...@@ -577,7 +577,8 @@ mkUbxSum dc ty_args args0
= slotRubbishArg slot : mkTupArgs (arg_idx + 1) slots_left arg_map = slotRubbishArg slot : mkTupArgs (arg_idx + 1) slots_left arg_map
slotRubbishArg :: SlotTy -> StgArg slotRubbishArg :: SlotTy -> StgArg
slotRubbishArg PtrSlot = StgVarArg aBSENT_ERROR_ID slotRubbishArg PtrSlot = StgVarArg aBSENT_SUM_FIELD_ERROR_ID
-- See Note [aBSENT_SUM_FIELD_ERROR_ID] in MkCore
slotRubbishArg WordSlot = StgLitArg (MachWord 0) slotRubbishArg WordSlot = StgLitArg (MachWord 0)
slotRubbishArg Word64Slot = StgLitArg (MachWord64 0) slotRubbishArg Word64Slot = StgLitArg (MachWord64 0)
slotRubbishArg FloatSlot = StgLitArg (MachFloat 0) slotRubbishArg FloatSlot = StgLitArg (MachFloat 0)
......
...@@ -95,7 +95,7 @@ module Control.Exception.Base ( ...@@ -95,7 +95,7 @@ module Control.Exception.Base (
-- * Calls for GHC runtime -- * Calls for GHC runtime
recSelError, recConError, runtimeError, recSelError, recConError, runtimeError,
nonExhaustiveGuardsError, patError, noMethodBindingError, nonExhaustiveGuardsError, patError, noMethodBindingError,
absentError, typeError, absentError, absentSumFieldError, typeError,
nonTermination, nestedAtomically, nonTermination, nestedAtomically,
) where ) where
...@@ -398,3 +398,7 @@ nonTermination = toException NonTermination ...@@ -398,3 +398,7 @@ nonTermination = toException NonTermination
-- GHC's RTS calls this -- GHC's RTS calls this
nestedAtomically :: SomeException nestedAtomically :: SomeException
nestedAtomically = toException NestedAtomically nestedAtomically = toException NestedAtomically
-- Introduced by unarise for unused unboxed sum fields
absentSumFieldError :: a
absentSumFieldError = absentError " in unboxed sum."#
...@@ -45,6 +45,7 @@ PRELUDE_CLOSURE(base_GHCziIOziException_cannotCompactPinned_closure); ...@@ -45,6 +45,7 @@ PRELUDE_CLOSURE(base_GHCziIOziException_cannotCompactPinned_closure);
PRELUDE_CLOSURE(base_GHCziIOziException_cannotCompactMutable_closure); PRELUDE_CLOSURE(base_GHCziIOziException_cannotCompactMutable_closure);
PRELUDE_CLOSURE(base_ControlziExceptionziBase_nonTermination_closure); PRELUDE_CLOSURE(base_ControlziExceptionziBase_nonTermination_closure);
PRELUDE_CLOSURE(base_ControlziExceptionziBase_nestedAtomically_closure); PRELUDE_CLOSURE(base_ControlziExceptionziBase_nestedAtomically_closure);
PRELUDE_CLOSURE(base_ControlziExceptionziBase_absentSumFieldError_closure);
PRELUDE_CLOSURE(base_GHCziEventziThread_blockedOnBadFD_closure); PRELUDE_CLOSURE(base_GHCziEventziThread_blockedOnBadFD_closure);
PRELUDE_CLOSURE(base_GHCziConcziSync_runSparks_closure); PRELUDE_CLOSURE(base_GHCziConcziSync_runSparks_closure);
...@@ -99,6 +100,7 @@ PRELUDE_INFO(base_GHCziStable_StablePtr_con_info); ...@@ -99,6 +100,7 @@ PRELUDE_INFO(base_GHCziStable_StablePtr_con_info);
#define nonTermination_closure DLL_IMPORT_DATA_REF(base_ControlziExceptionziBase_nonTermination_closure) #define nonTermination_closure DLL_IMPORT_DATA_REF(base_ControlziExceptionziBase_nonTermination_closure)
#define nestedAtomically_closure DLL_IMPORT_DATA_REF(base_ControlziExceptionziBase_nestedAtomically_closure) #define nestedAtomically_closure DLL_IMPORT_DATA_REF(base_ControlziExceptionziBase_nestedAtomically_closure)
#define blockedOnBadFD_closure DLL_IMPORT_DATA_REF(base_GHCziEventziThread_blockedOnBadFD_closure) #define blockedOnBadFD_closure DLL_IMPORT_DATA_REF(base_GHCziEventziThread_blockedOnBadFD_closure)
#define absentSumFieldError_closure DLL_IMPORT_DATA_REF(base_ControlziExceptionziBase_absentSumFieldError_closure)
#define Czh_con_info DLL_IMPORT_DATA_REF(ghczmprim_GHCziTypes_Czh_con_info) #define Czh_con_info DLL_IMPORT_DATA_REF(ghczmprim_GHCziTypes_Czh_con_info)
#define Izh_con_info DLL_IMPORT_DATA_REF(ghczmprim_GHCziTypes_Izh_con_info) #define Izh_con_info DLL_IMPORT_DATA_REF(ghczmprim_GHCziTypes_Izh_con_info)
......
...@@ -247,6 +247,10 @@ hs_init_ghc(int *argc, char **argv[], RtsConfig rts_config) ...@@ -247,6 +247,10 @@ hs_init_ghc(int *argc, char **argv[], RtsConfig rts_config)
/* Add some GC roots for things in the base package that the RTS /* Add some GC roots for things in the base package that the RTS
* knows about. We don't know whether these turn out to be CAFs * knows about. We don't know whether these turn out to be CAFs
* or refer to CAFs, but we have to assume that they might. * or refer to CAFs, but we have to assume that they might.
*
* Because these stable pointers will retain any CAF references in
* these closures `Id`s of these can be safely marked as non-CAFFY
* in the compiler.
*/ */
getStablePtr((StgPtr)runIO_closure); getStablePtr((StgPtr)runIO_closure);
getStablePtr((StgPtr)runNonIO_closure); getStablePtr((StgPtr)runNonIO_closure);
...@@ -265,6 +269,9 @@ hs_init_ghc(int *argc, char **argv[], RtsConfig rts_config) ...@@ -265,6 +269,9 @@ hs_init_ghc(int *argc, char **argv[], RtsConfig rts_config)
getStablePtr((StgPtr)cannotCompactPinned_closure); getStablePtr((StgPtr)cannotCompactPinned_closure);
getStablePtr((StgPtr)cannotCompactMutable_closure); getStablePtr((StgPtr)cannotCompactMutable_closure);
getStablePtr((StgPtr)nestedAtomically_closure); getStablePtr((StgPtr)nestedAtomically_closure);
getStablePtr((StgPtr)absentSumFieldError_closure);
// `Id` for this closure is marked as non-CAFFY,
// see Note [aBSENT_SUM_FIELD_ERROR_ID] in MkCore.
getStablePtr((StgPtr)runSparks_closure); getStablePtr((StgPtr)runSparks_closure);
getStablePtr((StgPtr)ensureIOManagerIsRunning_closure); getStablePtr((StgPtr)ensureIOManagerIsRunning_closure);
......
TOP=../../../..
include $(TOP)/mk/boilerplate.mk
include $(TOP)/mk/test.mk
.PHONY: T15038
T15038:
'$(TEST_HC)' $(TEST_HC_OPTS) $(shell find . -iname '*.hs') -O2 -o Main \
-package containers -package ghc-prim -package primitive
./Main
test('T15038',
[reqlib('containers'), reqlib('ghc-prim'), reqlib('primitive')],
run_command,
['$MAKE -s --no-print-directory T15038'])
{-# LANGUAGE DeriveFunctor #-}
module Data.Trie.Naive
( Trie
, singleton
, singletonString
, lookup
, parser
, fromList
, fromListAppend
, fromStringList
) where
import Prelude hiding (lookup)
import Data.Semigroup (Semigroup)
import Data.Word (Word8)
import Data.Map (Map)
import Data.Bifunctor (second)
import Packed.Bytes (Bytes)
import qualified Data.Char
import qualified GHC.OldList as L
import qualified Packed.Bytes.Parser as P
import qualified Packed.Bytes as B
import qualified Data.Semigroup as SG
import qualified Data.Map.Strict as M
data Trie a = Trie (Maybe a) (Map Word8 (Trie a))
deriving (Functor)
instance Semigroup a => Semigroup (Trie a) where
(<>) = append
instance Semigroup a => Monoid (Trie a) where
mempty = Trie Nothing M.empty
mappend = (SG.<>)
append :: Semigroup a => Trie a -> Trie a -> Trie a
append (Trie v1 m1) (Trie v2 m2) = Trie
(SG.getOption (SG.Option v1 SG.<> SG.Option v2))
(M.unionWith append m1 m2)
singleton :: Bytes -> a -> Trie a
singleton k v = B.foldr (\b r -> Trie Nothing (M.singleton b r)) (Trie (Just v) M.empty) k
singletonString :: String -> a -> Trie a
singletonString k v = L.foldr (\c r -> Trie Nothing (M.singleton (c2w c) r)) (Trie (Just v) M.empty) k
lookup :: Bytes -> Trie a -> Maybe a
lookup k t0 = case B.foldr lookupStep (Just t0) k of
Nothing -> Nothing
Just (Trie v _) -> v
lookupStep :: Word8 -> Maybe (Trie a) -> Maybe (Trie a)
lookupStep w Nothing = Nothing
lookupStep w (Just (Trie _ m)) = M.lookup w m
parser :: Trie (P.Parser a) -> P.Parser a
parser (Trie mp m) = case mp of
Just p -> p
Nothing -> do
w <- P.any
case M.lookup w m of
Nothing -> P.failure
Just t -> parser t
fromList :: [(Bytes,a)] -> Trie a
fromList = fmap SG.getFirst . fromListAppend . map (second SG.First)
fromListAppend :: Semigroup a => [(Bytes,a)] -> Trie a
fromListAppend = foldMap (uncurry singleton)
fromStringList :: [(String,a)] -> Trie a
fromStringList = fmap SG.getFirst . fromStringListAppend . map (second SG.First)
fromStringListAppend :: Semigroup a => [(String,a)] -> Trie a
fromStringListAppend = foldMap (uncurry singletonString)
c2w :: Char -> Word8
c2w = fromIntegral . Data.Char.ord
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE UnboxedTuples #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# OPTIONS_GHC
-fno-warn-unsafe
-fno-warn-implicit-prelude
-fno-warn-missing-import-lists
-O2
#-}
module Packed.Bytes
( Bytes(..)
, pack
, unpack
, length
-- * Folds
, foldr
-- * Unsliced Byte Arrays
, fromByteArray
) where
import Prelude hiding (take,length,replicate,drop,null,concat,foldr)
import Data.Primitive (ByteArray(..))
import Data.Word (Word8)
import Control.Monad.ST (runST, ST)
import qualified Data.Primitive as PM
import qualified GHC.OldList as L
data Bytes = Bytes
{-# UNPACK #-} !ByteArray -- payload
{-# UNPACK #-} !Int -- offset
{-# UNPACK #-} !Int -- length
instance Show Bytes where
show x = "pack " ++ show (unpack x)
pack :: [Word8] -> Bytes
pack bs = let arr = packByteArray bs in Bytes arr 0 (lengthByteArray arr)
unpack :: Bytes -> [Word8]
unpack (Bytes arr off len) = go off
where
go :: Int -> [Word8]
go !ix = if ix < len + off
then PM.indexByteArray arr ix : go (ix + 1)
else []
fromByteArray :: ByteArray -> Bytes
fromByteArray ba = Bytes ba 0 (lengthByteArray ba)
length :: Bytes -> Int
length (Bytes _ _ len) = len
foldr :: (Word8 -> a -> a) -> a -> Bytes -> a
foldr f a0 (Bytes arr off0 len) = go off0 where
!end = off0 + len
go !ix = if ix < end
then f (PM.indexByteArray arr ix) (go (ix + 1))
else a0
packByteArray :: [Word8] -> ByteArray
packByteArray ws0 = runST $ do
marr <- PM.newByteArray (L.length ws0)
let go [] !_ = return ()
go (w : ws) !ix = PM.writeByteArray marr ix w >> go ws (ix + 1)
go ws0 0
PM.unsafeFreezeByteArray marr
unpackByteArray :: ByteArray -> [Word8]
unpackByteArray arr = go 0 where
go :: Int -> [Word8]
go !ix = if ix < lengthByteArray arr
then PM.indexByteArray arr ix : go (ix + 1)
else []
lengthByteArray :: ByteArray -> Int
lengthByteArray = PM.sizeofByteArray
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeInType #-}
{-# LANGUAGE UnboxedSums #-}
{-# LANGUAGE UnboxedTuples #-}
{-# OPTIONS_GHC
-Weverything
-fno-warn-unsafe
-fno-warn-implicit-prelude
-fno-warn-missing-import-lists
-fno-warn-noncanonical-monoid-instances
-O2
#-}
module Packed.Bytes.Parser
( Parser(..)
, Result(..)
, Leftovers(..)
, parseStreamST
, any
, failure
) where
import Control.Applicative
import Data.Primitive (ByteArray(..))
import GHC.Int (Int(I#))
import GHC.ST (ST(..),runST)
import GHC.Types (TYPE)
import GHC.Word (Word8(W8#))
import Packed.Bytes (Bytes(..))
import Packed.Bytes.Stream.ST (ByteStream(..))
import Prelude hiding (any,replicate)
import qualified Data.Primitive as PM
import qualified Control.Monad
import GHC.Exts (Int#,ByteArray#,Word#,State#,(+#),(-#),(>#),indexWord8Array#)
type Bytes# = (# ByteArray#, Int#, Int# #)
type Maybe# (a :: TYPE r) = (# (# #) | a #)
type Leftovers# s = (# Bytes# , ByteStream s #)
type Result# s a = (# Maybe# (Leftovers# s), Maybe# a #)
data Result s a = Result
{ resultLeftovers :: !(Maybe (Leftovers s))
, resultValue :: !(Maybe a)
}
data Leftovers s = Leftovers
{ leftoversChunk :: {-# UNPACK #-} !Bytes
-- ^ The last chunk pulled from the stream
, leftoversStream :: ByteStream s
-- ^ The remaining stream
}
data PureResult a = PureResult
{ pureResultLeftovers :: {-# UNPACK #-} !Bytes
, pureResultValue :: !(Maybe a)
} deriving (Show)
emptyByteArray :: ByteArray
emptyByteArray = runST (PM.newByteArray 0 >>= PM.unsafeFreezeByteArray)
parseStreamST :: ByteStream s -> Parser a -> ST s (Result s a)
parseStreamST stream (Parser f) = ST $ \s0 ->
case f (# | (# (# unboxByteArray emptyByteArray, 0#, 0# #), stream #) #) s0 of
(# s1, r #) -> (# s1, boxResult r #)
boxResult :: Result# s a -> Result s a
boxResult (# leftovers, val #) = case val of
(# (# #) | #) -> Result (boxLeftovers leftovers) Nothing
(# | a #) -> Result (boxLeftovers leftovers) (Just a)
boxLeftovers :: Maybe# (Leftovers# s) -> Maybe (Leftovers s)
boxLeftovers (# (# #) | #) = Nothing
boxLeftovers (# | (# theBytes, stream #) #) = Just (Leftovers (boxBytes theBytes) stream)
instance Functor Parser where
fmap = mapParser
-- Remember to write liftA2 by hand at some point.
instance Applicative Parser where
pure = pureParser
(<*>) = Control.Monad.ap
instance Monad Parser where
return = pure
(>>=) = bindLifted
newtype Parser a = Parser
{ getParser :: forall s.
Maybe# (Leftovers# s)
-> State# s
-> (# State# s, Result# s a #)
}
nextNonEmpty :: ByteStream s -> State# s -> (# State# s, Maybe# (Leftovers# s) #)
nextNonEmpty (ByteStream f) s0 = case f s0 of
(# s1, r #) -> case r of
(# (# #) | #) -> (# s1, (# (# #) | #) #)
(# | (# theBytes@(# _,_,len #), stream #) #) -> case len of
0# -> nextNonEmpty stream s1
_ -> (# s1, (# | (# theBytes, stream #) #) #)
withNonEmpty :: forall s b.
Maybe# (Leftovers# s)
-> State# s
-> (State# s -> (# State# s, Result# s b #))
-> (Word# -> Bytes# -> ByteStream s -> State# s -> (# State# s, Result# s b #))
-- The first argument is a Word8, not a full machine word.
-- The second argument is the complete,non-empty chunk
-- with the head byte still intact.
-> (# State# s, Result# s b #)
withNonEmpty (# (# #) | #) s0 g _ = g s0
withNonEmpty (# | (# bytes0@(# arr0,off0,len0 #), stream0 #) #) s0 g f = case len0 ># 0# of
1# -> f (indexWord8Array# arr0 off0) bytes0 stream0 s0
_ -> case nextNonEmpty stream0 s0 of
(# s1, r #) -> case r of
(# (# #) | #) -> g s1
(# | (# bytes1@(# arr1, off1, _ #), stream1 #) #) ->
f (indexWord8Array# arr1 off1) bytes1 stream1 s1
-- | Consume the next byte from the input.
any :: Parser Word8
any = Parser go where
go :: Maybe# (Leftovers# s) -> State# s -> (# State# s, Result# s Word8 #)
go m s0 = withNonEmpty m s0
(\s -> (# s, (# (# (# #) | #), (# (# #) | #) #) #))
(\theByte theBytes stream s ->
(# s, (# (# | (# unsafeDrop# 1# theBytes, stream #) #), (# | W8# theByte #) #) #)
)
-- TODO: improve this
mapParser :: (a -> b) -> Parser a -> Parser b
mapParser f p = bindLifted p (pureParser . f)
pureParser :: a -> Parser a
pureParser a = Parser $ \leftovers0 s0 ->
(# s0, (# leftovers0, (# | a #) #) #)
bindLifted :: Parser a -> (a -> Parser b) -> Parser b
bindLifted (Parser f) g = Parser $ \leftovers0 s0 -> case f leftovers0 s0 of
(# s1, (# leftovers1, val #) #) -> case val of
(# (# #) | #) -> (# s1, (# leftovers1, (# (# #) | #) #) #)
(# | x #) -> case g x of
Parser k -> k leftovers1 s1
-- This assumes that the Bytes is longer than the index. It also does
-- not eliminate zero-length references to byte arrays.
unsafeDrop# :: Int# -> Bytes# -> Bytes#
unsafeDrop# i (# arr, off, len #) = (# arr, off +# i, len -# i #)
unboxByteArray :: ByteArray -> ByteArray#
unboxByteArray (ByteArray arr) = arr
boxBytes :: Bytes# -> Bytes
boxBytes (# a, b, c #) = Bytes (ByteArray a) (I# b) (I# c)
failure :: Parser a
failure = Parser (\m s -> (# s, (# m, (# (# #) | #) #) #))
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE UnboxedTuples #-}
{-# LANGUAGE UnboxedSums #-}
{-# OPTIONS_GHC -O2 #-}
module Packed.Bytes.Stream.ST
( ByteStream(..)
, empty
, unpack
, fromBytes
) where
import Data.Primitive (Array,ByteArray(..))
import Data.Semigroup (Semigroup)
import Data.Word (Word8)
import GHC.Exts (RealWorld,State#,Int#,ByteArray#)
import GHC.Int (Int(I#))
import GHC.ST (ST(..))
import Packed.Bytes (Bytes(..))
import System.IO (Handle)
import qualified Data.Primitive as PM
import qualified Data.Semigroup as SG
import qualified Packed.Bytes as B
type Bytes# = (# ByteArray#, Int#, Int# #)