From 88cb3e1079e88ba10065ce260a96095ae96d58e8 Mon Sep 17 00:00:00 2001 From: Fendor <fendor@posteo.de> Date: Tue, 20 Feb 2024 13:48:18 +0100 Subject: [PATCH] Avoid UArray when indexing is not required `UnlinkedBCO`'s can occur many times in the heap. Each `UnlinkedBCO` references two `UArray`'s but never indexes them. They are only needed to encode the elements into a `ByteArray#`. The three words for the lower bound, upper bound and number of elements are essentially unused, thus we replace `UArray` with a wrapper around `ByteArray#`. This saves us up to three words for each `UnlinkedBCO`. Further, to avoid re-allocating these words for `ResolvedBCO`, we repeat the procedure for `ResolvedBCO` and add custom `Binary` and `Show` instances. For example, agda's repl session has around 360_000 UnlinkedBCO's, so avoiding these three words is already saving us around 8MB residency. --- compiler/GHC/ByteCode/Asm.hs | 6 +- compiler/GHC/ByteCode/Linker.hs | 9 +- compiler/GHC/ByteCode/Types.hs | 9 +- libraries/ghci/GHCi/CreateBCO.hs | 12 +- libraries/ghci/GHCi/ResolvedBCO.hs | 119 ++++++++++++++++-- .../tests/ghci/should_run/BinaryArray.hs | 24 +++- 6 files changed, 148 insertions(+), 31 deletions(-) diff --git a/compiler/GHC/ByteCode/Asm.hs b/compiler/GHC/ByteCode/Asm.hs index c310cd9413d3..e8b64dc4b89a 100644 --- a/compiler/GHC/ByteCode/Asm.hs +++ b/compiler/GHC/ByteCode/Asm.hs @@ -213,8 +213,8 @@ assembleBCO platform (ProtoBCO { protoBCOName = nm (text "bytecode instruction count mismatch") let asm_insns = ssElts final_insns - insns_arr = Array.listArray (0, fromIntegral n_insns - 1) asm_insns - bitmap_arr = mkBitmapArray bsize bitmap + !insns_arr = mkBCOByteArray $ Array.listArray (0 :: Int, fromIntegral n_insns - 1) asm_insns + !bitmap_arr = mkBCOByteArray $ mkBitmapArray bsize bitmap ul_bco = UnlinkedBCO nm arity insns_arr bitmap_arr (fromSizedSeq final_lits) (fromSizedSeq final_ptrs) -- 8 Aug 01: Finalisers aren't safe when attached to non-primitive @@ -224,7 +224,7 @@ assembleBCO platform (ProtoBCO { protoBCOName = nm return ul_bco -mkBitmapArray :: Word -> [StgWord] -> UArray Int Word64 +mkBitmapArray :: Word -> [StgWord] -> UArray Int Word -- Here the return type must be an array of Words, not StgWords, -- because the underlying ByteArray# will end up as a component -- of a BCO object. diff --git a/compiler/GHC/ByteCode/Linker.hs b/compiler/GHC/ByteCode/Linker.hs index 032e8b17e639..346d1f6122f9 100644 --- a/compiler/GHC/ByteCode/Linker.hs +++ b/compiler/GHC/ByteCode/Linker.hs @@ -60,10 +60,13 @@ linkBCO interp le bco_ix (UnlinkedBCO _ arity insns bitmap lits0 ptrs0) = do -- fromIntegral Word -> Word64 should be a no op if Word is Word64 -- otherwise it will result in a cast to longlong on 32bit systems. - lits <- mapM (fmap fromIntegral . lookupLiteral interp le) (elemsFlatBag lits0) + (lits :: [Word]) <- mapM (fmap fromIntegral . lookupLiteral interp le) (elemsFlatBag lits0) ptrs <- mapM (resolvePtr interp le bco_ix) (elemsFlatBag ptrs0) - return (ResolvedBCO isLittleEndian arity insns bitmap - (listArray (0, fromIntegral (sizeFlatBag lits0)-1) lits) + let lits' = listArray (0 :: Int, fromIntegral (sizeFlatBag lits0)-1) lits + return (ResolvedBCO isLittleEndian arity + insns + bitmap + (mkBCOByteArray lits') (addListToSS emptySS ptrs)) lookupLiteral :: Interp -> LinkerEnv -> BCONPtr -> IO Word diff --git a/compiler/GHC/ByteCode/Types.hs b/compiler/GHC/ByteCode/Types.hs index 779986ee29e5..27533eb8b19e 100644 --- a/compiler/GHC/ByteCode/Types.hs +++ b/compiler/GHC/ByteCode/Types.hs @@ -1,6 +1,8 @@ {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE TypeApplications #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE UnliftedNewtypes #-} -- -- (c) The University of Glasgow 2002-2006 -- @@ -8,6 +10,7 @@ -- | Bytecode assembler types module GHC.ByteCode.Types ( CompiledByteCode(..), seqCompiledByteCode + , BCOByteArray(..), mkBCOByteArray , FFIInfo(..) , RegBitmap(..) , NativeCallType(..), NativeCallInfo(..), voidTupleReturnInfo, voidPrimCallInfo @@ -34,10 +37,10 @@ import GHCi.BreakArray import GHCi.RemoteTypes import GHCi.FFI import Control.DeepSeq +import GHCi.ResolvedBCO ( BCOByteArray(..), mkBCOByteArray ) import Foreign import Data.Array -import Data.Array.Base ( UArray(..) ) import Data.ByteString (ByteString) import Data.IntMap (IntMap) import qualified Data.IntMap as IntMap @@ -153,8 +156,8 @@ data UnlinkedBCO = UnlinkedBCO { unlinkedBCOName :: !Name, unlinkedBCOArity :: {-# UNPACK #-} !Int, - unlinkedBCOInstrs :: !(UArray Int Word16), -- insns - unlinkedBCOBitmap :: !(UArray Int Word64), -- bitmap + unlinkedBCOInstrs :: !(BCOByteArray Word16), -- insns + unlinkedBCOBitmap :: !(BCOByteArray Word), -- bitmap unlinkedBCOLits :: !(FlatBag BCONPtr), -- non-ptrs unlinkedBCOPtrs :: !(FlatBag BCOPtr) -- ptrs } diff --git a/libraries/ghci/GHCi/CreateBCO.hs b/libraries/ghci/GHCi/CreateBCO.hs index 7891fec6a724..fd6798932299 100644 --- a/libraries/ghci/GHCi/CreateBCO.hs +++ b/libraries/ghci/GHCi/CreateBCO.hs @@ -68,9 +68,6 @@ createBCO arr bco return (HValue final_bco) } -toWordArray :: UArray Int Word64 -> UArray Int Word -toWordArray = amap fromIntegral - linkBCO' :: Array Int HValue -> ResolvedBCO -> IO BCO linkBCO' arr ResolvedBCO{..} = do let @@ -80,11 +77,10 @@ linkBCO' arr ResolvedBCO{..} = do !(I# arity#) = resolvedBCOArity !(EmptyArr empty#) = emptyArr -- See Note [BCO empty array] - - barr a = case a of UArray _lo _hi n b -> if n == 0 then empty# else b - insns_barr = barr resolvedBCOInstrs - bitmap_barr = barr (toWordArray resolvedBCOBitmap) - literals_barr = barr (toWordArray resolvedBCOLits) + barr arr# = if I# (sizeofByteArray# arr#) == 0 then empty# else arr# + insns_barr = barr (getBCOByteArray resolvedBCOInstrs) + bitmap_barr = barr (getBCOByteArray resolvedBCOBitmap) + literals_barr = barr (getBCOByteArray resolvedBCOLits) PtrsArr marr <- mkPtrsArray arr n_ptrs ptrs IO $ \s -> diff --git a/libraries/ghci/GHCi/ResolvedBCO.hs b/libraries/ghci/GHCi/ResolvedBCO.hs index 5dda6f605313..8548b9703c26 100644 --- a/libraries/ghci/GHCi/ResolvedBCO.hs +++ b/libraries/ghci/GHCi/ResolvedBCO.hs @@ -1,9 +1,12 @@ {-# LANGUAGE RecordWildCards, DeriveGeneric, GeneralizedNewtypeDeriving, - BangPatterns, CPP #-} + BangPatterns, CPP, MagicHash, FlexibleInstances, FlexibleContexts, + TypeApplications, ScopedTypeVariables, UnboxedTuples #-} module GHCi.ResolvedBCO ( ResolvedBCO(..) , ResolvedBCOPtr(..) , isLittleEndian + , BCOByteArray(..) + , mkBCOByteArray ) where import Prelude -- See note [Why do we import Prelude here?] @@ -11,11 +14,19 @@ import GHC.Data.SizedSeq import GHCi.RemoteTypes import GHCi.BreakArray -import Data.Array.Unboxed import Data.Binary +import Data.Binary.Put (putBuilder) import GHC.Generics -import GHCi.BinaryArray +import Foreign.Ptr +import Data.Array.Byte +import qualified Data.Binary.Get.Internal as Binary +import qualified Data.ByteString.Builder as BB +import qualified Data.ByteString.Builder.Internal as BB +import GHC.Exts +import Data.Array.Base (UArray(..)) + +import GHC.IO #include "MachDeps.h" @@ -32,19 +43,35 @@ isLittleEndian = True -- | A 'ResolvedBCO' is one in which all the 'Name' references have been -- resolved to actual addresses or 'RemoteHValues'. -- --- Note, all arrays are zero-indexed (we assume this when --- serializing/deserializing) data ResolvedBCO = ResolvedBCO { resolvedBCOIsLE :: Bool, resolvedBCOArity :: {-# UNPACK #-} !Int, - resolvedBCOInstrs :: UArray Int Word16, -- insns - resolvedBCOBitmap :: UArray Int Word64, -- bitmap - resolvedBCOLits :: UArray Int Word64, -- non-ptrs + resolvedBCOInstrs :: BCOByteArray Word16, -- insns + resolvedBCOBitmap :: BCOByteArray Word, -- bitmap + resolvedBCOLits :: BCOByteArray Word, -- non-ptrs resolvedBCOPtrs :: (SizedSeq ResolvedBCOPtr) -- ptrs } deriving (Generic, Show) +-- | Wrapper for a 'ByteArray#'. +-- The phantom type tells what elements are stored in the 'ByteArray#'. +-- Creating a 'ByteArray#' can be achieved using 'UArray''s API, +-- where the underlying 'ByteArray#' can be unpacked. +data BCOByteArray a + = BCOByteArray { + getBCOByteArray :: !ByteArray# + } + +mkBCOByteArray :: UArray Int a -> BCOByteArray a +mkBCOByteArray (UArray _ _ _ arr) = BCOByteArray arr + +instance Show (BCOByteArray Word16) where + showsPrec _ _ = showString "BCOByteArray Word16" + +instance Show (BCOByteArray Word) where + showsPrec _ _ = showString "BCOByteArray Word" + -- | The Binary instance for ResolvedBCOs. -- -- Note, that we do encode the endianness, however there is no support for mixed @@ -54,12 +81,16 @@ instance Binary ResolvedBCO where put ResolvedBCO{..} = do put resolvedBCOIsLE put resolvedBCOArity - putArray resolvedBCOInstrs - putArray resolvedBCOBitmap - putArray resolvedBCOLits + put resolvedBCOInstrs + put resolvedBCOBitmap + put resolvedBCOLits put resolvedBCOPtrs - get = ResolvedBCO - <$> get <*> get <*> getArray <*> getArray <*> getArray <*> get + get = ResolvedBCO <$> get <*> get <*> get <*> get <*> get <*> get + +instance Binary (BCOByteArray a) where + put = putBCOByteArray + get = decodeBCOByteArray + data ResolvedBCOPtr = ResolvedBCORef {-# UNPACK #-} !Int @@ -75,3 +106,65 @@ data ResolvedBCOPtr deriving (Generic, Show) instance Binary ResolvedBCOPtr + +-- -------------------------------------------------------- +-- Serialisers for 'BCOByteArray' +-- -------------------------------------------------------- + +putBCOByteArray :: BCOByteArray a -> Put +putBCOByteArray (BCOByteArray bar) = do + put (I# (sizeofByteArray# bar)) + putBuilder $ byteArrayBuilder bar + +decodeBCOByteArray :: Get (BCOByteArray a) +decodeBCOByteArray = do + n <- get + getByteArray n + +byteArrayBuilder :: ByteArray# -> BB.Builder +byteArrayBuilder arr# = BB.builder $ go 0 (I# (sizeofByteArray# arr#)) + where + go :: Int -> Int -> BB.BuildStep a -> BB.BuildStep a + go !inStart !inEnd k (BB.BufferRange outStart outEnd) + -- There is enough room in this output buffer to write all remaining array + -- contents + | inRemaining <= outRemaining = do + copyByteArrayToAddr arr# inStart outStart inRemaining + k (BB.BufferRange (outStart `plusPtr` inRemaining) outEnd) + -- There is only enough space for a fraction of the remaining contents + | otherwise = do + copyByteArrayToAddr arr# inStart outStart outRemaining + let !inStart' = inStart + outRemaining + return $! BB.bufferFull 1 outEnd (go inStart' inEnd k) + where + inRemaining = inEnd - inStart + outRemaining = outEnd `minusPtr` outStart + + copyByteArrayToAddr :: ByteArray# -> Int -> Ptr a -> Int -> IO () + copyByteArrayToAddr src# (I# src_off#) (Ptr dst#) (I# len#) = + IO $ \s -> case copyByteArrayToAddr# src# src_off# dst# len# s of + s' -> (# s', () #) + +getByteArray :: Int -> Get (BCOByteArray a) +getByteArray nbytes@(I# nbytes#) = do + let !(MutableByteArray arr#) = unsafeDupablePerformIO $ + IO $ \s -> case newByteArray# nbytes# s of + (# s', mbar #) -> (# s', MutableByteArray mbar #) + let go 0 _ = return () + go !remaining !off = do + Binary.readNWith n $ \ptr -> + copyAddrToByteArray ptr arr# off n + go (remaining - n) (off + n) + where n = min chunkSize remaining + go nbytes 0 + return $! unsafeDupablePerformIO $ + IO $ \s -> case unsafeFreezeByteArray# arr# s of + (# s', bar #) -> (# s', BCOByteArray bar #) + where + chunkSize = 10*1024 + + copyAddrToByteArray :: Ptr a -> MutableByteArray# RealWorld + -> Int -> Int -> IO () + copyAddrToByteArray (Ptr src#) dst# (I# dst_off#) (I# len#) = + IO $ \s -> case copyAddrToByteArray# src# dst# dst_off# len# s of + s' -> (# s', () #) diff --git a/testsuite/tests/ghci/should_run/BinaryArray.hs b/testsuite/tests/ghci/should_run/BinaryArray.hs index 828588c74805..f9b3eec1ba09 100644 --- a/testsuite/tests/ghci/should_run/BinaryArray.hs +++ b/testsuite/tests/ghci/should_run/BinaryArray.hs @@ -1,11 +1,15 @@ -{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleContexts, MagicHash, ScopedTypeVariables #-} import Data.Binary.Get import Data.Binary.Put +import Data.Binary (get, put) +import Data.Array.Byte import Data.Array.Unboxed as AU import Data.Array.IO (IOUArray) import Data.Array.MArray (MArray) import Data.Array as A +import Data.Array.Base as A import GHCi.BinaryArray +import GHCi.ResolvedBCO import GHC.Word roundtripTest :: (IArray UArray a, MArray IOUArray a IO, Eq a) @@ -18,6 +22,17 @@ roundtripTest arr = | otherwise -> putStrLn "failed to round-trip" Left _ -> putStrLn "deserialization failed" +roundtripTestByteArray :: forall a . (IArray UArray a, MArray IOUArray a IO, Eq a) + => UArray Int a -> IO () +roundtripTestByteArray (UArray _ _ _ arr#) = + let val = BCOByteArray arr# :: BCOByteArray a + ser = Data.Binary.Put.runPut $ put val + in case Data.Binary.Get.runGetOrFail (get :: Get (BCOByteArray a)) ser of + Right (_, _, BCOByteArray arr'# ) + | ByteArray arr# == ByteArray arr'# -> return () + | otherwise -> putStrLn "failed to round-trip" + Left _ -> putStrLn "deserialization failed" + main :: IO () main = do roundtripTest (AU.listArray (1,500) [1..] :: UArray Int Int) @@ -27,3 +42,10 @@ main = do roundtripTest (AU.listArray (1,500) [1..] :: UArray Int Word32) roundtripTest (AU.listArray (1,500) [1..] :: UArray Int Word64) roundtripTest (AU.listArray (1,500) ['a'..] :: UArray Int Char) + roundtripTestByteArray (AU.listArray (1,500) [1..] :: UArray Int Int) + roundtripTestByteArray (AU.listArray (1,500) [1..] :: UArray Int Word) + roundtripTestByteArray (AU.listArray (1,500) [1..] :: UArray Int Word8) + roundtripTestByteArray (AU.listArray (1,500) [1..] :: UArray Int Word16) + roundtripTestByteArray (AU.listArray (1,500) [1..] :: UArray Int Word32) + roundtripTestByteArray (AU.listArray (1,500) [1..] :: UArray Int Word64) + roundtripTestByteArray (AU.listArray (1,500) ['a'..] :: UArray Int Char) -- GitLab