diff --git a/compiler/GHC/Iface/Binary.hs b/compiler/GHC/Iface/Binary.hs index 44a60587afb0bda508feb67db8abe2afcbc21578..9abf76ce2c745891957bca1f8f06d879cf711da3 100644 --- a/compiler/GHC/Iface/Binary.hs +++ b/compiler/GHC/Iface/Binary.hs @@ -25,6 +25,8 @@ module GHC.Iface.Binary ( putName, putSymbolTable, BinSymbolTable(..), + initWriteIfaceType, initReadIfaceTypeTable, + putAllTables, ) where import GHC.Prelude @@ -46,14 +48,19 @@ import GHC.Types.SrcLoc import GHC.Platform import GHC.Settings.Constants import GHC.Utils.Fingerprint +import GHC.Iface.Type (IfaceType, getIfaceType, putIfaceType) +import Control.Monad import Data.Array import Data.Array.IO import Data.Array.Unsafe import Data.Char -import Data.Word import Data.IORef -import Control.Monad +import Data.Map.Strict (Map) +import Data.Word +import System.IO.Unsafe +import Data.Typeable (Typeable) + -- --------------------------------------------------------------------------- -- Reading and writing binary interface files @@ -158,24 +165,37 @@ getWithUserData name_cache bh = do -- Reading names has the side effect of adding them into the given NameCache. getTables :: NameCache -> ReadBinHandle -> IO ReadBinHandle getTables name_cache bh = do + bhRef <- newIORef (error "used too soon") + -- It is important this is passed to 'getTable' + -- See Note [Lazy ReaderUserData during IfaceType serialisation] + ud <- unsafeInterleaveIO (readIORef bhRef) + fsReaderTable <- initFastStringReaderTable nameReaderTable <- initNameReaderTable name_cache - - - -- The order of these deserialisation matters! - -- - -- See Note [Order of deduplication tables during iface binary serialisation] for details. - fsTable <- Binary.forwardGet bh (getTable fsReaderTable bh) - let - fsReader = mkReaderFromTable fsReaderTable fsTable - bhFs = addReaderToUserData (mkSomeBinaryReader fsReader) bh - - nameTable <- Binary.forwardGet bh (getTable nameReaderTable bhFs) - let - nameReader = mkReaderFromTable nameReaderTable nameTable - bhName = addReaderToUserData (mkSomeBinaryReader nameReader) bhFs - - pure bhName + ifaceTypeReaderTable <- initReadIfaceTypeTable ud + + let -- For any 'ReaderTable', we decode the table that is found at the location + -- the forward reference points to. + -- After decoding the table, we create a 'BinaryReader' and immediately + -- add it to the 'ReaderUserData' of 'ReadBinHandle'. + decodeReaderTable :: Typeable a => ReaderTable a -> ReadBinHandle -> IO ReadBinHandle + decodeReaderTable tbl bh0 = do + table <- Binary.forwardGet bh (getTable tbl bh0) + let binaryReader = mkReaderFromTable tbl table + pure $ addReaderToUserData binaryReader bh0 + + -- Decode all the tables and populate the 'ReaderUserData'. + bhFinal <- foldM (\bh0 act -> act bh0) bh + -- The order of these deserialisation matters! + -- + -- See Note [Order of deduplication tables during iface binary serialisation] for details. + [ decodeReaderTable fsReaderTable + , decodeReaderTable nameReaderTable + , decodeReaderTable ifaceTypeReaderTable + ] + + writeIORef bhRef (getReaderUserData bhFinal) + pure bhFinal -- | Write an interface file. -- @@ -239,6 +259,7 @@ putWithTables bh' put_payload = do -- Initialise deduplicating tables. (fast_wt, fsWriter) <- initFastStringWriterTable (name_wt, nameWriter) <- initNameWriterTable + (ifaceType_wt, ifaceTypeWriter) <- initWriteIfaceType -- Initialise the 'WriterUserData'. let writerUserData = mkWriterUserData @@ -250,6 +271,7 @@ putWithTables bh' put_payload = do -- -- See Note [Binary UserData] , mkSomeBinaryWriter @BindingName $ mkWriter (\bh name -> putEntry nameWriter bh (getBindingName name)) + , mkSomeBinaryWriter @IfaceType ifaceTypeWriter ] let bh = setWriterUserData bh' writerUserData @@ -257,18 +279,24 @@ putWithTables bh' put_payload = do -- The order of these entries matters! -- -- See Note [Order of deduplication tables during iface binary serialisation] for details. - putAllTables bh [fast_wt, name_wt] $ do + putAllTables bh [fast_wt, name_wt, ifaceType_wt] $ do put_payload bh return (name_count, fs_count, r) - where - putAllTables _ [] act = do - a <- act - pure ([], a) - putAllTables bh (x : xs) act = do - (r, (res, a)) <- forwardPut bh (const $ putTable x bh) $ do - putAllTables bh xs act - pure (r : res, a) + +-- | Write all deduplication tables to disk after serialising the +-- main payload. +-- +-- Writes forward pointers to the deduplication tables before writing the payload +-- to allow deserialisation *before* the payload is read again. +putAllTables :: WriteBinHandle -> [WriterTable] -> IO b -> IO ([Int], b) +putAllTables _ [] act = do + a <- act + pure ([], a) +putAllTables bh (x : xs) act = do + (r, (res, a)) <- forwardPut bh (const $ putTable x bh) $ do + putAllTables bh xs act + pure (r : res, a) -- | Initial ram buffer to allocate for writing interface files initBinMemSize :: Int @@ -445,11 +473,69 @@ Here, a visualisation of the table structure we currently have (ignoring 'Extens -} +{- +Note [Lazy ReaderUserData during IfaceType serialisation] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Serialising recursive data types, such as 'IfaceType', requires some trickery +to inject the deduplication table at the right moment. + +When we serialise a value of 'IfaceType', we might encounter new 'IfaceType' values. +For example, 'IfaceAppTy' has an 'IfaceType' field, which we want to deduplicate as well. +Thus, when we serialise an 'IfaceType', we might add new 'IfaceType's to the 'GenericSymbolTable' +(i.e., the deduplication table). These 'IfaceType's are then subsequently also serialised to disk, +and uncover new 'IfaceType' values, etc... +In other words, when we serialise an 'IfaceType' we write it out using a post-order traversal. +See 'putGenericSymbolTable' for the implementation. + +Now, when we deserialise the deduplication table, reading the first element of the deduplication table +will fail, as deserialisation requires that we read the child elements first. Remember, we wrote them to disk +using a post-order traversal. +To make this work, we therefore use 'lazyGet'' to lazily read the parent 'IfaceType', but delay the actual +deserialisation. We just assume that once you need to force a value, the deduplication table for 'IfaceType' +will be available. + +That's where 'bhRef' comes into play: + +@ + bhRef <- newIORef (error "used too soon") + ud <- unsafeInterleaveIO (readIORef bhRef) + ... + ifaceTypeReaderTable <- initReadIfaceTypeTable ud + ... + writeIORef bhRef (getReaderUserData bhFinal) +@ + +'ud' is the 'ReaderUserData' that will eventually contain the deduplication table for 'IfaceType'. +As deserialisation of the 'IfaceType' needs the deduplication table, we provide a +promise that it will exist in the future (represented by @unsafeInterleaveIO (readIORef bhRef)@). +We pass 'ud' to 'initReadIfaceTypeTable', so the deserialisation will use the promised deduplication table. + +Once we have "read" the deduplication table, it will be available in 'bhFinal', and we fulfill the promise +that the deduplication table for 'IfaceType' exists when forced. +-} -- ----------------------------------------------------------------------------- -- The symbol table -- +initReadIfaceTypeTable :: ReaderUserData -> IO (ReaderTable IfaceType) +initReadIfaceTypeTable ud = do + pure $ + ReaderTable + { getTable = getGenericSymbolTable (\bh -> lazyGet' getIfaceType (setReaderUserData bh ud)) + , mkReaderFromTable = \tbl -> mkReader (getGenericSymtab tbl) + } + +initWriteIfaceType :: IO (WriterTable, BinaryWriter IfaceType) +initWriteIfaceType = do + sym_tab <- initGenericSymbolTable @(Map IfaceType) + pure + ( WriterTable + { putTable = putGenericSymbolTable sym_tab (lazyPut' putIfaceType) + } + , mkWriter $ putGenericSymTab sym_tab + ) + initNameReaderTable :: NameCache -> IO (ReaderTable Name) initNameReaderTable cache = do diff --git a/compiler/GHC/Iface/Ext/Binary.hs b/compiler/GHC/Iface/Ext/Binary.hs index c0cc3b2b30076e7f69160fadb001326316e2b99f..3b117b4d9a977d39a04e8aa11aa8baa8db2b8cc4 100644 --- a/compiler/GHC/Iface/Ext/Binary.hs +++ b/compiler/GHC/Iface/Ext/Binary.hs @@ -38,22 +38,21 @@ import Data.ByteString ( ByteString ) import qualified Data.ByteString as BS import qualified Data.ByteString.Char8 as BSC import Data.Word ( Word8, Word32 ) -import Control.Monad ( replicateM, when, forM_ ) +import Control.Monad ( replicateM, when, forM_, foldM ) import System.Directory ( createDirectoryIfMissing ) import System.FilePath ( takeDirectory ) import GHC.Iface.Ext.Types +import GHC.Iface.Binary (initWriteIfaceType, putAllTables, initReadIfaceTypeTable) +import GHC.Iface.Type (IfaceType) +import System.IO.Unsafe (unsafeInterleaveIO) +import qualified GHC.Utils.Binary as Binary data HieSymbolTable = HieSymbolTable { hie_symtab_next :: !FastMutInt , hie_symtab_map :: !(IORef (UniqFM Name (Int, HieName))) } -data HieDictionary = HieDictionary - { hie_dict_next :: !FastMutInt -- The next index to use - , hie_dict_map :: !(IORef (UniqFM FastString (Int,FastString))) -- indexed by FastString - } - initBinMemSize :: Int initBinMemSize = 1024*1024 @@ -84,58 +83,58 @@ writeHieFile hie_file_path hiefile = do putBinLine bh0 $ BSC.pack $ show hieVersion putBinLine bh0 $ ghcVersion - -- remember where the dictionary pointer will go - dict_p_p <- tellBinWriter bh0 - put_ bh0 dict_p_p + (fs_tbl, fs_w) <- initFastStringWriterTable + (name_tbl, name_w) <- initWriteNameTable + (iface_tbl, iface_w) <- initWriteIfaceType - -- remember where the symbol table pointer will go - symtab_p_p <- tellBinWriter bh0 - put_ bh0 symtab_p_p + let bh = setWriterUserData bh0 $ mkWriterUserData + [ mkSomeBinaryWriter @IfaceType iface_w + , mkSomeBinaryWriter @Name name_w + , mkSomeBinaryWriter @BindingName (simpleBindingNameWriter name_w) + , mkSomeBinaryWriter @FastString fs_w + ] - -- Make some initial state - symtab_next <- newFastMutInt 0 - symtab_map <- newIORef emptyUFM :: IO (IORef (UniqFM Name (Int, HieName))) - let hie_symtab = HieSymbolTable { - hie_symtab_next = symtab_next, - hie_symtab_map = symtab_map } - dict_next_ref <- newFastMutInt 0 - dict_map_ref <- newIORef emptyUFM - let hie_dict = HieDictionary { - hie_dict_next = dict_next_ref, - hie_dict_map = dict_map_ref } - - -- put the main thing - let bh = setWriterUserData bh0 - $ newWriteState (putName hie_symtab) - (putName hie_symtab) - (putFastString hie_dict) - put_ bh hiefile - - -- write the symtab pointer at the front of the file - symtab_p <- tellBinWriter bh - putAt bh symtab_p_p symtab_p - seekBinWriter bh symtab_p - - -- write the symbol table itself - symtab_next' <- readFastMutInt symtab_next - symtab_map' <- readIORef symtab_map - putSymbolTable bh symtab_next' symtab_map' - - -- write the dictionary pointer at the front of the file - dict_p <- tellBinWriter bh - putAt bh dict_p_p dict_p - seekBinWriter bh dict_p - - -- write the dictionary itself - dict_next <- readFastMutInt dict_next_ref - dict_map <- readIORef dict_map_ref - putDictionary bh dict_next dict_map + -- Discard number of written elements + -- Order matters! See Note [Order of deduplication tables during iface binary serialisation] + _ <- putAllTables bh [fs_tbl, name_tbl, iface_tbl] $ do + put_ bh hiefile -- and send the result to the file createDirectoryIfMissing True (takeDirectory hie_file_path) writeBinMem bh hie_file_path return () +initWriteNameTable :: IO (WriterTable, BinaryWriter Name) +initWriteNameTable = do + symtab_next <- newFastMutInt 0 + symtab_map <- newIORef emptyUFM + let bin_symtab = + HieSymbolTable + { hie_symtab_next = symtab_next + , hie_symtab_map = symtab_map + } + + let put_symtab bh = do + name_count <- readFastMutInt symtab_next + symtab_map <- readIORef symtab_map + putSymbolTable bh name_count symtab_map + pure name_count + + return + ( WriterTable + { putTable = put_symtab + } + , mkWriter $ putName bin_symtab + ) + +initReadNameTable :: NameCache -> IO (ReaderTable Name) +initReadNameTable cache = do + return $ + ReaderTable + { getTable = \bh -> getSymbolTable bh cache + , mkReaderFromTable = \tbl -> mkReader (getSymTabName tbl) + } + data HieFileResult = HieFileResult { hie_file_result_version :: Integer @@ -216,50 +215,37 @@ readHieFileHeader file bh0 = do readHieFileContents :: ReadBinHandle -> NameCache -> IO HieFile readHieFileContents bh0 name_cache = do - dict <- get_dictionary bh0 - -- read the symbol table so we are capable of reading the actual data - bh1 <- do - let bh1 = setReaderUserData bh0 - $ newReadState (error "getSymtabName") - (getDictFastString dict) - symtab <- get_symbol_table bh1 - let bh1' = setReaderUserData bh1 - $ newReadState (getSymTabName symtab) - (getDictFastString dict) - return bh1' + bhRef <- newIORef (error "used too soon") + -- It is important this is passed to 'getTable' + -- See Note [Lazy ReaderUserData during IfaceType serialisation] + ud <- unsafeInterleaveIO (readIORef bhRef) + + fsReaderTable <- initFastStringReaderTable + nameReaderTable <- initReadNameTable name_cache + ifaceTypeReaderTable <- initReadIfaceTypeTable ud + -- read the symbol table so we are capable of reading the actual data + bh1 <- + foldM (\bh tblReader -> tblReader bh) bh0 + -- The order of these deserialisation matters! + -- + -- See Note [Order of deduplication tables during iface binary serialisation] for details. + [ get_dictionary fsReaderTable + , get_dictionary nameReaderTable + , get_dictionary ifaceTypeReaderTable + ] + + writeIORef bhRef (getReaderUserData bh1) -- load the actual data get bh1 where - get_dictionary bin_handle = do - dict_p <- get bin_handle - data_p <- tellBinReader bin_handle - seekBinReader bin_handle dict_p - dict <- getDictionary bin_handle - seekBinReader bin_handle data_p - return dict - - get_symbol_table bh1 = do - symtab_p <- get bh1 - data_p' <- tellBinReader bh1 - seekBinReader bh1 symtab_p - symtab <- getSymbolTable bh1 name_cache - seekBinReader bh1 data_p' - return symtab - -putFastString :: HieDictionary -> WriteBinHandle -> FastString -> IO () -putFastString HieDictionary { hie_dict_next = j_r, - hie_dict_map = out_r} bh f - = do - out <- readIORef out_r - let !unique = getUnique f - case lookupUFM_Directly out unique of - Just (j, _) -> put_ bh (fromIntegral j :: Word32) - Nothing -> do - j <- readFastMutInt j_r - put_ bh (fromIntegral j :: Word32) - writeFastMutInt j_r (j + 1) - writeIORef out_r $! addToUFM_Directly out unique (j, f) + get_dictionary tbl bin_handle = do + fsTable <- Binary.forwardGet bin_handle (getTable tbl bin_handle) + let + fsReader = mkReaderFromTable tbl fsTable + bhFs = addReaderToUserData fsReader bin_handle + pure bhFs + putSymbolTable :: WriteBinHandle -> Int -> UniqFM Name (Int,HieName) -> IO () putSymbolTable bh next_off symtab = do diff --git a/compiler/GHC/Iface/Recomp/Binary.hs b/compiler/GHC/Iface/Recomp/Binary.hs index 1f83e578667ad338c6d10bb226eadd89422b4632..3dc55e68296a5a86b65711a83b4da11c3ad30144 100644 --- a/compiler/GHC/Iface/Recomp/Binary.hs +++ b/compiler/GHC/Iface/Recomp/Binary.hs @@ -14,6 +14,7 @@ import GHC.Utils.Fingerprint import GHC.Utils.Binary import GHC.Types.Name import GHC.Utils.Panic.Plain +import GHC.Iface.Type (putIfaceType) fingerprintBinMem :: WriteBinHandle -> IO Fingerprint fingerprintBinMem bh = withBinBuffer bh f @@ -34,8 +35,12 @@ computeFingerprint put_nonbinding_name a = do put_ bh a fingerprintBinMem bh where - set_user_data bh = - setWriterUserData bh $ newWriteState put_nonbinding_name putNameLiterally putFS + set_user_data bh = setWriterUserData bh $ mkWriterUserData + [ mkSomeBinaryWriter $ mkWriter putIfaceType + , mkSomeBinaryWriter $ mkWriter put_nonbinding_name + , mkSomeBinaryWriter $ simpleBindingNameWriter $ mkWriter putNameLiterally + , mkSomeBinaryWriter $ mkWriter putFS + ] -- | Used when we want to fingerprint a structure without depending on the -- fingerprints of external Names that it refers to. diff --git a/compiler/GHC/Iface/Type.hs b/compiler/GHC/Iface/Type.hs index 163bc581f7fe46f2e5e46811f26523da88db1a3a..9605e712cfba9e250f4a1a7293033847a3e1a30b 100644 --- a/compiler/GHC/Iface/Type.hs +++ b/compiler/GHC/Iface/Type.hs @@ -33,6 +33,8 @@ module GHC.Iface.Type ( ifForAllBndrVar, ifForAllBndrName, ifaceBndrName, ifTyConBinderVar, ifTyConBinderName, + -- Binary utilities + putIfaceType, getIfaceType, -- Equality testing isIfaceLiftedTypeKind, @@ -91,10 +93,11 @@ import GHC.Utils.Panic import {-# SOURCE #-} GHC.Tc.Utils.TcType ( isMetaTyVar, isTyConableTyVar ) import Control.DeepSeq +import Data.Proxy import Control.Monad ((<$!>)) import Control.Arrow (first) import qualified Data.Semigroup as Semi -import Data.Maybe( isJust ) +import Data.Maybe (isJust) {- ************************************************************************ @@ -192,6 +195,34 @@ data IfaceType -- in interface file size (in GHC's boot libraries). -- See !3987. deriving (Eq, Ord) + -- See Note [Ord instance of IfaceType] + +{- +Note [Ord instance of IfaceType] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We need an 'Ord' instance to have a 'Map' keyed by 'IfaceType'. This 'Map' is +required for implementing the deduplication table during interface file +serialisation. +See Note [Deduplication during iface binary serialisation] for the implementation details. + +We experimented with a 'TrieMap' based implementation, but it seems to be +slower than using a straight-forward 'Map IfaceType'. +The experiments loaded the full agda library into a ghci session with the +following scenarios: + +* normal: a plain ghci session. +* cold: a ghci session that uses '-fwrite-if-simplified-core -fforce-recomp', + forcing a cold-cache. +* warm: a subsequent ghci session that uses a warm cache for + '-fwrite-if-simplified-core', e.g. nothing needs to be recompiled. + +The implementation was up to 5% slower in some execution runs. However, on +'lib:Cabal', the performance difference between 'Map IfaceType' and +'TrieMap IfaceType' was negligible. + +We share our implementation of the 'TrieMap' in the ticket #24816, so that +further performance analysis and improvements don't need to start from scratch. +-} type IfaceMult = IfaceType @@ -2194,39 +2225,56 @@ ppr_parend_preds :: [IfacePredType] -> SDoc ppr_parend_preds preds = parens (fsep (punctuate comma (map ppr preds))) instance Binary IfaceType where - put_ _ (IfaceFreeTyVar tv) - = pprPanic "Can't serialise IfaceFreeTyVar" (ppr tv) - -- See Note [Free TyVars and CoVars in IfaceType] + put_ bh tyCon = case findUserDataWriter Proxy bh of + tbl -> putEntry tbl bh tyCon - put_ bh (IfaceForAllTy aa ab) = do - putByte bh 0 - put_ bh aa - put_ bh ab - put_ bh (IfaceTyVar ad) = do - putByte bh 1 - put_ bh ad - put_ bh (IfaceAppTy ae af) = do - putByte bh 2 - put_ bh ae - put_ bh af - put_ bh (IfaceFunTy af aw ag ah) = do - putByte bh 3 - put_ bh af - put_ bh aw - put_ bh ag - put_ bh ah - put_ bh (IfaceTyConApp tc tys) - = do { putByte bh 5; put_ bh tc; put_ bh tys } - put_ bh (IfaceCastTy a b) - = do { putByte bh 6; put_ bh a; put_ bh b } - put_ bh (IfaceCoercionTy a) - = do { putByte bh 7; put_ bh a } - put_ bh (IfaceTupleTy s i tys) - = do { putByte bh 8; put_ bh s; put_ bh i; put_ bh tys } - put_ bh (IfaceLitTy n) - = do { putByte bh 9; put_ bh n } + get bh = case findUserDataReader Proxy bh of + tbl -> getEntry tbl bh - get bh = do + +-- | Serialises an 'IfaceType' to the given 'WriteBinHandle'. +-- +-- Serialising inner 'IfaceType''s uses the 'Binary.put' of 'IfaceType' which may be using +-- a deduplication table. See Note [Deduplication during iface binary serialisation]. +putIfaceType :: WriteBinHandle -> IfaceType -> IO () +putIfaceType _ (IfaceFreeTyVar tv) + = pprPanic "Can't serialise IfaceFreeTyVar" (ppr tv) + -- See Note [Free TyVars and CoVars in IfaceType] + +putIfaceType bh (IfaceForAllTy aa ab) = do + putByte bh 0 + put_ bh aa + put_ bh ab +putIfaceType bh (IfaceTyVar ad) = do + putByte bh 1 + put_ bh ad +putIfaceType bh (IfaceAppTy ae af) = do + putByte bh 2 + put_ bh ae + put_ bh af +putIfaceType bh (IfaceFunTy af aw ag ah) = do + putByte bh 3 + put_ bh af + put_ bh aw + put_ bh ag + put_ bh ah +putIfaceType bh (IfaceTyConApp tc tys) + = do { putByte bh 5; put_ bh tc; put_ bh tys } +putIfaceType bh (IfaceCastTy a b) + = do { putByte bh 6; put_ bh a; put_ bh b } +putIfaceType bh (IfaceCoercionTy a) + = do { putByte bh 7; put_ bh a } +putIfaceType bh (IfaceTupleTy s i tys) + = do { putByte bh 8; put_ bh s; put_ bh i; put_ bh tys } +putIfaceType bh (IfaceLitTy n) + = do { putByte bh 9; put_ bh n } + +-- | Deserialises an 'IfaceType' from the given 'ReadBinHandle'. +-- +-- Reading inner 'IfaceType''s uses the 'Binary.get' of 'IfaceType' which may be using +-- a deduplication table. See Note [Deduplication during iface binary serialisation]. +getIfaceType :: HasCallStack => ReadBinHandle -> IO IfaceType +getIfaceType bh = do h <- getByte bh case h of 0 -> do aa <- get bh diff --git a/compiler/GHC/StgToJS/Object.hs b/compiler/GHC/StgToJS/Object.hs index 210c1a2803418fd0e817516e119d559b4eebcd69..f5319abbfca2478c40a772bd7ea8ec9c57032db3 100644 --- a/compiler/GHC/StgToJS/Object.hs +++ b/compiler/GHC/StgToJS/Object.hs @@ -314,7 +314,7 @@ putObject bh mod_name deps os = do put_ bh (moduleNameString mod_name) (fs_tbl, fs_writer) <- initFastStringWriterTable - let bh_fs = addWriterToUserData (mkSomeBinaryWriter fs_writer) bh + let bh_fs = addWriterToUserData fs_writer bh forwardPut_ bh (const (putTable fs_tbl bh_fs)) $ do put_ bh_fs deps diff --git a/compiler/GHC/Utils/Binary.hs b/compiler/GHC/Utils/Binary.hs index 4b33525a5eafb89218e2618feb24074a4838650f..dd206011d5ab9ff87d2c6d7560ebc7eddc2fd200 100644 --- a/compiler/GHC/Utils/Binary.hs +++ b/compiler/GHC/Utils/Binary.hs @@ -64,6 +64,8 @@ module GHC.Utils.Binary -- * Lazy Binary I/O lazyGet, lazyPut, + lazyGet', + lazyPut', lazyGetMaybe, lazyPutMaybe, @@ -86,10 +88,17 @@ module GHC.Utils.Binary initFastStringReaderTable, initFastStringWriterTable, putDictionary, getDictionary, putFS, FSTable(..), getDictFastString, putDictFastString, + -- * Generic deduplication table + GenericSymbolTable(..), + initGenericSymbolTable, + getGenericSymtab, putGenericSymTab, + getGenericSymbolTable, putGenericSymbolTable, -- * Newtype wrappers BinSpan(..), BinSrcSpan(..), BinLocated(..), -- * Newtypes for types that have canonically more than one valid encoding BindingName(..), + simpleBindingNameWriter, + simpleBindingNameReader, ) where import GHC.Prelude @@ -102,11 +111,11 @@ import GHC.Utils.Panic.Plain import GHC.Types.Unique.FM import GHC.Data.FastMutInt import GHC.Utils.Fingerprint -import GHC.Utils.Misc (HasCallStack) import GHC.Types.SrcLoc import GHC.Types.Unique import qualified GHC.Data.Strict as Strict import GHC.Utils.Outputable( JoinPointHood(..) ) +import GHC.Utils.Misc ( HasCallStack, HasDebugCallStack ) import Control.DeepSeq import Control.Monad ( when, (<$!>), unless, forM_, void ) @@ -132,6 +141,7 @@ import Data.List (unfoldr) import System.IO as IO import System.IO.Unsafe ( unsafeInterleaveIO ) import System.IO.Error ( mkIOError, eofErrorType ) +import Type.Reflection ( Typeable, SomeTypeRep(..) ) import qualified Type.Reflection as Refl import GHC.Real ( Ratio(..) ) import Data.IntMap (IntMap) @@ -142,6 +152,8 @@ import GHC.ForeignPtr ( unsafeWithForeignPtr ) import Unsafe.Coerce (unsafeCoerce) +import GHC.Data.TrieMap + type BinArray = ForeignPtr Word8 #if !MIN_VERSION_base(4,15,0) @@ -230,20 +242,28 @@ setReaderUserData bh us = bh { rbm_userData = us } -- | Add 'SomeBinaryReader' as a known binary decoder. -- If a 'BinaryReader' for the associated type already exists in 'ReaderUserData', -- it is overwritten. -addReaderToUserData :: SomeBinaryReader -> ReadBinHandle -> ReadBinHandle -addReaderToUserData cache@(SomeBinaryReader typRep _) bh = bh +addReaderToUserData :: forall a. Typeable a => BinaryReader a -> ReadBinHandle -> ReadBinHandle +addReaderToUserData reader bh = bh { rbm_userData = (rbm_userData bh) - { ud_reader_data = Map.insert (Refl.SomeTypeRep typRep) cache (ud_reader_data (rbm_userData bh)) + { ud_reader_data = + let + typRep = Refl.typeRep @a + in + Map.insert (SomeTypeRep typRep) (SomeBinaryReader typRep reader) (ud_reader_data (rbm_userData bh)) } } -- | Add 'SomeBinaryWriter' as a known binary encoder. -- If a 'BinaryWriter' for the associated type already exists in 'WriterUserData', -- it is overwritten. -addWriterToUserData :: SomeBinaryWriter -> WriteBinHandle -> WriteBinHandle -addWriterToUserData cache@(SomeBinaryWriter typRep _) bh = bh +addWriterToUserData :: forall a . Typeable a => BinaryWriter a -> WriteBinHandle -> WriteBinHandle +addWriterToUserData writer bh = bh { wbm_userData = (wbm_userData bh) - { ud_writer_data = Map.insert (Refl.SomeTypeRep typRep) cache (ud_writer_data (wbm_userData bh)) + { ud_writer_data = + let + typRep = Refl.typeRep @a + in + Map.insert (SomeTypeRep typRep) (SomeBinaryWriter typRep writer) (ud_writer_data (wbm_userData bh)) } } @@ -1102,24 +1122,32 @@ forwardGet bh get_A = do -- Lazy reading/writing lazyPut :: Binary a => WriteBinHandle -> a -> IO () -lazyPut bh a = do +lazyPut = lazyPut' put_ + +lazyGet :: Binary a => ReadBinHandle -> IO a +lazyGet = lazyGet' get + +lazyPut' :: HasDebugCallStack => (WriteBinHandle -> a -> IO ()) -> WriteBinHandle -> a -> IO () +lazyPut' f bh a = do -- output the obj with a ptr to skip over it: pre_a <- tellBinWriter bh put_ bh pre_a -- save a slot for the ptr - put_ bh a -- dump the object + f bh a -- dump the object q <- tellBinWriter bh -- q = ptr to after object putAt bh pre_a q -- fill in slot before a with ptr to q seekBinWriter bh q -- finally carry on writing at q -lazyGet :: Binary a => ReadBinHandle -> IO a -lazyGet bh = do +lazyGet' :: HasDebugCallStack => (ReadBinHandle -> IO a) -> ReadBinHandle -> IO a +lazyGet' f bh = do p <- get bh -- a BinPtr p_a <- tellBinReader bh a <- unsafeInterleaveIO $ do - -- NB: Use a fresh off_r variable in the child thread, for thread + -- NB: Use a fresh rbm_off_r variable in the child thread, for thread -- safety. off_r <- newFastMutInt 0 - getAt bh { rbm_off_r = off_r } p_a + let bh' = bh { rbm_off_r = off_r } + seekBinReader bh' p_a + f bh' seekBinReader bh p -- skip over the object for now return a @@ -1173,6 +1201,12 @@ lazyGetMaybe bh = do newtype BindingName = BindingName { getBindingName :: Name } deriving ( Eq ) +simpleBindingNameWriter :: BinaryWriter Name -> BinaryWriter BindingName +simpleBindingNameWriter = coerce + +simpleBindingNameReader :: BinaryReader Name -> BinaryReader BindingName +simpleBindingNameReader = coerce + -- | Existential for 'BinaryWriter' with a type witness. data SomeBinaryWriter = forall a . SomeBinaryWriter (Refl.TypeRep a) (BinaryWriter a) @@ -1184,7 +1218,7 @@ data SomeBinaryReader = forall a . SomeBinaryReader (Refl.TypeRep a) (BinaryRead -- See Note [Binary UserData] data WriterUserData = WriterUserData { - ud_writer_data :: Map Refl.SomeTypeRep SomeBinaryWriter + ud_writer_data :: Map SomeTypeRep SomeBinaryWriter -- ^ A mapping from a type witness to the 'Writer' for the associated type. -- This is a 'Map' because microbenchmarks indicated this is more efficient -- than other representations for less than ten elements. @@ -1201,7 +1235,7 @@ data WriterUserData = -- See Note [Binary UserData] data ReaderUserData = ReaderUserData { - ud_reader_data :: Map Refl.SomeTypeRep SomeBinaryReader + ud_reader_data :: Map SomeTypeRep SomeBinaryReader -- ^ A mapping from a type witness to the 'Reader' for the associated type. -- This is a 'Map' because microbenchmarks indicated this is more efficient -- than other representations for less than ten elements. @@ -1215,12 +1249,12 @@ data ReaderUserData = mkWriterUserData :: [SomeBinaryWriter] -> WriterUserData mkWriterUserData caches = noWriterUserData - { ud_writer_data = Map.fromList $ map (\cache@(SomeBinaryWriter typRep _) -> (Refl.SomeTypeRep typRep, cache)) caches + { ud_writer_data = Map.fromList $ map (\cache@(SomeBinaryWriter typRep _) -> (SomeTypeRep typRep, cache)) caches } mkReaderUserData :: [SomeBinaryReader] -> ReaderUserData mkReaderUserData caches = noReaderUserData - { ud_reader_data = Map.fromList $ map (\cache@(SomeBinaryReader typRep _) -> (Refl.SomeTypeRep typRep, cache)) caches + { ud_reader_data = Map.fromList $ map (\cache@(SomeBinaryReader typRep _) -> (SomeTypeRep typRep, cache)) caches } mkSomeBinaryWriter :: forall a . Refl.Typeable a => BinaryWriter a -> SomeBinaryWriter @@ -1346,6 +1380,110 @@ newtype WriterTable = WriterTable -- ^ Serialise a table to disk. Returns the number of written elements. } +-- ---------------------------------------------------------------------------- +-- Common data structures for constructing and maintaining lookup tables for +-- binary serialisation and deserialisation. +-- ---------------------------------------------------------------------------- + +-- | The 'GenericSymbolTable' stores a mapping from already seen elements to an index. +-- If an element wasn't seen before, it is added to the mapping together with a fresh +-- index. +-- +-- 'GenericSymbolTable' is a variant of a 'BinSymbolTable' that is polymorphic in the table implementation. +-- As such it can be used with any container that implements the 'TrieMap' type class. +-- +-- While 'GenericSymbolTable' is similar to the 'BinSymbolTable', it supports storing tree-like +-- structures such as 'Type' and 'IfaceType' more efficiently. +-- +data GenericSymbolTable m = GenericSymbolTable + { gen_symtab_next :: !FastMutInt + -- ^ The next index to use. + , gen_symtab_map :: !(IORef (m Int)) + -- ^ Given a symbol, find the symbol and return its index. + , gen_symtab_to_write :: !(IORef [Key m]) + -- ^ Reversed list of values to write into the buffer. + -- This is an optimisation, as it allows us to write out quickly all + -- newly discovered values that are discovered when serialising 'Key m' + -- to disk. + } + +-- | Initialise a 'GenericSymbolTable', initialising the index to '0'. +initGenericSymbolTable :: TrieMap m => IO (GenericSymbolTable m) +initGenericSymbolTable = do + symtab_next <- newFastMutInt 0 + symtab_map <- newIORef emptyTM + symtab_todo <- newIORef [] + pure $ GenericSymbolTable + { gen_symtab_next = symtab_next + , gen_symtab_map = symtab_map + , gen_symtab_to_write = symtab_todo + } + +-- | Serialise the 'GenericSymbolTable' to disk. +-- +-- Since 'GenericSymbolTable' stores tree-like structures, such as 'IfaceType', +-- serialising an element can add new elements to the mapping. +-- Thus, 'putGenericSymbolTable' first serialises all values, and then checks whether any +-- new elements have been discovered. If so, repeat the loop. +putGenericSymbolTable :: forall m. (TrieMap m) => GenericSymbolTable m -> (WriteBinHandle -> Key m -> IO ()) -> WriteBinHandle -> IO Int +{-# INLINE putGenericSymbolTable #-} +putGenericSymbolTable gen_sym_tab serialiser bh = do + putGenericSymbolTable bh + where + symtab_next = gen_symtab_next gen_sym_tab + symtab_to_write = gen_symtab_to_write gen_sym_tab + putGenericSymbolTable :: WriteBinHandle -> IO Int + putGenericSymbolTable bh = do + let loop = do + vs <- atomicModifyIORef' symtab_to_write (\a -> ([], a)) + case vs of + [] -> readFastMutInt symtab_next + todo -> do + mapM_ (\n -> serialiser bh n) (reverse todo) + loop + snd <$> + (forwardPut bh (const $ readFastMutInt symtab_next >>= put_ bh) $ + loop) + +-- | Read the elements of a 'GenericSymbolTable' from disk into a 'SymbolTable'. +getGenericSymbolTable :: forall a . (ReadBinHandle -> IO a) -> ReadBinHandle -> IO (SymbolTable a) +getGenericSymbolTable deserialiser bh = do + sz <- forwardGet bh (get bh) :: IO Int + mut_arr <- newArray_ (0, sz-1) :: IO (IOArray Int a) + forM_ [0..(sz-1)] $ \i -> do + f <- deserialiser bh + writeArray mut_arr i f + unsafeFreeze mut_arr + +-- | Write an element 'Key m' to the given 'WriteBinHandle'. +-- +-- If the element was seen before, we simply write the index of that element to the +-- 'WriteBinHandle'. If we haven't seen it before, we add the element to +-- the 'GenericSymbolTable', increment the index, and return this new index. +putGenericSymTab :: (TrieMap m) => GenericSymbolTable m -> WriteBinHandle -> Key m -> IO () +{-# INLINE putGenericSymTab #-} +putGenericSymTab GenericSymbolTable{ + gen_symtab_map = symtab_map_ref, + gen_symtab_next = symtab_next, + gen_symtab_to_write = symtab_todo } + bh val = do + symtab_map <- readIORef symtab_map_ref + case lookupTM val symtab_map of + Just off -> put_ bh (fromIntegral off :: Word32) + Nothing -> do + off <- readFastMutInt symtab_next + writeFastMutInt symtab_next (off+1) + writeIORef symtab_map_ref + $! insertTM val off symtab_map + atomicModifyIORef symtab_todo (\todo -> (val : todo, ())) + put_ bh (fromIntegral off :: Word32) + +-- | Read a value from a 'SymbolTable'. +getGenericSymtab :: Binary a => SymbolTable a -> ReadBinHandle -> IO a +getGenericSymtab symtab bh = do + i :: Word32 <- get bh + return $! symtab ! fromIntegral i + --------------------------------------------------------- -- The Dictionary --------------------------------------------------------- diff --git a/utils/haddock b/utils/haddock index a711607e29b925f3d69e27c5fde4ba655c711ff1..c9bc29c6a708483d2abc3d8ec9262510ce87ca61 160000 --- a/utils/haddock +++ b/utils/haddock @@ -1 +1 @@ -Subproject commit a711607e29b925f3d69e27c5fde4ba655c711ff1 +Subproject commit c9bc29c6a708483d2abc3d8ec9262510ce87ca61