From fa03b1fba784669c9e55363b45cc5170d7bc3674 Mon Sep 17 00:00:00 2001 From: Fendor <fendor@posteo.de> Date: Mon, 22 Apr 2024 10:10:37 +0200 Subject: [PATCH] Refactor the Binary serialisation interface The goal is simplifiy adding deduplication tables to `ModIface` interface serialisation. We identify two main points of interest that make this difficult: 1. UserData hardcodes what `Binary` instances can have deduplication tables. Moreover, it heavily uses partial functions. 2. GHC.Iface.Binary hardcodes the deduplication tables for 'Name' and 'FastString', making it difficult to add more deduplication. Instead of having a single `UserData` record with fields for all the types that can have deduplication tables, we allow to provide custom serialisers for any `Typeable`. These are wrapped in existentials and stored in a `Map` indexed by their respective `TypeRep`. The `Binary` instance of the type to deduplicate still needs to explicitly look up the decoder via `findUserDataReader` and `findUserDataWriter`, which is no worse than the status-quo. `Map` was chosen as microbenchmarks indicate it is the fastest for a small number of keys (< 10). To generalise the deduplication table serialisation mechanism, we introduce the types `ReaderTable` and `WriterTable` which provide a simple interface that is sufficient to implement a general purpose deduplication mechanism for `writeBinIface` and `readBinIface`. This allows us to provide a list of deduplication tables for serialisation that can be extended more easily, for example for `IfaceTyCon`, see the issue https://gitlab.haskell.org/ghc/ghc/-/issues/24540 for more motivation. In addition to this refactoring, we split `UserData` into `ReaderUserData` and `WriterUserData`, to avoid partial functions and reduce overall memory usage, as we need fewer mutable variables. Bump haddock submodule to accomodate for `UserData` split. ------------------------- Metric Increase: MultiLayerModulesTH_Make MultiLayerModulesRecomp T21839c ------------------------- --- compiler/GHC/Iface/Binary.hs | 328 +++++++++++++++++++----- compiler/GHC/Iface/Ext/Binary.hs | 18 +- compiler/GHC/Iface/Recomp/Binary.hs | 2 +- compiler/GHC/Iface/Syntax.hs | 7 +- compiler/GHC/Iface/Type.hs | 12 +- compiler/GHC/StgToJS/Object.hs | 15 +- compiler/GHC/Types/Basic.hs | 2 +- compiler/GHC/Types/FieldLabel.hs | 4 +- compiler/GHC/Types/Name.hs | 8 +- compiler/GHC/Utils/Binary.hs | 371 +++++++++++++++++++++------- utils/haddock | 2 +- 11 files changed, 577 insertions(+), 192 deletions(-) diff --git a/compiler/GHC/Iface/Binary.hs b/compiler/GHC/Iface/Binary.hs index a1611fe2637a..f00237f8865a 100644 --- a/compiler/GHC/Iface/Binary.hs +++ b/compiler/GHC/Iface/Binary.hs @@ -29,7 +29,6 @@ module GHC.Iface.Binary ( import GHC.Prelude -import GHC.Tc.Utils.Monad import GHC.Builtin.Utils ( isKnownKeyName, lookupKnownKeyName ) import GHC.Unit import GHC.Unit.Module.ModIface @@ -39,6 +38,7 @@ import GHC.Types.Unique.FM import GHC.Utils.Panic import GHC.Utils.Binary as Binary import GHC.Data.FastMutInt +import GHC.Data.FastString (FastString) import GHC.Types.Unique import GHC.Utils.Outputable import GHC.Types.Name.Cache @@ -121,6 +121,8 @@ readBinIfaceHeader profile _name_cache checkHiWay traceBinIFace hi_path = do pure (src_hash, bh) -- | Read an interface file. +-- +-- See Note [Deduplication during iface binary serialisation] for details. readBinIface :: Profile -> NameCache @@ -156,22 +158,28 @@ getWithUserData name_cache bh = do -- Reading names has the side effect of adding them into the given NameCache. getTables :: NameCache -> BinHandle -> IO BinHandle getTables name_cache bh = do - -- Read the dictionary - -- The next word in the file is a pointer to where the dictionary is - -- (probably at the end of the file) - dict <- Binary.forwardGet bh (getDictionary bh) + fsReaderTable <- initFastStringReaderTable + nameReaderTable <- initNameReaderTable name_cache + - -- Initialise the user-data field of bh - let bh_fs = setUserData bh $ newReadState (error "getSymtabName") - (getDictFastString dict) + -- The order of these deserialisation matters! + -- + -- See Note [Order of deduplication tables during iface binary serialisation] for details. + fsTable <- Binary.forwardGet bh (getTable fsReaderTable bh) + let + fsReader = mkReaderFromTable fsReaderTable fsTable + bhFs = addReaderToUserData (mkSomeBinaryReader fsReader) bh - symtab <- Binary.forwardGet bh_fs (getSymbolTable bh_fs name_cache) + nameTable <- Binary.forwardGet bh (getTable nameReaderTable bhFs) + let + nameReader = mkReaderFromTable nameReaderTable nameTable + bhName = addReaderToUserData (mkSomeBinaryReader nameReader) bhFs - -- It is only now that we know how to get a Name - return $ setUserData bh $ newReadState (getSymtabName name_cache dict symtab) - (getDictFastString dict) + pure bhName --- | Write an interface file +-- | Write an interface file. +-- +-- See Note [Deduplication during iface binary serialisation] for details. writeBinIface :: Profile -> TraceBinIFace -> FilePath -> ModIface -> IO () writeBinIface profile traceBinIface hi_path mod_iface = do bh <- openBinMem initBinMemSize @@ -225,58 +233,262 @@ putWithUserData traceBinIface bh payload = do -- -- It returns (number of names, number of FastStrings, payload write result) -- -putWithTables :: BinHandle -> (BinHandle -> IO b) -> IO (Int,Int,b) -putWithTables bh put_payload = do - -- initialize state for the name table and the FastString table. - symtab_next <- newFastMutInt 0 - symtab_map <- newIORef emptyUFM - let bin_symtab = BinSymbolTable - { bin_symtab_next = symtab_next - , bin_symtab_map = symtab_map - } +-- See Note [Order of deduplication tables during iface binary serialisation] +putWithTables :: BinHandle -> (BinHandle -> IO b) -> IO (Int, Int, b) +putWithTables bh' put_payload = do + -- Initialise deduplicating tables. + (fast_wt, fsWriter) <- initFastStringWriterTable + (name_wt, nameWriter) <- initNameWriterTable + + -- Initialise the 'WriterUserData'. + let writerUserData = mkWriterUserData + [ mkSomeBinaryWriter @FastString fsWriter + , mkSomeBinaryWriter @Name nameWriter + -- We sometimes serialise binding and non-binding names differently, but + -- not during 'ModIface' serialisation. Here, we serialise both to the same + -- deduplication table. + -- + -- See Note [Binary UserData] + , mkSomeBinaryWriter @BindingName $ mkWriter (\bh name -> putEntry nameWriter bh (getBindingName name)) + ] + let bh = setWriterUserData bh' writerUserData + + (fs_count : name_count : _, r) <- + -- The order of these entries matters! + -- + -- See Note [Order of deduplication tables during iface binary serialisation] for details. + putAllTables bh [fast_wt, name_wt] $ do + put_payload bh + + return (name_count, fs_count, r) + where + putAllTables _ [] act = do + a <- act + pure ([], a) + putAllTables bh (x : xs) act = do + (r, (res, a)) <- forwardPut bh (const $ putTable x bh) $ do + putAllTables bh xs act + pure (r : res, a) - (bh_fs, bin_dict, put_dict) <- initFSTable bh - - (fs_count,(name_count,r)) <- forwardPut bh (const put_dict) $ do +-- | Initial ram buffer to allocate for writing interface files +initBinMemSize :: Int +initBinMemSize = 1024 * 1024 - -- NB. write the dictionary after the symbol table, because - -- writing the symbol table may create more dictionary entries. - let put_symtab = do - name_count <- readFastMutInt symtab_next - symtab_map <- readIORef symtab_map - putSymbolTable bh_fs name_count symtab_map - pure name_count +binaryInterfaceMagic :: Platform -> FixedLengthEncoding Word32 +binaryInterfaceMagic platform + | target32Bit platform = FixedLengthEncoding 0x1face + | otherwise = FixedLengthEncoding 0x1face64 - forwardPut bh_fs (const put_symtab) $ do - -- BinHandle with FastString and Name writing support - let ud_fs = getUserData bh_fs - let ud_name = ud_fs - { ud_put_nonbinding_name = putName bin_dict bin_symtab - , ud_put_binding_name = putName bin_dict bin_symtab - } - let bh_name = setUserData bh ud_name +{- +Note [Deduplication during iface binary serialisation] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +When we serialise a 'ModIface', many symbols are redundant. +For example, there can be many duplicated 'FastString's and 'Name's. +To save space, we deduplicate duplicated symbols, such as 'FastString' and 'Name', +by maintaining a table of already seen symbols. - put_payload bh_name +Besides saving a lot of disk space, this additionally enables us to automatically share +these symbols when we read the 'ModIface' from disk, without additional mechanisms such as 'FastStringTable'. - return (name_count, fs_count, r) +The general idea is, when serialising a value of type 'Name', we first have to create a deduplication +table (see 'putWithTables.initNameWriterTable' for example). Then, we create a 'BinaryWriter' function +which we add to the 'WriterUserData'. When this 'BinaryWriter' is used to serialise a value of type 'Name', +it looks up whether we have seen this value before. If so, we write an index to disk. +If we haven't seen the value before, we add it to the deduplication table and produce a new index. +Both the 'ReaderUserData' and 'WriterUserData' can contain many 'BinaryReader's and 'BinaryWriter's +respectively, which can each individually be tweaked to use a deduplication table, or to serialise +the value without deduplication. +After the payload (e.g., the 'ModIface') has been serialised to disk, we serialise the deduplication tables +to disk. This happens in 'putAllTables', where we serialise all tables that we use during 'ModIface' +serialisation. See 'initNameWriterTable' and 'putSymbolTable' for an implementation example. +This uses the 'real' serialisation function, e.g., 'serialiseName'. +However, these tables need to be deserialised before we can read the 'ModIface' from disk. +Thus, we write before the 'ModIface' a forward pointer to the deduplication table, so we can +read this table before deserialising the 'ModIface'. --- | Initial ram buffer to allocate for writing interface files -initBinMemSize :: Int -initBinMemSize = 1024 * 1024 +To add a deduplication table for a type, let us assume 'IfaceTyCon', you need to do the following: -binaryInterfaceMagic :: Platform -> FixedLengthEncoding Word32 -binaryInterfaceMagic platform - | target32Bit platform = FixedLengthEncoding 0x1face - | otherwise = FixedLengthEncoding 0x1face64 +* The 'Binary' instance 'IfaceTyCon' needs to dynamically look up the serialiser function instead of + serialising the value of 'IfaceTyCon'. It needs to look up the serialiser in the 'ReaderUserData' and + 'WriterUserData' respectively. + This allows us to change the serialisation of 'IfaceTyCon' at run-time. + We can still serialise 'IfaceTyCon' to disk directly, or use a deduplication table to reduce the size of + the .hi file. + + For example: + + @ + instance Binary IfaceTyCon where + put_ bh ty = case findUserDataWriter (Proxy @IfaceTyCon) bh of + tbl -> putEntry tbl bh ty + get bh = case findUserDataReader (Proxy @IfaceTyCon) bh of + tbl -> getEntry tbl bh + @ + + We include the signatures of 'findUserDataWriter' and 'findUserDataReader' to make this code example + easier to understand: + + @ + findUserDataReader :: Typeable a => Proxy a -> ReadBinHandle -> BinaryReader a + findUserDataWriter :: Typeable a => Proxy a -> WriteBinHandle -> BinaryWriter a + @ + + where 'BinaryReader' and 'BinaryWriter' correspond to the 'Binary' class methods + 'get' and 'put_' respectively, thus: + + @ + newtype BinaryReader s = BinaryReader { getEntry :: ReadBinHandle -> IO s } + + newtype BinaryWriter s = BinaryWriter { putEntry :: WriteBinHandle -> s -> IO () } + @ + + 'findUserData*' looks up the serialisation function for 'IfaceTyCon', which we then subsequently + use to serialise said 'IfaceTyCon'. If no such serialiser can be found, 'findUserData*' + crashes at run-time. + +* Whenever a value of 'IfaceTyCon' needs to be serialised, there are two serialisation functions involved: + + * The literal serialiser that puts/gets the value to/from disk: + Writes or reads a value of type 'IfaceTyCon' from the 'Write/ReadBinHandle'. + This serialiser is primarily used to write the values stored in the deduplication table. + It is also used to read the values from disk. + + * The deduplicating serialiser: + Replaces the serialised value of 'IfaceTyCon' with an offset that is stored in the + deduplication table. + This serialiser is used while serialising the payload. + + We need to add the deduplicating serialiser to the 'ReaderUserData' and 'WriterUserData' + respectively, so that 'findUserData*' can find them. + + For example, adding a serialiser for writing 'IfaceTyCon's: + + @ + let bh0 :: WriteBinHandle = ... + putIfaceTyCon = ... -- Serialises 'IfaceTyCon' to disk + bh = addWriterToUserData (mkSomeBinaryWriter putIfaceTyCon) bh0 + @ + + Naturally, you have to do something similar for reading values of 'IfaceTyCon'. + + The provided code example implements the previous behaviour: + serialise all values of type 'IfaceTyCon' directly. No deduplication is happening. + + Now, instead of literally putting the value, we can introduce a deduplication table! + Instead of specifying 'putIfaceTyCon', which writes a value of 'IfaceTyCon' directly to disk, + we provide a function that looks up values in a table and provides an index of each value + we have already seen. + If the particular 'IfaceTyCon' we want to serialise isn't already in the de-dup table, + we allocate a new index and extend the table. + + See the definition of 'initNameWriterTable' and 'initNameReaderTable' for example deduplication tables. + +* Storing the deduplication table. + + After the deduplicating the elements in the payload (e.g., 'ModIface'), we now have a deduplication + table full with all the values. + We serialise this table to disk using the real serialiser (e.g., 'putIfaceTyCon'). + + When serialisation is complete, we write out the de-dup table in 'putAllTables', + serialising each 'IfaceTyCon' in the table. Of course, doing so might in turn serialise + another de-dup'd thing (e.g. a FastString), thereby extending its respective de-dup table. + +Note [Order of deduplication tables during iface binary serialisation] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Serialisation of 'ModIface' uses tables to deduplicate symbols that occur often. +See Note [Deduplication during iface binary serialisation]. + +After 'ModIface' has been written to disk, we write the deduplication tables. +Writing a table may add additional entries to *other* deduplication tables, thus +we need to make sure that the symbol table we serialise only depends on +deduplication tables that haven't been written to disk yet. + +For example, assume we maintain deduplication tables for 'FastString' and 'Name'. +The symbol 'Name' depends on 'FastString', so serialising a 'Name' may add a 'FastString' +to the 'FastString' deduplication table. +Thus, 'Name' table needs to be serialised to disk before the 'FastString' table. + +When we read the 'ModIface' from disk, we consequentially need to read the 'FastString' +deduplication table from disk, before we can deserialise the 'Name' deduplication table. +Therefore, before we serialise the tables, we write forward pointers that allow us to jump ahead +to the table we need to deserialise first. +What deduplication tables exist and the order of serialisation is currently statically specified +in 'putWithTables'. 'putWithTables' also takes care of the serialisation of used deduplication tables. +The deserialisation of the deduplication tables happens 'getTables', using 'Binary' utility +functions such as 'forwardGet'. + +Here, a visualisation of the table structure we currently have (ignoring 'ExtensibleFields'): + +┌──────────────┠+│ Headers │ +├──────────────┤ +│ Ptr FS ├────────┠+├──────────────┤ │ +│ Ptr Name ├─────┠│ +├──────────────┤ │ │ +│ │ │ │ +│ ModIface │ │ │ +│ Payload │ │ │ +│ │ │ │ +├──────────────┤ │ │ +│ │ │ │ +│ Name Table │◄────┘ │ +│ │ │ +├──────────────┤ │ +│ │ │ +│ FS Table │◄───────┘ +│ │ +└──────────────┘ + +-} -- ----------------------------------------------------------------------------- -- The symbol table -- + +initNameReaderTable :: NameCache -> IO (ReaderTable Name) +initNameReaderTable cache = do + return $ + ReaderTable + { getTable = \bh -> getSymbolTable bh cache + , mkReaderFromTable = \tbl -> mkReader (getSymtabName tbl) + } + +data BinSymbolTable = BinSymbolTable { + bin_symtab_next :: !FastMutInt, -- The next index to use + bin_symtab_map :: !(IORef (UniqFM Name (Int,Name))) + -- indexed by Name + } + +initNameWriterTable :: IO (WriterTable, BinaryWriter Name) +initNameWriterTable = do + symtab_next <- newFastMutInt 0 + symtab_map <- newIORef emptyUFM + let bin_symtab = + BinSymbolTable + { bin_symtab_next = symtab_next + , bin_symtab_map = symtab_map + } + + let put_symtab bh = do + name_count <- readFastMutInt symtab_next + symtab_map <- readIORef symtab_map + putSymbolTable bh name_count symtab_map + pure name_count + + return + ( WriterTable + { putTable = put_symtab + } + , mkWriter $ putName bin_symtab + ) + + putSymbolTable :: BinHandle -> Int -> UniqFM Name (Int,Name) -> IO () putSymbolTable bh name_count symtab = do put_ bh name_count @@ -286,7 +498,7 @@ putSymbolTable bh name_count symtab = do mapM_ (\n -> serialiseName bh n symtab) names -getSymbolTable :: BinHandle -> NameCache -> IO SymbolTable +getSymbolTable :: BinHandle -> NameCache -> IO (SymbolTable Name) getSymbolTable bh name_cache = do sz <- get bh :: IO Int -- create an array of Names for the symbols and add them to the NameCache @@ -331,8 +543,8 @@ serialiseName bh name _ = do -- See Note [Symbol table representation of names] -putName :: FSTable -> BinSymbolTable -> BinHandle -> Name -> IO () -putName _dict BinSymbolTable{ +putName :: BinSymbolTable -> BinHandle -> Name -> IO () +putName BinSymbolTable{ bin_symtab_map = symtab_map_ref, bin_symtab_next = symtab_next } bh name @@ -356,10 +568,9 @@ putName _dict BinSymbolTable{ put_ bh (fromIntegral off :: Word32) -- See Note [Symbol table representation of names] -getSymtabName :: NameCache - -> Dictionary -> SymbolTable +getSymtabName :: SymbolTable Name -> BinHandle -> IO Name -getSymtabName _name_cache _dict symtab bh = do +getSymtabName symtab bh = do i :: Word32 <- get bh case i .&. 0xC0000000 of 0x00000000 -> return $! symtab ! fromIntegral i @@ -376,10 +587,3 @@ getSymtabName _name_cache _dict symtab bh = do Just n -> n _ -> pprPanic "getSymtabName:unknown name tag" (ppr i) - -data BinSymbolTable = BinSymbolTable { - bin_symtab_next :: !FastMutInt, -- The next index to use - bin_symtab_map :: !(IORef (UniqFM Name (Int,Name))) - -- indexed by Name - } - diff --git a/compiler/GHC/Iface/Ext/Binary.hs b/compiler/GHC/Iface/Ext/Binary.hs index d9d57398b704..f1a1058f4a39 100644 --- a/compiler/GHC/Iface/Ext/Binary.hs +++ b/compiler/GHC/Iface/Ext/Binary.hs @@ -105,9 +105,10 @@ writeHieFile hie_file_path hiefile = do hie_dict_map = dict_map_ref } -- put the main thing - let bh = setUserData bh0 $ newWriteState (putName hie_symtab) - (putName hie_symtab) - (putFastString hie_dict) + let bh = setWriterUserData bh0 + $ newWriteState (putName hie_symtab) + (putName hie_symtab) + (putFastString hie_dict) put_ bh hiefile -- write the symtab pointer at the front of the file @@ -218,10 +219,11 @@ readHieFileContents bh0 name_cache = do dict <- get_dictionary bh0 -- read the symbol table so we are capable of reading the actual data bh1 <- do - let bh1 = setUserData bh0 $ newReadState (error "getSymtabName") - (getDictFastString dict) + let bh1 = setReaderUserData bh0 + $ newReadState (error "getSymtabName") + (getDictFastString dict) symtab <- get_symbol_table bh1 - let bh1' = setUserData bh1 + let bh1' = setReaderUserData bh1 $ newReadState (getSymTabName symtab) (getDictFastString dict) return bh1' @@ -265,7 +267,7 @@ putSymbolTable bh next_off symtab = do let names = A.elems (A.array (0,next_off-1) (nonDetEltsUFM symtab)) mapM_ (putHieName bh) names -getSymbolTable :: BinHandle -> NameCache -> IO SymbolTable +getSymbolTable :: BinHandle -> NameCache -> IO (SymbolTable Name) getSymbolTable bh name_cache = do sz <- get bh mut_arr <- A.newArray_ (0, sz-1) :: IO (A.IOArray Int Name) @@ -275,7 +277,7 @@ getSymbolTable bh name_cache = do A.writeArray mut_arr i name A.unsafeFreeze mut_arr -getSymTabName :: SymbolTable -> BinHandle -> IO Name +getSymTabName :: SymbolTable Name -> BinHandle -> IO Name getSymTabName st bh = do i :: Word32 <- get bh return $ st A.! (fromIntegral i) diff --git a/compiler/GHC/Iface/Recomp/Binary.hs b/compiler/GHC/Iface/Recomp/Binary.hs index 60f0d5fc8650..a3ebb31e3eb5 100644 --- a/compiler/GHC/Iface/Recomp/Binary.hs +++ b/compiler/GHC/Iface/Recomp/Binary.hs @@ -35,7 +35,7 @@ computeFingerprint put_nonbinding_name a = do fingerprintBinMem bh where set_user_data bh = - setUserData bh $ newWriteState put_nonbinding_name putNameLiterally putFS + setWriterUserData bh $ newWriteState put_nonbinding_name putNameLiterally putFS -- | Used when we want to fingerprint a structure without depending on the -- fingerprints of external Names that it refers to. diff --git a/compiler/GHC/Iface/Syntax.hs b/compiler/GHC/Iface/Syntax.hs index 96308f8d72ae..94cec93c2ec9 100644 --- a/compiler/GHC/Iface/Syntax.hs +++ b/compiler/GHC/Iface/Syntax.hs @@ -94,6 +94,7 @@ import GHC.Utils.Misc( dropList, filterByList, notNull, unzipWith, import Control.Monad import System.IO.Unsafe import Control.DeepSeq +import Data.Proxy infixl 3 &&& @@ -123,10 +124,10 @@ getIfaceTopBndr bh = get bh putIfaceTopBndr :: BinHandle -> IfaceTopBndr -> IO () putIfaceTopBndr bh name = - case getUserData bh of - UserData{ ud_put_binding_name = put_binding_name } -> + case findUserDataWriter (Proxy @BindingName) bh of + tbl -> --pprTrace "putIfaceTopBndr" (ppr name) $ - put_binding_name bh name + putEntry tbl bh (BindingName name) data IfaceDecl diff --git a/compiler/GHC/Iface/Type.hs b/compiler/GHC/Iface/Type.hs index acf5794802f3..134f16594661 100644 --- a/compiler/GHC/Iface/Type.hs +++ b/compiler/GHC/Iface/Type.hs @@ -9,7 +9,6 @@ This module defines interface types and binders {-# LANGUAGE MultiWayIf #-} {-# LANGUAGE LambdaCase #-} - module GHC.Iface.Type ( IfExtName, IfLclName, @@ -93,7 +92,7 @@ import {-# SOURCE #-} GHC.Tc.Utils.TcType ( isMetaTyVar, isTyConableTyVar ) import Data.Maybe( isJust ) import qualified Data.Semigroup as Semi import Control.DeepSeq -import Control.Monad ((<$!>)) +import Control.Monad {- ************************************************************************ @@ -2045,11 +2044,12 @@ instance Outputable IfaceCoercion where ppr = pprIfaceCoercion instance Binary IfaceTyCon where - put_ bh (IfaceTyCon n i) = put_ bh n >> put_ bh i + put_ bh (IfaceTyCon n i) = put_ bh n >> put_ bh i - get bh = do n <- get bh - i <- get bh - return (IfaceTyCon n i) + get bh = do + n <- get bh + i <- get bh + return (IfaceTyCon n i) instance Binary IfaceTyConSort where put_ bh IfaceNormalTyCon = putByte bh 0 diff --git a/compiler/GHC/StgToJS/Object.hs b/compiler/GHC/StgToJS/Object.hs index ab05817c9034..5dd298af57a1 100644 --- a/compiler/GHC/StgToJS/Object.hs +++ b/compiler/GHC/StgToJS/Object.hs @@ -66,6 +66,9 @@ import GHC.Prelude import Control.Monad import Data.Array +import qualified Data.ByteString as B +import qualified Data.ByteString.Unsafe as B +import Data.Char (isSpace) import Data.Int import Data.IntSet (IntSet) import qualified Data.IntSet as IS @@ -75,10 +78,7 @@ import Data.Map (Map) import qualified Data.Map as M import Data.Word import Data.Semigroup -import qualified Data.ByteString as B -import qualified Data.ByteString.Unsafe as B -import Data.Char (isSpace) -import System.IO +import System.IO import GHC.Settings.Constants (hiVersion) @@ -313,9 +313,10 @@ putObject bh mod_name deps os = do -- object in an archive. put_ bh (moduleNameString mod_name) - (bh_fs, _bin_dict, put_dict) <- initFSTable bh + (fs_tbl, fs_writer) <- initFastStringWriterTable + let bh_fs = addWriterToUserData (mkSomeBinaryWriter fs_writer) bh - forwardPut_ bh (const put_dict) $ do + forwardPut_ bh (const (putTable fs_tbl bh_fs)) $ do put_ bh_fs deps -- forward put the index @@ -348,7 +349,7 @@ getObjectBody :: BinHandle -> ModuleName -> IO Object getObjectBody bh0 mod_name = do -- Read the string table dict <- forwardGet bh0 (getDictionary bh0) - let bh = setUserData bh0 $ noUserData { ud_get_fs = getDictFastString dict } + let bh = setReaderUserData bh0 $ newReadState (panic "No name allowed") (getDictFastString dict) block_info <- get bh idx <- forwardGet bh (get bh) diff --git a/compiler/GHC/Types/Basic.hs b/compiler/GHC/Types/Basic.hs index 5d174347036f..5bc85c0e4c13 100644 --- a/compiler/GHC/Types/Basic.hs +++ b/compiler/GHC/Types/Basic.hs @@ -1010,7 +1010,7 @@ data TupleSort = BoxedTuple | UnboxedTuple | ConstraintTuple - deriving( Eq, Data ) + deriving( Eq, Data, Ord ) instance Outputable TupleSort where ppr ts = text $ diff --git a/compiler/GHC/Types/FieldLabel.hs b/compiler/GHC/Types/FieldLabel.hs index 9c35a3ee30da..edadf377412c 100644 --- a/compiler/GHC/Types/FieldLabel.hs +++ b/compiler/GHC/Types/FieldLabel.hs @@ -140,9 +140,7 @@ instance Binary Name => Binary FieldLabel where put_ bh (FieldLabel aa ab ac) = do put_ bh aa put_ bh ab - case getUserData bh of - UserData{ ud_put_binding_name = put_binding_name } -> - put_binding_name bh ac + put_ bh ac get bh = do aa <- get bh ab <- get bh diff --git a/compiler/GHC/Types/Name.hs b/compiler/GHC/Types/Name.hs index f1979dcbb9a3..981a9817f5e8 100644 --- a/compiler/GHC/Types/Name.hs +++ b/compiler/GHC/Types/Name.hs @@ -663,12 +663,12 @@ instance Data Name where -- distinction. instance Binary Name where put_ bh name = - case getUserData bh of - UserData{ ud_put_nonbinding_name = put_name } -> put_name bh name + case findUserDataWriter Proxy bh of + tbl -> putEntry tbl bh name get bh = - case getUserData bh of - UserData { ud_get_name = get_name } -> get_name bh + case findUserDataReader Proxy bh of + tbl -> getEntry tbl bh {- ************************************************************************ diff --git a/compiler/GHC/Utils/Binary.hs b/compiler/GHC/Utils/Binary.hs index 2b246f3c66d2..77be07ea593e 100644 --- a/compiler/GHC/Utils/Binary.hs +++ b/compiler/GHC/Utils/Binary.hs @@ -66,15 +66,29 @@ module GHC.Utils.Binary lazyPutMaybe, -- * User data - UserData(..), getUserData, setUserData, - newReadState, newWriteState, noUserData, - + ReaderUserData, getReaderUserData, setReaderUserData, noReaderUserData, + WriterUserData, getWriterUserData, setWriterUserData, noWriterUserData, + mkWriterUserData, mkReaderUserData, + newReadState, newWriteState, + addReaderToUserData, addWriterToUserData, + findUserDataReader, findUserDataWriter, + -- * Binary Readers & Writers + BinaryReader(..), BinaryWriter(..), + mkWriter, mkReader, + SomeBinaryReader, SomeBinaryWriter, + mkSomeBinaryReader, mkSomeBinaryWriter, + -- * Tables + ReaderTable(..), + WriterTable(..), -- * String table ("dictionary") + initFastStringReaderTable, initFastStringWriterTable, putDictionary, getDictionary, putFS, - FSTable, initFSTable, getDictFastString, putDictFastString, + FSTable(..), getDictFastString, putDictFastString, -- * Newtype wrappers - BinSpan(..), BinSrcSpan(..), BinLocated(..) + BinSpan(..), BinSrcSpan(..), BinLocated(..), + -- * Newtypes for types that have canonically more than one valid encoding + BindingName(..), ) where import GHC.Prelude @@ -87,31 +101,37 @@ import GHC.Utils.Panic.Plain import GHC.Types.Unique.FM import GHC.Data.FastMutInt import GHC.Utils.Fingerprint +import GHC.Utils.Misc (HasCallStack) import GHC.Types.SrcLoc import GHC.Types.Unique import qualified GHC.Data.Strict as Strict import GHC.Utils.Outputable( JoinPointHood(..) ) import Control.DeepSeq +import Control.Monad ( when, (<$!>), unless, forM_, void ) import Foreign hiding (shiftL, shiftR, void) import Data.Array import Data.Array.IO import Data.Array.Unsafe import Data.ByteString (ByteString) +import Data.Coerce import qualified Data.ByteString.Internal as BS import qualified Data.ByteString.Unsafe as BS import Data.IORef import Data.Char ( ord, chr ) import Data.List.NonEmpty ( NonEmpty(..)) import qualified Data.List.NonEmpty as NonEmpty +import Data.Map.Strict (Map) +import qualified Data.Map.Strict as Map +import Data.Proxy import Data.Set ( Set ) import qualified Data.Set as Set import Data.Time import Data.List (unfoldr) -import Control.Monad ( when, (<$!>), unless, forM_, void ) import System.IO as IO import System.IO.Unsafe ( unsafeInterleaveIO ) import System.IO.Error ( mkIOError, eofErrorType ) +import qualified Type.Reflection as Refl import GHC.Real ( Ratio(..) ) import Data.IntMap (IntMap) import qualified Data.IntMap as IntMap @@ -119,6 +139,8 @@ import qualified Data.IntMap as IntMap import GHC.ForeignPtr ( unsafeWithForeignPtr ) #endif +import Unsafe.Coerce (unsafeCoerce) + type BinArray = ForeignPtr Word8 #if !MIN_VERSION_base(4,15,0) @@ -155,10 +177,10 @@ dataHandle (BinData size bin) = do ixr <- newFastMutInt 0 szr <- newFastMutInt size binr <- newIORef bin - return (BinMem noUserData ixr szr binr) + return (BinMem noReaderUserData noWriterUserData ixr szr binr) handleData :: BinHandle -> IO BinData -handleData (BinMem _ ixr _ binr) = BinData <$> readFastMutInt ixr <*> readIORef binr +handleData (BinMem _ _ ixr _ binr) = BinData <$> readFastMutInt ixr <*> readIORef binr --------------------------------------------------------------- -- BinHandle @@ -166,7 +188,8 @@ handleData (BinMem _ ixr _ binr) = BinData <$> readFastMutInt ixr <*> readIORef data BinHandle = BinMem { -- binary data stored in an unboxed array - bh_usr :: UserData, -- sigh, need parameterized modules :-) + bh_reader :: ReaderUserData, -- sigh, need parameterized modules :-) + bh_writer :: WriterUserData, -- sigh, need parameterized modules :-) _off_r :: !FastMutInt, -- the current offset _sz_r :: !FastMutInt, -- size of the array (cached) _arr_r :: !(IORef BinArray) -- the array (bounds: (0,size-1)) @@ -174,15 +197,35 @@ data BinHandle -- XXX: should really store a "high water mark" for dumping out -- the binary data to a file. -getUserData :: BinHandle -> UserData -getUserData bh = bh_usr bh +getReaderUserData :: BinHandle -> ReaderUserData +getReaderUserData bh = bh_reader bh -setUserData :: BinHandle -> UserData -> BinHandle -setUserData bh us = bh { bh_usr = us } +getWriterUserData :: BinHandle -> WriterUserData +getWriterUserData bh = bh_writer bh + +setWriterUserData :: BinHandle -> WriterUserData -> BinHandle +setWriterUserData bh us = bh { bh_writer = us } + +setReaderUserData :: BinHandle -> ReaderUserData -> BinHandle +setReaderUserData bh us = bh { bh_reader = us } + +addReaderToUserData :: SomeBinaryReader -> BinHandle -> BinHandle +addReaderToUserData cache@(SomeBinaryReader typRep _) bh = bh + { bh_reader = (bh_reader bh) + { ud_reader_data = Map.insert (Refl.SomeTypeRep typRep) cache (ud_reader_data (bh_reader bh)) + } + } + +addWriterToUserData :: SomeBinaryWriter -> BinHandle -> BinHandle +addWriterToUserData cache@(SomeBinaryWriter typRep _) bh = bh + { bh_writer = (bh_writer bh) + { ud_writer_data = Map.insert (Refl.SomeTypeRep typRep) cache (ud_writer_data (bh_writer bh)) + } + } -- | Get access to the underlying buffer. withBinBuffer :: BinHandle -> (ByteString -> IO a) -> IO a -withBinBuffer (BinMem _ ix_r _ arr_r) action = do +withBinBuffer (BinMem _ _ ix_r _ arr_r) action = do arr <- readIORef arr_r ix <- readFastMutInt ix_r action $ BS.fromForeignPtr arr 0 ix @@ -192,7 +235,7 @@ unsafeUnpackBinBuffer (BS.BS arr len) = do arr_r <- newIORef arr ix_r <- newFastMutInt 0 sz_r <- newFastMutInt len - return (BinMem noUserData ix_r sz_r arr_r) + return (BinMem noReaderUserData noWriterUserData ix_r sz_r arr_r) --------------------------------------------------------------- -- Bin @@ -235,13 +278,13 @@ openBinMem size arr_r <- newIORef arr ix_r <- newFastMutInt 0 sz_r <- newFastMutInt size - return (BinMem noUserData ix_r sz_r arr_r) + return (BinMem noReaderUserData noWriterUserData ix_r sz_r arr_r) tellBin :: BinHandle -> IO (Bin a) -tellBin (BinMem _ r _ _) = do ix <- readFastMutInt r; return (BinPtr ix) +tellBin (BinMem _ _ r _ _) = do ix <- readFastMutInt r; return (BinPtr ix) seekBin :: BinHandle -> Bin a -> IO () -seekBin h@(BinMem _ ix_r sz_r _) (BinPtr !p) = do +seekBin h@(BinMem _ _ ix_r sz_r _) (BinPtr !p) = do sz <- readFastMutInt sz_r if (p > sz) then do expandBin h p; writeFastMutInt ix_r p @@ -252,14 +295,14 @@ seekBin h@(BinMem _ ix_r sz_r _) (BinPtr !p) = do -- This operation may 'panic', if the pointer location is out of bounds of the -- buffer of 'BinHandle'. seekBinNoExpand :: BinHandle -> Bin a -> IO () -seekBinNoExpand (BinMem _ ix_r sz_r _) (BinPtr !p) = do +seekBinNoExpand (BinMem _ _ ix_r sz_r _) (BinPtr !p) = do sz <- readFastMutInt sz_r if (p > sz) then panic "seekBinNoExpand: seek out of range" else writeFastMutInt ix_r p writeBinMem :: BinHandle -> FilePath -> IO () -writeBinMem (BinMem _ ix_r _ arr_r) fn = do +writeBinMem (BinMem _ _ ix_r _ arr_r) fn = do h <- openBinaryFile fn WriteMode arr <- readIORef arr_r ix <- readFastMutInt ix_r @@ -291,11 +334,11 @@ readBinMem_ filesize h = do arr_r <- newIORef arr ix_r <- newFastMutInt 0 sz_r <- newFastMutInt filesize - return (BinMem noUserData ix_r sz_r arr_r) + return (BinMem noReaderUserData noWriterUserData ix_r sz_r arr_r) -- expand the size of the array to include a specified offset expandBin :: BinHandle -> Int -> IO () -expandBin (BinMem _ _ sz_r arr_r) !off = do +expandBin (BinMem _ _ _ sz_r arr_r) !off = do !sz <- readFastMutInt sz_r let !sz' = getSize sz arr <- readIORef arr_r @@ -354,7 +397,7 @@ foldGet' n bh init_b f = go 0 init_b -- After the action has run advance the index to the buffer -- by size bytes. putPrim :: BinHandle -> Int -> (Ptr Word8 -> IO ()) -> IO () -putPrim h@(BinMem _ ix_r sz_r arr_r) size f = do +putPrim h@(BinMem _ _ ix_r sz_r arr_r) size f = do ix <- readFastMutInt ix_r sz <- readFastMutInt sz_r when (ix + size > sz) $ @@ -376,7 +419,7 @@ putPrim h@(BinMem _ ix_r sz_r arr_r) size f = do -- writeFastMutInt ix_r (ix + written) getPrim :: BinHandle -> Int -> (Ptr Word8 -> IO a) -> IO a -getPrim (BinMem _ ix_r sz_r arr_r) size f = do +getPrim (BinMem _ _ ix_r sz_r arr_r) size f = do ix <- readFastMutInt ix_r sz <- readFastMutInt sz_r when (ix + size > sz) $ @@ -1065,7 +1108,9 @@ lazyGetMaybe bh = do -- UserData -- ----------------------------------------------------------------------------- --- | Information we keep around during interface file +-- Note [Binary UserData] +-- ~~~~~~~~~~~~~~~~~~~~~~ +-- Information we keep around during interface file -- serialization/deserialization. Namely we keep the functions for serializing -- and deserializing 'Name's and 'FastString's. We do this because we actually -- use serialization in two distinct settings, @@ -1084,64 +1129,221 @@ lazyGetMaybe bh = do -- non-binding Name is serialized as the fingerprint of the thing they -- represent. See Note [Fingerprinting IfaceDecls] for further discussion. -- -data UserData = - UserData { - -- for *deserialising* only: - ud_get_name :: BinHandle -> IO Name, - ud_get_fs :: BinHandle -> IO FastString, - - -- for *serialising* only: - ud_put_nonbinding_name :: BinHandle -> Name -> IO (), - -- ^ serialize a non-binding 'Name' (e.g. a reference to another - -- binding). - ud_put_binding_name :: BinHandle -> Name -> IO (), - -- ^ serialize a binding 'Name' (e.g. the name of an IfaceDecl) - ud_put_fs :: BinHandle -> FastString -> IO () + +-- | Newtype to serialise binding names differently to non-binding 'Name'. +-- See Note [Binary UserData] +newtype BindingName = BindingName { getBindingName :: Name } + deriving ( Eq ) + +-- | Existential for 'BinaryWriter' with a type witness. +data SomeBinaryWriter = forall a . SomeBinaryWriter (Refl.TypeRep a) (BinaryWriter a) + +-- | Existential for 'BinaryReader' with a type witness. +data SomeBinaryReader = forall a . SomeBinaryReader (Refl.TypeRep a) (BinaryReader a) + +-- | UserData required to serialise symbols for interface files. +-- +-- See Note [Binary UserData] +data WriterUserData = + WriterUserData { + ud_writer_data :: Map Refl.SomeTypeRep SomeBinaryWriter + -- ^ A mapping from a type witness to the 'Writer' for the associated type. + -- This is a 'Map' because microbenchmarks indicated this is more efficient + -- than other representations for less than ten elements. + -- + -- Considered representations: + -- + -- * [(TypeRep, SomeBinaryWriter)] + -- * bytehash (on hackage) + -- * Map TypeRep SomeBinaryWriter } +-- | UserData required to deserialise symbols for interface files. +-- +-- See Note [Binary UserData] +data ReaderUserData = + ReaderUserData { + ud_reader_data :: Map Refl.SomeTypeRep SomeBinaryReader + -- ^ A mapping from a type witness to the 'Reader' for the associated type. + -- This is a 'Map' because microbenchmarks indicated this is more efficient + -- than other representations for less than ten elements. + -- + -- Considered representations: + -- + -- * [(TypeRep, SomeBinaryReader)] + -- * bytehash (on hackage) + -- * Map TypeRep SomeBinaryReader + } + +mkWriterUserData :: [SomeBinaryWriter] -> WriterUserData +mkWriterUserData caches = noWriterUserData + { ud_writer_data = Map.fromList $ map (\cache@(SomeBinaryWriter typRep _) -> (Refl.SomeTypeRep typRep, cache)) caches + } + +mkReaderUserData :: [SomeBinaryReader] -> ReaderUserData +mkReaderUserData caches = noReaderUserData + { ud_reader_data = Map.fromList $ map (\cache@(SomeBinaryReader typRep _) -> (Refl.SomeTypeRep typRep, cache)) caches + } + +mkSomeBinaryWriter :: forall a . Refl.Typeable a => BinaryWriter a -> SomeBinaryWriter +mkSomeBinaryWriter cb = SomeBinaryWriter (Refl.typeRep @a) cb + +mkSomeBinaryReader :: forall a . Refl.Typeable a => BinaryReader a -> SomeBinaryReader +mkSomeBinaryReader cb = SomeBinaryReader (Refl.typeRep @a) cb + +newtype BinaryReader s = BinaryReader + { getEntry :: BinHandle -> IO s + } deriving (Functor) + +newtype BinaryWriter s = BinaryWriter + { putEntry :: BinHandle -> s -> IO () + } + +mkWriter :: (BinHandle -> s -> IO ()) -> BinaryWriter s +mkWriter f = BinaryWriter + { putEntry = f + } + +mkReader :: (BinHandle -> IO s) -> BinaryReader s +mkReader f = BinaryReader + { getEntry = f + } + +-- | Find the 'BinaryReader' for the 'Binary' instance for the type identified by 'Proxy a'. +-- +-- If no 'BinaryReader' for that type can be found, this function will panic at run-time. +findUserDataReader :: forall a . (HasCallStack, Refl.Typeable a) => Proxy a -> BinHandle -> BinaryReader a +findUserDataReader query bh = + case Map.lookup (Refl.someTypeRep query) (ud_reader_data $ getReaderUserData bh) of + Nothing -> panic $ "Failed to find BinaryReader for the key: " ++ show (Refl.someTypeRep query) + Just (SomeBinaryReader _ (reader :: BinaryReader x)) -> + unsafeCoerce @(BinaryReader x) @(BinaryReader a) reader + -- This 'unsafeCoerce' could be written safely like this: + -- + -- @ + -- Just (SomeBinaryReader _ (reader :: BinaryReader x)) -> + -- case testEquality (typeRep @a) tyRep of + -- Just Refl -> coerce @(BinaryReader x) @(BinaryReader a) reader + -- Nothing -> panic $ "Invariant violated" + -- @ + -- + -- But it comes at a slight performance cost and this function is used in + -- binary serialisation hot loops, thus, we prefer the small performance boost over + -- the additional type safety. + +-- | Find the 'BinaryWriter' for the 'Binary' instance for the type identified by 'Proxy a'. +-- +-- If no 'BinaryWriter' for that type can be found, this function will panic at run-time. +findUserDataWriter :: forall a . (HasCallStack, Refl.Typeable a) => Proxy a -> BinHandle -> BinaryWriter a +findUserDataWriter query bh = + case Map.lookup (Refl.someTypeRep query) (ud_writer_data $ getWriterUserData bh) of + Nothing -> panic $ "Failed to find BinaryWriter for the key: " ++ show (Refl.someTypeRep query) + Just (SomeBinaryWriter _ (writer :: BinaryWriter x)) -> + unsafeCoerce @(BinaryWriter x) @(BinaryWriter a) writer + -- This 'unsafeCoerce' could be written safely like this: + -- + -- @ + -- Just (SomeBinaryWriter tyRep (writer :: BinaryWriter x)) -> + -- case testEquality (typeRep @a) tyRep of + -- Just Refl -> coerce @(BinaryWriter x) @(BinaryWriter a) writer + -- Nothing -> panic $ "Invariant violated" + -- @ + -- + -- But it comes at a slight performance cost and this function is used in + -- binary serialisation hot loops, thus, we prefer the small performance boost over + -- the additional type safety. + + +noReaderUserData :: ReaderUserData +noReaderUserData = ReaderUserData + { ud_reader_data = Map.empty + } + +noWriterUserData :: WriterUserData +noWriterUserData = WriterUserData + { ud_writer_data = Map.empty + } + newReadState :: (BinHandle -> IO Name) -- ^ how to deserialize 'Name's -> (BinHandle -> IO FastString) - -> UserData -newReadState get_name get_fs - = UserData { ud_get_name = get_name, - ud_get_fs = get_fs, - ud_put_nonbinding_name = undef "put_nonbinding_name", - ud_put_binding_name = undef "put_binding_name", - ud_put_fs = undef "put_fs" - } + -> ReaderUserData +newReadState get_name get_fs = + mkReaderUserData + [ mkSomeBinaryReader $ mkReader get_name + , mkSomeBinaryReader $ mkReader @BindingName (coerce get_name) + , mkSomeBinaryReader $ mkReader get_fs + ] newWriteState :: (BinHandle -> Name -> IO ()) -- ^ how to serialize non-binding 'Name's -> (BinHandle -> Name -> IO ()) -- ^ how to serialize binding 'Name's -> (BinHandle -> FastString -> IO ()) - -> UserData -newWriteState put_nonbinding_name put_binding_name put_fs - = UserData { ud_get_name = undef "get_name", - ud_get_fs = undef "get_fs", - ud_put_nonbinding_name = put_nonbinding_name, - ud_put_binding_name = put_binding_name, - ud_put_fs = put_fs - } - -noUserData :: UserData -noUserData = UserData - { ud_get_name = undef "get_name" - , ud_get_fs = undef "get_fs" - , ud_put_nonbinding_name = undef "put_nonbinding_name" - , ud_put_binding_name = undef "put_binding_name" - , ud_put_fs = undef "put_fs" + -> WriterUserData +newWriteState put_non_binding_name put_binding_name put_fs = + mkWriterUserData + [ mkSomeBinaryWriter $ mkWriter (\bh name -> put_binding_name bh (getBindingName name)) + , mkSomeBinaryWriter $ mkWriter put_non_binding_name + , mkSomeBinaryWriter $ mkWriter put_fs + ] + +-- ---------------------------------------------------------------------------- +-- Types for lookup and deduplication tables. +-- ---------------------------------------------------------------------------- + +-- | A 'ReaderTable' describes how to deserialise a table from disk, +-- and how to create a 'BinaryReader' that looks up values in the deduplication table. +data ReaderTable a = ReaderTable + { getTable :: BinHandle -> IO (SymbolTable a) + -- ^ Deserialise a list of elements into a 'SymbolTable'. + , mkReaderFromTable :: SymbolTable a -> BinaryReader a + -- ^ Given the table from 'getTable', create a 'BinaryReader' + -- that reads values only from the 'SymbolTable'. } -undef :: String -> a -undef s = panic ("Binary.UserData: no " ++ s) +-- | A 'WriterTable' is an interface any deduplication table can implement to +-- describe how the table can be written to disk. +newtype WriterTable = WriterTable + { putTable :: BinHandle -> IO Int + -- ^ Serialise a table to disk. Returns the number of written elements. + } --------------------------------------------------------- -- The Dictionary --------------------------------------------------------- -type Dictionary = Array Int FastString -- The dictionary - -- Should be 0-indexed +-- | A 'SymbolTable' of 'FastString's. +type Dictionary = SymbolTable FastString + +initFastStringReaderTable :: IO (ReaderTable FastString) +initFastStringReaderTable = do + return $ + ReaderTable + { getTable = getDictionary + , mkReaderFromTable = \tbl -> mkReader (getDictFastString tbl) + } + +initFastStringWriterTable :: IO (WriterTable, BinaryWriter FastString) +initFastStringWriterTable = do + dict_next_ref <- newFastMutInt 0 + dict_map_ref <- newIORef emptyUFM + let bin_dict = + FSTable + { fs_tab_next = dict_next_ref + , fs_tab_map = dict_map_ref + } + let put_dict bh = do + fs_count <- readFastMutInt dict_next_ref + dict_map <- readIORef dict_map_ref + putDictionary bh fs_count dict_map + pure fs_count + + return + ( WriterTable + { putTable = put_dict + } + , mkWriter $ putDictFastString bin_dict + ) putDictionary :: BinHandle -> Int -> UniqFM FastString (Int,FastString) -> IO () putDictionary bh sz dict = do @@ -1164,28 +1366,6 @@ getDictFastString dict bh = do j <- get bh return $! (dict ! fromIntegral (j :: Word32)) - -initFSTable :: BinHandle -> IO (BinHandle, FSTable, IO Int) -initFSTable bh = do - dict_next_ref <- newFastMutInt 0 - dict_map_ref <- newIORef emptyUFM - let bin_dict = FSTable - { fs_tab_next = dict_next_ref - , fs_tab_map = dict_map_ref - } - let put_dict = do - fs_count <- readFastMutInt dict_next_ref - dict_map <- readIORef dict_map_ref - putDictionary bh fs_count dict_map - pure fs_count - - -- BinHandle with FastString writing support - let ud = getUserData bh - let ud_fs = ud { ud_put_fs = putDictFastString bin_dict } - let bh_fs = setUserData bh ud_fs - - return (bh_fs,bin_dict,put_dict) - putDictFastString :: FSTable -> BinHandle -> FastString -> IO () putDictFastString dict bh fs = allocateFastString dict fs >>= put_ bh @@ -1215,10 +1395,9 @@ data FSTable = FSTable { fs_tab_next :: !FastMutInt -- The next index to use -- The Symbol Table --------------------------------------------------------- --- On disk, the symbol table is an array of IfExtName, when --- reading it in we turn it into a SymbolTable. - -type SymbolTable = Array Int Name +-- | Symbols that are read from disk. +-- The 'SymbolTable' index starts on '0'. +type SymbolTable a = Array Int a --------------------------------------------------------- -- Reading and writing FastStrings @@ -1263,12 +1442,12 @@ instance Binary ByteString where instance Binary FastString where put_ bh f = - case getUserData bh of - UserData { ud_put_fs = put_fs } -> put_fs bh f + case findUserDataWriter (Proxy :: Proxy FastString) bh of + tbl -> putEntry tbl bh f get bh = - case getUserData bh of - UserData { ud_get_fs = get_fs } -> get_fs bh + case findUserDataReader (Proxy :: Proxy FastString) bh of + tbl -> getEntry tbl bh deriving instance Binary NonDetFastString deriving instance Binary LexicalFastString diff --git a/utils/haddock b/utils/haddock index 358307f6fa52..278f8b07e027 160000 --- a/utils/haddock +++ b/utils/haddock @@ -1 +1 @@ -Subproject commit 358307f6fa52daa2c2411a4975c87b30932af3dc +Subproject commit 278f8b07e027ce33f11a73d3f055c99a34d3cee9 -- GitLab