Skip to content
Snippets Groups Projects
Commit 5126764b authored by Zejun Wu's avatar Zejun Wu Committed by Ben Gamari
Browse files

Rewrite FastString table in concurrent hashtable

Summary:
Reimplement global FastString table using a concurrent hashatable with
fixed size segments and dynamically growing buckets instead of fixed size
buckets.

This addresses the problem that `mkFastString` was not linear when the
total number of entries was large.

Test Plan:
./validate

```
inplace/bin/ghc-stage2 --interactive -dfaststring-stats < /dev/null
GHCi, version 8.7.20181005: http://www.haskell.org/ghc/  :? for help
Prelude> Leaving GHCi.
FastString stats:
    segments:          256
    buckets:           16384
    entries:           7117
    largest segment:   64
    smallest segment:  64
    longest bucket:    5
    has z-encoding:    0%
```

Also comapre the two implementation using

{P187}

The new implementation is on a par with the old version with different
conbination of parameters and perform better when the number of
FastString's are large.

{P188}

Reviewers: simonmar, bgamari, niteria

Reviewed By: simonmar, bgamari

Subscribers: rwbarton, carter

GHC Trac Issues: #14854

Differential Revision: https://phabricator.haskell.org/D5211
parent 42575701
No related merge requests found
......@@ -36,7 +36,6 @@ module FastString
mkFastStringByteString,
fastZStringToByteString,
unsafeMkByteString,
hashByteString,
-- * FastZString
FastZString,
......@@ -104,6 +103,7 @@ import FastFunctions
import Panic
import Util
import Control.Concurrent.MVar
import Control.DeepSeq
import Control.Monad
import Data.ByteString (ByteString)
......@@ -116,13 +116,12 @@ import GHC.Exts
import System.IO
import System.IO.Unsafe ( unsafePerformIO )
import Data.Data
import Data.IORef ( IORef, newIORef, readIORef, atomicModifyIORef' )
import Data.IORef
import Data.Maybe ( isJust )
import Data.Char
import Data.List ( elemIndex )
import Data.Semigroup as Semi
import GHC.IO ( IO(..), unsafeDupablePerformIO )
import GHC.IO ( IO(..), unIO, unsafeDupablePerformIO )
import Foreign
......@@ -132,9 +131,6 @@ import GHC.Conc.Sync (sharedCAF)
import GHC.Base ( unpackCString#, unpackNBytes# )
#define hASH_TBL_SIZE 4091
#define hASH_TBL_SIZE_UNBOXED 4091#
fastStringToByteString :: FastString -> ByteString
fastStringToByteString f = fs_bs f
......@@ -146,8 +142,8 @@ fastZStringToByteString (FastZString bs) = bs
unsafeMkByteString :: String -> ByteString
unsafeMkByteString = BSC.pack
hashByteString :: ByteString -> Int
hashByteString bs
hashFastString :: FastString -> Int
hashFastString (FastString _ _ bs _)
= inlinePerformIO $ BS.unsafeUseAsCStringLen bs $ \(ptr, len) ->
return $ hashStr (castPtr ptr) len
......@@ -243,21 +239,85 @@ and updates to multiple buckets with low synchronization overhead.
See Note [Updating the FastString table] on how it's updated.
-}
data FastStringTable =
FastStringTable
{-# UNPACK #-} !(IORef Int) -- the unique ID counter shared with all buckets
(MutableArray# RealWorld (IORef [FastString])) -- the array of mutable buckets
string_table :: FastStringTable
{-# NOINLINE string_table #-}
string_table = unsafePerformIO $ do
data FastStringTable = FastStringTable
{-# UNPACK #-} !(IORef Int) -- the unique ID counter shared with all buckets
(Array# (IORef FastStringTableSegment)) -- concurrent segments
data FastStringTableSegment = FastStringTableSegment
{-# UNPACK #-} !(MVar ()) -- the lock for write in each segment
{-# UNPACK #-} !(IORef Int) -- the number of elements
(MutableArray# RealWorld [FastString]) -- buckets in this segment
{-
Following parameters are determined based on:
* Benchmark based on testsuite/tests/utils/should_run/T14854.hs
* Stats of @echo :browse | ghc --interactive -dfaststring-stats >/dev/null@:
on 2018-10-24, we have 13920 entries.
-}
segmentBits, numSegments, segmentMask, initialNumBuckets :: Int
segmentBits = 8
numSegments = 256 -- bit segmentBits
segmentMask = 0xff -- bit segmentBits - 1
initialNumBuckets = 64
hashToSegment# :: Int# -> Int#
hashToSegment# hash# = hash# `andI#` segmentMask#
where
!(I# segmentMask#) = segmentMask
hashToIndex# :: MutableArray# RealWorld [FastString] -> Int# -> Int#
hashToIndex# buckets# hash# =
(hash# `uncheckedIShiftRL#` segmentBits#) `remInt#` size#
where
!(I# segmentBits#) = segmentBits
size# = sizeofMutableArray# buckets#
maybeResizeSegment :: IORef FastStringTableSegment -> IO FastStringTableSegment
maybeResizeSegment segmentRef = do
segment@(FastStringTableSegment lock counter old#) <- readIORef segmentRef
let oldSize# = sizeofMutableArray# old#
newSize# = oldSize# *# 2#
(I# n#) <- readIORef counter
if isTrue# (n# <# newSize#) -- maximum load of 1
then return segment
else do
resizedSegment@(FastStringTableSegment _ _ new#) <- IO $ \s1# ->
case newArray# newSize# [] s1# of
(# s2#, arr# #) -> (# s2#, FastStringTableSegment lock counter arr# #)
forM_ [0 .. (I# oldSize#) - 1] $ \(I# i#) -> do
fsList <- IO $ readArray# old# i#
forM_ fsList $ \fs -> do
let -- Shall we store in hash value in FastString instead?
!(I# hash#) = hashFastString fs
idx# = hashToIndex# new# hash#
IO $ \s1# ->
case readArray# new# idx# s1# of
(# s2#, bucket #) -> case writeArray# new# idx# (fs: bucket) s2# of
s3# -> (# s3#, () #)
writeIORef segmentRef resizedSegment
return resizedSegment
{-# NOINLINE stringTable #-}
stringTable :: FastStringTable
stringTable = unsafePerformIO $ do
let !(I# numSegments#) = numSegments
!(I# initialNumBuckets#) = initialNumBuckets
loop a# i# s1#
| isTrue# (i# ==# numSegments#) = s1#
| otherwise = case newMVar () `unIO` s1# of
(# s2#, lock #) -> case newIORef 0 `unIO` s2# of
(# s3#, counter #) -> case newArray# initialNumBuckets# [] s3# of
(# s4#, buckets# #) -> case newIORef
(FastStringTableSegment lock counter buckets#) `unIO` s4# of
(# s5#, segment #) -> case writeArray# a# i# segment s5# of
s6# -> loop a# (i# +# 1#) s6#
uid <- newIORef 603979776 -- ord '$' * 0x01000000
tab <- IO $ \s1# -> case newArray# hASH_TBL_SIZE_UNBOXED (panic "string_table") s1# of
(# s2#, arr# #) ->
(# s2#, FastStringTable uid arr# #)
forM_ [0.. hASH_TBL_SIZE-1] $ \i -> do
bucket <- newIORef []
updTbl tab i bucket
tab <- IO $ \s1# ->
case newArray# numSegments# (panic "string_table") s1# of
(# s2#, arr# #) -> case loop arr# 0# s2# of
s3# -> case unsafeFreezeArray# arr# s3# of
(# s4#, segments# #) -> (# s4#, FastStringTable uid segments# #)
-- use the support wired into the RTS to share this CAF among all images of
-- libHSghc
......@@ -303,27 +363,27 @@ lower-level `sharedCAF` mechanism that relies on Globals.c.
-}
lookupTbl :: FastStringTable -> Int -> IO (IORef [FastString])
lookupTbl (FastStringTable _ arr#) (I# i#) =
IO $ \ s# -> readArray# arr# i# s#
updTbl :: FastStringTable -> Int -> IORef [FastString] -> IO ()
updTbl (FastStringTable _uid arr#) (I# i#) ls = do
(IO $ \ s# -> case writeArray# arr# i# ls s# of { s2# -> (# s2#, () #) })
mkFastString# :: Addr# -> FastString
mkFastString# a# = mkFastStringBytes ptr (ptrStrLength ptr)
where ptr = Ptr a#
{- Note [Updating the FastString table]
We use a concurrent hashtable which contains multiple segments, each hash value
always maps to the same segment. Read is lock-free, write to the a segment
should acquire a lock for that segment to avoid race condition, writes to
different segments are independent.
The procedure goes like this:
1. Read the relevant bucket and perform a look up of the string.
2. If it exists, return it.
3. Otherwise grab a unique ID, create a new FastString and atomically attempt
to update the relevant bucket with this FastString:
1. Find out which segment to operate on based on the hash value
2. Read the relevant bucket and perform a look up of the string.
3. If it exists, return it.
4. Otherwise grab a unique ID, create a new FastString and atomically attempt
to update the relevant segment with this FastString:
* Resize the segment by doubling the number of buckets when the number of
FastStrings in this segment grows beyond the threshold.
* Double check that the string is not in the bucket. Another thread may have
inserted it while we were creating our string.
* Return the existing FastString if it exists. The one we preemptively
......@@ -331,43 +391,51 @@ The procedure goes like this:
* Otherwise, insert and return the string we created.
-}
{- Note [Double-checking the bucket]
It is not necessary to check the entire bucket the second time. We only have to
check the strings that are new to the bucket since the last time we read it.
-}
mkFastStringWith :: (Int -> IO FastString) -> Ptr Word8 -> Int -> IO FastString
mkFastStringWith mk_fs !ptr !len = do
let hash = hashStr ptr len
bucket <- lookupTbl string_table hash
ls1 <- readIORef bucket
res <- bucket_match ls1 len ptr
case res of
Just v -> return v
Nothing -> do
n <- get_uid
new_fs <- mk_fs n
atomicModifyIORef' bucket $ \ls2 ->
-- Note [Double-checking the bucket]
let delta_ls = case ls1 of
[] -> ls2
l:_ -> case l `elemIndex` ls2 of
Nothing -> panic "mkFastStringWith"
Just idx -> take idx ls2
-- NB: Might as well use inlinePerformIO, since the call to
-- bucket_match doesn't perform any IO that could be floated
-- out of this closure or erroneously duplicated.
in case inlinePerformIO (bucket_match delta_ls len ptr) of
Nothing -> (new_fs:ls2, new_fs)
Just fs -> (ls2,fs)
FastStringTableSegment lock _ buckets# <- readIORef segmentRef
let idx# = hashToIndex# buckets# hash#
bucket <- IO $ readArray# buckets# idx#
res <- bucket_match bucket len ptr
case res of
Just found -> return found
Nothing -> do
n <- get_uid
new_fs <- mk_fs n
withMVar lock $ \_ -> insert new_fs
where
!(FastStringTable uid _arr) = string_table
!(FastStringTable uid segments#) = stringTable
get_uid = atomicModifyIORef' uid $ \n -> (n+1,n)
!(I# hash#) = hashStr ptr len
(# segmentRef #) = indexArray# segments# (hashToSegment# hash#)
insert fs = do
FastStringTableSegment _ counter buckets# <- maybeResizeSegment segmentRef
let idx# = hashToIndex# buckets# hash#
bucket <- IO $ readArray# buckets# idx#
res <- bucket_match bucket len ptr
case res of
-- The FastString was added by another thread after previous read and
-- before we acquired the write lock.
Just found -> return found
Nothing -> do
IO $ \s1# ->
case writeArray# buckets# idx# (fs: bucket) s1# of
s2# -> (# s2#, () #)
modifyIORef' counter succ
return fs
bucket_match :: [FastString] -> Int -> Ptr Word8 -> IO (Maybe FastString)
bucket_match [] _ _ = return Nothing
bucket_match (v@(FastString _ _ bs _):ls) len ptr
| len == BS.length bs = do
b <- BS.unsafeUseAsCString bs $ \buf ->
cmpStringPrefix ptr (castPtr buf) len
if b then return (Just v)
else bucket_match ls len ptr
| otherwise =
bucket_match ls len ptr
mkFastStringBytes :: Ptr Word8 -> Int -> FastString
mkFastStringBytes !ptr !len =
-- NB: Might as well use unsafeDupablePerformIO, since mkFastStringWith is
......@@ -416,17 +484,6 @@ mkFastStringByteList str =
mkZFastString :: String -> FastZString
mkZFastString = mkFastZStringString
bucket_match :: [FastString] -> Int -> Ptr Word8 -> IO (Maybe FastString)
bucket_match [] _ _ = return Nothing
bucket_match (v@(FastString _ _ bs _):ls) len ptr
| len == BS.length bs = do
b <- BS.unsafeUseAsCString bs $ \buf ->
cmpStringPrefix ptr (castPtr buf) len
if b then return (Just v)
else bucket_match ls len ptr
| otherwise =
bucket_match ls len ptr
mkNewFastString :: ForeignPtr Word8 -> Ptr Word8 -> Int -> Int
-> IO FastString
mkNewFastString fp ptr len uid = do
......@@ -466,9 +523,9 @@ hashStr (Ptr a#) (I# len#) = loop 0# 0#
where
loop h n | isTrue# (n ==# len#) = I# h
| otherwise = loop h2 (n +# 1#)
where !c = ord# (indexCharOffAddr# a# n)
!h2 = (c +# (h *# 128#)) `remInt#`
hASH_TBL_SIZE#
where
!c = ord# (indexCharOffAddr# a# n)
!h2 = (h *# 16777619#) `xorI#` c
-- -----------------------------------------------------------------------------
-- Operations
......@@ -547,12 +604,16 @@ nilFS = mkFastString ""
-- -----------------------------------------------------------------------------
-- Stats
getFastStringTable :: IO [[FastString]]
getFastStringTable = do
buckets <- forM [0.. hASH_TBL_SIZE-1] $ \idx -> do
bucket <- lookupTbl string_table idx
readIORef bucket
return buckets
getFastStringTable :: IO [[[FastString]]]
getFastStringTable =
forM [0 .. numSegments - 1] $ \(I# i#) -> do
let (# segmentRef #) = indexArray# segments# i#
FastStringTableSegment _ _ buckets# <- readIORef segmentRef
let bucketSize = I# (sizeofMutableArray# buckets#)
forM [0 .. bucketSize - 1] $ \(I# j#) ->
IO $ readArray# buckets# j#
where
!(FastStringTable _ segments#) = stringTable
-- -----------------------------------------------------------------------------
-- Outputting 'FastString's
......
......@@ -805,14 +805,21 @@ dumpFinalStats dflags =
dumpFastStringStats :: DynFlags -> IO ()
dumpFastStringStats dflags = do
buckets <- getFastStringTable
let (entries, longest, has_z) = countFS 0 0 0 buckets
msg = text "FastString stats:" $$
nest 4 (vcat [text "size: " <+> int (length buckets),
text "entries: " <+> int entries,
text "longest chain: " <+> int longest,
text "has z-encoding: " <+> (has_z `pcntOf` entries)
])
segments <- getFastStringTable
let buckets = concat segments
bucketsPerSegment = map length segments
entriesPerBucket = map length buckets
entries = sum entriesPerBucket
hasZ = sum $ map (length . filter hasZEncoding) buckets
msg = text "FastString stats:" $$ nest 4 (vcat
[ text "segments: " <+> int (length segments)
, text "buckets: " <+> int (sum bucketsPerSegment)
, text "entries: " <+> int entries
, text "largest segment: " <+> int (maximum bucketsPerSegment)
, text "smallest segment: " <+> int (minimum bucketsPerSegment)
, text "longest bucket: " <+> int (maximum entriesPerBucket)
, text "has z-encoding: " <+> (hasZ `pcntOf` entries)
])
-- we usually get more "has z-encoding" than "z-encoded", because
-- when we z-encode a string it might hash to the exact same string,
-- which is not counted as "z-encoded". Only strings whose
......@@ -822,17 +829,6 @@ dumpFastStringStats dflags = do
where
x `pcntOf` y = int ((x * 100) `quot` y) Outputable.<> char '%'
countFS :: Int -> Int -> Int -> [[FastString]] -> (Int, Int, Int)
countFS entries longest has_z [] = (entries, longest, has_z)
countFS entries longest has_z (b:bs) =
let
len = length b
longest' = max len longest
entries' = entries + len
has_zs = length (filter hasZEncoding b)
in
countFS entries' longest' (has_z + has_zs) bs
showPackages, dumpPackages, dumpPackagesSimple :: DynFlags -> IO ()
showPackages dflags = putStrLn (showSDoc dflags (pprPackages dflags))
dumpPackages dflags = putMsg dflags (pprPackages dflags)
......
TOP=../..
include $(TOP)/mk/boilerplate.mk
include $(TOP)/mk/test.mk
TOP=../../..
include $(TOP)/mk/boilerplate.mk
include $(TOP)/mk/test.mk
{-# LANGUAGE RecordWildCards #-}
module Main (main) where
import FastString
import Control.Concurrent
import Control.DeepSeq
import Control.Exception
import Control.Monad
import Data.ByteString (ByteString)
import Data.ByteString.Builder
import qualified Data.ByteString.Char8 as Char
import Data.ByteString.Lazy (toStrict)
import Data.List
import Data.Monoid
import qualified Data.Sequence as Seq
import Data.Time
import GHC.Conc
import System.IO
import System.Random
import Text.Printf
data Options = Options
{ optThreads :: Int -- ^ the number of threads to run concurrently
, optRepeat :: Int -- ^ how many times do we create the same 'FastString'
, optCount :: Int -- ^ the total number of different 'FastString's
, optPrefix :: Int -- ^ the length of prefix in each 'FastString'
}
defOptions :: Options
defOptions = Options
{ optThreads = 8
, optRepeat = 16
, optCount = 10000
, optPrefix = 0
}
run :: [[ByteString]] -> (ByteString -> Int) -> IO Int
run jobs op = do
mvars <- forM ([0 ..] `zip` jobs) $ \(i, job) -> do
mvar <- newEmptyMVar
forkOn i $ do
uniq <- evaluate $ force $ maximum $ map op job
putMVar mvar uniq
return mvar
uniqs <- mapM takeMVar mvars
evaluate $ force $ maximum uniqs - 603979775
summary :: IO [[[a]]] -> IO Int
summary getTable = do
table <- getTable
evaluate $ force $ length $ concat $ concat table
timeIt :: String -> IO a -> IO a
timeIt name io = do
before <- getCurrentTime
ret <- io
after <- getCurrentTime
hPrintf stderr "%s: %.2fms\n" name
(realToFrac $ diffUTCTime after before * 1000 :: Double)
return ret
main :: IO ()
main = do
seed <- randomIO
let Options{..} = defOptions
shuffle (i:is) s
| Seq.null s = []
| otherwise = m: shuffle is (l <> r)
where
(l, m Seq.:< r) = Seq.viewl <$> Seq.splitAt (i `rem` Seq.length s) s
inputs =
shuffle (randoms $ mkStdGen seed) $
mconcat $ replicate optRepeat $
Seq.fromFunction optCount $ \i -> toStrict $ toLazyByteString $
byteString (Char.replicate optPrefix '_') <> intDec i
jobs <- evaluate $ force $ transpose $
map (take optThreads) $
takeWhile (not . null) $
iterate (drop optThreads) inputs
setNumCapabilities (length jobs)
-- The maximum unique may be greater than 'optCount'
u <- timeIt "run" $ run jobs $ uniqueOfFS . mkFastStringByteString
print $ optCount <= u && u <= min optThreads optRepeat * optCount
-- But we should never have duplicate 'FastString's in the table
n <- timeIt "summary" $ summary getFastStringTable
print $ n == optCount
True
True
test('T14854',
[only_ways(threaded_ways),
omit_ways('ghci'),
reqlib('random'),
ignore_stderr],
compile_and_run,
['-package ghc'])
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment