Rewrite FastString table in concurrent hashtable

Summary: Reimplement global FastString table using a concurrent hashatable with fixed size segments and dynamically growing buckets instead of fixed size buckets. This addresses the problem that `mkFastString` was not linear when the total number of entries was large. Test Plan: ./validate ``` inplace/bin/ghc-stage2 --interactive -dfaststring-stats < /dev/null GHCi, version 8.7.20181005: http://www.haskell.org/ghc/ :? for help Prelude> Leaving GHCi. FastString stats: segments: 256 buckets: 16384 entries: 7117 largest segment: 64 smallest segment: 64 longest bucket: 5 has z-encoding: 0% ``` Also comapre the two implementation using {P187} The new implementation is on a par with the old version with different conbination of parameters and perform better when the number of FastString's are large. {P188} Reviewers: simonmar, bgamari, niteria Reviewed By: simonmar, bgamari Subscribers: rwbarton, carter GHC Trac Issues: #14854 Differential Revision: https://phabricator.haskell.org/D5211

Rewrite FastString table in concurrent hashtable
Summary: Reimplement global FastString table using a concurrent hashatable with fixed size segments and dynamically growing buckets instead of fixed size buckets. This addresses the problem that `mkFastString` was not linear when the total number of entries was large. Test Plan: ./validate ``` inplace/bin/ghc-stage2 --interactive -dfaststring-stats < /dev/null GHCi, version 8.7.20181005: http://www.haskell.org/ghc/ :? for help Prelude> Leaving GHCi. FastString stats: segments: 256 buckets: 16384 entries: 7117 largest segment: 64 smallest segment: 64 longest bucket: 5 has z-encoding: 0% ``` Also comapre the two implementation using {P187} The new implementation is on a par with the old version with different conbination of parameters and perform better when the number of FastString's are large. {P188} Reviewers: simonmar, bgamari, niteria Reviewed By: simonmar, bgamari Subscribers: rwbarton, carter GHC Trac Issues: #14854 Differential Revision: https://phabricator.haskell.org/D5211
5126764b · Zejun Wu · Ben Gamari · 42575701 · 5126764b · 5126764b
Commit 5126764b authored 6 years ago by Zejun Wu Committed by Ben Gamari 6 years ago
--- a/compiler/utils/FastString.hs
+++ b/compiler/utils/FastString.hs
@@ -36,7 +36,6 @@ module FastString
        mkFastStringByteString,
        fastZStringToByteString,
        unsafeMkByteString,
-        hashByteString,

        -- * FastZString
        FastZString,
@@ -104,6 +103,7 @@ import FastFunctions
 import Panic
 import Util

+import Control.Concurrent.MVar
 import Control.DeepSeq
 import Control.Monad
 import Data.ByteString (ByteString)
@@ -116,13 +116,12 @@ import GHC.Exts
 import System.IO
 import System.IO.Unsafe ( unsafePerformIO )
 import Data.Data
-import Data.IORef       ( IORef, newIORef, readIORef, atomicModifyIORef' )
+import Data.IORef
 import Data.Maybe       ( isJust )
 import Data.Char
-import Data.List        ( elemIndex )
 import Data.Semigroup as Semi

-import GHC.IO           ( IO(..), unsafeDupablePerformIO )
+import GHC.IO           ( IO(..), unIO, unsafeDupablePerformIO )

 import Foreign

@@ -132,9 +131,6 @@ import GHC.Conc.Sync    (sharedCAF)

 import GHC.Base         ( unpackCString#, unpackNBytes# )

-#define hASH_TBL_SIZE          4091
-#define hASH_TBL_SIZE_UNBOXED  4091#
-

 fastStringToByteString :: FastString -> ByteString
 fastStringToByteString f = fs_bs f
@@ -146,8 +142,8 @@ fastZStringToByteString (FastZString bs) = bs
 unsafeMkByteString :: String -> ByteString
 unsafeMkByteString = BSC.pack

-hashByteString :: ByteString -> Int
-hashByteString bs
+hashFastString :: FastString -> Int
+hashFastString (FastString _ _ bs _)
    = inlinePerformIO $ BS.unsafeUseAsCStringLen bs $ \(ptr, len) ->
      return $ hashStr (castPtr ptr) len

@@ -243,21 +239,85 @@ and updates to multiple buckets with low synchronization overhead.

 See Note [Updating the FastString table] on how it's updated.
 -}
-data FastStringTable =
- FastStringTable
-    {-# UNPACK #-} !(IORef Int)  -- the unique ID counter shared with all buckets
-    (MutableArray# RealWorld (IORef [FastString])) -- the array of mutable buckets
-
-string_table :: FastStringTable
-{-# NOINLINE string_table #-}
-string_table = unsafePerformIO $ do
+data FastStringTable = FastStringTable
+  {-# UNPACK #-} !(IORef Int) -- the unique ID counter shared with all buckets
+  (Array# (IORef FastStringTableSegment)) -- concurrent segments
+
+data FastStringTableSegment = FastStringTableSegment
+  {-# UNPACK #-} !(MVar ()) -- the lock for write in each segment
+  {-# UNPACK #-} !(IORef Int) -- the number of elements
+  (MutableArray# RealWorld [FastString]) -- buckets in this segment
+
+{-
+Following parameters are determined based on:
+
+* Benchmark based on testsuite/tests/utils/should_run/T14854.hs
+* Stats of @echo :browse | ghc --interactive -dfaststring-stats >/dev/null@:
+  on 2018-10-24, we have 13920 entries.
+-}
+segmentBits, numSegments, segmentMask, initialNumBuckets :: Int
+segmentBits = 8
+numSegments = 256   -- bit segmentBits
+segmentMask = 0xff  -- bit segmentBits - 1
+initialNumBuckets = 64
+
+hashToSegment# :: Int# -> Int#
+hashToSegment# hash# = hash# `andI#` segmentMask#
+  where
+    !(I# segmentMask#) = segmentMask
+
+hashToIndex# :: MutableArray# RealWorld [FastString] -> Int# -> Int#
+hashToIndex# buckets# hash# =
+  (hash# `uncheckedIShiftRL#` segmentBits#) `remInt#` size#
+  where
+    !(I# segmentBits#) = segmentBits
+    size# = sizeofMutableArray# buckets#
+
+maybeResizeSegment :: IORef FastStringTableSegment -> IO FastStringTableSegment
+maybeResizeSegment segmentRef = do
+  segment@(FastStringTableSegment lock counter old#) <- readIORef segmentRef
+  let oldSize# = sizeofMutableArray# old#
+      newSize# = oldSize# *# 2#
+  (I# n#) <- readIORef counter
+  if isTrue# (n# <# newSize#) -- maximum load of 1
+  then return segment
+  else do
+    resizedSegment@(FastStringTableSegment _ _ new#) <- IO $ \s1# ->
+      case newArray# newSize# [] s1# of
+        (# s2#, arr# #) -> (# s2#, FastStringTableSegment lock counter arr# #)
+    forM_ [0 .. (I# oldSize#) - 1] $ \(I# i#) -> do
+      fsList <- IO $ readArray# old# i#
+      forM_ fsList $ \fs -> do
+        let -- Shall we store in hash value in FastString instead?
+            !(I# hash#) = hashFastString fs
+            idx# = hashToIndex# new# hash#
+        IO $ \s1# ->
+          case readArray# new# idx# s1# of
+            (# s2#, bucket #) -> case writeArray# new# idx# (fs: bucket) s2# of
+              s3# -> (# s3#, () #)
+    writeIORef segmentRef resizedSegment
+    return resizedSegment
+
+{-# NOINLINE stringTable #-}
+stringTable :: FastStringTable
+stringTable = unsafePerformIO $ do
+  let !(I# numSegments#) = numSegments
+      !(I# initialNumBuckets#) = initialNumBuckets
+      loop a# i# s1#
+        | isTrue# (i# ==# numSegments#) = s1#
+        | otherwise = case newMVar () `unIO` s1# of
+            (# s2#, lock #) -> case newIORef 0 `unIO` s2# of
+              (# s3#, counter #) -> case newArray# initialNumBuckets# [] s3# of
+                (# s4#, buckets# #) -> case newIORef
+                    (FastStringTableSegment lock counter buckets#) `unIO` s4# of
+                  (# s5#, segment #) -> case writeArray# a# i# segment s5# of
+                    s6# -> loop a# (i# +# 1#) s6#
  uid <- newIORef 603979776 -- ord '$' * 0x01000000
-  tab <- IO $ \s1# -> case newArray# hASH_TBL_SIZE_UNBOXED (panic "string_table") s1# of
-                          (# s2#, arr# #) ->
-                              (# s2#, FastStringTable uid arr# #)
-  forM_ [0.. hASH_TBL_SIZE-1] $ \i -> do
-     bucket <- newIORef []
-     updTbl tab i bucket
+  tab <- IO $ \s1# ->
+    case newArray# numSegments# (panic "string_table") s1# of
+      (# s2#, arr# #) -> case loop arr# 0# s2# of
+        s3# -> case unsafeFreezeArray# arr# s3# of
+          (# s4#, segments# #) -> (# s4#, FastStringTable uid segments# #)

  -- use the support wired into the RTS to share this CAF among all images of
  -- libHSghc
@@ -303,27 +363,27 @@ lower-level `sharedCAF` mechanism that relies on Globals.c.

 -}

-lookupTbl :: FastStringTable -> Int -> IO (IORef [FastString])
-lookupTbl (FastStringTable _ arr#) (I# i#) =
-  IO $ \ s# -> readArray# arr# i# s#
-
-updTbl :: FastStringTable -> Int -> IORef [FastString] -> IO ()
-updTbl (FastStringTable _uid arr#) (I# i#) ls = do
-  (IO $ \ s# -> case writeArray# arr# i# ls s# of { s2# -> (# s2#, () #) })
-
 mkFastString# :: Addr# -> FastString
 mkFastString# a# = mkFastStringBytes ptr (ptrStrLength ptr)
  where ptr = Ptr a#

 {- Note [Updating the FastString table]

+We use a concurrent hashtable which contains multiple segments, each hash value
+always maps to the same segment. Read is lock-free, write to the a segment
+should acquire a lock for that segment to avoid race condition, writes to
+different segments are independent.
+
 The procedure goes like this:

-1. Read the relevant bucket and perform a look up of the string.
-2. If it exists, return it.
-3. Otherwise grab a unique ID, create a new FastString and atomically attempt
-   to update the relevant bucket with this FastString:
+1. Find out which segment to operate on based on the hash value
+2. Read the relevant bucket and perform a look up of the string.
+3. If it exists, return it.
+4. Otherwise grab a unique ID, create a new FastString and atomically attempt
+   to update the relevant segment with this FastString:

+   * Resize the segment by doubling the number of buckets when the number of
+     FastStrings in this segment grows beyond the threshold.
   * Double check that the string is not in the bucket. Another thread may have
     inserted it while we were creating our string.
   * Return the existing FastString if it exists. The one we preemptively
@@ -331,43 +391,51 @@ The procedure goes like this:
   * Otherwise, insert and return the string we created.
 -}

-{- Note [Double-checking the bucket]
-
-It is not necessary to check the entire bucket the second time. We only have to
-check the strings that are new to the bucket since the last time we read it.
-}
-
 mkFastStringWith :: (Int -> IO FastString) -> Ptr Word8 -> Int -> IO FastString
 mkFastStringWith mk_fs !ptr !len = do
-    let hash = hashStr ptr len
-    bucket <- lookupTbl string_table hash
-    ls1 <- readIORef bucket
-    res <- bucket_match ls1 len ptr
-    case res of
-        Just v  -> return v
-        Nothing -> do
-            n <- get_uid
-            new_fs <- mk_fs n
-
-            atomicModifyIORef' bucket $ \ls2 ->
-                -- Note [Double-checking the bucket]
-                let delta_ls = case ls1 of
-                        []  -> ls2
-                        l:_ -> case l `elemIndex` ls2 of
-                            Nothing  -> panic "mkFastStringWith"
-                            Just idx -> take idx ls2
-
-                -- NB: Might as well use inlinePerformIO, since the call to
-                -- bucket_match doesn't perform any IO that could be floated
-                -- out of this closure or erroneously duplicated.
-                in case inlinePerformIO (bucket_match delta_ls len ptr) of
-                    Nothing -> (new_fs:ls2, new_fs)
-                    Just fs -> (ls2,fs)
+  FastStringTableSegment lock _ buckets# <- readIORef segmentRef
+  let idx# = hashToIndex# buckets# hash#
+  bucket <- IO $ readArray# buckets# idx#
+  res <- bucket_match bucket len ptr
+  case res of
+    Just found -> return found
+    Nothing -> do
+      n <- get_uid
+      new_fs <- mk_fs n
+      withMVar lock $ \_ -> insert new_fs
  where
-    !(FastStringTable uid _arr) = string_table
-
+    !(FastStringTable uid segments#) = stringTable
    get_uid = atomicModifyIORef' uid $ \n -> (n+1,n)

+    !(I# hash#) = hashStr ptr len
+    (# segmentRef #) = indexArray# segments# (hashToSegment# hash#)
+    insert fs = do
+      FastStringTableSegment _ counter buckets# <- maybeResizeSegment segmentRef
+      let idx# = hashToIndex# buckets# hash#
+      bucket <- IO $ readArray# buckets# idx#
+      res <- bucket_match bucket len ptr
+      case res of
+        -- The FastString was added by another thread after previous read and
+        -- before we acquired the write lock.
+        Just found -> return found
+        Nothing -> do
+          IO $ \s1# ->
+            case writeArray# buckets# idx# (fs: bucket) s1# of
+              s2# -> (# s2#, () #)
+          modifyIORef' counter succ
+          return fs
+
+bucket_match :: [FastString] -> Int -> Ptr Word8 -> IO (Maybe FastString)
+bucket_match [] _ _ = return Nothing
+bucket_match (v@(FastString _ _ bs _):ls) len ptr
+      | len == BS.length bs = do
+         b <- BS.unsafeUseAsCString bs $ \buf ->
+             cmpStringPrefix ptr (castPtr buf) len
+         if b then return (Just v)
+              else bucket_match ls len ptr
+      | otherwise =
+         bucket_match ls len ptr
+
 mkFastStringBytes :: Ptr Word8 -> Int -> FastString
 mkFastStringBytes !ptr !len =
    -- NB: Might as well use unsafeDupablePerformIO, since mkFastStringWith is
@@ -416,17 +484,6 @@ mkFastStringByteList str =
 mkZFastString :: String -> FastZString
 mkZFastString = mkFastZStringString

-bucket_match :: [FastString] -> Int -> Ptr Word8 -> IO (Maybe FastString)
-bucket_match [] _ _ = return Nothing
-bucket_match (v@(FastString _ _ bs _):ls) len ptr
-      | len == BS.length bs = do
-         b <- BS.unsafeUseAsCString bs $ \buf ->
-             cmpStringPrefix ptr (castPtr buf) len
-         if b then return (Just v)
-              else bucket_match ls len ptr
-      | otherwise =
-         bucket_match ls len ptr
-
 mkNewFastString :: ForeignPtr Word8 -> Ptr Word8 -> Int -> Int
                -> IO FastString
 mkNewFastString fp ptr len uid = do
@@ -466,9 +523,9 @@ hashStr (Ptr a#) (I# len#) = loop 0# 0#
   where
    loop h n | isTrue# (n ==# len#) = I# h
             | otherwise  = loop h2 (n +# 1#)
-          where !c = ord# (indexCharOffAddr# a# n)
-                !h2 = (c +# (h *# 128#)) `remInt#`
-                      hASH_TBL_SIZE#
+          where
+            !c = ord# (indexCharOffAddr# a# n)
+            !h2 = (h *# 16777619#) `xorI#` c

 -- -----------------------------------------------------------------------------
 -- Operations
@@ -547,12 +604,16 @@ nilFS = mkFastString ""
 -- -----------------------------------------------------------------------------
 -- Stats

-getFastStringTable :: IO [[FastString]]
-getFastStringTable = do
-  buckets <- forM [0.. hASH_TBL_SIZE-1] $ \idx -> do
-    bucket <- lookupTbl string_table idx
-    readIORef bucket
-  return buckets
+getFastStringTable :: IO [[[FastString]]]
+getFastStringTable =
+  forM [0 .. numSegments - 1] $ \(I# i#) -> do
+    let (# segmentRef #) = indexArray# segments# i#
+    FastStringTableSegment _ _ buckets# <- readIORef segmentRef
+    let bucketSize = I# (sizeofMutableArray# buckets#)
+    forM [0 .. bucketSize - 1] $ \(I# j#) ->
+      IO $ readArray# buckets# j#
+  where
+    !(FastStringTable _ segments#) = stringTable

 -- -----------------------------------------------------------------------------
 -- Outputting 'FastString's

--- a/ghc/Main.hs
+++ b/ghc/Main.hs
@@ -805,14 +805,21 @@ dumpFinalStats dflags =

 dumpFastStringStats :: DynFlags -> IO ()
 dumpFastStringStats dflags = do
-  buckets <- getFastStringTable
-  let (entries, longest, has_z) = countFS 0 0 0 buckets
-      msg = text "FastString stats:" $$
-            nest 4 (vcat [text "size:           " <+> int (length buckets),
-                          text "entries:        " <+> int entries,
-                          text "longest chain:  " <+> int longest,
-                          text "has z-encoding: " <+> (has_z `pcntOf` entries)
-                         ])
+  segments <- getFastStringTable
+  let buckets = concat segments
+      bucketsPerSegment = map length segments
+      entriesPerBucket = map length buckets
+      entries = sum entriesPerBucket
+      hasZ = sum $ map (length . filter hasZEncoding) buckets
+      msg = text "FastString stats:" $$ nest 4 (vcat
+        [ text "segments:         " <+> int (length segments)
+        , text "buckets:          " <+> int (sum bucketsPerSegment)
+        , text "entries:          " <+> int entries
+        , text "largest segment:  " <+> int (maximum bucketsPerSegment)
+        , text "smallest segment: " <+> int (minimum bucketsPerSegment)
+        , text "longest bucket:   " <+> int (maximum entriesPerBucket)
+        , text "has z-encoding:   " <+> (hasZ `pcntOf` entries)
+        ])
        -- we usually get more "has z-encoding" than "z-encoded", because
        -- when we z-encode a string it might hash to the exact same string,
        -- which is not counted as "z-encoded".  Only strings whose
@@ -822,17 +829,6 @@ dumpFastStringStats dflags = do
  where
   x `pcntOf` y = int ((x * 100) `quot` y) Outputable.<> char '%'

-countFS :: Int -> Int -> Int -> [[FastString]] -> (Int, Int, Int)
-countFS entries longest has_z [] = (entries, longest, has_z)
-countFS entries longest has_z (b:bs) =
-  let
-        len = length b
-        longest' = max len longest
-        entries' = entries + len
-        has_zs = length (filter hasZEncoding b)
-  in
-        countFS entries' longest' (has_z + has_zs) bs
-
 showPackages, dumpPackages, dumpPackagesSimple :: DynFlags -> IO ()
 showPackages       dflags = putStrLn (showSDoc dflags (pprPackages dflags))
 dumpPackages       dflags = putMsg dflags (pprPackages dflags)

--- a/testsuite/tests/utils/Makefile
+++ b/testsuite/tests/utils/Makefile
+TOP=../..
+include $(TOP)/mk/boilerplate.mk
+include $(TOP)/mk/test.mk
--- a/testsuite/tests/utils/should_run/Makefile
+++ b/testsuite/tests/utils/should_run/Makefile
+TOP=../../..
+include $(TOP)/mk/boilerplate.mk
+include $(TOP)/mk/test.mk
--- a/testsuite/tests/utils/should_run/T14854.hs
+++ b/testsuite/tests/utils/should_run/T14854.hs
+{-# LANGUAGE RecordWildCards #-}
+module Main (main) where
+
+import FastString
+
+import Control.Concurrent
+import Control.DeepSeq
+import Control.Exception
+import Control.Monad
+import Data.ByteString (ByteString)
+import Data.ByteString.Builder
+import qualified Data.ByteString.Char8 as Char
+import Data.ByteString.Lazy (toStrict)
+import Data.List
+import Data.Monoid
+import qualified Data.Sequence as Seq
+import Data.Time
+import GHC.Conc
+import System.IO
+import System.Random
+import Text.Printf
+
+data Options = Options
+  { optThreads :: Int   -- ^ the number of threads to run concurrently
+  , optRepeat :: Int    -- ^ how many times do we create the same 'FastString'
+  , optCount :: Int     -- ^ the total number of different 'FastString's
+  , optPrefix :: Int    -- ^ the length of prefix in each 'FastString'
+  }
+
+defOptions :: Options
+defOptions = Options
+  { optThreads = 8
+  , optRepeat = 16
+  , optCount = 10000
+  , optPrefix = 0
+  }
+
+run :: [[ByteString]] -> (ByteString -> Int) -> IO Int
+run jobs op = do
+  mvars <- forM ([0 ..] `zip` jobs) $ \(i, job) -> do
+    mvar <- newEmptyMVar
+    forkOn i $ do
+      uniq <- evaluate $ force $ maximum $ map op job
+      putMVar mvar uniq
+    return mvar
+  uniqs <- mapM takeMVar mvars
+  evaluate $ force $ maximum uniqs - 603979775
+
+summary :: IO [[[a]]] -> IO Int
+summary getTable = do
+  table <- getTable
+  evaluate $ force $ length $ concat $ concat table
+
+timeIt :: String -> IO a -> IO a
+timeIt name io = do
+  before <- getCurrentTime
+  ret <- io
+  after <- getCurrentTime
+  hPrintf stderr "%s: %.2fms\n" name
+    (realToFrac $ diffUTCTime after before * 1000 :: Double)
+  return ret
+
+main :: IO ()
+main = do
+  seed <- randomIO
+  let Options{..} = defOptions
+      shuffle (i:is) s
+        | Seq.null s = []
+        | otherwise = m: shuffle is (l <> r)
+        where
+          (l, m Seq.:< r) = Seq.viewl <$> Seq.splitAt (i `rem` Seq.length s) s
+      inputs =
+        shuffle (randoms $ mkStdGen seed) $
+        mconcat $ replicate optRepeat $
+        Seq.fromFunction optCount $ \i -> toStrict $ toLazyByteString $
+          byteString (Char.replicate optPrefix '_') <> intDec i
+  jobs <- evaluate $ force $ transpose $
+    map (take optThreads) $
+    takeWhile (not . null) $
+    iterate (drop optThreads) inputs
+  setNumCapabilities (length jobs)
+  -- The maximum unique may be greater than 'optCount'
+  u <- timeIt "run" $ run jobs $ uniqueOfFS . mkFastStringByteString
+  print $ optCount <= u && u <= min optThreads optRepeat * optCount
+  -- But we should never have duplicate 'FastString's in the table
+  n <- timeIt "summary" $ summary getFastStringTable
+  print $ n == optCount
--- a/testsuite/tests/utils/should_run/T14854.stdout
+++ b/testsuite/tests/utils/should_run/T14854.stdout
+True
+True
--- a/testsuite/tests/utils/should_run/all.T
+++ b/testsuite/tests/utils/should_run/all.T
+test('T14854',
+     [only_ways(threaded_ways),
+      omit_ways('ghci'),
+      reqlib('random'),
+      ignore_stderr],
+     compile_and_run,
+     ['-package ghc'])