FastString.hs 23.2 KB
Newer Older
Austin Seipp's avatar
Austin Seipp committed
1
2
-- (c) The University of Glasgow, 1997-2006

3
4
{-# LANGUAGE BangPatterns, CPP, MagicHash, UnboxedTuples,
    GeneralizedNewtypeDeriving #-}
5
{-# OPTIONS_GHC -O2 -funbox-strict-fields #-}
6
7
8
-- We always optimise this, otherwise performance of a non-optimised
-- compiler is severely affected

9
10
11
-- |
-- There are two principal string types used internally by GHC:
--
Mateusz Kowalczyk's avatar
Mateusz Kowalczyk committed
12
-- ['FastString']
13
--
Mateusz Kowalczyk's avatar
Mateusz Kowalczyk committed
14
15
16
17
18
--   * A compact, hash-consed, representation of character strings.
--   * Comparison is O(1), and you can get a 'Unique.Unique' from them.
--   * Generated by 'fsLit'.
--   * Turn into 'Outputable.SDoc' with 'Outputable.ftext'.
--
Sylvain Henry's avatar
Sylvain Henry committed
19
-- ['PtrString']
Mateusz Kowalczyk's avatar
Mateusz Kowalczyk committed
20
--
Sylvain Henry's avatar
Sylvain Henry committed
21
--   * Pointer and size of a Latin-1 encoded string.
Mateusz Kowalczyk's avatar
Mateusz Kowalczyk committed
22
23
24
25
--   * Practically no operations.
--   * Outputing them is fast.
--   * Generated by 'sLit'.
--   * Turn into 'Outputable.SDoc' with 'Outputable.ptext'
26
27
28
29
--   * Requires manual memory management.
--     Improper use may lead to memory leaks or dangling pointers.
--   * It assumes Latin-1 as the encoding, therefore it cannot represent
--     arbitrary Unicode strings.
30
--
Sylvain Henry's avatar
Sylvain Henry committed
31
-- Use 'PtrString' unless you want the facilities of 'FastString'.
32
33
module FastString
       (
34
35
        -- * ByteString
        fastStringToByteString,
36
        mkFastStringByteString,
37
        fastZStringToByteString,
38
        unsafeMkByteString,
39

Ian Lynagh's avatar
Ian Lynagh committed
40
41
42
43
44
45
        -- * FastZString
        FastZString,
        hPutFZS,
        zString,
        lengthFZS,

Ian Lynagh's avatar
Ian Lynagh committed
46
47
        -- * FastStrings
        FastString(..),     -- not abstract, for now.
48

Ian Lynagh's avatar
Ian Lynagh committed
49
        -- ** Construction
50
        fsLit,
51
        mkFastString,
Ian Lynagh's avatar
Ian Lynagh committed
52
        mkFastStringBytes,
53
        mkFastStringByteList,
Ian Lynagh's avatar
Ian Lynagh committed
54
55
        mkFastStringForeignPtr,
        mkFastString#,
56

Ian Lynagh's avatar
Ian Lynagh committed
57
58
59
        -- ** Deconstruction
        unpackFS,           -- :: FastString -> String
        bytesFS,            -- :: FastString -> [Word8]
60

Ian Lynagh's avatar
Ian Lynagh committed
61
62
        -- ** Encoding
        zEncodeFS,
63

Ian Lynagh's avatar
Ian Lynagh committed
64
        -- ** Operations
65
        uniqueOfFS,
Ian Lynagh's avatar
Ian Lynagh committed
66
67
68
        lengthFS,
        nullFS,
        appendFS,
69
70
        headFS,
        tailFS,
Ian Lynagh's avatar
Ian Lynagh committed
71
        concatFS,
72
        consFS,
Ian Lynagh's avatar
Ian Lynagh committed
73
        nilFS,
74

Ian Lynagh's avatar
Ian Lynagh committed
75
        -- ** Outputing
76
77
        hPutFS,

Ian Lynagh's avatar
Ian Lynagh committed
78
79
80
        -- ** Internal
        getFastStringTable,
        hasZEncoding,
81

Sylvain Henry's avatar
Sylvain Henry committed
82
83
        -- * PtrStrings
        PtrString (..),
Jan Stolarek's avatar
Jan Stolarek committed
84

85
86
        -- ** Construction
        sLit,
Sylvain Henry's avatar
Sylvain Henry committed
87
88
        mkPtrString#,
        mkPtrString,
Jan Stolarek's avatar
Jan Stolarek committed
89

90
        -- ** Deconstruction
Sylvain Henry's avatar
Sylvain Henry committed
91
        unpackPtrString,
Jan Stolarek's avatar
Jan Stolarek committed
92

93
        -- ** Operations
Sylvain Henry's avatar
Sylvain Henry committed
94
        lengthPS
95
96
       ) where

97
#include "HsVersions.h"
98

99
100
import GhcPrelude as Prelude

101
import Encoding
102
import FastFunctions
103
import Panic
104
import Util
105

106
import Control.Concurrent.MVar
107
import Control.DeepSeq
108
import Control.Monad
109
import Data.ByteString (ByteString)
110
111
112
113
import qualified Data.ByteString          as BS
import qualified Data.ByteString.Char8    as BSC
import qualified Data.ByteString.Internal as BS
import qualified Data.ByteString.Unsafe   as BS
114
import Foreign.C
thomie's avatar
thomie committed
115
import GHC.Exts
116
import System.IO
117
import Data.Data
118
import Data.IORef
Ian Lynagh's avatar
Ian Lynagh committed
119
import Data.Maybe       ( isJust )
120
import Data.Char
121
import Data.Semigroup as Semi
122

123
import GHC.IO
124

125
import Foreign
126

127
128
129
130
#if STAGE >= 2
import GHC.Conc.Sync    (sharedCAF)
#endif

Sylvain Henry's avatar
Sylvain Henry committed
131
import GHC.Base         ( unpackCString#, unpackNBytes# )
132
133


134
135
fastStringToByteString :: FastString -> ByteString
fastStringToByteString f = fs_bs f
136

137
138
fastZStringToByteString :: FastZString -> ByteString
fastZStringToByteString (FastZString bs) = bs
Ian Lynagh's avatar
Ian Lynagh committed
139

140
-- This will drop information if any character > '\xFF'
141
142
unsafeMkByteString :: String -> ByteString
unsafeMkByteString = BSC.pack
143

144
145
hashFastString :: FastString -> Int
hashFastString (FastString _ _ bs _)
146
147
    = inlinePerformIO $ BS.unsafeUseAsCStringLen bs $ \(ptr, len) ->
      return $ hashStr (castPtr ptr) len
148

Ian Lynagh's avatar
Ian Lynagh committed
149
150
-- -----------------------------------------------------------------------------

151
newtype FastZString = FastZString ByteString
152
  deriving NFData
Ian Lynagh's avatar
Ian Lynagh committed
153
154

hPutFZS :: Handle -> FastZString -> IO ()
155
hPutFZS handle (FastZString bs) = BS.hPut handle bs
Ian Lynagh's avatar
Ian Lynagh committed
156
157

zString :: FastZString -> String
158
159
zString (FastZString bs) =
    inlinePerformIO $ BS.unsafeUseAsCStringLen bs peekCAStringLen
Ian Lynagh's avatar
Ian Lynagh committed
160
161

lengthFZS :: FastZString -> Int
162
lengthFZS (FastZString bs) = BS.length bs
Ian Lynagh's avatar
Ian Lynagh committed
163

164
mkFastZStringString :: String -> FastZString
165
mkFastZStringString str = FastZString (BSC.pack str)
166

Ian Lynagh's avatar
Ian Lynagh committed
167
-- -----------------------------------------------------------------------------
168

169
170
171
172
173
{-|
A 'FastString' is an array of bytes, hashed to support fast O(1)
comparison.  It is also associated with a character encoding, so that
we know how to convert a 'FastString' to the local encoding, or to the
Z-encoding used by the compiler internally.
174

175
176
177
178
'FastString's support a memoized conversion to the Z-encoding via zEncodeFS.
-}

data FastString = FastString {
Ian Lynagh's avatar
Ian Lynagh committed
179
180
      uniq    :: {-# UNPACK #-} !Int, -- unique id
      n_chars :: {-# UNPACK #-} !Int, -- number of chars
181
      fs_bs   :: {-# UNPACK #-} !ByteString,
182
      fs_ref  :: {-# UNPACK #-} !(IORef (Maybe FastZString))
183
  }
184
185
186

instance Eq FastString where
  f1 == f2  =  uniq f1 == uniq f2
187

188
instance Ord FastString where
Ian Lynagh's avatar
Ian Lynagh committed
189
    -- Compares lexicographically, not by unique
190
    a <= b = case cmpFS a b of { LT -> True;  EQ -> True;  GT -> False }
Ian Lynagh's avatar
Ian Lynagh committed
191
    a <  b = case cmpFS a b of { LT -> True;  EQ -> False; GT -> False }
192
    a >= b = case cmpFS a b of { LT -> False; EQ -> True;  GT -> True  }
Ian Lynagh's avatar
Ian Lynagh committed
193
194
195
196
197
    a >  b = case cmpFS a b of { LT -> False; EQ -> False; GT -> True  }
    max x y | x >= y    =  x
            | otherwise =  y
    min x y | x <= y    =  x
            | otherwise =  y
198
    compare a b = cmpFS a b
199

Ben Gamari's avatar
Ben Gamari committed
200
201
202
instance IsString FastString where
    fromString = fsLit

203
204
205
instance Semi.Semigroup FastString where
    (<>) = appendFS

206
207
instance Monoid FastString where
    mempty = nilFS
208
    mappend = (Semi.<>)
209
    mconcat = concatFS
210

211
212
213
instance Show FastString where
   show fs = show (unpackFS fs)

214
215
216
217
218
219
instance Data FastString where
  -- don't traverse?
  toConstr _   = abstractConstr "FastString"
  gunfold _ _  = error "gunfold"
  dataTypeOf _ = mkNoRepType "FastString"

220
cmpFS :: FastString -> FastString -> Ordering
221
cmpFS f1@(FastString u1 _ _ _) f2@(FastString u2 _ _ _) =
222
  if u1 == u2 then EQ else
223
  compare (fastStringToByteString f1) (fastStringToByteString f2)
224

Sylvain Henry's avatar
Sylvain Henry committed
225
foreign import ccall unsafe "memcmp"
226
  memcmp :: Ptr a -> Ptr b -> Int -> IO Int
227

228
229
-- -----------------------------------------------------------------------------
-- Construction
230

231
{-
232
233
234
235
236
237
Internally, the compiler will maintain a fast string symbol table, providing
sharing and fast comparison. Creation of new @FastString@s then covertly does a
lookup, re-using the @FastString@ if there was a hit.

The design of the FastString hash table allows for lockless concurrent reads
and updates to multiple buckets with low synchronization overhead.
238

239
240
See Note [Updating the FastString table] on how it's updated.
-}
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
data FastStringTable = FastStringTable
  {-# UNPACK #-} !(IORef Int) -- the unique ID counter shared with all buckets
  (Array# (IORef FastStringTableSegment)) -- concurrent segments

data FastStringTableSegment = FastStringTableSegment
  {-# UNPACK #-} !(MVar ()) -- the lock for write in each segment
  {-# UNPACK #-} !(IORef Int) -- the number of elements
  (MutableArray# RealWorld [FastString]) -- buckets in this segment

{-
Following parameters are determined based on:

* Benchmark based on testsuite/tests/utils/should_run/T14854.hs
* Stats of @echo :browse | ghc --interactive -dfaststring-stats >/dev/null@:
  on 2018-10-24, we have 13920 entries.
-}
segmentBits, numSegments, segmentMask, initialNumBuckets :: Int
segmentBits = 8
numSegments = 256   -- bit segmentBits
segmentMask = 0xff  -- bit segmentBits - 1
initialNumBuckets = 64

hashToSegment# :: Int# -> Int#
hashToSegment# hash# = hash# `andI#` segmentMask#
  where
    !(I# segmentMask#) = segmentMask

hashToIndex# :: MutableArray# RealWorld [FastString] -> Int# -> Int#
hashToIndex# buckets# hash# =
  (hash# `uncheckedIShiftRL#` segmentBits#) `remInt#` size#
  where
    !(I# segmentBits#) = segmentBits
    size# = sizeofMutableArray# buckets#

maybeResizeSegment :: IORef FastStringTableSegment -> IO FastStringTableSegment
maybeResizeSegment segmentRef = do
  segment@(FastStringTableSegment lock counter old#) <- readIORef segmentRef
  let oldSize# = sizeofMutableArray# old#
      newSize# = oldSize# *# 2#
  (I# n#) <- readIORef counter
  if isTrue# (n# <# newSize#) -- maximum load of 1
  then return segment
  else do
    resizedSegment@(FastStringTableSegment _ _ new#) <- IO $ \s1# ->
      case newArray# newSize# [] s1# of
        (# s2#, arr# #) -> (# s2#, FastStringTableSegment lock counter arr# #)
    forM_ [0 .. (I# oldSize#) - 1] $ \(I# i#) -> do
      fsList <- IO $ readArray# old# i#
      forM_ fsList $ \fs -> do
        let -- Shall we store in hash value in FastString instead?
            !(I# hash#) = hashFastString fs
            idx# = hashToIndex# new# hash#
        IO $ \s1# ->
          case readArray# new# idx# s1# of
            (# s2#, bucket #) -> case writeArray# new# idx# (fs: bucket) s2# of
              s3# -> (# s3#, () #)
    writeIORef segmentRef resizedSegment
    return resizedSegment

{-# NOINLINE stringTable #-}
stringTable :: FastStringTable
stringTable = unsafePerformIO $ do
  let !(I# numSegments#) = numSegments
      !(I# initialNumBuckets#) = initialNumBuckets
      loop a# i# s1#
        | isTrue# (i# ==# numSegments#) = s1#
        | otherwise = case newMVar () `unIO` s1# of
            (# s2#, lock #) -> case newIORef 0 `unIO` s2# of
              (# s3#, counter #) -> case newArray# initialNumBuckets# [] s3# of
                (# s4#, buckets# #) -> case newIORef
                    (FastStringTableSegment lock counter buckets#) `unIO` s4# of
                  (# s5#, segment #) -> case writeArray# a# i# segment s5# of
                    s6# -> loop a# (i# +# 1#) s6#
314
  uid <- newIORef 603979776 -- ord '$' * 0x01000000
315
316
317
318
319
  tab <- IO $ \s1# ->
    case newArray# numSegments# (panic "string_table") s1# of
      (# s2#, arr# #) -> case loop arr# 0# s2# of
        s3# -> case unsafeFreezeArray# arr# s3# of
          (# s4#, segments# #) -> (# s4#, FastStringTable uid segments# #)
320

321
322
323
  -- use the support wired into the RTS to share this CAF among all images of
  -- libHSghc
#if STAGE < 2
324
  return tab
325
#else
326
  sharedCAF tab getOrSetLibHSghcFastStringTable
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363

-- from the RTS; thus we cannot use this mechanism when STAGE<2; the previous
-- RTS might not have this symbol
foreign import ccall unsafe "getOrSetLibHSghcFastStringTable"
  getOrSetLibHSghcFastStringTable :: Ptr a -> IO (Ptr a)
#endif

{-

We include the FastString table in the `sharedCAF` mechanism because we'd like
FastStrings created by a Core plugin to have the same uniques as corresponding
strings created by the host compiler itself.  For example, this allows plugins
to lookup known names (eg `mkTcOcc "MySpecialType"`) in the GlobalRdrEnv or
even re-invoke the parser.

In particular, the following little sanity test was failing in a plugin
prototyping safe newtype-coercions: GHC.NT.Type.NT was imported, but could not
be looked up /by the plugin/.

   let rdrName = mkModuleName "GHC.NT.Type" `mkRdrQual` mkTcOcc "NT"
   putMsgS $ showSDoc dflags $ ppr $ lookupGRE_RdrName rdrName $ mg_rdr_env guts

`mkTcOcc` involves the lookup (or creation) of a FastString.  Since the
plugin's FastString.string_table is empty, constructing the RdrName also
allocates new uniques for the FastStrings "GHC.NT.Type" and "NT".  These
uniques are almost certainly unequal to the ones that the host compiler
originally assigned to those FastStrings.  Thus the lookup fails since the
domain of the GlobalRdrEnv is affected by the RdrName's OccName's FastString's
unique.

Maintaining synchronization of the two instances of this global is rather
difficult because of the uses of `unsafePerformIO` in this module.  Not
synchronizing them risks breaking the rather major invariant that two
FastStrings with the same unique have the same string. Thus we use the
lower-level `sharedCAF` mechanism that relies on Globals.c.

-}
364

365
mkFastString# :: Addr# -> FastString
366
mkFastString# a# = mkFastStringBytes ptr (ptrStrLength ptr)
367
  where ptr = Ptr a#
368

369
370
{- Note [Updating the FastString table]

371
372
373
374
375
We use a concurrent hashtable which contains multiple segments, each hash value
always maps to the same segment. Read is lock-free, write to the a segment
should acquire a lock for that segment to avoid race condition, writes to
different segments are independent.

376
377
The procedure goes like this:

378
379
380
381
382
1. Find out which segment to operate on based on the hash value
2. Read the relevant bucket and perform a look up of the string.
3. If it exists, return it.
4. Otherwise grab a unique ID, create a new FastString and atomically attempt
   to update the relevant segment with this FastString:
383

384
385
   * Resize the segment by doubling the number of buckets when the number of
     FastStrings in this segment grows beyond the threshold.
386
387
388
389
390
391
392
393
394
   * Double check that the string is not in the bucket. Another thread may have
     inserted it while we were creating our string.
   * Return the existing FastString if it exists. The one we preemptively
     created will get GCed.
   * Otherwise, insert and return the string we created.
-}

mkFastStringWith :: (Int -> IO FastString) -> Ptr Word8 -> Int -> IO FastString
mkFastStringWith mk_fs !ptr !len = do
395
396
397
398
399
400
401
  FastStringTableSegment lock _ buckets# <- readIORef segmentRef
  let idx# = hashToIndex# buckets# hash#
  bucket <- IO $ readArray# buckets# idx#
  res <- bucket_match bucket len ptr
  case res of
    Just found -> return found
    Nothing -> do
402
403
404
      -- The withMVar below is not dupable. It can lead to deadlock if it is
      -- only run partially and putMVar is not called after takeMVar.
      noDuplicate
405
406
407
      n <- get_uid
      new_fs <- mk_fs n
      withMVar lock $ \_ -> insert new_fs
408
  where
409
    !(FastStringTable uid segments#) = stringTable
410
    get_uid = atomicModifyIORef' uid $ \n -> (n+1,n)
411

412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
    !(I# hash#) = hashStr ptr len
    (# segmentRef #) = indexArray# segments# (hashToSegment# hash#)
    insert fs = do
      FastStringTableSegment _ counter buckets# <- maybeResizeSegment segmentRef
      let idx# = hashToIndex# buckets# hash#
      bucket <- IO $ readArray# buckets# idx#
      res <- bucket_match bucket len ptr
      case res of
        -- The FastString was added by another thread after previous read and
        -- before we acquired the write lock.
        Just found -> return found
        Nothing -> do
          IO $ \s1# ->
            case writeArray# buckets# idx# (fs: bucket) s1# of
              s2# -> (# s2#, () #)
          modifyIORef' counter succ
          return fs

bucket_match :: [FastString] -> Int -> Ptr Word8 -> IO (Maybe FastString)
bucket_match [] _ _ = return Nothing
bucket_match (v@(FastString _ _ bs _):ls) len ptr
      | len == BS.length bs = do
         b <- BS.unsafeUseAsCString bs $ \buf ->
             cmpStringPrefix ptr (castPtr buf) len
         if b then return (Just v)
              else bucket_match ls len ptr
      | otherwise =
         bucket_match ls len ptr

441
mkFastStringBytes :: Ptr Word8 -> Int -> FastString
442
443
444
445
446
mkFastStringBytes !ptr !len =
    -- NB: Might as well use unsafeDupablePerformIO, since mkFastStringWith is
    -- idempotent.
    unsafeDupablePerformIO $
        mkFastStringWith (copyNewFastString ptr len) ptr len
447
448
449
450
451

-- | Create a 'FastString' from an existing 'ForeignPtr'; the difference
-- between this and 'mkFastStringBytes' is that we don't have to copy
-- the bytes if the string is new to the table.
mkFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString
452
453
mkFastStringForeignPtr ptr !fp len
    = mkFastStringWith (mkNewFastString fp ptr len) ptr len
454

455
456
457
-- | Create a 'FastString' from an existing 'ForeignPtr'; the difference
-- between this and 'mkFastStringBytes' is that we don't have to copy
-- the bytes if the string is new to the table.
458
459
460
461
462
463
mkFastStringByteString :: ByteString -> FastString
mkFastStringByteString bs =
    inlinePerformIO $
      BS.unsafeUseAsCStringLen bs $ \(ptr, len) -> do
        let ptr' = castPtr ptr
        mkFastStringWith (mkNewFastStringByteString bs ptr' len) ptr' len
464

465
466
-- | Creates a UTF-8 encoded 'FastString' from a 'String'
mkFastString :: String -> FastString
Ian Lynagh's avatar
Ian Lynagh committed
467
mkFastString str =
468
469
470
471
472
  inlinePerformIO $ do
    let l = utf8EncodedLength str
    buf <- mallocForeignPtrBytes l
    withForeignPtr buf $ \ptr -> do
      utf8EncodeString ptr str
Ian Lynagh's avatar
Ian Lynagh committed
473
      mkFastStringForeignPtr ptr buf l
474

475
476
-- | Creates a 'FastString' from a UTF-8 encoded @[Word8]@
mkFastStringByteList :: [Word8] -> FastString
Ian Lynagh's avatar
Ian Lynagh committed
477
mkFastStringByteList str =
478
479
480
481
482
  inlinePerformIO $ do
    let l = Prelude.length str
    buf <- mallocForeignPtrBytes l
    withForeignPtr buf $ \ptr -> do
      pokeArray (castPtr ptr) str
Ian Lynagh's avatar
Ian Lynagh committed
483
      mkFastStringForeignPtr ptr buf l
484
485

-- | Creates a Z-encoded 'FastString' from a 'String'
Ian Lynagh's avatar
Ian Lynagh committed
486
mkZFastString :: String -> FastZString
487
mkZFastString = mkFastZStringString
488

489
mkNewFastString :: ForeignPtr Word8 -> Ptr Word8 -> Int -> Int
490
                -> IO FastString
491
mkNewFastString fp ptr len uid = do
492
493
  ref <- newIORef Nothing
  n_chars <- countUTF8Chars ptr len
494
495
  return (FastString uid n_chars (BS.fromForeignPtr fp 0 len) ref)

496
mkNewFastStringByteString :: ByteString -> Ptr Word8 -> Int -> Int
497
                          -> IO FastString
498
mkNewFastStringByteString bs ptr len uid = do
499
500
501
  ref <- newIORef Nothing
  n_chars <- countUTF8Chars ptr len
  return (FastString uid n_chars bs ref)
502

503
504
copyNewFastString :: Ptr Word8 -> Int -> Int -> IO FastString
copyNewFastString ptr len uid = do
505
506
507
  fp <- copyBytesToForeignPtr ptr len
  ref <- newIORef Nothing
  n_chars <- countUTF8Chars ptr len
508
  return (FastString uid n_chars (BS.fromForeignPtr fp 0 len) ref)
509

510
copyBytesToForeignPtr :: Ptr Word8 -> Int -> IO (ForeignPtr Word8)
511
512
513
514
515
copyBytesToForeignPtr ptr len = do
  fp <- mallocForeignPtrBytes len
  withForeignPtr fp $ \ptr' -> copyBytes ptr' ptr len
  return fp

516
517
518
cmpStringPrefix :: Ptr Word8 -> Ptr Word8 -> Int -> IO Bool
cmpStringPrefix ptr1 ptr2 len =
 do r <- memcmp ptr1 ptr2 len
519
520
521
522
    return (r == 0)


hashStr  :: Ptr Word8 -> Int -> Int
523
 -- use the Addr to produce a hash value between 0 & m (inclusive)
524
hashStr (Ptr a#) (I# len#) = loop 0# 0#
Ian Lynagh's avatar
Ian Lynagh committed
525
   where
thomie's avatar
thomie committed
526
527
    loop h n | isTrue# (n ==# len#) = I# h
             | otherwise  = loop h2 (n +# 1#)
528
529
530
          where
            !c = ord# (indexCharOffAddr# a# n)
            !h2 = (h *# 16777619#) `xorI#` c
531

532
533
-- -----------------------------------------------------------------------------
-- Operations
534

535
536
537
-- | Returns the length of the 'FastString' in characters
lengthFS :: FastString -> Int
lengthFS f = n_chars f
538

539
-- | Returns @True@ if this 'FastString' is not Z-encoded but already has
540
541
-- a Z-encoding cached (used in producing stats).
hasZEncoding :: FastString -> Bool
542
hasZEncoding (FastString _ _ _ ref) =
543
544
      inlinePerformIO $ do
        m <- readIORef ref
Ian Lynagh's avatar
Ian Lynagh committed
545
        return (isJust m)
546

547
-- | Returns @True@ if the 'FastString' is empty
548
nullFS :: FastString -> Bool
549
nullFS f = BS.null (fs_bs f)
550

551
-- | Unpacks and decodes the FastString
552
unpackFS :: FastString -> String
Ben Gamari's avatar
Ben Gamari committed
553
unpackFS (FastString _ _ bs _) = utf8DecodeByteString bs
554

555
-- | Gives the UTF-8 encoded bytes corresponding to a 'FastString'
556
bytesFS :: FastString -> [Word8]
557
bytesFS fs = BS.unpack $ fastStringToByteString fs
558

559
-- | Returns a Z-encoded version of a 'FastString'.  This might be the
560
561
562
563
-- original, if it was already Z-encoded.  The first time this
-- function is applied to a particular 'FastString', the results are
-- memoized.
--
Ian Lynagh's avatar
Ian Lynagh committed
564
zEncodeFS :: FastString -> FastZString
565
zEncodeFS fs@(FastString _ _ _ ref) =
566
      inlinePerformIO $ do
567
568
        m <- readIORef ref
        case m of
Ian Lynagh's avatar
Ian Lynagh committed
569
          Just zfs -> return zfs
570
          Nothing -> do
571
            atomicModifyIORef' ref $ \m' -> case m' of
572
573
574
              Nothing  -> let zfs = mkZFastString (zEncodeString (unpackFS fs))
                          in (Just zfs, zfs)
              Just zfs -> (m', zfs)
575
576

appendFS :: FastString -> FastString -> FastString
577
appendFS fs1 fs2 = mkFastStringByteString
578
579
                 $ BS.append (fastStringToByteString fs1)
                             (fastStringToByteString fs2)
580
581

concatFS :: [FastString] -> FastString
582
concatFS = mkFastStringByteString . BS.concat . map fs_bs
583
584

headFS :: FastString -> Char
585
headFS (FastString _ 0 _ _) = panic "headFS: Empty FastString"
586
587
588
headFS (FastString _ _ bs _) =
  inlinePerformIO $ BS.unsafeUseAsCString bs $ \ptr ->
         return (fst (utf8DecodeChar (castPtr ptr)))
589
590

tailFS :: FastString -> FastString
591
tailFS (FastString _ 0 _ _) = panic "tailFS: Empty FastString"
592
593
tailFS (FastString _ _ bs _) =
    inlinePerformIO $ BS.unsafeUseAsCString bs $ \ptr ->
594
    do let (_, n) = utf8DecodeChar (castPtr ptr)
595
       return $! mkFastStringByteString (BS.drop n bs)
596
597
598
599

consFS :: Char -> FastString -> FastString
consFS c fs = mkFastString (c : unpackFS fs)

600
601
uniqueOfFS :: FastString -> Int
uniqueOfFS (FastString u _ _ _) = u
602

603
nilFS :: FastString
604
nilFS = mkFastString ""
605

606
607
608
-- -----------------------------------------------------------------------------
-- Stats

609
610
611
612
613
614
615
616
617
618
getFastStringTable :: IO [[[FastString]]]
getFastStringTable =
  forM [0 .. numSegments - 1] $ \(I# i#) -> do
    let (# segmentRef #) = indexArray# segments# i#
    FastStringTableSegment _ _ buckets# <- readIORef segmentRef
    let bucketSize = I# (sizeofMutableArray# buckets#)
    forM [0 .. bucketSize - 1] $ \(I# j#) ->
      IO $ readArray# buckets# j#
  where
    !(FastStringTable _ segments#) = stringTable
619

620
621
622
-- -----------------------------------------------------------------------------
-- Outputting 'FastString's

623
624
-- |Outputs a 'FastString' with /no decoding at all/, that is, you
-- get the actual bytes in the 'FastString' written to the 'Handle'.
625
hPutFS :: Handle -> FastString -> IO ()
626
hPutFS handle fs = BS.hPut handle $ fastStringToByteString fs
627

628
629
-- ToDo: we'll probably want an hPutFSLocal, or something, to output
-- in the current locale's encoding (for error messages and suchlike).
630

631
-- -----------------------------------------------------------------------------
Sylvain Henry's avatar
Sylvain Henry committed
632
-- PtrStrings, here for convenience only.
633

Sylvain Henry's avatar
Sylvain Henry committed
634
635
-- | A 'PtrString' is a pointer to some array of Latin-1 encoded chars.
data PtrString = PtrString !(Ptr Word8) !Int
636

Sylvain Henry's avatar
Sylvain Henry committed
637
638
639
-- | Wrap an unboxed address into a 'PtrString'.
mkPtrString# :: Addr# -> PtrString
mkPtrString# a# = PtrString (Ptr a#) (ptrStrLength (Ptr a#))
640

Sylvain Henry's avatar
Sylvain Henry committed
641
-- | Encode a 'String' into a newly allocated 'PtrString' using Latin-1
642
643
-- encoding.  The original string must not contain non-Latin-1 characters
-- (above codepoint @0xff@).
Sylvain Henry's avatar
Sylvain Henry committed
644
645
646
{-# INLINE mkPtrString #-}
mkPtrString :: String -> PtrString
mkPtrString s =
Sylvain Henry's avatar
Sylvain Henry committed
647
648
 -- we don't use `unsafeDupablePerformIO` here to avoid potential memory leaks
 -- and because someone might be using `eqAddr#` to check for string equality.
649
 unsafePerformIO (do
Sylvain Henry's avatar
Sylvain Henry committed
650
651
   let len = length s
   p <- mallocBytes len
652
653
   let
     loop :: Int -> String -> IO ()
Sylvain Henry's avatar
Sylvain Henry committed
654
     loop !_ []    = return ()
655
656
657
658
     loop n (c:cs) = do
        pokeByteOff p n (fromIntegral (ord c) :: Word8)
        loop (1+n) cs
   loop 0 s
Sylvain Henry's avatar
Sylvain Henry committed
659
   return (PtrString p len)
660
661
 )

Sylvain Henry's avatar
Sylvain Henry committed
662
663
664
665
-- | Decode a 'PtrString' back into a 'String' using Latin-1 encoding.
-- This does not free the memory associated with 'PtrString'.
unpackPtrString :: PtrString -> String
unpackPtrString (PtrString (Ptr p#) (I# n#)) = unpackNBytes# p# n#
666

Sylvain Henry's avatar
Sylvain Henry committed
667
668
669
-- | Return the length of a 'PtrString'
lengthPS :: PtrString -> Int
lengthPS (PtrString _ n) = n
670

671
672
673
-- -----------------------------------------------------------------------------
-- under the carpet

Sylvain Henry's avatar
Sylvain Henry committed
674
foreign import ccall unsafe "strlen"
675
  ptrStrLength :: Ptr Word8 -> Int
676

Ian Lynagh's avatar
Ian Lynagh committed
677
{-# NOINLINE sLit #-}
Sylvain Henry's avatar
Sylvain Henry committed
678
679
sLit :: String -> PtrString
sLit x  = mkPtrString x
Ian Lynagh's avatar
Ian Lynagh committed
680
681
682
683
684
685

{-# NOINLINE fsLit #-}
fsLit :: String -> FastString
fsLit x = mkFastString x

{-# RULES "slit"
Sylvain Henry's avatar
Sylvain Henry committed
686
    forall x . sLit  (unpackCString# x) = mkPtrString#  x #-}
Ian Lynagh's avatar
Ian Lynagh committed
687
688
{-# RULES "fslit"
    forall x . fsLit (unpackCString# x) = mkFastString# x #-}