Commit 509d2ad2 authored by Ian Lynagh's avatar Ian Lynagh
Browse files

Add a separate FastZString type

FastStrings are now always UTF8-encoded.

There's no StringTable for FastZString, but I don't think one is needed.
We only ever make a FastZString by running zEncodeFS on a FastString,
and the FastStrings are shared via the FastString StringTable, so we get
the same FastZString from the IORef.
parent 2f01debc
......@@ -191,7 +191,7 @@ pprModuleName :: ModuleName -> SDoc
pprModuleName (ModuleName nm) =
getPprStyle $ \ sty ->
if codeStyle sty
then ftext (zEncodeFS nm)
then ztext (zEncodeFS nm)
else ftext nm
moduleNameFS :: ModuleName -> FastString
......@@ -271,7 +271,7 @@ pprPackagePrefix p mod = getPprStyle doc
| codeStyle sty =
if p == mainPackageId
then empty -- never qualify the main package in code
else ftext (zEncodeFS (packageIdFS p)) <> char '_'
else ztext (zEncodeFS (packageIdFS p)) <> char '_'
| qualModule sty mod = ftext (packageIdFS (modulePackageId mod)) <> char ':'
-- the PrintUnqualified tells us which modules have to
-- be qualified with package names
......
......@@ -514,7 +514,7 @@ ppr_occ_name occ = ftext (occNameFS occ)
-- In code style, we Z-encode the strings. The results of Z-encoding each FastString are
-- cached behind the scenes in the FastString implementation.
ppr_z_occ_name :: OccName -> SDoc
ppr_z_occ_name occ = ftext (zEncodeFS (occNameFS occ))
ppr_z_occ_name occ = ztext (zEncodeFS (occNameFS occ))
-- Prints (if mod information is available) "Defined at <loc>" or
-- "Defined in <mod>" information for a Name.
......
......@@ -265,7 +265,7 @@ pprOccName :: OccName -> SDoc
pprOccName (OccName sp occ)
= getPprStyle $ \ sty ->
if codeStyle sty
then ftext (zEncodeFS occ)
then ztext (zEncodeFS occ)
else pp_occ <> pp_debug sty
where
pp_debug sty | debugStyle sty = braces (pprNameSpaceBrief sp)
......
......@@ -408,7 +408,10 @@ dsFExportDynamic id co0 cconv = do
dflags <- getDynFlags
let
-- hack: need to get at the name of the C stub we're about to generate.
fe_nm = mkFastString (unpackFS (zEncodeFS (moduleNameFS (moduleName mod))) ++ "_" ++ toCName dflags fe_id)
-- TODO: There's no real need to go via String with
-- (mkFastString . zString). In fact, is there a reason to convert
-- to FastString at all now, rather than sticking with FastZString?
fe_nm = mkFastString (zString (zEncodeFS (moduleNameFS (moduleName mod))) ++ "_" ++ toCName dflags fe_id)
cback <- newSysLocalDs arg_ty
newStablePtrId <- dsLookupGlobalId newStablePtrName
......
......@@ -261,15 +261,15 @@ nameToCLabel n suffix
where
pkgid = modulePackageId mod
mod = ASSERT( isExternalName n ) nameModule n
package_part = unpackFS (zEncodeFS (packageIdFS (modulePackageId mod)))
module_part = unpackFS (zEncodeFS (moduleNameFS (moduleName mod)))
occ_part = unpackFS (zEncodeFS (occNameFS (nameOccName n)))
package_part = zString (zEncodeFS (packageIdFS (modulePackageId mod)))
module_part = zString (zEncodeFS (moduleNameFS (moduleName mod)))
occ_part = zString (zEncodeFS (occNameFS (nameOccName n)))
qual_name = module_part ++ '_':occ_part ++ '_':suffix
primopToCLabel :: PrimOp -> String{-suffix-} -> String
primopToCLabel primop suffix
= let str = "ghczmprim_GHCziPrimopWrappers_" ++ unpackFS (zEncodeFS (occNameFS (primOpOcc primop))) ++ '_':suffix
= let str = "ghczmprim_GHCziPrimopWrappers_" ++ zString (zEncodeFS (occNameFS (primOpOcc primop))) ++ '_':suffix
in --trace ("primopToCLabel: " ++ str)
str
\end{code}
......
......@@ -277,7 +277,7 @@ ppCostCentreLbl :: CostCentre -> SDoc
ppCostCentreLbl (AllCafsCC {cc_mod = m}) = ppr m <> text "_CAFs_cc"
ppCostCentreLbl (NormalCC {cc_key = k, cc_name = n, cc_mod = m,
cc_is_caf = is_caf})
= ppr m <> char '_' <> ftext (zEncodeFS n) <> char '_' <>
= ppr m <> char '_' <> ztext (zEncodeFS n) <> char '_' <>
case is_caf of { CafCC -> ptext (sLit "CAF"); _ -> ppr (mkUniqueGrimily k)} <> text "_cc"
-- This is the name to go in the user-displayed string,
......
......@@ -23,6 +23,7 @@ module BufWrite (
bPutChar,
bPutStr,
bPutFS,
bPutFZS,
bPutLitString,
bFlush,
) where
......@@ -84,7 +85,13 @@ bPutStr (BufHandle buf r hdl) str = do
loop cs (i+1)
bPutFS :: BufHandle -> FastString -> IO ()
bPutFS b@(BufHandle buf r hdl) fs@(FastString _ len _ fp _) =
bPutFS b fs = bPutFB b $ fastStringToFastBytes fs
bPutFZS :: BufHandle -> FastZString -> IO ()
bPutFZS b fs = bPutFB b $ fastZStringToFastBytes fs
bPutFB :: BufHandle -> FastBytes -> IO ()
bPutFB b@(BufHandle buf r hdl) fb@(FastBytes len fp) =
withForeignPtr fp $ \ptr -> do
i <- readFastMutInt r
if (i + len) >= buf_size
......@@ -92,7 +99,7 @@ bPutFS b@(BufHandle buf r hdl) fs@(FastString _ len _ fp _) =
writeFastMutInt r 0
if (len >= buf_size)
then hPutBuf hdl ptr len
else bPutFS b fs
else bPutFB b fb
else do
copyBytes (buf `plusPtr` i) ptr len
writeFastMutInt r (i+len)
......
......@@ -31,12 +31,19 @@ module FastString
mkFastStringFastBytes,
foreignPtrToFastBytes,
fastStringToFastBytes,
fastZStringToFastBytes,
mkFastBytesByteList,
bytesFB,
hashFB,
lengthFB,
appendFB,
-- * FastZString
FastZString,
hPutFZS,
zString,
lengthFZS,
-- * FastStrings
FastString(..), -- not abstract, for now.
......@@ -49,15 +56,12 @@ module FastString
#if defined(__GLASGOW_HASKELL__)
mkFastString#,
#endif
mkZFastString,
mkZFastStringBytes,
-- ** Deconstruction
unpackFS, -- :: FastString -> String
bytesFS, -- :: FastString -> [Word8]
-- ** Encoding
isZEncoded,
zEncodeFS,
-- ** Operations
......@@ -163,6 +167,9 @@ mkFastStringFastBytes (FastBytes len fp)
fastStringToFastBytes :: FastString -> FastBytes
fastStringToFastBytes f = FastBytes (n_bytes f) (buf f)
fastZStringToFastBytes :: FastZString -> FastBytes
fastZStringToFastBytes (FastZString fb) = fb
mkFastBytesByteList :: [Word8] -> FastBytes
mkFastBytesByteList bs =
inlinePerformIO $ do
......@@ -199,6 +206,27 @@ appendFB fb1 fb2 =
len1 = fb_n_bytes fb1
len2 = fb_n_bytes fb2
hPutFB :: Handle -> FastBytes -> IO ()
hPutFB handle (FastBytes len fp)
| len == 0 = return ()
| otherwise = do withForeignPtr fp $ \ptr -> hPutBuf handle ptr len
-- -----------------------------------------------------------------------------
newtype FastZString = FastZString FastBytes
hPutFZS :: Handle -> FastZString -> IO ()
hPutFZS handle (FastZString fb) = hPutFB handle fb
zString :: FastZString -> String
zString (FastZString (FastBytes n_bytes buf)) =
inlinePerformIO $ withForeignPtr buf $ \ptr ->
peekCAStringLen (castPtr ptr, n_bytes)
lengthFZS :: FastZString -> Int
lengthFZS (FastZString fb) = lengthFB fb
-- -----------------------------------------------------------------------------
{-|
A 'FastString' is an array of bytes, hashed to support fast O(1)
......@@ -214,15 +242,9 @@ data FastString = FastString {
n_bytes :: {-# UNPACK #-} !Int, -- number of bytes
n_chars :: {-# UNPACK #-} !Int, -- number of chars
buf :: {-# UNPACK #-} !(ForeignPtr Word8),
enc :: FSEncoding
ref :: {-# UNPACK #-} !(IORef (Maybe FastZString))
} deriving Typeable
data FSEncoding
-- including strings that don't need any encoding
= ZEncoded
-- A UTF-8 string with a memoized Z-encoding
| UTF8Encoded {-# UNPACK #-} !(IORef (Maybe FastString))
instance Eq FastString where
f1 == f2 = uniq f1 == uniq f2
......@@ -328,26 +350,6 @@ mkFastStringBytes ptr len = unsafePerformIO $ do
Nothing -> add_it ls
Just v -> {- _trace ("re-use: "++show v) $ -} return v
mkZFastStringBytes :: Ptr Word8 -> Int -> FastString
mkZFastStringBytes ptr len = unsafePerformIO $ do
ft@(FastStringTable uid _) <- readIORef string_table
let
h = hashStr ptr len
add_it ls = do
fs <- copyNewZFastString uid ptr len
updTbl string_table ft h (fs:ls)
{- _trace ("new: " ++ show f_str) $ -}
return fs
--
lookup_result <- lookupTbl ft h
case lookup_result of
[] -> add_it []
ls -> do
b <- bucket_match ls len ptr
case b of
Nothing -> add_it ls
Just v -> {- _trace ("re-use: "++show v) $ -} return v
-- | Create a 'FastString' from an existing 'ForeignPtr'; the difference
-- between this and 'mkFastStringBytes' is that we don't have to copy
-- the bytes if the string is new to the table.
......@@ -372,28 +374,6 @@ mkFastStringForeignPtr ptr fp len = do
Nothing -> add_it ls
Just v -> {- _trace ("re-use: "++show v) $ -} return v
mkZFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString
mkZFastStringForeignPtr ptr fp len = do
ft@(FastStringTable uid _) <- readIORef string_table
-- _trace ("hashed: "++show (I# h)) $
let
h = hashStr ptr len
add_it ls = do
fs <- mkNewZFastString uid ptr fp len
updTbl string_table ft h (fs:ls)
{- _trace ("new: " ++ show f_str) $ -}
return fs
--
lookup_result <- lookupTbl ft h
case lookup_result of
[] -> add_it []
ls -> do
b <- bucket_match ls len ptr
case b of
Nothing -> add_it ls
Just v -> {- _trace ("re-use: "++show v) $ -} return v
-- | Creates a UTF-8 encoded 'FastString' from a 'String'
mkFastString :: String -> FastString
mkFastString str =
......@@ -415,14 +395,9 @@ mkFastStringByteList str =
mkFastStringForeignPtr ptr buf l
-- | Creates a Z-encoded 'FastString' from a 'String'
mkZFastString :: String -> FastString
mkZFastString str =
inlinePerformIO $ do
let l = Prelude.length str
buf <- mallocForeignPtrBytes l
withForeignPtr buf $ \ptr -> do
pokeCAString (castPtr ptr) str
mkZFastStringForeignPtr ptr buf l
mkZFastString :: String -> FastZString
mkZFastString str = FastZString
$ mkFastBytesByteList $ map (fromIntegral . ord) str
bucket_match :: [FastString] -> Int -> Ptr Word8 -> IO (Maybe FastString)
bucket_match [] _ _ = return Nothing
......@@ -439,24 +414,14 @@ mkNewFastString :: Int -> Ptr Word8 -> ForeignPtr Word8 -> Int
mkNewFastString uid ptr fp len = do
ref <- newIORef Nothing
n_chars <- countUTF8Chars ptr len
return (FastString uid len n_chars fp (UTF8Encoded ref))
mkNewZFastString :: Int -> Ptr Word8 -> ForeignPtr Word8 -> Int
-> IO FastString
mkNewZFastString uid _ fp len = do
return (FastString uid len len fp ZEncoded)
return (FastString uid len n_chars fp ref)
copyNewFastString :: Int -> Ptr Word8 -> Int -> IO FastString
copyNewFastString uid ptr len = do
fp <- copyBytesToForeignPtr ptr len
ref <- newIORef Nothing
n_chars <- countUTF8Chars ptr len
return (FastString uid len n_chars fp (UTF8Encoded ref))
copyNewZFastString :: Int -> Ptr Word8 -> Int -> IO FastString
copyNewZFastString uid ptr len = do
fp <- copyBytesToForeignPtr ptr len
return (FastString uid len len fp ZEncoded)
return (FastString uid len n_chars fp ref)
copyBytesToForeignPtr :: Ptr Word8 -> Int -> IO (ForeignPtr Word8)
copyBytesToForeignPtr ptr len = do
......@@ -488,18 +453,10 @@ hashStr (Ptr a#) (I# len#) = loop 0# 0#
lengthFS :: FastString -> Int
lengthFS f = n_chars f
-- | Returns @True@ if the 'FastString' is Z-encoded
isZEncoded :: FastString -> Bool
isZEncoded fs | ZEncoded <- enc fs = True
| otherwise = False
-- | Returns @True@ if this 'FastString' is not Z-encoded but already has
-- a Z-encoding cached (used in producing stats).
hasZEncoding :: FastString -> Bool
hasZEncoding (FastString _ _ _ _ enc) =
case enc of
ZEncoded -> False
UTF8Encoded ref ->
hasZEncoding (FastString _ _ _ _ ref) =
inlinePerformIO $ do
m <- readIORef ref
return (isJust m)
......@@ -510,11 +467,9 @@ nullFS f = n_bytes f == 0
-- | Unpacks and decodes the FastString
unpackFS :: FastString -> String
unpackFS (FastString _ n_bytes _ buf enc) =
unpackFS (FastString _ n_bytes _ buf _) =
inlinePerformIO $ withForeignPtr buf $ \ptr ->
case enc of
ZEncoded -> peekCAStringLen (castPtr ptr,n_bytes)
UTF8Encoded _ -> utf8DecodeString ptr n_bytes
utf8DecodeString ptr n_bytes
-- | Gives the UTF-8 encoded bytes corresponding to a 'FastString'
bytesFS :: FastString -> [Word8]
......@@ -525,19 +480,16 @@ bytesFS fs = bytesFB $ fastStringToFastBytes fs
-- function is applied to a particular 'FastString', the results are
-- memoized.
--
zEncodeFS :: FastString -> FastString
zEncodeFS fs@(FastString _ _ _ _ enc) =
case enc of
ZEncoded -> fs
UTF8Encoded ref ->
zEncodeFS :: FastString -> FastZString
zEncodeFS fs@(FastString _ _ _ _ ref) =
inlinePerformIO $ do
m <- readIORef ref
case m of
Just fs -> return fs
Just zfs -> return zfs
Nothing -> do
let efs = mkZFastString (zEncodeString (unpackFS fs))
writeIORef ref (Just efs)
return efs
let zfs = mkZFastString (zEncodeString (unpackFS fs))
writeIORef ref (Just zfs)
return zfs
appendFS :: FastString -> FastString -> FastString
appendFS fs1 fs2 = inlinePerformIO
......@@ -550,23 +502,14 @@ concatFS ls = mkFastString (Prelude.concat (map unpackFS ls)) -- ToDo: do better
headFS :: FastString -> Char
headFS (FastString _ 0 _ _ _) = panic "headFS: Empty FastString"
headFS (FastString _ _ _ buf enc) =
headFS (FastString _ _ _ buf _) =
inlinePerformIO $ withForeignPtr buf $ \ptr -> do
case enc of
ZEncoded -> do
w <- peek (castPtr ptr)
return (castCCharToChar w)
UTF8Encoded _ ->
return (fst (utf8DecodeChar ptr))
tailFS :: FastString -> FastString
tailFS (FastString _ 0 _ _ _) = panic "tailFS: Empty FastString"
tailFS (FastString _ n_bytes _ buf enc) =
tailFS (FastString _ n_bytes _ buf _) =
inlinePerformIO $ withForeignPtr buf $ \ptr -> do
case enc of
ZEncoded -> do
return $! mkZFastStringBytes (ptr `plusPtr` 1) (n_bytes - 1)
UTF8Encoded _ -> do
let (_,ptr') = utf8DecodeChar ptr
let off = ptr' `minusPtr` ptr
return $! mkFastStringBytes (ptr `plusPtr` off) (n_bytes - off)
......@@ -595,9 +538,7 @@ getFastStringTable = do
-- |Outputs a 'FastString' with /no decoding at all/, that is, you
-- get the actual bytes in the 'FastString' written to the 'Handle'.
hPutFS :: Handle -> FastString -> IO ()
hPutFS handle (FastString _ len _ fp _)
| len == 0 = return ()
| otherwise = do withForeignPtr fp $ \ptr -> hPutBuf handle ptr len
hPutFS handle fs = hPutFB handle $ fastStringToFastBytes fs
-- ToDo: we'll probably want an hPutFSLocal, or something, to output
-- in the current locale's encoding (for error messages and suchlike).
......@@ -675,17 +616,6 @@ lengthLS = length
foreign import ccall unsafe "ghc_strlen"
ptrStrLength :: Ptr Word8 -> Int
-- NB. does *not* add a '\0'-terminator.
-- We only use CChar here to be parallel to the imported
-- peekC(A)StringLen.
pokeCAString :: Ptr CChar -> String -> IO ()
pokeCAString ptr str =
let
go [] _ = return ()
go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1)
in
go str 0
{-# NOINLINE sLit #-}
sLit :: String -> LitString
sLit x = mkLitString x
......
......@@ -20,7 +20,7 @@ module Outputable (
interppSP, interpp'SP, pprQuotedList, pprWithCommas, quotedListWithOr,
empty, nest,
char,
text, ftext, ptext,
text, ftext, ptext, ztext,
int, intWithCommas, integer, float, double, rational,
parens, cparen, brackets, braces, quotes, quote,
doubleQuotes, angleBrackets, paBrackets,
......@@ -419,6 +419,7 @@ char :: Char -> SDoc
text :: String -> SDoc
ftext :: FastString -> SDoc
ptext :: LitString -> SDoc
ztext :: FastZString -> SDoc
int :: Int -> SDoc
integer :: Integer -> SDoc
float :: Float -> SDoc
......@@ -430,6 +431,7 @@ char c = docToSDoc $ Pretty.char c
text s = docToSDoc $ Pretty.text s
ftext s = docToSDoc $ Pretty.ftext s
ptext s = docToSDoc $ Pretty.ptext s
ztext s = docToSDoc $ Pretty.ztext s
int n = docToSDoc $ Pretty.int n
integer n = docToSDoc $ Pretty.integer n
float n = docToSDoc $ Pretty.float n
......
......@@ -163,7 +163,7 @@ module Pretty (
empty, isEmpty, nest,
char, text, ftext, ptext, zeroWidthText,
char, text, ftext, ptext, ztext, zeroWidthText,
int, integer, float, double, rational,
parens, brackets, braces, quotes, quote, doubleQuotes,
semi, comma, colon, space, equals,
......@@ -464,6 +464,7 @@ reduceDoc p = p
data TextDetails = Chr {-#UNPACK#-}!Char
| Str String
| PStr FastString -- a hashed string
| ZStr FastZString -- a z-encoded string
| LStr {-#UNPACK#-}!LitString FastInt -- a '\0'-terminated
-- array of bytes
......@@ -563,6 +564,8 @@ ftext :: FastString -> Doc
ftext s = case iUnbox (lengthFS s) of {sl -> textBeside_ (PStr s) sl Empty}
ptext :: LitString -> Doc
ptext s = case iUnbox (lengthLS s) of {sl -> textBeside_ (LStr s sl) sl Empty}
ztext :: FastZString -> Doc
ztext s = case iUnbox (lengthFZS s) of {sl -> textBeside_ (ZStr s) sl Empty}
zeroWidthText s = textBeside_ (Str s) (_ILIT(0)) Empty
#if defined(__GLASGOW_HASKELL__)
......@@ -906,6 +909,7 @@ string_txt :: TextDetails -> String -> String
string_txt (Chr c) s = c:s
string_txt (Str s1) s2 = s1 ++ s2
string_txt (PStr s1) s2 = unpackFS s1 ++ s2
string_txt (ZStr s1) s2 = zString s1 ++ s2
string_txt (LStr s1 _) s2 = unpackLitString s1 ++ s2
\end{code}
......@@ -1014,6 +1018,7 @@ printDoc mode pprCols hdl doc
put (PStr s) next = hPutStr hdl (unpackFS s) >> next
-- NB. not hPutFS, we want this to go through
-- the I/O library's encoding layer. (#3398)
put (ZStr s) next = hPutFZS hdl s >> next
put (LStr s l) next = hPutLitString hdl s l >> next
done = hPutChar hdl '\n'
......@@ -1065,6 +1070,7 @@ layLeft b (TextBeside s _ p) = put b s >> layLeft b p
put b (Chr c) = bPutChar b c
put b (Str s) = bPutStr b s
put b (PStr s) = bPutFS b s
put b (ZStr s) = bPutFZS b s
put b (LStr s l) = bPutLitString b s l
layLeft _ _ = panic "layLeft: Unhandled case"
\end{code}
......@@ -715,12 +715,11 @@ dumpFinalStats dflags =
dumpFastStringStats :: DynFlags -> IO ()
dumpFastStringStats dflags = do
buckets <- getFastStringTable
let (entries, longest, is_z, has_z) = countFS 0 0 0 0 buckets
let (entries, longest, has_z) = countFS 0 0 0 buckets
msg = text "FastString stats:" $$
nest 4 (vcat [text "size: " <+> int (length buckets),
text "entries: " <+> int entries,
text "longest chain: " <+> int longest,
text "z-encoded: " <+> (is_z `pcntOf` entries),
text "has z-encoding: " <+> (has_z `pcntOf` entries)
])
-- we usually get more "has z-encoding" than "z-encoded", because
......@@ -732,17 +731,16 @@ dumpFastStringStats dflags = do
where
x `pcntOf` y = int ((x * 100) `quot` y) <> char '%'
countFS :: Int -> Int -> Int -> Int -> [[FastString]] -> (Int, Int, Int, Int)
countFS entries longest is_z has_z [] = (entries, longest, is_z, has_z)
countFS entries longest is_z has_z (b:bs) =
countFS :: Int -> Int -> Int -> [[FastString]] -> (Int, Int, Int)
countFS entries longest has_z [] = (entries, longest, has_z)
countFS entries longest has_z (b:bs) =
let
len = length b
longest' = max len longest
entries' = entries + len
is_zs = length (filter isZEncoded b)
has_zs = length (filter hasZEncoding b)
in
countFS entries' longest' (is_z + is_zs) (has_z + has_zs) bs
countFS entries' longest' (has_z + has_zs) bs
-- -----------------------------------------------------------------------------
-- ABI hash support
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment