Commit 7ae1bec5 authored by Ian Lynagh's avatar Ian Lynagh

Implement FastBytes, and use it for MachStr

This is a first step on the way to refactoring the FastString type.

FastBytes currently has no unique, mainly because there isn't currently
a nice way to produce them in Binary.

Also, we don't currently do the "Dictionary" thing with FastBytes in
Binary. I'm not sure whether this is important.

We can change both decisions later, but in the meantime this gets the
refactoring underway.
parent 18f82197
......@@ -84,7 +84,7 @@ data Literal
-- First the primitive guys
MachChar Char -- ^ @Char#@ - at least 31 bits. Create with 'mkMachChar'
| MachStr FastString -- ^ A string-literal: stored and emitted
| MachStr FastBytes -- ^ A string-literal: stored and emitted
-- UTF-8 encoded, we'll arrange to decode it
-- at runtime. Also emitted with a @'\0'@
-- terminator. Create with 'mkMachString'
......@@ -248,7 +248,8 @@ mkMachChar = MachChar
-- | Creates a 'Literal' of type @Addr#@, which is appropriate for passing to
-- e.g. some of the \"error\" functions in GHC.Err such as @GHC.Err.runtimeError@
mkMachString :: String -> Literal
mkMachString s = MachStr (mkFastString s) -- stored UTF-8 encoded
-- stored UTF-8 encoded
mkMachString s = MachStr (fastStringToFastBytes $ mkFastString s)
mkLitInteger :: Integer -> Type -> Literal
mkLitInteger = LitInteger
......@@ -436,7 +437,7 @@ pprLiteral :: (SDoc -> SDoc) -> Literal -> SDoc
-- to wrap parens around literals that occur in
-- a context requiring an atomic thing
pprLiteral _ (MachChar ch) = pprHsChar ch
pprLiteral _ (MachStr s) = pprHsString s
pprLiteral _ (MachStr s) = pprHsBytes s
pprLiteral _ (MachInt i) = pprIntVal i
pprLiteral _ (MachDouble d) = double (fromRat d)
pprLiteral _ (MachNullAddr) = ptext (sLit "__NULL")
......@@ -469,7 +470,7 @@ Hash values should be zero or a positive integer. No negatives please.
\begin{code}
hashLiteral :: Literal -> Int
hashLiteral (MachChar c) = ord c + 1000 -- Keep it out of range of common ints
hashLiteral (MachStr s) = hashFS s
hashLiteral (MachStr s) = hashFB s
hashLiteral (MachNullAddr) = 0
hashLiteral (MachInt i) = hashInteger i
hashLiteral (MachInt64 i) = hashInteger i
......
......@@ -92,8 +92,7 @@ addIdReps ids = [(idCgRep id, id) | id <- ids]
-------------------------------------------------------------------------
cgLit :: Literal -> FCode CmmLit
cgLit (MachStr s) = newByteStringCLit (bytesFS s)
-- not unpackFS; we want the UTF-8 byte stream.
cgLit (MachStr s) = newByteStringCLit (bytesFB s)
cgLit other_lit = return (mkSimpleLit other_lit)
mkSimpleLit :: Literal -> CmmLit
......
......@@ -90,7 +90,7 @@ import Data.Maybe
-------------------------------------------------------------------------
cgLit :: Literal -> FCode CmmLit
cgLit (MachStr s) = newByteStringCLit (bytesFS s)
cgLit (MachStr s) = newByteStringCLit (bytesFB s)
-- not unpackFS; we want the UTF-8 byte stream.
cgLit other_lit = return (mkSimpleLit other_lit)
......
......@@ -508,7 +508,7 @@ sizeExpr bOMB_OUT_SIZE top_args expr
litSize :: Literal -> Int
-- Used by CoreUnfold.sizeExpr
litSize (LitInteger {}) = 100 -- Note [Size of literal integers]
litSize (MachStr str) = 10 + 10 * ((lengthFS str + 3) `div` 4)
litSize (MachStr str) = 10 + 10 * ((lengthFB str + 3) `div` 4)
-- If size could be 0 then @f "x"@ might be too small
-- [Sept03: make literal strings a bit bigger to avoid fruitless
-- duplication of little strings]
......
......@@ -11,6 +11,8 @@
module ExternalCore where
import Data.Word
data Module
= Module Mname [Tdef] [Vdefg]
......@@ -84,7 +86,7 @@ data Lit
= Lint Integer Ty
| Lrational Rational Ty
| Lchar Char Ty
| Lstring String Ty
| Lstring [Word8] Ty
type Mname = Id
......
......@@ -283,11 +283,11 @@ mkStringExprFS str
| all safeChar chars
= do unpack_id <- lookupId unpackCStringName
return (App (Var unpack_id) (Lit (MachStr str)))
return (App (Var unpack_id) (Lit (MachStr (fastStringToFastBytes str))))
| otherwise
= do unpack_id <- lookupId unpackCStringUtf8Name
return (App (Var unpack_id) (Lit (MachStr str)))
return (App (Var unpack_id) (Lit (MachStr (fastStringToFastBytes str))))
where
chars = unpackFS str
......
......@@ -221,7 +221,7 @@ make_lit dflags l =
-- For a character bigger than 0xff, we represent it in ext-core
-- as an int lit with a char type.
MachChar i -> C.Lint (fromIntegral $ ord i) t
MachStr s -> C.Lstring (unpackFS s) t
MachStr s -> C.Lstring (bytesFB s) t
MachNullAddr -> C.Lint 0 t
MachInt i -> C.Lint i t
MachInt64 i -> C.Lint i t
......
......@@ -199,7 +199,9 @@ plit (Lint i t) = parens (integer i <> text "::" <> pty t)
plit (Lrational r t) = parens (text (show (numerator r)) <+> char '%'
<+> text (show (denominator r)) <> text "::" <> pty t)
plit (Lchar c t) = parens (text ("\'" ++ escape [c] ++ "\'") <> text "::" <> pty t)
plit (Lstring s t) = parens (pstring s <> text "::" <> pty t)
-- This is a little messy. We shouldn't really be going via String.
plit (Lstring bs t) = parens (pstring str <> text "::" <> pty t)
where str = map (chr . fromIntegral) bs
pstring :: String -> Doc
pstring s = doubleQuotes(text (escape s))
......
......@@ -775,7 +775,7 @@ dsEvTerm (EvSuperClass d n)
dsEvTerm (EvDelayedError ty msg) = return $ Var errorId `mkTyApps` [ty] `mkApps` [litMsg]
where
errorId = rUNTIME_ERROR_ID
litMsg = Lit (MachStr msg)
litMsg = Lit (MachStr (fastStringToFastBytes msg))
dsEvTerm (EvLit l) =
case l of
......
......@@ -39,6 +39,7 @@ import TysWiredIn
import Literal
import SrcLoc
import Data.Ratio
import MonadUtils
import Outputable
import BasicTypes
import Util
......@@ -68,7 +69,7 @@ See also below where we look for @DictApps@ for \tr{plusInt}, etc.
\begin{code}
dsLit :: HsLit -> DsM CoreExpr
dsLit (HsStringPrim s) = return (Lit (MachStr s))
dsLit (HsStringPrim s) = return (Lit (MachStr (fastStringToFastBytes s)))
dsLit (HsCharPrim c) = return (Lit (MachChar c))
dsLit (HsIntPrim i) = return (Lit (MachInt i))
dsLit (HsWordPrim w) = return (Lit (MachWord w))
......@@ -123,10 +124,10 @@ hsLitKey (HsWordPrim w) = mkMachWord w
hsLitKey (HsInt64Prim i) = mkMachInt64 i
hsLitKey (HsWord64Prim w) = mkMachWord64 w
hsLitKey (HsCharPrim c) = MachChar c
hsLitKey (HsStringPrim s) = MachStr s
hsLitKey (HsStringPrim s) = MachStr (fastStringToFastBytes s)
hsLitKey (HsFloatPrim f) = MachFloat (fl_value f)
hsLitKey (HsDoublePrim d) = MachDouble (fl_value d)
hsLitKey (HsString s) = MachStr s
hsLitKey (HsString s) = MachStr (fastStringToFastBytes s)
hsLitKey l = pprPanic "hsLitKey" (ppr l)
hsOverLitKey :: OutputableBndr a => HsOverLit a -> Bool -> Literal
......@@ -138,7 +139,7 @@ litValKey (HsIntegral i) False = MachInt i
litValKey (HsIntegral i) True = MachInt (-i)
litValKey (HsFractional r) False = MachFloat (fl_value r)
litValKey (HsFractional r) True = MachFloat (negate (fl_value r))
litValKey (HsIsString s) neg = ASSERT( not neg) MachStr s
litValKey (HsIsString s) neg = ASSERT( not neg) MachStr (fastStringToFastBytes s)
\end{code}
%************************************************************************
......@@ -253,7 +254,10 @@ matchLiterals (var:vars) ty sub_groups
wrap_str_guard :: Id -> (Literal,MatchResult) -> DsM MatchResult
-- Equality check for string literals
wrap_str_guard eq_str (MachStr s, mr)
= do { lit <- mkStringExprFS s
= do { -- We now have to convert back to FastString. Perhaps there
-- should be separate MachBytes and MachStr constructors?
s' <- liftIO $ mkFastStringFastBytes s
; lit <- mkStringExprFS s'
; let pred = mkApps (Var eq_str) [Var var, lit]
; return (mkGuardedMatchResult pred mr) }
wrap_str_guard _ (l, _) = pprPanic "matchLiterals/wrap_str_guard" (ppr l)
......
......@@ -1259,7 +1259,7 @@ pushAtom _ _ (AnnLit lit)
pushStr s
= let getMallocvilleAddr
= case s of
FastString _ n _ fp _ ->
FastBytes n fp ->
-- we could grab the Ptr from the ForeignPtr,
-- but then we have no way to control its lifetime.
-- In reality it'll probably stay alive long enoungh
......
......@@ -303,7 +303,7 @@ lit :: { Literal }
: '(' INTEGER '::' aty ')' { convIntLit $2 $4 }
| '(' RATIONAL '::' aty ')' { convRatLit $2 $4 }
| '(' CHAR '::' aty ')' { MachChar $2 }
| '(' STRING '::' aty ')' { MachStr (mkFastString $2) }
| '(' STRING '::' aty ')' { MachStr (fastStringToFastBytes (mkFastString $2)) }
fs_var_occ :: { FastString }
: NAME { mkFastString $1 }
......
......@@ -737,7 +737,7 @@ match_append_lit _ [Type ty1,
c1 `cheapEqExpr` c2
= ASSERT( ty1 `eqType` ty2 )
Just (Var unpk `App` Type ty1
`App` Lit (MachStr (s1 `appendFS` s2))
`App` Lit (MachStr (s1 `appendFB` s2))
`App` c1
`App` n)
......
......@@ -725,7 +725,14 @@ type SymbolTable = Array Int Name
---------------------------------------------------------
putFS :: BinHandle -> FastString -> IO ()
putFS bh (FastString _ l _ buf _) = do
putFS bh fs = putFB bh $ fastStringToFastBytes fs
getFS :: BinHandle -> IO FastString
getFS bh = do fb <- getFB bh
mkFastStringFastBytes fb
putFB :: BinHandle -> FastBytes -> IO ()
putFB bh (FastBytes l buf) = do
put_ bh l
withForeignPtr buf $ \ptr ->
let
......@@ -738,19 +745,19 @@ putFS bh (FastString _ l _ buf _) = do
go 0
{- -- possible faster version, not quite there yet:
getFS bh@BinMem{} = do
getFB bh@BinMem{} = do
(I# l) <- get bh
arr <- readIORef (arr_r bh)
off <- readFastMutInt (off_r bh)
return $! (mkFastSubStringBA# arr off l)
return $! (mkFastSubBytesBA# arr off l)
-}
getFS :: BinHandle -> IO FastString
getFS bh = do
getFB :: BinHandle -> IO FastBytes
getFB bh = do
l <- get bh
fp <- mallocForeignPtrBytes l
withForeignPtr fp $ \ptr -> do
let
go n | n == l = mkFastStringForeignPtr ptr fp l
go n | n == l = return $ foreignPtrToFastBytes fp l
| otherwise = do
b <- getByte bh
pokeElemOff ptr n b
......@@ -758,6 +765,10 @@ getFS bh = do
--
go 0
instance Binary FastBytes where
put_ bh f = putFB bh f
get bh = getFB bh
instance Binary FastString where
put_ bh f =
case getUserData bh of
......
......@@ -26,6 +26,16 @@
-- Use 'LitString' unless you want the facilities of 'FastString'.
module FastString
(
-- * FastBytes
FastBytes(..),
mkFastStringFastBytes,
foreignPtrToFastBytes,
fastStringToFastBytes,
bytesFB,
hashFB,
lengthFB,
appendFB,
-- * FastStrings
FastString(..), -- not abstract, for now.
......@@ -117,6 +127,61 @@ import GHC.Base ( unpackCString# )
#define hASH_TBL_SIZE_UNBOXED 4091#
data FastBytes = FastBytes {
fb_n_bytes :: {-# UNPACK #-} !Int, -- number of bytes
fb_buf :: {-# UNPACK #-} !(ForeignPtr Word8)
} deriving Typeable
instance Data FastBytes where
-- don't traverse?
toConstr _ = abstractConstr "FastBytes"
gunfold _ _ = error "gunfold"
dataTypeOf _ = mkNoRepType "FastBytes"
instance Eq FastBytes where
x == y = (x `compare` y) == EQ
instance Ord FastBytes where
compare = cmpFB
foreignPtrToFastBytes :: ForeignPtr Word8 -> Int -> FastBytes
foreignPtrToFastBytes fp len = FastBytes len fp
mkFastStringFastBytes :: FastBytes -> IO FastString
mkFastStringFastBytes (FastBytes len fp)
= withForeignPtr fp $ \ptr -> mkFastStringForeignPtr ptr fp len
fastStringToFastBytes :: FastString -> FastBytes
fastStringToFastBytes f = FastBytes (n_bytes f) (buf f)
-- | Gives the UTF-8 encoded bytes corresponding to a 'FastString'
bytesFB :: FastBytes -> [Word8]
bytesFB (FastBytes n_bytes buf) =
inlinePerformIO $ withForeignPtr buf $ \ptr ->
peekArray n_bytes ptr
hashFB :: FastBytes -> Int
hashFB (FastBytes len buf)
= inlinePerformIO $ withForeignPtr buf $ \ptr -> return $ hashStr ptr len
lengthFB :: FastBytes -> Int
lengthFB f = fb_n_bytes f
appendFB :: FastBytes -> FastBytes -> FastBytes
appendFB fb1 fb2 =
inlinePerformIO $ do
r <- mallocForeignPtrBytes len
withForeignPtr r $ \ r' -> do
withForeignPtr (fb_buf fb1) $ \ fb1Ptr -> do
withForeignPtr (fb_buf fb2) $ \ fb2Ptr -> do
copyBytes r' fb1Ptr len1
copyBytes (advancePtr r' len1) fb2Ptr len2
return $ foreignPtrToFastBytes r len
where len = len1 + len2
len1 = fb_n_bytes fb1
len2 = fb_n_bytes fb2
{-|
A 'FastString' is an array of bytes, hashed to support fast O(1)
comparison. It is also associated with a character encoding, so that
......@@ -165,8 +230,12 @@ instance Data FastString where
dataTypeOf _ = mkNoRepType "FastString"
cmpFS :: FastString -> FastString -> Ordering
cmpFS (FastString u1 l1 _ buf1 _) (FastString u2 l2 _ buf2 _) =
cmpFS f1@(FastString u1 _ _ _ _) f2@(FastString u2 _ _ _ _) =
if u1 == u2 then EQ else
cmpFB (fastStringToFastBytes f1) (fastStringToFastBytes f2)
cmpFB :: FastBytes -> FastBytes -> Ordering
cmpFB (FastBytes l1 buf1) (FastBytes l2 buf2) =
case unsafeMemcmp buf1 buf2 (min l1 l2) `compare` 0 of
LT -> LT
EQ -> compare l1 l2
......@@ -431,9 +500,7 @@ unpackFS (FastString _ n_bytes _ buf enc) =
-- | Gives the UTF-8 encoded bytes corresponding to a 'FastString'
bytesFS :: FastString -> [Word8]
bytesFS (FastString _ n_bytes _ buf _) =
inlinePerformIO $ withForeignPtr buf $ \ptr ->
peekArray n_bytes ptr
bytesFS fs = bytesFB $ fastStringToFastBytes fs
-- | Returns a Z-encoded version of a 'FastString'. This might be the
-- original, if it was already Z-encoded. The first time this
......
......@@ -48,7 +48,7 @@ module Outputable (
renderWithStyle,
pprInfixVar, pprPrefixVar,
pprHsChar, pprHsString,
pprHsChar, pprHsString, pprHsBytes,
pprFastFilePath,
-- * Controlling the style in which output is printed
......@@ -743,6 +743,16 @@ pprHsChar c | c > '\x10ffff' = char '\\' <> text (show (fromIntegral (ord c) ::
pprHsString :: FastString -> SDoc
pprHsString fs = vcat (map text (showMultiLineString (unpackFS fs)))
-- | Special combinator for showing string literals.
pprHsBytes :: FastBytes -> SDoc
pprHsBytes fb = let escaped = concatMap escape $ bytesFB fb
in vcat (map text (showMultiLineString escaped)) <> char '#'
where escape :: Word8 -> String
escape w = let c = chr (fromIntegral w)
in if isAscii c
then [c]
else '\\' : show w
---------------------
-- Put a name in parens if it's an operator
pprPrefixVar :: Bool -> SDoc -> SDoc
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment