Commit 7c665f9c authored by Sylvain Henry's avatar Sylvain Henry Committed by Ben Gamari

Refactor LitString

Refactor LitString so that the string length is computed at most once
and then stored.

Also remove strlen and memcmp wrappers (it seems like they were a
workaround for a very old GCC when using -fvia-C).

Bumps haddock submodule.

Reviewers: bgamari, dfeuer, nickkuk

Reviewed By: bgamari, nickkuk

Subscribers: nickkuk, dfeuer, thomie, carter

Differential Revision: https://phabricator.haskell.org/D4630
parent 21884270
...@@ -13,23 +13,6 @@ places in the GHC library. ...@@ -13,23 +13,6 @@ places in the GHC library.
#include <unistd.h> #include <unistd.h>
#endif #endif
/*
Calling 'strlen' and 'memcpy' directly gives problems with GCC's inliner,
and causes gcc to require too many registers on x84
*/
HsInt
ghc_strlen( HsPtr a )
{
return (strlen((char *)a));
}
HsInt
ghc_memcmp( HsPtr a1, HsPtr a2, HsInt len )
{
return (memcmp((char *)a1, a2, len));
}
void void
enableTimingStats( void ) /* called from the driver */ enableTimingStats( void ) /* called from the driver */
{ {
......
...@@ -6,10 +6,5 @@ ...@@ -6,10 +6,5 @@
#include "HsFFI.h" #include "HsFFI.h"
// Out-of-line string functions, see compiler/utils/FastString.hs
HsInt ghc_strlen( HsAddr a );
HsInt ghc_memcmp( HsAddr a1, HsAddr a2, HsInt len );
void enableTimingStats( void ); void enableTimingStats( void );
void setHeapSize( HsInt size ); void setHeapSize( HsInt size );
...@@ -20,6 +20,7 @@ module BufWrite ( ...@@ -20,6 +20,7 @@ module BufWrite (
bPutFS, bPutFS,
bPutFZS, bPutFZS,
bPutLitString, bPutLitString,
bPutReplicate,
bFlush, bFlush,
) where ) where
...@@ -97,19 +98,45 @@ bPutCStringLen b@(BufHandle buf r hdl) cstr@(ptr, len) = do ...@@ -97,19 +98,45 @@ bPutCStringLen b@(BufHandle buf r hdl) cstr@(ptr, len) = do
copyBytes (buf `plusPtr` i) ptr len copyBytes (buf `plusPtr` i) ptr len
writeFastMutInt r (i + len) writeFastMutInt r (i + len)
bPutLitString :: BufHandle -> LitString -> Int -> IO () bPutLitString :: BufHandle -> LitString -> IO ()
bPutLitString b@(BufHandle buf r hdl) a len = a `seq` do bPutLitString b@(BufHandle buf r hdl) l@(LitString a len) = l `seq` do
i <- readFastMutInt r i <- readFastMutInt r
if (i+len) >= buf_size if (i+len) >= buf_size
then do hPutBuf hdl buf i then do hPutBuf hdl buf i
writeFastMutInt r 0 writeFastMutInt r 0
if (len >= buf_size) if (len >= buf_size)
then hPutBuf hdl a len then hPutBuf hdl a len
else bPutLitString b a len else bPutLitString b l
else do else do
copyBytes (buf `plusPtr` i) a len copyBytes (buf `plusPtr` i) a len
writeFastMutInt r (i+len) writeFastMutInt r (i+len)
-- | Replicate an 8-bit character
bPutReplicate :: BufHandle -> Int -> Char -> IO ()
bPutReplicate (BufHandle buf r hdl) len c = do
i <- readFastMutInt r
let oc = fromIntegral (ord c)
if (i+len) < buf_size
then do
fillBytes (buf `plusPtr` i) oc len
writeFastMutInt r (i+len)
else do
-- flush the current buffer
when (i /= 0) $ hPutBuf hdl buf i
if (len < buf_size)
then do
fillBytes buf oc len
writeFastMutInt r len
else do
-- fill a full buffer
fillBytes buf oc buf_size
-- flush it as many times as necessary
let go n | n >= buf_size = do
hPutBuf hdl buf buf_size
go (n-buf_size)
| otherwise = writeFastMutInt r n
go len
bFlush :: BufHandle -> IO () bFlush :: BufHandle -> IO ()
bFlush (BufHandle buf r hdl) = do bFlush (BufHandle buf r hdl) = do
i <- readFastMutInt r i <- readFastMutInt r
......
...@@ -18,7 +18,7 @@ ...@@ -18,7 +18,7 @@
-- --
-- ['LitString'] -- ['LitString']
-- --
-- * Just a wrapper for the @Addr#@ of a C string (@Ptr CChar@). -- * Pointer and size of a Latin-1 encoded string.
-- * Practically no operations. -- * Practically no operations.
-- * Outputing them is fast. -- * Outputing them is fast.
-- * Generated by 'sLit'. -- * Generated by 'sLit'.
...@@ -81,7 +81,7 @@ module FastString ...@@ -81,7 +81,7 @@ module FastString
hasZEncoding, hasZEncoding,
-- * LitStrings -- * LitStrings
LitString, LitString (..),
-- ** Construction -- ** Construction
sLit, sLit,
...@@ -130,7 +130,7 @@ import Foreign ...@@ -130,7 +130,7 @@ import Foreign
import GHC.Conc.Sync (sharedCAF) import GHC.Conc.Sync (sharedCAF)
#endif #endif
import GHC.Base ( unpackCString# ) import GHC.Base ( unpackCString#, unpackNBytes# )
#define hASH_TBL_SIZE 4091 #define hASH_TBL_SIZE 4091
#define hASH_TBL_SIZE_UNBOXED 4091# #define hASH_TBL_SIZE_UNBOXED 4091#
...@@ -227,7 +227,7 @@ cmpFS f1@(FastString u1 _ _ _) f2@(FastString u2 _ _ _) = ...@@ -227,7 +227,7 @@ cmpFS f1@(FastString u1 _ _ _) f2@(FastString u2 _ _ _) =
if u1 == u2 then EQ else if u1 == u2 then EQ else
compare (fastStringToByteString f1) (fastStringToByteString f2) compare (fastStringToByteString f1) (fastStringToByteString f2)
foreign import ccall unsafe "ghc_memcmp" foreign import ccall unsafe "memcmp"
memcmp :: Ptr a -> Ptr b -> Int -> IO Int memcmp :: Ptr a -> Ptr b -> Int -> IO Int
-- ----------------------------------------------------------------------------- -- -----------------------------------------------------------------------------
...@@ -568,15 +568,12 @@ hPutFS handle fs = BS.hPut handle $ fastStringToByteString fs ...@@ -568,15 +568,12 @@ hPutFS handle fs = BS.hPut handle $ fastStringToByteString fs
-- ----------------------------------------------------------------------------- -- -----------------------------------------------------------------------------
-- LitStrings, here for convenience only. -- LitStrings, here for convenience only.
-- | A 'LitString' is a pointer to some null-terminated array of bytes. -- | A 'LitString' is a pointer to some array of Latin-1 encoded chars.
type LitString = Ptr Word8 data LitString = LitString !(Ptr Word8) !Int
--Why do we recalculate length every time it's requested?
--If it's commonly needed, we should perhaps have
--data LitString = LitString {-#UNPACK#-}!Addr# {-#UNPACK#-}!Int#
-- | Wrap an unboxed address into a 'LitString'. -- | Wrap an unboxed address into a 'LitString'.
mkLitString# :: Addr# -> LitString mkLitString# :: Addr# -> LitString
mkLitString# a# = Ptr a# mkLitString# a# = LitString (Ptr a#) (ptrStrLength (Ptr a#))
-- | Encode a 'String' into a newly allocated 'LitString' using Latin-1 -- | Encode a 'String' into a newly allocated 'LitString' using Latin-1
-- encoding. The original string must not contain non-Latin-1 characters -- encoding. The original string must not contain non-Latin-1 characters
...@@ -584,32 +581,34 @@ mkLitString# a# = Ptr a# ...@@ -584,32 +581,34 @@ mkLitString# a# = Ptr a#
{-# INLINE mkLitString #-} {-# INLINE mkLitString #-}
mkLitString :: String -> LitString mkLitString :: String -> LitString
mkLitString s = mkLitString s =
-- we don't use `unsafeDupablePerformIO` here to avoid potential memory leaks
-- and because someone might be using `eqAddr#` to check for string equality.
unsafePerformIO (do unsafePerformIO (do
p <- mallocBytes (length s + 1) let len = length s
p <- mallocBytes len
let let
loop :: Int -> String -> IO () loop :: Int -> String -> IO ()
loop !n [] = pokeByteOff p n (0 :: Word8) loop !_ [] = return ()
loop n (c:cs) = do loop n (c:cs) = do
pokeByteOff p n (fromIntegral (ord c) :: Word8) pokeByteOff p n (fromIntegral (ord c) :: Word8)
loop (1+n) cs loop (1+n) cs
loop 0 s loop 0 s
return p return (LitString p len)
) )
-- | Decode a 'LitString' back into a 'String' using Latin-1 encoding. -- | Decode a 'LitString' back into a 'String' using Latin-1 encoding.
-- This does not free the memory associated with 'LitString'. -- This does not free the memory associated with 'LitString'.
unpackLitString :: LitString -> String unpackLitString :: LitString -> String
unpackLitString (Ptr p) = unpackCString# p unpackLitString (LitString (Ptr p#) (I# n#)) = unpackNBytes# p# n#
-- | Compute the length of a 'LitString', which must necessarily be -- | Return the length of a 'LitString'
-- null-terminated.
lengthLS :: LitString -> Int lengthLS :: LitString -> Int
lengthLS = ptrStrLength lengthLS (LitString _ n) = n
-- ----------------------------------------------------------------------------- -- -----------------------------------------------------------------------------
-- under the carpet -- under the carpet
foreign import ccall unsafe "ghc_strlen" foreign import ccall unsafe "strlen"
ptrStrLength :: Ptr Word8 -> Int ptrStrLength :: Ptr Word8 -> Int
{-# NOINLINE sLit #-} {-# NOINLINE sLit #-}
......
...@@ -103,7 +103,7 @@ module Pretty ( ...@@ -103,7 +103,7 @@ module Pretty (
Mode(..), Mode(..),
-- ** General rendering -- ** General rendering
fullRender, fullRender, txtPrinter,
-- ** GHC-specific rendering -- ** GHC-specific rendering
printDoc, printDoc_, printDoc, printDoc_,
...@@ -120,7 +120,7 @@ import System.IO ...@@ -120,7 +120,7 @@ import System.IO
import Numeric (showHex) import Numeric (showHex)
--for a RULES --for a RULES
import GHC.Base ( unpackCString# ) import GHC.Base ( unpackCString#, unpackNBytes#, Int(..) )
import GHC.Ptr ( Ptr(..) ) import GHC.Ptr ( Ptr(..) )
-- Don't import Util( assertPanic ) because it makes a loop in the module structure -- Don't import Util( assertPanic ) because it makes a loop in the module structure
...@@ -270,8 +270,10 @@ data TextDetails = Chr {-# UNPACK #-} !Char -- ^ A single Char fragment ...@@ -270,8 +270,10 @@ data TextDetails = Chr {-# UNPACK #-} !Char -- ^ A single Char fragment
| Str String -- ^ A whole String fragment | Str String -- ^ A whole String fragment
| PStr FastString -- a hashed string | PStr FastString -- a hashed string
| ZStr FastZString -- a z-encoded string | ZStr FastZString -- a z-encoded string
| LStr {-# UNPACK #-} !LitString {-#UNPACK #-} !Int | LStr {-# UNPACK #-} !LitString
-- a '\0'-terminated array of bytes -- a '\0'-terminated array of bytes
| RStr {-# UNPACK #-} !Int {-# UNPACK #-} !Char
-- a repeated character (e.g., ' ')
instance Show Doc where instance Show Doc where
showsPrec _ doc cont = fullRender (mode style) (lineLength style) showsPrec _ doc cont = fullRender (mode style) (lineLength style)
...@@ -296,25 +298,28 @@ char c = textBeside_ (Chr c) 1 Empty ...@@ -296,25 +298,28 @@ char c = textBeside_ (Chr c) 1 Empty
-- The side condition on the last law is necessary because @'text' \"\"@ -- The side condition on the last law is necessary because @'text' \"\"@
-- has height 1, while 'empty' has no height. -- has height 1, while 'empty' has no height.
text :: String -> Doc text :: String -> Doc
text s = case length s of {sl -> textBeside_ (Str s) sl Empty} text s = textBeside_ (Str s) (length s) Empty
{-# NOINLINE [0] text #-} -- Give the RULE a chance to fire {-# NOINLINE [0] text #-} -- Give the RULE a chance to fire
-- It must wait till after phase 1 when -- It must wait till after phase 1 when
-- the unpackCString first is manifested -- the unpackCString first is manifested
-- RULE that turns (text "abc") into (ptext (A# "abc"#)) to avoid the -- RULE that turns (text "abc") into (ptext (A# "abc"#)) to avoid the
-- intermediate packing/unpacking of the string. -- intermediate packing/unpacking of the string.
{-# RULES {-# RULES "text/str"
"text/str" forall a. text (unpackCString# a) = ptext (Ptr a) forall a. text (unpackCString# a) = ptext (mkLitString# a)
#-} #-}
{-# RULES "text/unpackNBytes#"
forall p n. text (unpackNBytes# p n) = ptext (LitString (Ptr p) (I# n))
#-}
ftext :: FastString -> Doc ftext :: FastString -> Doc
ftext s = case lengthFS s of {sl -> textBeside_ (PStr s) sl Empty} ftext s = textBeside_ (PStr s) (lengthFS s) Empty
ptext :: LitString -> Doc ptext :: LitString -> Doc
ptext s = case lengthLS s of {sl -> textBeside_ (LStr s sl) sl Empty} ptext s = textBeside_ (LStr s) (lengthLS s) Empty
ztext :: FastZString -> Doc ztext :: FastZString -> Doc
ztext s = case lengthFZS s of {sl -> textBeside_ (ZStr s) sl Empty} ztext s = textBeside_ (ZStr s) (lengthFZS s) Empty
-- | Some text with any width. (@text s = sizedText (length s) s@) -- | Some text with any width. (@text s = sizedText (length s) s@)
sizedText :: Int -> String -> Doc sizedText :: Int -> String -> Doc
...@@ -336,12 +341,6 @@ isEmpty :: Doc -> Bool ...@@ -336,12 +341,6 @@ isEmpty :: Doc -> Bool
isEmpty Empty = True isEmpty Empty = True
isEmpty _ = False isEmpty _ = False
-- | Produce spacing for indenting the amount specified.
--
-- an old version inserted tabs being 8 columns apart in the output.
spaces :: Int -> String
spaces !n = replicate n ' '
{- {-
Q: What is the reason for negative indentation (i.e. argument to indent Q: What is the reason for negative indentation (i.e. argument to indent
is < 0) ? is < 0) ?
...@@ -655,7 +654,7 @@ nilAboveNest _ _ Empty = Empty ...@@ -655,7 +654,7 @@ nilAboveNest _ _ Empty = Empty
-- Here's why the "text s <>" is in the spec! -- Here's why the "text s <>" is in the spec!
nilAboveNest g k (Nest k1 q) = nilAboveNest g (k + k1) q nilAboveNest g k (Nest k1 q) = nilAboveNest g (k + k1) q
nilAboveNest g k q | not g && k > 0 -- No newline if no overlap nilAboveNest g k q | not g && k > 0 -- No newline if no overlap
= textBeside_ (Str (spaces k)) k q = textBeside_ (RStr k ' ') k q
| otherwise -- Put them really above | otherwise -- Put them really above
= nilAbove_ (mkNest k q) = nilAbove_ (mkNest k q)
...@@ -938,11 +937,12 @@ renderStyle s = fullRender (mode s) (lineLength s) (ribbonsPerLine s) ...@@ -938,11 +937,12 @@ renderStyle s = fullRender (mode s) (lineLength s) (ribbonsPerLine s)
-- | Default TextDetails printer -- | Default TextDetails printer
txtPrinter :: TextDetails -> String -> String txtPrinter :: TextDetails -> String -> String
txtPrinter (Chr c) s = c:s txtPrinter (Chr c) s = c:s
txtPrinter (Str s1) s2 = s1 ++ s2 txtPrinter (Str s1) s2 = s1 ++ s2
txtPrinter (PStr s1) s2 = unpackFS s1 ++ s2 txtPrinter (PStr s1) s2 = unpackFS s1 ++ s2
txtPrinter (ZStr s1) s2 = zString s1 ++ s2 txtPrinter (ZStr s1) s2 = zString s1 ++ s2
txtPrinter (LStr s1 _) s2 = unpackLitString s1 ++ s2 txtPrinter (LStr s1) s2 = unpackLitString s1 ++ s2
txtPrinter (RStr n c) s2 = replicate n c ++ s2
-- | The general rendering interface. -- | The general rendering interface.
fullRender :: Mode -- ^ Rendering mode fullRender :: Mode -- ^ Rendering mode
...@@ -1028,10 +1028,7 @@ display m !page_width !ribbon_width txt end doc ...@@ -1028,10 +1028,7 @@ display m !page_width !ribbon_width txt end doc
lay2 _ NoDoc = error "display lay2 NoDoc" lay2 _ NoDoc = error "display lay2 NoDoc"
lay2 _ (Union {}) = error "display lay2 Union" lay2 _ (Union {}) = error "display lay2 Union"
-- optimise long indentations using LitString chunks of 8 spaces indent !n r = RStr n ' ' `txt` r
indent !n r | n >= 8 = LStr (sLit " ") 8 `txt`
indent (n - 8) r
| otherwise = Str (spaces n) `txt` r
in in
lay 0 doc lay 0 doc
}} }}
...@@ -1050,21 +1047,21 @@ printDoc_ mode pprCols hdl doc ...@@ -1050,21 +1047,21 @@ printDoc_ mode pprCols hdl doc
= do { fullRender mode pprCols 1.5 put done doc ; = do { fullRender mode pprCols 1.5 put done doc ;
hFlush hdl } hFlush hdl }
where where
put (Chr c) next = hPutChar hdl c >> next put (Chr c) next = hPutChar hdl c >> next
put (Str s) next = hPutStr hdl s >> next put (Str s) next = hPutStr hdl s >> next
put (PStr s) next = hPutStr hdl (unpackFS s) >> next put (PStr s) next = hPutStr hdl (unpackFS s) >> next
-- NB. not hPutFS, we want this to go through -- NB. not hPutFS, we want this to go through
-- the I/O library's encoding layer. (#3398) -- the I/O library's encoding layer. (#3398)
put (ZStr s) next = hPutFZS hdl s >> next put (ZStr s) next = hPutFZS hdl s >> next
put (LStr s l) next = hPutLitString hdl s l >> next put (LStr s) next = hPutLitString hdl s >> next
put (RStr n c) next = hPutStr hdl (replicate n c) >> next
done = return () -- hPutChar hdl '\n' done = return () -- hPutChar hdl '\n'
-- some versions of hPutBuf will barf if the length is zero -- some versions of hPutBuf will barf if the length is zero
hPutLitString :: Handle -> Ptr a -> Int -> IO () hPutLitString :: Handle -> LitString -> IO ()
hPutLitString handle a l = if l == 0 hPutLitString _handle (LitString _ 0) = return ()
then return () hPutLitString handle (LitString a l) = hPutBuf handle a l
else hPutBuf handle a l
-- Printing output in LeftMode is performance critical: it's used when -- Printing output in LeftMode is performance critical: it's used when
-- dumping C and assembly output, so we allow ourselves a few dirty -- dumping C and assembly output, so we allow ourselves a few dirty
...@@ -1102,7 +1099,8 @@ layLeft b (TextBeside s _ p) = s `seq` (put b s >> layLeft b p) ...@@ -1102,7 +1099,8 @@ layLeft b (TextBeside s _ p) = s `seq` (put b s >> layLeft b p)
put b (Str s) = bPutStr b s put b (Str s) = bPutStr b s
put b (PStr s) = bPutFS b s put b (PStr s) = bPutFS b s
put b (ZStr s) = bPutFZS b s put b (ZStr s) = bPutFZS b s
put b (LStr s l) = bPutLitString b s l put b (LStr s) = bPutLitString b s
put b (RStr n c) = bPutReplicate b n c
layLeft _ _ = panic "layLeft: Unhandled case" layLeft _ _ = panic "layLeft: Unhandled case"
-- Define error=panic, for easier comparison with libraries/pretty. -- Define error=panic, for easier comparison with libraries/pretty.
......
...@@ -596,7 +596,7 @@ test('T5321FD', ...@@ -596,7 +596,7 @@ test('T5321FD',
# (due to better optCoercion, 5e7406d9, #9233) # (due to better optCoercion, 5e7406d9, #9233)
# 2016-04-06: 250757460 (x86/Linux) # 2016-04-06: 250757460 (x86/Linux)
(wordsize(64), 415136648, 10)]) (wordsize(64), 371826136, 10)])
# prev: 418306336 # prev: 418306336
# 29/08/2012: 492905640 # 29/08/2012: 492905640
# (increase due to new codegen) # (increase due to new codegen)
...@@ -618,6 +618,7 @@ test('T5321FD', ...@@ -618,6 +618,7 @@ test('T5321FD',
# 2016-07-16: 477840432 # 2016-07-16: 477840432
# Optimize handling of built-in OccNames # Optimize handling of built-in OccNames
# 2017-05-14: 415136648 (amd64/Linux) Two-pass CmmLayoutStack # 2017-05-14: 415136648 (amd64/Linux) Two-pass CmmLayoutStack
# 2018-04-24: 371826136 (amd64/Linux) Store size in LitString
], ],
compile,['']) compile,[''])
......
Subproject commit 271a9cb0c7a070deef8df2d4fb54ebe47a0bf560 Subproject commit 46ff2306f580c44915a6f3adb652f02b7f4edfe9
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment