Commit 49301ad6 authored by Andrew Martin's avatar Andrew Martin Committed by Marge Bot

Implement cstringLength# and FinalPtr

This function and its accompanying rule resolve issue #5218.
A future PR to the bytestring library will make the internal
Data.ByteString.Internal.unsafePackAddress compute string length
with cstringLength#. This will improve the status quo because it is
eligible for constant folding.

Additionally, introduce a new data constructor to ForeignPtrContents
named FinalPtr. This additional data constructor, when used in the
IsString instance for ByteString, leads to more Core-to-Core
optimization opportunities, fewer runtime allocations, and smaller
binaries.

Also, this commit re-exports all the functions from GHC.CString
(including cstringLength#) in GHC.Exts. It also adds a new test
driver. This test driver is used to perform substring matches on Core
that is dumped after all the simplifier passes. In this commit, it is
used to check that constant folding of cstringLength# works.
parent d830bbc9
Pipeline #19692 passed with stages
in 438 minutes and 32 seconds
......@@ -349,6 +349,7 @@ basicKnownKeyNames
-- Strings and lists
unpackCStringName,
unpackCStringFoldrName, unpackCStringUtf8Name,
cstringLengthName,
-- Overloaded lists
isListClassName,
......@@ -1014,10 +1015,11 @@ modIntName = varQual gHC_CLASSES (fsLit "modInt#") modIntIdKey
-- Base strings Strings
unpackCStringName, unpackCStringFoldrName,
unpackCStringUtf8Name, eqStringName :: Name
unpackCStringUtf8Name, eqStringName, cstringLengthName :: Name
unpackCStringName = varQual gHC_CSTRING (fsLit "unpackCString#") unpackCStringIdKey
unpackCStringFoldrName = varQual gHC_CSTRING (fsLit "unpackFoldrCString#") unpackCStringFoldrIdKey
unpackCStringUtf8Name = varQual gHC_CSTRING (fsLit "unpackCStringUtf8#") unpackCStringUtf8IdKey
cstringLengthName = varQual gHC_CSTRING (fsLit "cstringLength#") cstringLengthIdKey
eqStringName = varQual gHC_BASE (fsLit "eqString") eqStringIdKey
-- The 'inline' function
......@@ -2097,7 +2099,7 @@ wildCardKey, absentErrorIdKey, augmentIdKey, appendIdKey,
unpackCStringUtf8IdKey, unpackCStringAppendIdKey,
unpackCStringFoldrIdKey, unpackCStringIdKey,
typeErrorIdKey, divIntIdKey, modIntIdKey,
absentSumFieldErrorIdKey :: Unique
absentSumFieldErrorIdKey, cstringLengthIdKey :: Unique
wildCardKey = mkPreludeMiscIdUnique 0 -- See Note [WildCard binders]
absentErrorIdKey = mkPreludeMiscIdUnique 1
......@@ -2124,6 +2126,7 @@ voidPrimIdKey = mkPreludeMiscIdUnique 21
typeErrorIdKey = mkPreludeMiscIdUnique 22
divIntIdKey = mkPreludeMiscIdUnique 23
modIntIdKey = mkPreludeMiscIdUnique 24
cstringLengthIdKey = mkPreludeMiscIdUnique 25
concatIdKey, filterIdKey, zipIdKey,
bindIOIdKey, returnIOIdKey, newStablePtrIdKey,
......
......@@ -66,6 +66,7 @@ import qualified Data.ByteString as BS
import Data.Int
import Data.Ratio
import Data.Word
import Data.Maybe (fromMaybe)
{-
Note [Constant folding]
......@@ -1257,6 +1258,8 @@ builtinRules
ru_nargs = 4, ru_try = match_append_lit },
BuiltinRule { ru_name = fsLit "EqString", ru_fn = eqStringName,
ru_nargs = 2, ru_try = match_eq_string },
BuiltinRule { ru_name = fsLit "CStringLength", ru_fn = cstringLengthName,
ru_nargs = 1, ru_try = match_cstring_length },
BuiltinRule { ru_name = fsLit "Inline", ru_fn = inlineIdName,
ru_nargs = 2, ru_try = \_ _ _ -> match_inline },
BuiltinRule { ru_name = fsLit "MagicDict", ru_fn = idName magicDictId,
......@@ -1477,6 +1480,30 @@ match_eq_string _ id_unf _
match_eq_string _ _ _ _ = Nothing
-----------------------------------------------------------------------
-- Illustration of this rule:
--
-- cstringLength# "foobar"# --> 6
-- cstringLength# "fizz\NULzz"# --> 4
--
-- Nota bene: Addr# literals are suffixed by a NUL byte when they are
-- compiled to read-only data sections. That's why cstringLength# is
-- well defined on Addr# literals that do not explicitly have an embedded
-- NUL byte.
--
-- See GHC issue #5218, MR 2165, and bytestring PR 191. This is particularly
-- helpful when using OverloadedStrings to create a ByteString since the
-- function computing the length of such ByteStrings can often be constant
-- folded.
match_cstring_length :: RuleFun
match_cstring_length env id_unf _ [lit1]
| Just (LitString str) <- exprIsLiteral_maybe id_unf lit1
-- If elemIndex returns Just, it has the index of the first embedded NUL
-- in the string. If no NUL bytes are present (the common case) then use
-- full length of the byte string.
= let len = fromMaybe (BS.length str) (BS.elemIndex 0 str)
in Just (Lit (mkLitInt (roPlatform env) (fromIntegral len)))
match_cstring_length _ _ _ _ = Nothing
---------------------------------------------------
-- The rule is this:
......
......@@ -128,8 +128,9 @@ import Foreign
import GHC.Conc.Sync (sharedCAF)
#endif
import GHC.Base ( unpackCString#, unpackNBytes# )
#if __GLASGOW_HASKELL__ < 811
import GHC.Base (unpackCString#,unpackNBytes#)
#endif
-- | Gives the UTF-8 encoded bytes corresponding to a 'FastString'
bytesFS :: FastString -> ByteString
......
......@@ -53,7 +53,7 @@ data HsLit x
-- ^ Unboxed character
| HsString (XHsString x) {- SourceText -} FastString
-- ^ String
| HsStringPrim (XHsStringPrim x) {- SourceText -} ByteString
| HsStringPrim (XHsStringPrim x) {- SourceText -} !ByteString
-- ^ Packed bytes
| HsInt (XHsInt x) IntegralLit
-- ^ Genuinely an Int; arises from
......
......@@ -6,6 +6,7 @@
This module converts Template Haskell syntax into Hs syntax
-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE ScopedTypeVariables #-}
......@@ -1232,8 +1233,7 @@ cvtLit (CharPrimL c) = do { force c; return $ HsCharPrim NoSourceText c }
cvtLit (StringL s) = do { let { s' = mkFastString s }
; force s'
; return $ HsString (quotedSourceText s) s' }
cvtLit (StringPrimL s) = do { let { s' = BS.pack s }
; force s'
cvtLit (StringPrimL s) = do { let { !s' = BS.pack s }
; return $ HsStringPrim NoSourceText s' }
cvtLit (BytesPrimL (Bytes fptr off sz)) = do
let bs = unsafePerformIO $ withForeignPtr fptr $ \ptr ->
......
......@@ -114,7 +114,7 @@ data Literal
-- See Note [Types of LitNumbers] below for the
-- Type field.
| LitString ByteString -- ^ A string-literal: stored and emitted
| LitString !ByteString -- ^ A string-literal: stored and emitted
-- UTF-8 encoded, we'll arrange to decode it
-- at runtime. Also emitted with a @\'\\0\'@
-- terminator. Create with 'mkLitString'
......
......@@ -144,6 +144,9 @@ Arrow notation
``ghc-prim`` library
~~~~~~~~~~~~~~~~~~~~
- Add a known-key ``cstringLength#`` to ``GHC.CString`` that is eligible
for constant folding by a built-in rule.
``ghc`` library
~~~~~~~~~~~~~~~
......@@ -181,6 +184,15 @@ Arrow notation
``base`` library
~~~~~~~~~~~~~~~~
- ``ForeignPtrContents`` has a new nullary data constructor ``FinalPtr``.
``FinalPtr`` is intended for turning a primitive string literal into a
``ForeignPtr``. Unlike ``PlainForeignPtr``, ``FinalPtr`` does not have
a finalizer. Replacing ``PlainForeignPtr`` that has ``NoFinalizers`` with
``FinalPtr`` reduces allocations, reduces the size of compiled binaries,
and unlocks important Core-to-Core optimizations. ``FinalPtr`` will be used
in an upcoming ``bytestring`` release to improve the performance of
``ByteString`` literals created with ``OverloadedStrings``.
Build system
~~~~~~~~~~~~
......
......@@ -54,6 +54,14 @@ module GHC.Exts
-- * Overloaded string literals
IsString(..),
-- * CString
unpackCString#,
unpackAppendCString#,
unpackFoldrCString#,
unpackCStringUtf8#,
unpackNBytes#,
cstringLength#,
-- * Debugging
breakpoint, breakpointCond,
......
This diff is collapsed.
{-# LANGUAGE MagicHash, NoImplicitPrelude, BangPatterns #-}
{-# LANGUAGE MagicHash, NoImplicitPrelude, BangPatterns, UnliftedFFITypes #-}
-----------------------------------------------------------------------------
-- |
-- Module : GHC.CString
......@@ -18,7 +17,7 @@
module GHC.CString (
unpackCString#, unpackAppendCString#, unpackFoldrCString#,
unpackCStringUtf8#, unpackNBytes#
unpackCStringUtf8#, unpackNBytes#, cstringLength#
) where
import GHC.Types
......@@ -174,3 +173,17 @@ unpackNBytes# addr len# = unpack [] (len# -# 1#)
case indexCharOffAddr# addr i# of
ch -> unpack (C# ch : acc) (i# -# 1#)
-- The return type is not correct here. We really want CSize,
-- but that type is defined in base. However, CSize should always
-- match the size of a machine word (I hope), so this is probably
-- alright on all platforms that GHC supports.
foreign import ccall unsafe "strlen" c_strlen :: Addr# -> Int#
-- | Compute the length of a NUL-terminated string. This address
-- must refer to immutable memory. GHC includes a built-in rule for
-- constant folding when the argument is a statically-known literal.
-- That is, a core-to-core pass reduces the expression
-- @cstringLength# "hello"#@ to the constant @5#@.
cstringLength# :: Addr# -> Int#
{-# INLINE[0] cstringLength# #-}
cstringLength# = c_strlen
## 0.6.2 (edit as necessary)
- Shipped with GHC 8.12.1
- Add known-key `cstringLength#` to `GHC.CString`. This is just the
C function `strlen`, but a built-in rewrite rule allows GHC to
compute the result at compile time when the argument is known.
## 0.6.1 (edit as necessary)
- Shipped with GHC 8.10.1
......
......@@ -43,6 +43,7 @@ Thumbs.db
*.prof.sample.normalised
*.run.stdout
*.run.stderr
*.dump-simpl
*.hp
tests/**/*.ps
......
......@@ -1344,6 +1344,26 @@ def compile_grep_asm(name: TestName,
# no problems found, this test passed
return passed()
def compile_grep_core(name: TestName,
way: WayName,
extra_hc_opts: str
) -> PassFail:
print('Compile only, extra args = ', extra_hc_opts)
result = simple_build(name + '.hs', way, '-ddump-to-file -dsuppress-all -ddump-simpl -O ' + extra_hc_opts, False, None, False, False)
if badResult(result):
return result
expected_pat_file = find_expected_file(name, 'substr-simpl')
actual_core_file = add_suffix(name, 'dump-simpl')
if not grep_output(join_normalisers(normalise_errmsg),
expected_pat_file, actual_core_file):
return failBecause('simplified core mismatch')
# no problems found, this test passed
return passed()
# -----------------------------------------------------------------------------
# Compile-and-run tests
......
{-# language MagicHash #-}
module CStringLengthCore
( ozymandias
) where
import GHC.Exts
ozymandias :: Int
ozymandias =
I# (cstringLength# "I met a traveller from an antique land"#)
test('CStringLength_core', normal, compile_grep_core, [''])
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE UnboxedTuples #-}
import GHC.Exts
main :: IO ()
main = do
putStr "A: "
print $
I# (cstringLength# "hello_world"#)
==
naiveStrlen "hello_world"# 0
putStr "B: "
print $
I# (cstringLength# "aaaaaaaaaaaaa\x00b"#)
==
naiveStrlen "aaaaaaaaaaaaa\x00b"# 0
putStr "C: "
print $
I# (cstringLength# "cccccccccccccccccc\x00b"#)
==
naiveStrlen "cccccccccccccccccc\x00b"# 0
putStr "D: "
print $
I# (cstringLength# "araña\NULb"#)
==
naiveStrlen "araña\NULb"# 0
naiveStrlen :: Addr# -> Int -> Int
naiveStrlen addr !n = case indexWord8OffAddr# addr 0# of
0## -> n
_ -> naiveStrlen (plusAddr# addr 1#) (n + 1)
......@@ -29,3 +29,4 @@ test('CmpWord16', normal, compile_and_run, [''])
test('ShrinkSmallMutableArrayA', normal, compile_and_run, [''])
test('ShrinkSmallMutableArrayB', normal, compile_and_run, [''])
test('T14664', normal, compile_and_run, [''])
test('CStringLength', normal, compile_and_run, ['-O2'])
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment