...
 
Commits (2)
  • Adam Sandberg Eriksson's avatar
    docs: fix formatting and add some links · d830bbc9
    Adam Sandberg Eriksson authored
    [skip ci]
    d830bbc9
  • Andrew Martin's avatar
    Implement cstringLength# and FinalPtr · 49301ad6
    Andrew Martin authored
    This function and its accompanying rule resolve issue #5218.
    A future PR to the bytestring library will make the internal
    Data.ByteString.Internal.unsafePackAddress compute string length
    with cstringLength#. This will improve the status quo because it is
    eligible for constant folding.
    
    Additionally, introduce a new data constructor to ForeignPtrContents
    named FinalPtr. This additional data constructor, when used in the
    IsString instance for ByteString, leads to more Core-to-Core
    optimization opportunities, fewer runtime allocations, and smaller
    binaries.
    
    Also, this commit re-exports all the functions from GHC.CString
    (including cstringLength#) in GHC.Exts. It also adds a new test
    driver. This test driver is used to perform substring matches on Core
    that is dumped after all the simplifier passes. In this commit, it is
    used to check that constant folding of cstringLength# works.
    49301ad6
......@@ -349,6 +349,7 @@ basicKnownKeyNames
-- Strings and lists
unpackCStringName,
unpackCStringFoldrName, unpackCStringUtf8Name,
cstringLengthName,
-- Overloaded lists
isListClassName,
......@@ -1014,10 +1015,11 @@ modIntName = varQual gHC_CLASSES (fsLit "modInt#") modIntIdKey
-- Base strings Strings
unpackCStringName, unpackCStringFoldrName,
unpackCStringUtf8Name, eqStringName :: Name
unpackCStringUtf8Name, eqStringName, cstringLengthName :: Name
unpackCStringName = varQual gHC_CSTRING (fsLit "unpackCString#") unpackCStringIdKey
unpackCStringFoldrName = varQual gHC_CSTRING (fsLit "unpackFoldrCString#") unpackCStringFoldrIdKey
unpackCStringUtf8Name = varQual gHC_CSTRING (fsLit "unpackCStringUtf8#") unpackCStringUtf8IdKey
cstringLengthName = varQual gHC_CSTRING (fsLit "cstringLength#") cstringLengthIdKey
eqStringName = varQual gHC_BASE (fsLit "eqString") eqStringIdKey
-- The 'inline' function
......@@ -2097,7 +2099,7 @@ wildCardKey, absentErrorIdKey, augmentIdKey, appendIdKey,
unpackCStringUtf8IdKey, unpackCStringAppendIdKey,
unpackCStringFoldrIdKey, unpackCStringIdKey,
typeErrorIdKey, divIntIdKey, modIntIdKey,
absentSumFieldErrorIdKey :: Unique
absentSumFieldErrorIdKey, cstringLengthIdKey :: Unique
wildCardKey = mkPreludeMiscIdUnique 0 -- See Note [WildCard binders]
absentErrorIdKey = mkPreludeMiscIdUnique 1
......@@ -2124,6 +2126,7 @@ voidPrimIdKey = mkPreludeMiscIdUnique 21
typeErrorIdKey = mkPreludeMiscIdUnique 22
divIntIdKey = mkPreludeMiscIdUnique 23
modIntIdKey = mkPreludeMiscIdUnique 24
cstringLengthIdKey = mkPreludeMiscIdUnique 25
concatIdKey, filterIdKey, zipIdKey,
bindIOIdKey, returnIOIdKey, newStablePtrIdKey,
......
......@@ -66,6 +66,7 @@ import qualified Data.ByteString as BS
import Data.Int
import Data.Ratio
import Data.Word
import Data.Maybe (fromMaybe)
{-
Note [Constant folding]
......@@ -1257,6 +1258,8 @@ builtinRules
ru_nargs = 4, ru_try = match_append_lit },
BuiltinRule { ru_name = fsLit "EqString", ru_fn = eqStringName,
ru_nargs = 2, ru_try = match_eq_string },
BuiltinRule { ru_name = fsLit "CStringLength", ru_fn = cstringLengthName,
ru_nargs = 1, ru_try = match_cstring_length },
BuiltinRule { ru_name = fsLit "Inline", ru_fn = inlineIdName,
ru_nargs = 2, ru_try = \_ _ _ -> match_inline },
BuiltinRule { ru_name = fsLit "MagicDict", ru_fn = idName magicDictId,
......@@ -1477,6 +1480,30 @@ match_eq_string _ id_unf _
match_eq_string _ _ _ _ = Nothing
-----------------------------------------------------------------------
-- Illustration of this rule:
--
-- cstringLength# "foobar"# --> 6
-- cstringLength# "fizz\NULzz"# --> 4
--
-- Nota bene: Addr# literals are suffixed by a NUL byte when they are
-- compiled to read-only data sections. That's why cstringLength# is
-- well defined on Addr# literals that do not explicitly have an embedded
-- NUL byte.
--
-- See GHC issue #5218, MR 2165, and bytestring PR 191. This is particularly
-- helpful when using OverloadedStrings to create a ByteString since the
-- function computing the length of such ByteStrings can often be constant
-- folded.
match_cstring_length :: RuleFun
match_cstring_length env id_unf _ [lit1]
| Just (LitString str) <- exprIsLiteral_maybe id_unf lit1
-- If elemIndex returns Just, it has the index of the first embedded NUL
-- in the string. If no NUL bytes are present (the common case) then use
-- full length of the byte string.
= let len = fromMaybe (BS.length str) (BS.elemIndex 0 str)
in Just (Lit (mkLitInt (roPlatform env) (fromIntegral len)))
match_cstring_length _ _ _ _ = Nothing
---------------------------------------------------
-- The rule is this:
......
......@@ -128,8 +128,9 @@ import Foreign
import GHC.Conc.Sync (sharedCAF)
#endif
import GHC.Base ( unpackCString#, unpackNBytes# )
#if __GLASGOW_HASKELL__ < 811
import GHC.Base (unpackCString#,unpackNBytes#)
#endif
-- | Gives the UTF-8 encoded bytes corresponding to a 'FastString'
bytesFS :: FastString -> ByteString
......
......@@ -53,7 +53,7 @@ data HsLit x
-- ^ Unboxed character
| HsString (XHsString x) {- SourceText -} FastString
-- ^ String
| HsStringPrim (XHsStringPrim x) {- SourceText -} ByteString
| HsStringPrim (XHsStringPrim x) {- SourceText -} !ByteString
-- ^ Packed bytes
| HsInt (XHsInt x) IntegralLit
-- ^ Genuinely an Int; arises from
......
......@@ -6,6 +6,7 @@
This module converts Template Haskell syntax into Hs syntax
-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE ScopedTypeVariables #-}
......@@ -1232,8 +1233,7 @@ cvtLit (CharPrimL c) = do { force c; return $ HsCharPrim NoSourceText c }
cvtLit (StringL s) = do { let { s' = mkFastString s }
; force s'
; return $ HsString (quotedSourceText s) s' }
cvtLit (StringPrimL s) = do { let { s' = BS.pack s }
; force s'
cvtLit (StringPrimL s) = do { let { !s' = BS.pack s }
; return $ HsStringPrim NoSourceText s' }
cvtLit (BytesPrimL (Bytes fptr off sz)) = do
let bs = unsafePerformIO $ withForeignPtr fptr $ \ptr ->
......
......@@ -114,7 +114,7 @@ data Literal
-- See Note [Types of LitNumbers] below for the
-- Type field.
| LitString ByteString -- ^ A string-literal: stored and emitted
| LitString !ByteString -- ^ A string-literal: stored and emitted
-- UTF-8 encoded, we'll arrange to decode it
-- at runtime. Also emitted with a @\'\\0\'@
-- terminator. Create with 'mkLitString'
......
......@@ -79,12 +79,15 @@ Language
* GHC now consistently does eager instantiation during type inference.
As a consequence, visible type application (VTA) now only works when
the head of the application is:
* A variable
* An expression with a type signature
For example `(let x = blah in id) @Bool True` no longer typechecks.
You should write `let x = blah in id @Bool True` instead.
This change prepares the way for Quick Look impredicativity.
For example ``(let x = blah in id) @Bool True`` no longer typechecks.
You should write ``let x = blah in id @Bool True`` instead.
This change prepares the way for `Quick Look impredicativity
<https://gitlab.haskell.org/ghc/ghc/issues/18126>`_.
* GHC now allows users to manually define the specificity of type variable
binders. By marking a variable with braces ``{tyvar}`` or ``{tyvar :: kind}``,
......@@ -110,60 +113,65 @@ Runtime system
Template Haskell
~~~~~~~~~~~~~~~~
- Implement the Overloaded Quotations proposal (#246). The type of all quotation
forms have now been generalised in terms of a minimal interface necessary for the
implementation rather than the overapproximation of the ``Q`` monad.
- Implement the `Overloaded Quotations proposal (#246) <https://github.com/ghc-proposals/ghc-proposals/blob/master/proposals/0246-overloaded-bracket.rst>`_.
The type of all quotation forms have now been generalised in terms of a
minimal interface necessary (the ``Quote`` type class) for the
implementation rather than the overapproximation of the ``Q`` monad.
- Template Haskell quotes now handle fixity declarations in ``let`` and
``where`` bindings properly. Previously, such fixity declarations would
be dropped when quoted due to a Template Haskell bug.
- Template Haskell quotes now handle fixity declarations in ``let`` and
``where`` bindings properly. Previously, such fixity declarations would
be dropped when quoted due to a Template Haskell bug.
- The ``-XTemplateHaskellQuotes`` extension now allows nested splices as nested
splices do not lead directly to compile-time evaluation. (!2288)
- The ``-XTemplateHaskellQuotes`` extension now allows nested splices as nested
splices do not lead directly to compile-time evaluation. (Merge request
`!2288 <https://gitlab.haskell.org/ghc/ghc/-/merge_requests/2288>`_)
Arrow notation
~~~~~~~~~~~~~~
- When combined with :extension:`Arrows`, the :extension:`LambdaCase` extension
now additionally allows ``\case`` syntax to be used as a command in ``proc``
notation.
- When combined with :extension:`Arrows`, the :extension:`LambdaCase` extension
now additionally allows ``\case`` syntax to be used as a command in ``proc``
notation.
- When combined with :extension:`Arrows`, the effects of the
:extension:`BlockArguments` extension now also apply to applications of
arrow control operators in ``(|`` banana brackets ``|)``: ::
- When combined with :extension:`Arrows`, the effects of the
:extension:`BlockArguments` extension now also apply to applications of
arrow control operators in ``(|`` banana brackets ``|)``: ::
(| untilA (increment -< x + y) do
within 0.5 -< x
... |)
(| untilA (increment -< x + y) do
within 0.5 -< x
... |)
``ghc-prim`` library
~~~~~~~~~~~~~~~~~~~~
- Add a known-key ``cstringLength#`` to ``GHC.CString`` that is eligible
for constant folding by a built-in rule.
``ghc`` library
~~~~~~~~~~~~~~~
- The type of the ``getAnnotations`` function has changed to better reflect
the fact that it returns two different kinds of annotations, those on
names and those on modules: ::
- The type of the ``getAnnotations`` function has changed to better reflect
the fact that it returns two different kinds of annotations, those on
names and those on modules: ::
getAnnotations :: Typeable a
=> ([Word8] -> a) -> ModGuts
-> CoreM (ModuleEnv [a], NameEnv [a])
getAnnotations :: Typeable a
=> ([Word8] -> a) -> ModGuts
-> CoreM (ModuleEnv [a], NameEnv [a])
- The meaning of the ``hs_fixds`` field of ``HsGroup`` has changed slightly.
It now only contains fixity signatures defined for top-level declarations
and class methods defined *outside* of the class itself. Previously,
``hs_fixds`` would also contain fixity signatures for class methods defined
*inside* the class, such as the fixity signature for ``m`` in the following
example: ::
- The meaning of the ``hs_fixds`` field of ``HsGroup`` has changed slightly.
It now only contains fixity signatures defined for top-level declarations
and class methods defined *outside* of the class itself. Previously,
``hs_fixds`` would also contain fixity signatures for class methods defined
*inside* the class, such as the fixity signature for ``m`` in the following
example: ::
class C a where
infixl 4 `m`
m :: a -> a -> a
class C a where
infixl 4 `m`
m :: a -> a -> a
If you wish to attain the previous behavior of ``hs_fixds``, use the new
``hsGroupTopLevelFixitySigs`` function, which collects all top-level fixity
signatures, including those for class methods defined inside classes.
If you wish to attain the previous behavior of ``hs_fixds``, use the new
``hsGroupTopLevelFixitySigs`` function, which collects all top-level fixity
signatures, including those for class methods defined inside classes.
- The ``Exception`` module was boiled down acknowledging the existence of
the ``exceptions`` dependency. In particular, the ``ExceptionMonad``
......@@ -176,6 +184,15 @@ Arrow notation
``base`` library
~~~~~~~~~~~~~~~~
- ``ForeignPtrContents`` has a new nullary data constructor ``FinalPtr``.
``FinalPtr`` is intended for turning a primitive string literal into a
``ForeignPtr``. Unlike ``PlainForeignPtr``, ``FinalPtr`` does not have
a finalizer. Replacing ``PlainForeignPtr`` that has ``NoFinalizers`` with
``FinalPtr`` reduces allocations, reduces the size of compiled binaries,
and unlocks important Core-to-Core optimizations. ``FinalPtr`` will be used
in an upcoming ``bytestring`` release to improve the performance of
``ByteString`` literals created with ``OverloadedStrings``.
Build system
~~~~~~~~~~~~
......@@ -196,7 +213,7 @@ for further change information.
libraries/containers/containers/containers.cabal: Dependency of ``ghc`` library
libraries/deepseq/deepseq.cabal: Dependency of ``ghc`` library
libraries/directory/directory.cabal: Dependency of ``ghc`` library
libraries/exceptions/exceptions.cabal: Dependency of ``haskeline`` library
libraries/exceptions/exceptions.cabal: Dependency of ``ghc`` and ``haskeline`` library
libraries/filepath/filepath.cabal: Dependency of ``ghc`` library
compiler/ghc.cabal: The compiler itself
libraries/ghci/ghci.cabal: The REPL interface
......
......@@ -74,7 +74,7 @@ The :extension:`TemplateHaskellQuotes` extension is considered safe under
that declaration splices are not allowed anywhere except at top level
(outside any other declarations).
The ``Q`` monad is a monad defined in ``Language.Haskell.TH.Syntax`` which
The ``Q`` monad is a monad defined in :th-ref:`Language.Haskell.TH.Syntax.` which
supports several useful operations during code generation such as reporting
errors or looking up identifiers in the environment.
......@@ -92,9 +92,10 @@ The :extension:`TemplateHaskellQuotes` extension is considered safe under
- ``[p| ... |]``, where the "..." is a pattern; the quotation has
type ``Quote m => m Pat``.
The ``Quote`` type class is the minimal interface necessary to implement
the desugaring of quotations. The ``Q`` monad is an instance of ``Quote`` but
contains many more operations which are not needed for defining quotations.
The ``Quote`` type class (:th-ref:`Language.Haskell.TH.Syntax.Quote`) is
the minimal interface necessary to implement the desugaring of quotations.
The ``Q`` monad is an instance of ``Quote`` but contains many more
operations which are not needed for defining quotations.
See :ref:`pts-where` for using partial type signatures in quotations.
......@@ -402,7 +403,7 @@ Using Template Haskell
----------------------
- The data types and monadic constructor functions for Template Haskell
are in the library ``Language.Haskell.TH.Syntax``.
are in the library :th-ref:`Language.Haskell.TH.Syntax.`.
- You can only run a function at compile time if it is imported from
another module. That is, you can't define a function in a module, and
......@@ -645,7 +646,7 @@ Here are the salient features
(Only the first two are described in the paper.)
- A quoter is a value of type
``Language.Haskell.TH.Quote.QuasiQuoter``, which is defined thus: ::
:th-ref:`Language.Haskell.TH.Quote.QuasiQuoter`, which is defined thus: ::
data QuasiQuoter = QuasiQuoter { quoteExp :: String -> Q Exp,
quotePat :: String -> Q Pat,
......
......@@ -54,6 +54,14 @@ module GHC.Exts
-- * Overloaded string literals
IsString(..),
-- * CString
unpackCString#,
unpackAppendCString#,
unpackFoldrCString#,
unpackCStringUtf8#,
unpackNBytes#,
cstringLength#,
-- * Debugging
breakpoint, breakpointCond,
......
This diff is collapsed.
{-# LANGUAGE MagicHash, NoImplicitPrelude, BangPatterns #-}
{-# LANGUAGE MagicHash, NoImplicitPrelude, BangPatterns, UnliftedFFITypes #-}
-----------------------------------------------------------------------------
-- |
-- Module : GHC.CString
......@@ -18,7 +17,7 @@
module GHC.CString (
unpackCString#, unpackAppendCString#, unpackFoldrCString#,
unpackCStringUtf8#, unpackNBytes#
unpackCStringUtf8#, unpackNBytes#, cstringLength#
) where
import GHC.Types
......@@ -174,3 +173,17 @@ unpackNBytes# addr len# = unpack [] (len# -# 1#)
case indexCharOffAddr# addr i# of
ch -> unpack (C# ch : acc) (i# -# 1#)
-- The return type is not correct here. We really want CSize,
-- but that type is defined in base. However, CSize should always
-- match the size of a machine word (I hope), so this is probably
-- alright on all platforms that GHC supports.
foreign import ccall unsafe "strlen" c_strlen :: Addr# -> Int#
-- | Compute the length of a NUL-terminated string. This address
-- must refer to immutable memory. GHC includes a built-in rule for
-- constant folding when the argument is a statically-known literal.
-- That is, a core-to-core pass reduces the expression
-- @cstringLength# "hello"#@ to the constant @5#@.
cstringLength# :: Addr# -> Int#
{-# INLINE[0] cstringLength# #-}
cstringLength# = c_strlen
## 0.6.2 (edit as necessary)
- Shipped with GHC 8.12.1
- Add known-key `cstringLength#` to `GHC.CString`. This is just the
C function `strlen`, but a built-in rewrite rule allows GHC to
compute the result at compile time when the argument is known.
## 0.6.1 (edit as necessary)
- Shipped with GHC 8.10.1
......
......@@ -43,6 +43,7 @@ Thumbs.db
*.prof.sample.normalised
*.run.stdout
*.run.stderr
*.dump-simpl
*.hp
tests/**/*.ps
......
......@@ -1344,6 +1344,26 @@ def compile_grep_asm(name: TestName,
# no problems found, this test passed
return passed()
def compile_grep_core(name: TestName,
way: WayName,
extra_hc_opts: str
) -> PassFail:
print('Compile only, extra args = ', extra_hc_opts)
result = simple_build(name + '.hs', way, '-ddump-to-file -dsuppress-all -ddump-simpl -O ' + extra_hc_opts, False, None, False, False)
if badResult(result):
return result
expected_pat_file = find_expected_file(name, 'substr-simpl')
actual_core_file = add_suffix(name, 'dump-simpl')
if not grep_output(join_normalisers(normalise_errmsg),
expected_pat_file, actual_core_file):
return failBecause('simplified core mismatch')
# no problems found, this test passed
return passed()
# -----------------------------------------------------------------------------
# Compile-and-run tests
......
{-# language MagicHash #-}
module CStringLengthCore
( ozymandias
) where
import GHC.Exts
ozymandias :: Int
ozymandias =
I# (cstringLength# "I met a traveller from an antique land"#)
test('CStringLength_core', normal, compile_grep_core, [''])
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE UnboxedTuples #-}
import GHC.Exts
main :: IO ()
main = do
putStr "A: "
print $
I# (cstringLength# "hello_world"#)
==
naiveStrlen "hello_world"# 0
putStr "B: "
print $
I# (cstringLength# "aaaaaaaaaaaaa\x00b"#)
==
naiveStrlen "aaaaaaaaaaaaa\x00b"# 0
putStr "C: "
print $
I# (cstringLength# "cccccccccccccccccc\x00b"#)
==
naiveStrlen "cccccccccccccccccc\x00b"# 0
putStr "D: "
print $
I# (cstringLength# "araña\NULb"#)
==
naiveStrlen "araña\NULb"# 0
naiveStrlen :: Addr# -> Int -> Int
naiveStrlen addr !n = case indexWord8OffAddr# addr 0# of
0## -> n
_ -> naiveStrlen (plusAddr# addr 1#) (n + 1)
......@@ -29,3 +29,4 @@ test('CmpWord16', normal, compile_and_run, [''])
test('ShrinkSmallMutableArrayA', normal, compile_and_run, [''])
test('ShrinkSmallMutableArrayB', normal, compile_and_run, [''])
test('T14664', normal, compile_and_run, [''])
test('CStringLength', normal, compile_and_run, ['-O2'])