Commit 88bf81aa authored by Andreas Klebinger's avatar Andreas Klebinger Committed by Marge Bot

Optimize unpackCString# to allocate less.

unpackCString# is a recursive function which for each iteration
returns a Cons cell containing the current Char, and a thunk for
unpacking the rest of the string.

In this patch we change from storing addr + offset inside this thunk
to storing only the addr, simply incrementing the address on each
iteration.

This saves one word of allocation per unpacked character.
For a program like "main = print "<largishString>" this amounts
to 2-3% fewer % in bytes allocated.

I also removed the now redundant local unpack definitions.
This removes one call per unpack operation.
parent 7d04b9f2
Pipeline #15553 passed with stages
in 470 minutes and 15 seconds
{-# LANGUAGE MagicHash, NoImplicitPrelude, BangPatterns #-}
-----------------------------------------------------------------------------
-- |
-- Module : GHC.CString
......@@ -70,28 +71,45 @@ Moreover, we want to make it CONLIKE, so that:
All of this goes for unpackCStringUtf8# too.
-}
{- Note [unpackCString# iterating over addr]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
When unpacking unpackCString# and friends repeatedly return a cons cell
containing:
* The current character we just unpacked.
* A thunk to unpack the rest of the string.
In order to minimize the size of the thunk we do not index of
the start of the string, offsetting into it, but instead increment
the addr and always use offset 0#.
This works since these two expressions will read from the same address.
* `indexCharOffAddr# a i`
* `indexCharOffAddr (a `plusAddr#` i) 0#`
This way we avoid the need for the thunks to close over both the start of
the string and the current offset, saving a word for each character unpacked.
-}
unpackCString# :: Addr# -> [Char]
{-# NOINLINE CONLIKE unpackCString# #-}
unpackCString# addr
= unpack 0#
where
unpack nh
| isTrue# (ch `eqChar#` '\0'#) = []
| True = C# ch : unpack (nh +# 1#)
| isTrue# (ch `eqChar#` '\0'#) = []
| True = C# ch : unpackCString# (addr `plusAddr#` 1#)
where
!ch = indexCharOffAddr# addr nh
-- See Note [unpackCString# iterating over addr]
!ch = indexCharOffAddr# addr 0#
unpackAppendCString# :: Addr# -> [Char] -> [Char]
{-# NOINLINE unpackAppendCString# #-}
-- See the NOINLINE note on unpackCString#
unpackAppendCString# addr rest
= unpack 0#
where
unpack nh
| isTrue# (ch `eqChar#` '\0'#) = rest
| True = C# ch : unpack (nh +# 1#)
| isTrue# (ch `eqChar#` '\0'#) = rest
| True = C# ch : unpackAppendCString# (addr `plusAddr#` 1#) rest
where
!ch = indexCharOffAddr# addr nh
-- See Note [unpackCString# iterating over addr]
!ch = indexCharOffAddr# addr 0#
unpackFoldrCString# :: Addr# -> (Char -> a -> a) -> a -> a
......@@ -110,45 +128,37 @@ unpackFoldrCString# :: Addr# -> (Char -> a -> a) -> a -> a
-- each is highly gratuitous. See nofib/real/anna/PrettyPrint.
unpackFoldrCString# addr f z
= unpack 0#
| isTrue# (ch `eqChar#` '\0'#) = z
| True = C# ch `f` unpackFoldrCString# (addr `plusAddr#` 1#) f z
where
unpack nh
| isTrue# (ch `eqChar#` '\0'#) = z
| True = C# ch `f` unpack (nh +# 1#)
where
!ch = indexCharOffAddr# addr nh
-- See Note [unpackCString# iterating over addr]
!ch = indexCharOffAddr# addr 0#
-- There's really no point in inlining this for the same reasons as
-- unpackCString. See Note [Inlining unpackCString#] above for details.
unpackCStringUtf8# :: Addr# -> [Char]
{-# NOINLINE CONLIKE unpackCStringUtf8# #-}
unpackCStringUtf8# addr
= unpack 0#
where
-- We take care to strictly evaluate the character decoding as
-- indexCharOffAddr# is marked with the can_fail flag and
-- consequently GHC won't evaluate the expression unless it is absolutely
-- needed.
unpack nh
| isTrue# (ch `eqChar#` '\0'# ) = []
| isTrue# (ch `leChar#` '\x7F'#) = C# ch : unpack (nh +# 1#)
| isTrue# (ch `leChar#` '\xDF'#) =
let !c = C# (chr# (((ord# ch -# 0xC0#) `uncheckedIShiftL#` 6#) +#
(ord# (indexCharOffAddr# addr (nh +# 1#)) -# 0x80#)))
in c : unpack (nh +# 2#)
| isTrue# (ch `leChar#` '\xEF'#) =
let !c = C# (chr# (((ord# ch -# 0xE0#) `uncheckedIShiftL#` 12#) +#
((ord# (indexCharOffAddr# addr (nh +# 1#)) -# 0x80#) `uncheckedIShiftL#` 6#) +#
(ord# (indexCharOffAddr# addr (nh +# 2#)) -# 0x80#)))
in c : unpack (nh +# 3#)
| True =
let !c = C# (chr# (((ord# ch -# 0xF0#) `uncheckedIShiftL#` 18#) +#
((ord# (indexCharOffAddr# addr (nh +# 1#)) -# 0x80#) `uncheckedIShiftL#` 12#) +#
((ord# (indexCharOffAddr# addr (nh +# 2#)) -# 0x80#) `uncheckedIShiftL#` 6#) +#
(ord# (indexCharOffAddr# addr (nh +# 3#)) -# 0x80#)))
in c : unpack (nh +# 4#)
| isTrue# (ch `eqChar#` '\0'# ) = []
| isTrue# (ch `leChar#` '\x7F'#) = C# ch : unpackCStringUtf8# (addr `plusAddr#` 1#)
| isTrue# (ch `leChar#` '\xDF'#) =
let !c = C# (chr# (((ord# ch -# 0xC0#) `uncheckedIShiftL#` 6#) +#
(ord# (indexCharOffAddr# (addr `plusAddr#` 1#) 0#) -# 0x80#)))
in c : unpackCStringUtf8# (addr `plusAddr#` 2#)
| isTrue# (ch `leChar#` '\xEF'#) =
let !c = C# (chr# (((ord# ch -# 0xE0#) `uncheckedIShiftL#` 12#) +#
((ord# (indexCharOffAddr# (addr `plusAddr#` 1#) 0#) -# 0x80#) `uncheckedIShiftL#` 6#) +#
(ord# (indexCharOffAddr# (addr `plusAddr#` 2#) 0#) -# 0x80#)))
in c : unpackCStringUtf8# (addr `plusAddr#` 3#)
| True =
let !c = C# (chr# (((ord# ch -# 0xF0#) `uncheckedIShiftL#` 18#) +#
((ord# (indexCharOffAddr# (addr `plusAddr#` 1#) 0#) -# 0x80#) `uncheckedIShiftL#` 12#) +#
((ord# (indexCharOffAddr# (addr `plusAddr#` 2#) 0#) -# 0x80#) `uncheckedIShiftL#` 6#) +#
(ord# (indexCharOffAddr# (addr `plusAddr#` 3#) 0#) -# 0x80#)))
in c : unpackCStringUtf8# (addr `plusAddr#` 4#)
where
!ch = indexCharOffAddr# addr nh
-- See Note [unpackCString# iterating over addr]
!ch = indexCharOffAddr# addr 0#
-- There's really no point in inlining this for the same reasons as
-- unpackCString. See Note [Inlining unpackCString#] above for details.
......@@ -157,6 +167,7 @@ unpackNBytes# :: Addr# -> Int# -> [Char]
unpackNBytes# _addr 0# = []
unpackNBytes# addr len# = unpack [] (len# -# 1#)
where
unpack :: [Char] -> Int# -> [Char]
unpack acc i#
| isTrue# (i# <# 0#) = acc
| True =
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment