Skip to content
Snippets Groups Projects
Commit ac3aa323 authored by sof's avatar sof
Browse files

[project @ 1997-08-25 22:44:21 by sof]

Moved to ghc system library
parent c13305e9
No related merge requests found
%
% (c) The GRASP/AQUA Project, Glasgow University, 1993-1996
%
\section{Packed strings}
This sits on top of the sequencing/arrays world, notably @ByteArray#@s.
Glorious hacking (all the hard work) by Bryan O'Sullivan.
\begin{code}
{-# OPTIONS -fno-implicit-prelude '-#include "cbits/stgio.h"' #-}
module PackedString (
PackedString, -- abstract
-- Creating the beasts
packString, -- :: [Char] -> PackedString
packStringST, -- :: [Char] -> ST s PackedString
packCBytesST, -- :: Int -> Addr -> ST s PackedString
byteArrayToPS, -- :: ByteArray Int -> PackedString
unsafeByteArrayToPS, -- :: ByteArray a -> Int -> PackedString
psToByteArray, -- :: PackedString -> ByteArray Int
psToByteArrayST, -- :: PackedString -> ST s (ByteArray Int)
unpackPS, -- :: PackedString -> [Char]
{-LATER:
hPutPS, -- :: Handle -> PackedString -> IO ()
putPS, -- :: FILE -> PackedString -> PrimIO () -- ToDo: more sensible type
getPS, -- :: FILE -> Int -> PrimIO PackedString
-}
nilPS, -- :: PackedString
consPS, -- :: Char -> PackedString -> PackedString
headPS, -- :: PackedString -> Char
tailPS, -- :: PackedString -> PackedString
nullPS, -- :: PackedString -> Bool
appendPS, -- :: PackedString -> PackedString -> PackedString
lengthPS, -- :: PackedString -> Int
{- 0-origin indexing into the string -}
indexPS, -- :: PackedString -> Int -> Char
mapPS, -- :: (Char -> Char) -> PackedString -> PackedString
filterPS, -- :: (Char -> Bool) -> PackedString -> PackedString
foldlPS, -- :: (a -> Char -> a) -> a -> PackedString -> a
foldrPS, -- :: (Char -> a -> a) -> a -> PackedString -> a
takePS, -- :: Int -> PackedString -> PackedString
dropPS, -- :: Int -> PackedString -> PackedString
splitAtPS, -- :: Int -> PackedString -> (PackedString, PackedString)
takeWhilePS, -- :: (Char -> Bool) -> PackedString -> PackedString
dropWhilePS, -- :: (Char -> Bool) -> PackedString -> PackedString
spanPS, -- :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
breakPS, -- :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
linesPS, -- :: PackedString -> [PackedString]
wordsPS, -- :: PackedString -> [PackedString]
reversePS, -- :: PackedString -> PackedString
splitPS, -- :: Char -> PackedString -> [PackedString]
splitWithPS, -- :: (Char -> Bool) -> PackedString -> [PackedString]
joinPS, -- :: PackedString -> [PackedString] -> PackedString
concatPS, -- :: [PackedString] -> PackedString
elemPS, -- :: Char -> PackedString -> Bool
{-
Pluck out a piece of a PS start and end
chars you want; both 0-origin-specified
-}
substrPS, -- :: PackedString -> Int -> Int -> PackedString
comparePS,
-- Converting to C strings
packCString#,
unpackCString#, unpackCString2#, unpackAppendCString#, unpackFoldrCString#,
unpackCString
) where
import {-# SOURCE #-} IOBase ( error )
import Ix
import PrelList
import STBase
import UnsafeST ( unsafePerformPrimIO )
import ArrBase
import PrelBase
import Foreign ( Addr(..) )
import GHC
\end{code}
%************************************************************************
%* *
\subsection{@PackedString@ type declaration}
%* *
%************************************************************************
\begin{code}
data PackedString
= PS ByteArray# -- the bytes
Int# -- length (*not* including NUL at the end)
Bool -- True <=> contains a NUL
| CPS Addr# -- pointer to the (null-terminated) bytes in C land
Int# -- length, as per strlen
-- definitely doesn't contain a NUL
instance Eq PackedString where
x == y = compare x y == EQ
x /= y = compare x y /= EQ
instance Ord PackedString where
compare = comparePS
x <= y = compare x y /= GT
x < y = compare x y == LT
x >= y = compare x y /= LT
x > y = compare x y == GT
max x y = case (compare x y) of { LT -> y ; EQ -> x ; GT -> x }
min x y = case (compare x y) of { LT -> x ; EQ -> x ; GT -> y }
--instance Read PackedString: ToDo
instance Show PackedString where
showsPrec p ps r = showsPrec p (unpackPS ps) r
showList = showList__ (showsPrec 0)
\end{code}
%************************************************************************
%* *
\subsection{@PackedString@ instances}
%* *
%************************************************************************
We try hard to make this go fast:
\begin{code}
comparePS :: PackedString -> PackedString -> Ordering
comparePS (PS bs1 len1 has_null1) (PS bs2 len2 has_null2)
| not has_null1 && not has_null2
= unsafePerformPrimIO (
_ccall_ strcmp ba1 ba2 >>= \ (I# res) ->
return (
if res <# 0# then LT
else if res ==# 0# then EQ
else GT
))
where
ba1 = ByteArray (0, I# (len1 -# 1#)) bs1
ba2 = ByteArray (0, I# (len2 -# 1#)) bs2
comparePS (PS bs1 len1 has_null1) (CPS bs2 len2)
| not has_null1
= unsafePerformPrimIO (
_ccall_ strcmp ba1 ba2 >>= \ (I# res) ->
return (
if res <# 0# then LT
else if res ==# 0# then EQ
else GT
))
where
ba1 = ByteArray (0, I# (len1 -# 1#)) bs1
ba2 = A# bs2
comparePS (CPS bs1 len1) (CPS bs2 len2)
= unsafePerformPrimIO (
_ccall_ strcmp ba1 ba2 >>= \ (I# res) ->
return (
if res <# 0# then LT
else if res ==# 0# then EQ
else GT
))
where
ba1 = A# bs1
ba2 = A# bs2
comparePS a@(CPS _ _) b@(PS _ _ has_null2)
| not has_null2
= -- try them the other way 'round
case (comparePS b a) of { LT -> GT; EQ -> EQ; GT -> LT }
comparePS ps1 ps2 -- slow catch-all case (esp for "has_null" True)
= looking_at 0#
where
end1 = lengthPS# ps1 -# 1#
end2 = lengthPS# ps2 -# 1#
looking_at char#
= if char# ># end1 then
if char# ># end2 then -- both strings ran out at once
EQ
else -- ps1 ran out before ps2
LT
else if char# ># end2 then
GT -- ps2 ran out before ps1
else
let
ch1 = indexPS# ps1 char#
ch2 = indexPS# ps2 char#
in
if ch1 `eqChar#` ch2 then
looking_at (char# +# 1#)
else if ch1 `ltChar#` ch2 then LT
else GT
\end{code}
%************************************************************************
%* *
\subsection{Constructor functions}
%* *
%************************************************************************
Easy ones first. @packString@ requires getting some heap-bytes and
scribbling stuff into them.
\begin{code}
nilPS :: PackedString
nilPS = CPS ""# 0#
consPS :: Char -> PackedString -> PackedString
consPS c cs = packString (c : (unpackPS cs)) -- ToDo:better
packString :: [Char] -> PackedString
packString str = runST (packStringST str)
packStringST :: [Char] -> ST s PackedString
packStringST str =
let len = length str in
packNCharsST len str
packNCharsST :: Int -> [Char] -> ST s PackedString
packNCharsST len@(I# length#) str =
{-
allocate an array that will hold the string
(not forgetting the NUL byte at the end)
-}
new_ps_array (length# +# 1#) >>= \ ch_array ->
-- fill in packed string from "str"
fill_in ch_array 0# str >>
-- freeze the puppy:
freeze_ps_array ch_array >>= \ (ByteArray _ frozen#) ->
let has_null = byteArrayHasNUL# frozen# length# in
return (PS frozen# length# has_null)
where
fill_in :: MutableByteArray s Int -> Int# -> [Char] -> ST s ()
fill_in arr_in# idx [] =
write_ps_array arr_in# idx (chr# 0#) >>
return ()
fill_in arr_in# idx (C# c : cs) =
write_ps_array arr_in# idx c >>
fill_in arr_in# (idx +# 1#) cs
byteArrayToPS :: ByteArray Int -> PackedString
byteArrayToPS (ByteArray ixs@(_, ix_end) frozen#) =
let
n# =
case (
if null (range ixs)
then 0
else ((index ixs ix_end) + 1)
) of { I# x -> x }
in
PS frozen# n# (byteArrayHasNUL# frozen# n#)
unsafeByteArrayToPS :: ByteArray a -> Int -> PackedString
unsafeByteArrayToPS (ByteArray _ frozen#) (I# n#)
= PS frozen# n# (byteArrayHasNUL# frozen# n#)
psToByteArray :: PackedString -> ByteArray Int
psToByteArray (PS bytes n has_null)
= ByteArray (0, I# (n -# 1#)) bytes
psToByteArray (CPS addr len#)
= let
len = I# len#
byte_array_form = packCBytes len (A# addr)
in
case byte_array_form of { PS bytes _ _ ->
ByteArray (0, len - 1) bytes }
\end{code}
%************************************************************************
%* *
\subsection{Destructor functions (taking @PackedStrings@ apart)}
%* *
%************************************************************************
\begin{code}
-- OK, but this code gets *hammered*:
-- unpackPS ps
-- = [ indexPS ps n | n <- [ 0::Int .. lengthPS ps - 1 ] ]
unpackPS :: PackedString -> [Char]
unpackPS (PS bytes len has_null)
= unpack 0#
where
unpack nh
| nh >=# len = []
| otherwise = C# ch : unpack (nh +# 1#)
where
ch = indexCharArray# bytes nh
unpackPS (CPS addr len)
= unpack 0#
where
unpack nh
| ch `eqChar#` '\0'# = []
| otherwise = C# ch : unpack (nh +# 1#)
where
ch = indexCharOffAddr# addr nh
\end{code}
Output a packed string via a handle:
\begin{code}
{- LATER:
hPutPS :: Handle -> PackedString -> IO ()
hPutPS handle ps =
let
len =
case ps of
PS _ len _ -> len
CPS _ len -> len
in
if len ==# 0# then
return ()
else
_readHandle handle >>= \ htype ->
case htype of
_ErrorHandle ioError ->
_writeHandle handle htype >>
failWith ioError
_ClosedHandle ->
_writeHandle handle htype >>
failWith (IllegalOperation "handle is closed")
_SemiClosedHandle _ _ ->
_writeHandle handle htype >>
failWith (IllegalOperation "handle is closed")
_ReadHandle _ _ _ ->
_writeHandle handle htype >>
failWith (IllegalOperation "handle is not open for writing")
other ->
_getBufferMode other >>= \ other ->
(case _bufferMode other of
Just LineBuffering ->
writeLines (_filePtr other)
Just (BlockBuffering (Just size)) ->
writeBlocks (_filePtr other) size
Just (BlockBuffering Nothing) ->
writeBlocks (_filePtr other) ``BUFSIZ''
_ -> -- Nothing is treated pessimistically as NoBuffering
writeChars (_filePtr other) 0#
) >>= \ success ->
_writeHandle handle (_markHandle other) >>
if success then
return ()
else
_constructError "hPutStr" >>= \ ioError ->
failWith ioError
where
pslen = lengthPS# ps
writeLines :: Addr -> PrimIO Bool
writeLines = writeChunks ``BUFSIZ'' True
writeBlocks :: Addr -> Int -> PrimIO Bool
writeBlocks fp size = writeChunks size False fp
{-
The breaking up of output into lines along \n boundaries
works fine as long as there are newlines to split by.
Avoid the splitting up into lines altogether (doesn't work
for overly long lines like the stuff that showsPrec instances
normally return). Instead, we split them up into fixed size
chunks before blasting them off to the Real World.
Hacked to avoid multiple passes over the strings - unsightly, but
a whole lot quicker. -- SOF 3/96
-}
writeChunks :: Int -> Bool -> Addr -> PrimIO Bool
writeChunks (I# bufLen) chopOnNewLine fp =
newCharArray (0,I# bufLen) >>= \ arr@(MutableByteArray _ arr#) ->
let
shoveString :: Int# -> Int# -> PrimIO Bool
shoveString n i
| i ==# pslen = -- end of string
if n ==# 0# then
return True
else
_ccall_ writeFile arr fp (I# n) >>= \rc ->
return (rc==0)
| otherwise =
(\ (S# s#) ->
case writeCharArray# arr# n (indexPS# ps i) s# of
s1# ->
{- Flushing lines - should we bother? -}
(if n ==# bufLen then
_ccall_ writeFile arr fp (I# (n +# 1#)) >>= \rc ->
if rc == 0 then
shoveString 0# (i +# 1#)
else
return False
else
shoveString (n +# 1#) (i +# 1#)) (S# s1#))
in
shoveString 0# 0#
writeChars :: Addr -> Int# -> PrimIO Bool
writeChars fp i
| i ==# pslen = return True
| otherwise =
_ccall_ filePutc fp (ord (C# (indexPS# ps i))) >>= \ rc ->
if rc == 0 then
writeChars fp (i +# 1#)
else
return False
---------------------------------------------
putPS :: _FILE -> PackedString -> PrimIO ()
putPS file ps@(PS bytes len has_null)
| len ==# 0#
= return ()
| otherwise
= let
byte_array = ByteArray (0, I# (len -# 1#)) bytes
in
_ccall_ fwrite byte_array (1::Int){-size-} (I# len) file
>>= \ (I# written) ->
if written ==# len then
return ()
else
error "putPS: fwrite failed!\n"
putPS file (CPS addr len)
| len ==# 0#
= return ()
| otherwise
= _ccall_ fputs (A# addr) file >>= \ (I# _){-force type-} ->
return ()
\end{code}
The dual to @_putPS@, note that the size of the chunk specified
is the upper bound of the size of the chunk returned.
\begin{code}
getPS :: _FILE -> Int -> PrimIO PackedString
getPS file len@(I# len#)
| len# <=# 0# = return nilPS -- I'm being kind here.
| otherwise =
-- Allocate an array for system call to store its bytes into.
new_ps_array len# >>= \ ch_arr ->
freeze_ps_array ch_arr >>= \ (ByteArray _ frozen#) ->
let
byte_array = ByteArray (0, I# len#) frozen#
in
_ccall_ fread byte_array (1::Int) len file >>= \ (I# read#) ->
if read# ==# 0# then -- EOF or other error
error "getPS: EOF reached or other error"
else
{-
The system call may not return the number of
bytes requested. Instead of failing with an error
if the number of bytes read is less than requested,
a packed string containing the bytes we did manage
to snarf is returned.
-}
let
has_null = byteArrayHasNUL# frozen# read#
in
return (PS frozen# read# has_null)
END LATER -}
\end{code}
%************************************************************************
%* *
\subsection{List-mimicking functions for @PackedStrings@}
%* *
%************************************************************************
First, the basic functions that do look into the representation;
@indexPS@ is the most important one.
\begin{code}
lengthPS :: PackedString -> Int
lengthPS ps = I# (lengthPS# ps)
{-# INLINE lengthPS# #-}
lengthPS# (PS _ i _) = i
lengthPS# (CPS _ i) = i
{-# INLINE strlen# #-}
strlen# :: Addr# -> Int
strlen# a
= unsafePerformPrimIO (
_ccall_ strlen (A# a) >>= \ len@(I# _) ->
return len
)
byteArrayHasNUL# :: ByteArray# -> Int#{-length-} -> Bool
byteArrayHasNUL# bs len
= unsafePerformPrimIO (
_ccall_ byteArrayHasNUL__ ba (I# len) >>= \ (I# res) ->
return (
if res ==# 0# then False else True
))
where
ba = ByteArray (0, I# (len -# 1#)) bs
-----------------------
indexPS :: PackedString -> Int -> Char
indexPS ps (I# n) = C# (indexPS# ps n)
{-# INLINE indexPS# #-}
indexPS# (PS bs i _) n
= --ASSERT (n >=# 0# && n <# i) -- error checking: my eye! (WDP 94/10)
indexCharArray# bs n
indexPS# (CPS a _) n
= indexCharOffAddr# a n
\end{code}
Now, the rest of the functions can be defined without digging
around in the representation.
\begin{code}
headPS :: PackedString -> Char
headPS ps
| nullPS ps = error "headPS: head []"
| otherwise = C# (indexPS# ps 0#)
tailPS :: PackedString -> PackedString
tailPS ps
| len <=# 0# = error "tailPS: tail []"
| len ==# 1# = nilPS
| otherwise = substrPS# ps 1# (len -# 1#)
where
len = lengthPS# ps
nullPS :: PackedString -> Bool
nullPS (PS _ i _) = i ==# 0#
nullPS (CPS _ i) = i ==# 0#
{- (ToDo: some non-lousy implementations...)
Old : _appendPS xs ys = packString (unpackPS xs ++ unpackPS ys)
-}
appendPS :: PackedString -> PackedString -> PackedString
appendPS xs ys
| nullPS xs = ys
| nullPS ys = xs
| otherwise = concatPS [xs,ys]
{- OLD: mapPS f xs = packString (map f (unpackPS xs)) -}
mapPS :: (Char -> Char) -> PackedString -> PackedString {-or String?-}
mapPS f xs =
if nullPS xs then
xs
else
runST (
new_ps_array (length +# 1#) >>= \ ps_arr ->
whizz ps_arr length 0# >>
freeze_ps_array ps_arr >>= \ (ByteArray _ frozen#) ->
let has_null = byteArrayHasNUL# frozen# length in
return (PS frozen# length has_null))
where
length = lengthPS# xs
whizz :: MutableByteArray s Int -> Int# -> Int# -> ST s ()
whizz arr# n i
| n ==# 0#
= write_ps_array arr# i (chr# 0#) >>
return ()
| otherwise
= let
ch = indexPS# xs i
in
write_ps_array arr# i (case f (C# ch) of { (C# x) -> x}) >>
whizz arr# (n -# 1#) (i +# 1#)
filterPS :: (Char -> Bool) -> PackedString -> PackedString {-or String?-}
filterPS pred ps =
if nullPS ps then
ps
else
{-
Filtering proceeds as follows:
* traverse the list, applying the pred. to each element,
remembering the positions where it was satisfied.
Encode these positions using a run-length encoding of the gaps
between the matching positions.
* Allocate a MutableByteArray in the heap big enough to hold
all the matched entries, and copy the elements that matched over.
A better solution that merges the scan&copy passes into one,
would be to copy the filtered elements over into a growable
buffer. No such operation currently supported over
MutableByteArrays (could of course use malloc&realloc)
But, this solution may in the case of repeated realloc's
be worse than the current solution.
-}
runST (
let
(rle,len_filtered) = filter_ps len# 0# 0# []
len_filtered# = case len_filtered of { I# x# -> x#}
in
if len# ==# len_filtered# then
{- not much filtering as everything passed through. -}
return ps
else if len_filtered# ==# 0# then
return nilPS
else
new_ps_array (len_filtered# +# 1#) >>= \ ps_arr ->
copy_arr ps_arr rle 0# 0# >>
freeze_ps_array ps_arr >>= \ (ByteArray _ frozen#) ->
let has_null = byteArrayHasNUL# frozen# len_filtered# in
return (PS frozen# len_filtered# has_null))
where
len# = lengthPS# ps
matchOffset :: Int# -> [Char] -> (Int,[Char])
matchOffset off [] = (I# off,[])
matchOffset off (C# c:cs) =
let
x = ord# c
off' = off +# x
in
if x==# 0# then -- escape code, add 255#
matchOffset off' cs
else
(I# off', cs)
copy_arr :: MutableByteArray s Int -> [Char] -> Int# -> Int# -> ST s ()
copy_arr arr# [_] _ _ = return ()
copy_arr arr# ls n i =
let
(x,ls') = matchOffset 0# ls
n' = n +# (case x of { (I# x#) -> x#}) -# 1#
ch = indexPS# ps n'
in
write_ps_array arr# i ch >>
copy_arr arr# ls' (n' +# 1#) (i +# 1#)
esc :: Int# -> Int# -> [Char] -> [Char]
esc v 0# ls = (C# (chr# v)):ls
esc v n ls = esc v (n -# 1#) (C# (chr# 0#):ls)
filter_ps :: Int# -> Int# -> Int# -> [Char] -> ([Char],Int)
filter_ps n hits run acc
| n <# 0# =
let
escs = run `quotInt#` 255#
v = run `remInt#` 255#
in
(esc (v +# 1#) escs acc, I# hits)
| otherwise
= let
ch = indexPS# ps n
n' = n -# 1#
in
if pred (C# ch) then
let
escs = run `quotInt#` 255#
v = run `remInt#` 255#
acc' = esc (v +# 1#) escs acc
in
filter_ps n' (hits +# 1#) 0# acc'
else
filter_ps n' hits (run +# 1#) acc
foldlPS :: (a -> Char -> a) -> a -> PackedString -> a
foldlPS f b ps
= if nullPS ps then
b
else
whizzLR b 0#
where
len = lengthPS# ps
--whizzLR :: a -> Int# -> a
whizzLR b idx
| idx ==# len = b
| otherwise = whizzLR (f b (C# (indexPS# ps idx))) (idx +# 1#)
foldrPS :: (Char -> a -> a) -> a -> PackedString -> a
foldrPS f b ps
= if nullPS ps then
b
else
whizzRL b len
where
len = lengthPS# ps
--whizzRL :: a -> Int# -> a
whizzRL b idx
| idx <# 0# = b
| otherwise = whizzRL (f (C# (indexPS# ps idx)) b) (idx -# 1#)
takePS :: Int -> PackedString -> PackedString
takePS (I# n) ps
| n ==# 0# = nilPS
| otherwise = substrPS# ps 0# (n -# 1#)
dropPS :: Int -> PackedString -> PackedString
dropPS (I# n) ps
| n ==# len = ps
| otherwise = substrPS# ps n (lengthPS# ps -# 1#)
where
len = lengthPS# ps
splitAtPS :: Int -> PackedString -> (PackedString, PackedString)
splitAtPS n ps = (takePS n ps, dropPS n ps)
takeWhilePS :: (Char -> Bool) -> PackedString -> PackedString
takeWhilePS pred ps
= let
break_pt = char_pos_that_dissatisfies
(\ c -> pred (C# c))
ps
(lengthPS# ps)
0#
in
if break_pt ==# 0# then
nilPS
else
substrPS# ps 0# (break_pt -# 1#)
dropWhilePS :: (Char -> Bool) -> PackedString -> PackedString
dropWhilePS pred ps
= let
len = lengthPS# ps
break_pt = char_pos_that_dissatisfies
(\ c -> pred (C# c))
ps
len
0#
in
if len ==# break_pt then
nilPS
else
substrPS# ps break_pt (len -# 1#)
elemPS :: Char -> PackedString -> Bool
elemPS (C# ch) ps
= let
len = lengthPS# ps
break_pt = first_char_pos_that_satisfies
(`eqChar#` ch)
ps
len
0#
in
break_pt <# len
char_pos_that_dissatisfies :: (Char# -> Bool) -> PackedString -> Int# -> Int# -> Int#
char_pos_that_dissatisfies p ps len pos
| pos >=# len = pos -- end
| p (indexPS# ps pos) = -- predicate satisfied; keep going
char_pos_that_dissatisfies p ps len (pos +# 1#)
| otherwise = pos -- predicate not satisfied
first_char_pos_that_satisfies :: (Char# -> Bool) -> PackedString -> Int# -> Int# -> Int#
first_char_pos_that_satisfies p ps len pos
| pos >=# len = pos -- end
| p (indexPS# ps pos) = pos -- got it!
| otherwise = first_char_pos_that_satisfies p ps len (pos +# 1#)
-- ToDo: could certainly go quicker
spanPS :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
spanPS p ps = (takeWhilePS p ps, dropWhilePS p ps)
breakPS :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
breakPS p ps = spanPS (not . p) ps
linesPS :: PackedString -> [PackedString]
linesPS ps = splitPS '\n' ps
wordsPS :: PackedString -> [PackedString]
wordsPS ps = splitWithPS isSpace ps
reversePS :: PackedString -> PackedString
reversePS ps =
if nullPS ps then -- don't create stuff unnecessarily.
ps
else
runST (
new_ps_array (length +# 1#) >>= \ arr# -> -- incl NUL byte!
fill_in arr# (length -# 1#) 0# >>
freeze_ps_array arr# >>= \ (ByteArray _ frozen#) ->
let has_null = byteArrayHasNUL# frozen# length in
return (PS frozen# length has_null))
where
length = lengthPS# ps
fill_in :: MutableByteArray s Int -> Int# -> Int# -> ST s ()
fill_in arr_in# n i =
let
ch = indexPS# ps n
in
write_ps_array arr_in# i ch >>
if n ==# 0# then
write_ps_array arr_in# (i +# 1#) (chr# 0#) >>
return ()
else
fill_in arr_in# (n -# 1#) (i +# 1#)
concatPS :: [PackedString] -> PackedString
concatPS [] = nilPS
concatPS pss
= let
tot_len# = case (foldr ((+) . lengthPS) 0 pss) of { I# x -> x }
tot_len = I# tot_len#
in
runST (
new_ps_array (tot_len# +# 1#) >>= \ arr# -> -- incl NUL byte!
packum arr# pss 0# >>
freeze_ps_array arr# >>= \ (ByteArray _ frozen#) ->
let has_null = byteArrayHasNUL# frozen# tot_len# in
return (PS frozen# tot_len# has_null)
)
where
packum :: MutableByteArray s Int -> [PackedString] -> Int# -> ST s ()
packum arr [] pos
= write_ps_array arr pos (chr# 0#) >>
return ()
packum arr (ps : pss) pos
= fill arr pos ps 0# (lengthPS# ps) >>= \ (I# next_pos) ->
packum arr pss next_pos
fill :: MutableByteArray s Int -> Int# -> PackedString -> Int# -> Int# -> ST s Int
fill arr arr_i ps ps_i ps_len
| ps_i ==# ps_len
= return (I# (arr_i +# ps_len))
| otherwise
= write_ps_array arr (arr_i +# ps_i) (indexPS# ps ps_i) >>
fill arr arr_i ps (ps_i +# 1#) ps_len
------------------------------------------------------------
joinPS :: PackedString -> [PackedString] -> PackedString
joinPS filler pss = concatPS (splice pss)
where
splice [] = []
splice [x] = [x]
splice (x:y:xs) = x:filler:splice (y:xs)
-- ToDo: the obvious generalisation
{-
Some properties that hold:
* splitPS x ls = ls'
where False = any (map (x `elemPS`) ls')
False = any (map (nullPS) ls')
* all x's have been chopped out.
* no empty PackedStrings in returned list. A conseq.
of this is:
splitPS x nilPS = []
* joinPS (packString [x]) (_splitPS x ls) = ls
-}
splitPS :: Char -> PackedString -> [PackedString]
splitPS (C# ch) = splitWithPS (\ (C# c) -> c `eqChar#` ch)
splitWithPS :: (Char -> Bool) -> PackedString -> [PackedString]
splitWithPS pred ps =
splitify 0#
where
len = lengthPS# ps
splitify n
| n >=# len = []
| otherwise =
let
break_pt =
first_char_pos_that_satisfies
(\ c -> pred (C# c))
ps
len
n
in
if break_pt ==# n then -- immediate match, no substring to cut out.
splitify (break_pt +# 1#)
else
substrPS# ps n (break_pt -# 1#): -- leave out the matching character
splitify (break_pt +# 1#)
\end{code}
%************************************************************************
%* *
\subsection{Local utility functions}
%* *
%************************************************************************
The definition of @_substrPS@ is essentially:
@take (end - begin + 1) (drop begin str)@.
\begin{code}
substrPS :: PackedString -> Int -> Int -> PackedString
substrPS ps (I# begin) (I# end) = substrPS# ps begin end
substrPS# ps s e
| s <# 0# || e <# s
= error "substrPS: bounds out of range"
| s >=# len || result_len# <=# 0#
= nilPS
| otherwise
= runST (
new_ps_array (result_len# +# 1#) >>= \ ch_arr -> -- incl NUL byte!
fill_in ch_arr 0# >>
freeze_ps_array ch_arr >>= \ (ByteArray _ frozen#) ->
let has_null = byteArrayHasNUL# frozen# result_len# in
return (PS frozen# result_len# has_null)
)
where
len = lengthPS# ps
result_len# = (if e <# len then (e +# 1#) else len) -# s
result_len = I# result_len#
-----------------------
fill_in :: MutableByteArray s Int -> Int# -> ST s ()
fill_in arr_in# idx
| idx ==# result_len#
= write_ps_array arr_in# idx (chr# 0#) >>
return ()
| otherwise
= let
ch = indexPS# ps (s +# idx)
in
write_ps_array arr_in# idx ch >>
fill_in arr_in# (idx +# 1#)
\end{code}
(Very :-) ``Specialised'' versions of some CharArray things...
\begin{code}
new_ps_array :: Int# -> ST s (MutableByteArray s Int)
write_ps_array :: MutableByteArray s Int -> Int# -> Char# -> ST s ()
freeze_ps_array :: MutableByteArray s Int -> ST s (ByteArray Int)
new_ps_array size = ST $ \ (S# s) ->
case (newCharArray# size s) of { StateAndMutableByteArray# s2# barr# ->
(MutableByteArray bot barr#, S# s2#)}
where
bot = error "new_ps_array"
write_ps_array (MutableByteArray _ barr#) n ch = ST $ \ (S# s#) ->
case writeCharArray# barr# n ch s# of { s2# ->
((), S# s2#)}
-- same as unsafeFreezeByteArray
freeze_ps_array (MutableByteArray ixs arr#) = ST $ \ (S# s#) ->
case unsafeFreezeByteArray# arr# s# of { StateAndByteArray# s2# frozen# ->
(ByteArray ixs frozen#, S# s2#) }
\end{code}
%*********************************************************
%* *
\subsection{Packing and unpacking C strings}
%* *
%*********************************************************
\begin{code}
unpackCString :: Addr -> [Char]
-- Calls to the next four are injected by the compiler itself,
-- to deal with literal strings
packCString# :: [Char] -> ByteArray#
unpackCString# :: Addr# -> [Char]
unpackCString2# :: Addr# -> Int# -> [Char]
unpackAppendCString# :: Addr# -> [Char] -> [Char]
unpackFoldrCString# :: Addr# -> (Char -> a -> a) -> a -> a
packCString# str = case (packString str) of { PS bytes _ _ -> bytes }
unpackCString (A# addr) = unpackCString# addr
unpackCString# addr
= unpack 0#
where
unpack nh
| ch `eqChar#` '\0'# = []
| otherwise = C# ch : unpack (nh +# 1#)
where
ch = indexCharOffAddr# addr nh
unpackCString2# addr len
-- This one is called by the compiler to unpack literal strings with NULs in them; rare.
= unpackPS (packCBytes (I# len) (A# addr))
unpackAppendCString# addr rest
= unpack 0#
where
unpack nh
| ch `eqChar#` '\0'# = rest
| otherwise = C# ch : unpack (nh +# 1#)
where
ch = indexCharOffAddr# addr nh
unpackFoldrCString# addr f z
= unpack 0#
where
unpack nh
| ch `eqChar#` '\0'# = z
| otherwise = C# ch `f` unpack (nh +# 1#)
where
ch = indexCharOffAddr# addr nh
cStringToPS :: Addr -> PackedString
cStringToPS (A# a#) = -- the easy one; we just believe the caller
CPS a# len
where
len = case (strlen# a#) of { I# x -> x }
packBytesForC :: [Char] -> ByteArray Int
packBytesForC str = psToByteArray (packString str)
psToByteArrayST :: [Char] -> ST s (ByteArray Int)
psToByteArrayST str =
packStringST str >>= \ (PS bytes n has_null) ->
--later? ASSERT(not has_null)
return (ByteArray (0, I# (n -# 1#)) bytes)
packNBytesForCST :: Int -> [Char] -> ST s (ByteArray Int)
packNBytesForCST len str =
packNCharsST len str >>= \ (PS bytes n has_null) ->
return (ByteArray (0, I# (n -# 1#)) bytes)
packCBytes :: Int -> Addr -> PackedString
packCBytes len addr = runST (packCBytesST len addr)
packCBytesST :: Int -> Addr -> ST s PackedString
packCBytesST len@(I# length#) (A# addr) =
{-
allocate an array that will hold the string
(not forgetting the NUL byte at the end)
-}
new_ps_array (length# +# 1#) >>= \ ch_array ->
-- fill in packed string from "addr"
fill_in ch_array 0# >>
-- freeze the puppy:
freeze_ps_array ch_array >>= \ (ByteArray _ frozen#) ->
let has_null = byteArrayHasNUL# frozen# length# in
return (PS frozen# length# has_null)
where
fill_in :: MutableByteArray s Int -> Int# -> ST s ()
fill_in arr_in# idx
| idx ==# length#
= write_ps_array arr_in# idx (chr# 0#) >>
return ()
| otherwise
= case (indexCharOffAddr# addr idx) of { ch ->
write_ps_array arr_in# idx ch >>
fill_in arr_in# (idx +# 1#) }
\end{code}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment