Commit c7bd3896 authored by simonpj's avatar simonpj
Browse files

[project @ 2005-07-08 16:30:58 by simonpj]

Add a test for the specialiser
parent f7e2c3ef
......@@ -14,3 +14,5 @@ test('simpl010', normal, compile, [''])
test('simpl011', normal, compile, [''])
test('simpl012', normal, compile, [''])
test('spec001', normal, compile, [''])
{-# OPTIONS -fglasgow-exts -O -cpp #-}
-- In GHC 6.4, compiling this module gave a Core Lint failure following the
-- specialier, because a function was floated out that had a RULE that
-- mentioned another fuction (unpack, in fact). but the latter wasn't
-- floated because we didn't take the RULES into account properly; result,
-- variable out of scope.
-- It's hard to cut this test down.
module Data.PackedString.Latin1 (
-- * The @PackedString@ type
PackedString, -- abstract, instances: Eq, Ord, Show, Typeable
-- * Converting to and from @PackedString@s
pack,
unpack,
-- * I\/O with @PackedString@s
hPut, hGet,
-- * List-like manipulation functions
nil,
cons,
head,
tail,
null,
append,
length,
index,
map,
filter,
reverse,
concat,
elem,
substr,
take,
drop,
splitAt,
foldl,
foldr,
takeWhile,
dropWhile,
span,
break,
lines,
unlines,
words,
unwords,
split,
splitWith,
join,
-- unpackList, -- eek, otherwise it gets thrown away by the simplifier
) where
import qualified Prelude
import Prelude hiding (
head,
tail,
null,
length,
(!!),
map,
filter,
reverse,
concat,
elem,
take,
drop,
foldl,
foldr,
splitAt,
takeWhile,
dropWhile,
span,
break,
lines,
unlines,
words,
unwords
)
import GHC.Exts hiding (split)
import GHC.IOBase (IO(..))
import Foreign
import Data.Typeable
import Data.Char
import qualified Data.List
import System.IO
-- -----------------------------------------------------------------------------
-- PackedString type declaration
-- | A space-efficient representation of a 'String', which supports
-- various efficient operations. A 'PackedString' contains Latin1
-- (8-bit) characters only.
data PackedString = PS {-#UNPACK#-}!Int {-#UNPACK#-}!Int
{-#UNPACK#-}!(ForeignPtr Word8)
-- this is a pretty efficient representation, and can be
-- converted to/from a StorableArray.
-- When the ForeignPtr is unpacked, we get the Addr# stored
-- directly in the PS constructor.
-- Perhaps making a slice should be conditional on the ratio of the
-- slice/string size to limit memory leaks.
instance Eq PackedString where
a == b = comparePS a b == EQ
instance Ord PackedString where
compare = comparePS
comparePS (PS off1 len1 fp1) (PS off2 len2 fp2)
= inlinePerformIO $
withForeignPtr fp1 $ \p1 ->
withForeignPtr fp2 $ \p2 ->
cmp (p1 `plusPtr` off1) (p2 `plusPtr` off2) len1
where
cmp :: Ptr Word8 -> Ptr Word8 -> Int -> IO Ordering
cmp p1 p2 n
| n == len1 = if n == len2 then return EQ else return LT
| n == len2 = return GT
| otherwise = do
a <- peekElemOff p1 n
b <- peekElemOff p2 n
case a `compare` b of
EQ -> cmp p1 p2 (n+1)
LT -> return LT
GT -> return GT
--instance Read PackedString: ToDo
instance Show PackedString where
showsPrec p ps r = showsPrec p (unpack ps) r
#include "Typeable.h"
INSTANCE_TYPEABLE0(PackedString,packedStringTc,"PackedString")
-- -----------------------------------------------------------------------------
-- Constructor functions
-- | The 'nilPS' value is the empty string.
nil :: PackedString
nil = inlinePerformIO $ do
fp <- newForeignPtr_ nullPtr
return (PS 0 0 fp)
-- | The 'consPS' function prepends the given character to the
-- given string.
cons :: Char -> PackedString -> PackedString
cons c cs = pack (c : (unpack cs)) -- ToDo:better
-- | Convert a 'String' into a 'PackedString'
packLen :: Int -> String -> PackedString
packLen len str = inlinePerformIO $ do
fp <- mallocForeignPtrBytes len
withForeignPtr fp $ \p -> do
fill_it_in p 0 str
return (PS 0 len fp)
fill_it_in p i [] = return ()
fill_it_in p i (c:cs) = do pokeElemOff p i (c2w c); fill_it_in p (i+1) cs
pack :: String -> PackedString
pack str = packLen (Prelude.length str) str
{-# INLINE w2c #-}
w2c :: Word8 -> Char
w2c = chr . fromIntegral
{-# INLINE c2w #-}
c2w :: Char -> Word8
c2w = fromIntegral . ord
-- -----------------------------------------------------------------------------
-- List-mimicking functions for PackedStrings
-- | The 'length' function returns the length of the input list.
-- Analogous to 'length'.
length :: PackedString -> Int
length (PS _ len _) = len
-- | The 'index' function returns the character in the string at the
-- given position.
index :: PackedString -> Int -> Char
index ps i
| i >= 0 && i < len = unsafeIndex ps i
| otherwise = error "Data.PackedString.Latin1.index: index out of range"
where len = length ps
unsafeIndex :: PackedString -> Int -> Char
unsafeIndex (PS off len fp) i =
withPackedString fp $ \p -> do
w <- peekElemOff (p `plusPtr` off) i
return $! w2c w
-- | The 'head' function returns the first element of a
-- 'PackedString' or throws an error if the string is empty.
head :: PackedString -> Char
head ps
| len <= 0 = error "Data.PackedString.Latin1.head: head []"
| otherwise = index ps 0
where len = length ps
-- | The 'tail' function returns the tail of a 'PackedString' or throws an error
-- if the string is empty.
tail :: PackedString -> PackedString
tail ps
| len <= 0 = error "Data.PackedString.Latin1.tail: tail []"
| len == 1 = nil
| otherwise = substr ps 1 (len - 1)
where len = length ps
-- | The 'null' function returns True iff the argument is null.
null :: PackedString -> Bool
null (PS _ l _) = l == 0
-- | The 'append' function appends the second string onto the first.
append :: PackedString -> PackedString -> PackedString
append xs ys
| null xs = ys
| null ys = xs
| otherwise = concat [xs,ys]
-- | The 'map' function applies a function to each character in the string.
map :: (Char -> Char) -> PackedString -> PackedString
map f ps = packLen (length ps) (Prelude.map f (unpack ps))
-- | The 'filter' function filters out the appropriate substring.
filter :: (Char -> Bool) -> PackedString -> PackedString {-or String?-}
filter pred ps = pack $ Prelude.filter pred $ unpack ps
-- | The 'foldl' function behaves like 'foldl' on 'PackedString's.
foldl :: (a -> Char -> a) -> a -> PackedString -> a
foldl f b ps = Prelude.foldl f b $ unpack ps
-- | The 'foldr' function behaves like 'foldr' on 'PackedString's.
foldr :: (Char -> a -> a) -> a -> PackedString -> a
foldr f v ps = Prelude.foldr f v $ unpack ps -- no intermediate list, we hope
-- | The 'take' function takes the first @n@ characters of a 'PackedString'.
take :: Int -> PackedString -> PackedString
take n ps = substr ps 0 (n-1)
-- | The 'drop' function drops the first @n@ characters of a 'PackedString'.
drop :: Int -> PackedString -> PackedString
drop n ps = substr ps n (length ps - 1)
-- | The 'splitWith' function splits a 'PackedString' at a given index.
splitAt :: Int -> PackedString -> (PackedString, PackedString)
splitAt n ps = (take n ps, drop n ps)
-- | The 'takeWhile' function is analogous to the 'takeWhile' function.
takeWhile :: (Char -> Bool) -> PackedString -> PackedString
takeWhile pred ps = pack $ Prelude.takeWhile pred $ unpack ps
-- | The 'dropWhile' function is analogous to the 'dropWhile' function.
dropWhile :: (Char -> Bool) -> PackedString -> PackedString
dropWhile pred ps = pack $ Prelude.dropWhile pred $ unpack ps
-- | The 'elem' function returns True iff the given element is in the string.
elem :: Char -> PackedString -> Bool
elem c ps = c `Prelude.elem` unpack ps
-- | The 'span' function returns a pair containing the result of
-- running both 'takeWhile' and 'dropWhile'.
span :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
span p ps = (takeWhile p ps, dropWhile p ps)
-- | The 'break' function breaks a string at the first position which
-- satisfies the predicate.
break :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
break p ps = span (not . p) ps
-- | The 'lines' function splits the input on line-breaks.
lines :: PackedString -> [PackedString]
lines ps = split '\n' ps
-- | The 'unlines' function concatenates the input list after
-- interspersing newlines.
unlines :: [PackedString] -> PackedString
unlines pss = join (pack "\n") pss
-- | The 'words' function is analogous to the 'words' function.
words :: PackedString -> [PackedString]
words ps = Prelude.filter (not.null) (splitWith isSpace ps)
-- | The 'unwords' function is analogous to the 'unwords' function.
unwords :: [PackedString] -> PackedString
unwords pss = join (pack " ") pss
-- | The 'reverse' function reverses the string.
reverse :: PackedString -> PackedString
reverse ps = pack $ Prelude.reverse $ unpack ps
-- | The 'concat' function concatenates a list of 'PackedString's.
concat :: [PackedString] -> PackedString
concat pss = pack $ Prelude.concat $ Prelude.map unpack pss
------------------------------------------------------------
-- | The 'join' function takes a 'PackedString' and a list of 'PackedString's
-- and concatenates the list after interspersing the first argument between
-- each element of the list.
join :: PackedString -> [PackedString] -> PackedString
join filler pss = concat (splice pss)
where
splice [] = []
splice [x] = [x]
splice (x:y:xs) = x:filler:splice (y:xs)
-- ToDo: the obvious generalisation
{-
Some properties that hold:
* split x ls = ls'
where False = any (map (x `elem`) ls')
* join (pack [x]) (split x ls) = ls
-}
-- | The 'split' function splits the input string on each occurrence of the given 'Char'.
split :: Char -> PackedString -> [PackedString]
split c = splitWith (== c)
splitWith :: (Char -> Bool) -> PackedString -> [PackedString]
splitWith pred (PS off 0 fp) = []
splitWith pred (PS off len fp) = splitWith' pred off len fp
splitWith' pred off len fp =
withPackedString fp $ \p -> splitLoop pred p 0 off len fp
splitLoop pred p idx off len fp
| p `seq` idx `seq` off `seq` fp `seq` False = undefined
splitLoop pred p idx off len fp
| idx >= len = return [PS off idx fp]
| otherwise = do
w <- peekElemOff p (off+idx)
if pred (w2c w)
then return (PS off idx fp :
splitWith' pred (off+idx+1) (len-idx-1) fp)
else splitLoop pred p (idx+1) off len fp
-- -----------------------------------------------------------------------------
-- Local utility functions
-- The definition of @_substr@ is essentially:
-- @take (end - begin + 1) (drop begin str)@.
-- | The 'substr' function takes a 'PackedString' and two indices
-- and returns the substring of the input string between (and including)
-- these indices.
substr :: PackedString -> Int -> Int -> PackedString
substr (PS off len fp) begin end = PS (off+begin) (end-begin+1) fp
-- -----------------------------------------------------------------------------
-- hPut
-- | Outputs a 'PackedString' to the specified 'Handle'.
--
-- NOTE: the string will be output directly in Latin-1.
--
hPut :: Handle -> PackedString -> IO ()
hPut h (PS off l fp) =
withForeignPtr fp $ \p ->
hPutBuf h (p `plusPtr` off) l
-- -----------------------------------------------------------------------------
-- hGet
-- | Read a 'PackedString' directly from the specified 'Handle'.
-- This is far more efficient than reading the characters into a 'String'
-- and then using 'pack'.
--
-- NOTE: as with 'hPut', the string representation in the file is
-- assumed to be Latin-1.
hGet :: Handle -> Int -> IO PackedString
hGet h i = do
fp <- mallocForeignPtrBytes i
withForeignPtr fp $ \p -> do
l <- hGetBuf h p i
return (PS 0 l fp)
-- -----------------------------------------------------------------------------
-- unpacking
{-# INLINE unpack #-}
unpack :: PackedString -> String
unpack ps = build (unpackFoldr ps)
{-# RULES
"unpack-list" [1] forall p . unpackFoldr p (:) [] = unpackList p
#-}
unpackList :: PackedString -> [Char]
unpackList (PS off len fp) =
withPackedString fp $ \p -> do
let loop p (-1) acc = return acc
loop p n acc = do
a <- peekElemOff p n
loop p (n-1) (w2c a : acc)
loop (p `plusPtr` off) (len-1) []
{-# INLINE [0] unpackFoldr #-}
unpackFoldr :: PackedString -> (Char -> a -> a) -> a -> a
unpackFoldr (PS off len fp) f c =
withPackedString fp $ \p -> do
let loop p (-1) acc = return acc
loop p n acc = do
a <- peekElemOff p n
loop p (n-1) (w2c a `f` acc)
loop (p `plusPtr` off) (len-1) c
-- -----------------------------------------------------------------------------
-- Utils
-- Just like unsafePerformIO, but we inline it.
{-# INLINE inlinePerformIO #-}
inlinePerformIO :: IO a -> a
inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
withPackedString :: ForeignPtr a -> (Ptr a -> IO b) -> b
withPackedString fp io = inlinePerformIO (withForeignPtr fp io)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment