Commit fc865a71 authored by ross's avatar ross
Browse files

[project @ 2004-06-22 12:45:55 by ross]

Change the interface (but not the implementation) to match the FFI spec:

* added charIsRepresentable, *CAString and *CWString

* currently (and in violation of the spec), fooCString = fooCAString

* The WString versions use UTF-16 under Windows and UTF-32 elsewhere
  (valid only if the compiler defines __STDC_ISO_10646__).
parent f75fa294
......@@ -9,14 +9,29 @@
-- Stability : provisional
-- Portability : portable
--
-- Utilities for primitive marshaling
-- Utilities for primitive marshalling of C strings.
--
-- The marshalling converts each Haskell character, representing a Unicode
-- code point, to one or more bytes in a manner that, by default, is
-- determined by the current locale. As a consequence, no guarantees
-- can be made about the relative length of a Haskell string and its
-- corresponding C string, and therefore all the marshalling routines
-- include memory allocation. The translation between Unicode and the
-- encoding of the current locale may be lossy.
--
-----------------------------------------------------------------------------
module Foreign.C.String ( -- representation of strings in C
-- * C strings
CString, -- = Ptr CChar
CStringLen, -- = (CString, Int)
CStringLen, -- = (Ptr CChar, Int)
-- ** Using a locale-dependent encoding
-- | Currently these functions are identical to their @CAString@ counterparts;
-- eventually they will use an encoding determined by the current locale.
-- conversion of C strings into Haskell strings
--
......@@ -33,11 +48,44 @@ module Foreign.C.String ( -- representation of strings in C
withCString, -- :: String -> (CString -> IO a) -> IO a
withCStringLen, -- :: String -> (CStringLen -> IO a) -> IO a
-- conversion between Haskell and C characters *ignoring* the encoding
--
charIsRepresentable, -- :: Char -> IO Bool
-- ** Using 8-bit characters
-- | These variants of the above functions are for use with C libraries
-- that are ignorant of Unicode. These functions should be used with
-- care, as a loss of information can occur.
castCharToCChar, -- :: Char -> CChar
castCCharToChar, -- :: CChar -> Char
peekCAString, -- :: CString -> IO String
peekCAStringLen, -- :: CStringLen -> IO String
newCAString, -- :: String -> IO CString
newCAStringLen, -- :: String -> IO CStringLen
withCAString, -- :: String -> (CString -> IO a) -> IO a
withCAStringLen, -- :: String -> (CStringLen -> IO a) -> IO a
-- * C wide strings
-- | These variants of the above functions are for use with C libraries
-- that encode Unicode using the C @wchar_t@ type in a system-dependent
-- way. The only encodings supported are
--
-- * UTF-32 (the C compiler defines @__STDC_ISO_10646__@), or
--
-- * UTF-16 (as used on Windows systems).
CWString, -- = Ptr CWchar
CWStringLen, -- = (Ptr CWchar, Int)
peekCWString, -- :: CWString -> IO String
peekCWStringLen, -- :: CWStringLen -> IO String
newCWString, -- :: String -> IO CWString
newCWStringLen, -- :: String -> IO CWStringLen
withCWString, -- :: String -> (CWString -> IO a) -> IO a
withCWStringLen, -- :: String -> (CWStringLen -> IO a) -> IO a
) where
import Foreign.Marshal.Array
......@@ -64,29 +112,97 @@ import Data.Char ( chr, ord )
-- representation of strings in C
-- ------------------------------
type CString = Ptr CChar -- conventional NUL terminates strings
type CStringLen = (CString, Int) -- strings with explicit length
-- | A C string is a reference to an array of C characters terminated by NUL.
type CString = Ptr CChar
-- | A string with explicit length information in bytes instead of a
-- terminating NUL (allowing NUL characters in the middle of the string).
type CStringLen = (Ptr CChar, Int)
-- exported functions
-- ------------------
--
-- * the following routines apply the default conversion when converting the
-- C-land character encoding into the Haskell-land character encoding
-- | Marshal a NUL terminated C string into a Haskell string.
--
peekCString :: CString -> IO String
peekCString = peekCAString
-- | Marshal a C string with explicit length into a Haskell string.
--
-- ** NOTE: The current implementation doesn't handle conversions yet! **
peekCStringLen :: CStringLen -> IO String
peekCStringLen = peekCAStringLen
-- | Marshal a Haskell string into a NUL terminated C string.
--
-- * the Haskell string may /not/ contain any NUL characters
--
-- * the routines using an explicit length tolerate NUL characters in the
-- middle of a string
-- * new storage is allocated for the C string and must be explicitly freed
--
newCString :: String -> IO CString
newCString = newCAString
-- marshal a NUL terminated C string into a Haskell string
-- | Marshal a Haskell string into a C string (ie, character array) with
-- explicit length information.
--
peekCString :: CString -> IO String
-- * new storage is allocated for the C string and must be explicitly freed
--
newCStringLen :: String -> IO CStringLen
newCStringLen = newCAStringLen
-- | Marshal a Haskell string into a NUL terminated C string using temporary
-- storage.
--
-- * the Haskell string may /not/ contain any NUL characters
--
-- * see the lifetime constraints of 'Foreign.Marshal.Alloc.alloca'
--
withCString :: String -> (CString -> IO a) -> IO a
withCString = withCAString
-- | Marshal a Haskell string into a NUL terminated C string using temporary
-- storage.
--
-- * the Haskell string may /not/ contain any NUL characters
--
-- * see the lifetime constraints of 'Foreign.Marshal.Alloc.alloca'
--
withCStringLen :: String -> (CStringLen -> IO a) -> IO a
withCStringLen = withCAStringLen
-- | Determines whether a character can be accurately encoded in a 'CString'.
-- Unrepresentable characters are converted to @\'?\'@.
--
-- Currently only Latin-1 characters are representable.
charIsRepresentable :: Char -> IO Bool
charIsRepresentable c = return (ord c < 256)
-- single byte characters
-- ----------------------
--
-- ** NOTE: These routines don't handle conversions! **
-- | Convert a C byte, representing a Latin-1 character, to the corresponding
-- Haskell character.
castCCharToChar :: CChar -> Char
castCCharToChar ch = unsafeChr (fromIntegral (fromIntegral ch :: Word8))
-- | Convert a Haskell character to a C character.
-- This function is only safe on the first 256 characters.
castCharToCChar :: Char -> CChar
castCharToCChar ch = fromIntegral (ord ch)
-- | Marshal a NUL terminated C string into a Haskell string.
--
peekCAString :: CString -> IO String
#ifndef __GLASGOW_HASKELL__
peekCString cp = do cs <- peekArray0 nUL cp; return (cCharsToChars cs)
peekCAString cp = do
cs <- peekArray0 nUL cp
return (cCharsToChars cs)
#else
peekCString cp = do
peekCAString cp = do
l <- lengthArray0 nUL cp
if l <= 0 then return "" else loop "" (l-1)
where
......@@ -96,13 +212,15 @@ peekCString cp = do
val `seq` if i <= 0 then return (val:s) else loop (val:s) (i-1)
#endif
-- marshal a C string with explicit length into a Haskell string
-- | Marshal a C string with explicit length into a Haskell string.
--
peekCStringLen :: CStringLen -> IO String
peekCAStringLen :: CStringLen -> IO String
#ifndef __GLASGOW_HASKELL__
peekCStringLen (cp, len) = do cs <- peekArray len cp; return (cCharsToChars cs)
peekCAStringLen (cp, len) = do
cs <- peekArray len cp
return (cCharsToChars cs)
#else
peekCStringLen (cp, len)
peekCAStringLen (cp, len)
| len <= 0 = return "" -- being (too?) nice.
| otherwise = loop [] (len-1)
where
......@@ -115,91 +233,92 @@ peekCStringLen (cp, len)
else loop (val:acc) (i-1)
#endif
-- marshal a Haskell string into a NUL terminated C strings
-- | Marshal a Haskell string into a NUL terminated C string.
--
-- * the Haskell string may *not* contain any NUL characters
-- * the Haskell string may /not/ contain any NUL characters
--
-- * new storage is allocated for the C string and must be explicitly freed
--
newCString :: String -> IO CString
newCAString :: String -> IO CString
#ifndef __GLASGOW_HASKELL__
newCString = newArray0 nUL . charsToCChars
newCAString = newArray0 nUL . charsToCChars
#else
newCString str = do
newCAString str = do
ptr <- mallocArray0 (length str)
let
go [] n# = pokeElemOff ptr (I# n#) nUL
go (c:cs) n# = do pokeElemOff ptr (I# n#) (castCharToCChar c); go cs (n# +# 1#)
go str 0#
go [] n = pokeElemOff ptr n nUL
go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1)
go str 0
return ptr
#endif
-- marshal a Haskell string into a C string (ie, character array) with
-- explicit length information
-- | Marshal a Haskell string into a C string (ie, character array) with
-- explicit length information.
--
-- * new storage is allocated for the C string and must be explicitly freed
--
newCStringLen :: String -> IO CStringLen
newCAStringLen :: String -> IO CStringLen
#ifndef __GLASGOW_HASKELL__
newCStringLen str = do a <- newArray (charsToCChars str)
return (pairLength str a)
newCAStringLen str = do
a <- newArray (charsToCChars str)
return (pairLength str a)
#else
newCStringLen str = do
newCAStringLen str = do
ptr <- mallocArray0 len
let
go [] n# = return ()
go (c:cs) n# = do pokeElemOff ptr (I# n#) (castCharToCChar c); go cs (n# +# 1#)
go str 0#
go [] n = n `seq` return () -- make it strict in n
go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1)
go str 0
return (ptr, len)
where
len = length str
#endif
-- marshal a Haskell string into a NUL terminated C strings using temporary
-- storage
-- | Marshal a Haskell string into a NUL terminated C string using temporary
-- storage.
--
-- * the Haskell string may *not* contain any NUL characters
-- * the Haskell string may /not/ contain any NUL characters
--
-- * see the lifetime constraints of `MarshalAlloc.alloca'
-- * see the lifetime constraints of 'Foreign.Marshal.Alloc.alloca'
--
withCString :: String -> (CString -> IO a) -> IO a
withCAString :: String -> (CString -> IO a) -> IO a
#ifndef __GLASGOW_HASKELL__
withCString = withArray0 nUL . charsToCChars
withCAString = withArray0 nUL . charsToCChars
#else
withCString str f =
withCAString str f =
allocaArray0 (length str) $ \ptr ->
let
go [] n# = pokeElemOff ptr (I# n#) nUL
go (c:cs) n# = do pokeElemOff ptr (I# n#) (castCharToCChar c); go cs (n# +# 1#)
go [] n = pokeElemOff ptr n nUL
go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1)
in do
go str 0#
go str 0
f ptr
#endif
-- marshal a Haskell string into a NUL terminated C strings using temporary
-- storage
-- | Marshal a Haskell string into a NUL terminated C string using temporary
-- storage.
--
-- * the Haskell string may *not* contain any NUL characters
-- * the Haskell string may /not/ contain any NUL characters
--
-- * see the lifetime constraints of `MarshalAlloc.alloca'
-- * see the lifetime constraints of 'Foreign.Marshal.Alloc.alloca'
--
withCStringLen :: String -> (CStringLen -> IO a) -> IO a
withCAStringLen :: String -> (CStringLen -> IO a) -> IO a
#ifndef __GLASGOW_HASKELL__
withCStringLen str act = withArray (charsToCChars str) $ act . pairLength str
withCAStringLen str act = withArray (charsToCChars str) $ act . pairLength str
#else
withCStringLen str f =
withCAStringLen str f =
allocaArray len $ \ptr ->
let
go [] n# = return ()
go (c:cs) n# = do pokeElemOff ptr (I# n#) (castCharToCChar c); go cs (n# +# 1#)
go [] n = n `seq` return () -- make it strict in n
go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1)
in do
go str 0#
go str 0
f (ptr,len)
where
len = length str
#endif
-- auxiliary definitions
-- ----------------------
-- C's end of string character
......@@ -209,9 +328,10 @@ nUL = 0
-- pair a C string with the length of the given Haskell string
--
pairLength :: String -> CString -> CStringLen
pairLength :: String -> a -> (a, Int)
pairLength = flip (,) . length
#ifndef __GLASGOW_HASKELL__
-- cast [CChar] to [Char]
--
cCharsToChars :: [CChar] -> [Char]
......@@ -221,9 +341,118 @@ cCharsToChars xs = map castCCharToChar xs
--
charsToCChars :: [Char] -> [CChar]
charsToCChars xs = map castCharToCChar xs
#endif
castCCharToChar :: CChar -> Char
castCCharToChar ch = unsafeChr (fromIntegral (fromIntegral ch :: Word8))
-----------------------------------------------------------------------------
-- Wide strings
castCharToCChar :: Char -> CChar
castCharToCChar ch = fromIntegral (ord ch)
-- representation of wide strings in C
-- -----------------------------------
-- | A C wide string is a reference to an array of C wide characters
-- terminated by NUL.
type CWString = Ptr CWchar
-- | A wide character string with explicit length information in bytes
-- instead of a terminating NUL (allowing NUL characters in the middle
-- of the string).
type CWStringLen = (Ptr CWchar, Int)
-- | Marshal a NUL terminated C wide string into a Haskell string.
--
peekCWString :: CWString -> IO String
peekCWString cp = do
cs <- peekArray0 wNUL cp
return (cWcharsToChars cs)
-- | Marshal a C wide string with explicit length into a Haskell string.
--
peekCWStringLen :: CWStringLen -> IO String
peekCWStringLen (cp, len) = do
cs <- peekArray len cp
return (cWcharsToChars cs)
-- | Marshal a Haskell string into a NUL terminated C wide string.
--
-- * the Haskell string may /not/ contain any NUL characters
--
-- * new storage is allocated for the C string and must be explicitly freed
--
newCWString :: String -> IO CWString
newCWString = newArray0 wNUL . charsToCWchars
-- | Marshal a Haskell string into a C wide string (ie, wide character array)
-- with explicit length information.
--
-- * new storage is allocated for the C string and must be explicitly freed
--
newCWStringLen :: String -> IO CWStringLen
newCWStringLen str = do
a <- newArray (charsToCWchars str)
return (pairLength str a)
-- | Marshal a Haskell string into a NUL terminated C wide string using
-- temporary storage.
--
-- * the Haskell string may /not/ contain any NUL characters
--
-- * see the lifetime constraints of 'Foreign.Marshal.Alloc.alloca'
--
withCWString :: String -> (CWString -> IO a) -> IO a
withCWString = withArray0 wNUL . charsToCWchars
-- | Marshal a Haskell string into a NUL terminated C wide string using
-- temporary storage.
--
-- * the Haskell string may /not/ contain any NUL characters
--
-- * see the lifetime constraints of 'Foreign.Marshal.Alloc.alloca'
--
withCWStringLen :: String -> (CWStringLen -> IO a) -> IO a
withCWStringLen str act = withArray (charsToCWchars str) $ act . pairLength str
-- auxiliary definitions
-- ----------------------
wNUL :: CWchar
wNUL = 0
cWcharsToChars :: [CWchar] -> [Char]
charsToCWchars :: [Char] -> [CWchar]
#ifdef mingw32_TARGET_OS
-- On Windows, wchar_t is 16 bits wide and CWString uses the UTF-16 encoding.
-- coding errors generate Chars in the surrogate range
cWcharsToChars = map chr . fromUTF16 . map fromIntegral
where
fromUTF16 (c1:c2:wcs)
| 0xd800 <= c1 && c1 <= 0xdbff && 0xdc00 <= c2 && c2 <= 0xdfff =
((c1 - 0xd800)*0x400 + (c2 - 0xdc00) + 0x10000) : fromUTF16 wcs
fromUTF16 (c:wcs) = c : fromUTF16 wcs
fromUTF16 [] = []
charsToCWchars = foldr utf16Char [] . map ord
where
utf16Char c wcs
| c < 0x10000 = fromIntegral c : wcs
| otherwise = let c' = c - 0x10000 in
fromIntegral (c' `div` 0x400 + 0xd800) :
fromIntegral (c' `mod` 0x400 + 0xdc00) : wcs
#else /* !mingw32_TARGET_OS */
cWcharsToChars xs = map castCWcharToChar xs
charsToCWchars xs = map castCharToCWchar xs
-- These conversions only make sense if __STDC_ISO_10646__ is defined
-- (meaning that wchar_t is ISO 10646, aka Unicode)
castCWcharToChar :: CWchar -> Char
castCWcharToChar ch = chr (fromIntegral ch )
castCharToCWchar :: Char -> CWchar
castCharToCWchar ch = fromIntegral (ord ch)
#endif /* !mingw32_TARGET_OS */
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment