Commit 6bc00b29 authored by Simon Marlow's avatar Simon Marlow

Export Unicode and newline functionality from System.IO; update Haddock docs

parent 3d5794a8
......@@ -43,45 +43,57 @@ import GHC.IO.Exception
-- -----------------------------------------------------------------------------
latin1, utf8, utf16, utf16le, utf16be, utf32, utf32le, utf32be, localeEncoding
:: TextEncoding
-- | The Latin1 (ISO8859-1) encoding. This encoding maps bytes
-- directly to the first 256 Unicode code points, and is thus not a
-- complete Unicode encoding.
-- complete Unicode encoding. An attempt to write a character greater than
-- '\255' to a 'Handle' using the 'latin1' encoding will result in an error.
latin1 :: TextEncoding
latin1 = Latin1.latin1_checked
-- | The UTF-8 unicode encoding
-- | The UTF-8 Unicode encoding
utf8 :: TextEncoding
utf8 = UTF8.utf8
-- | The UTF-16 unicode encoding (a byte-order-mark should be used to
-- | The UTF-16 Unicode encoding (a byte-order-mark should be used to
-- indicate endianness).
utf16 :: TextEncoding
utf16 = UTF16.utf16
-- | The UTF-16 unicode encoding (litte-endian)
-- | The UTF-16 Unicode encoding (litte-endian)
utf16le :: TextEncoding
utf16le = UTF16.utf16le
-- | The UTF-16 unicode encoding (big-endian)
-- | The UTF-16 Unicode encoding (big-endian)
utf16be :: TextEncoding
utf16be = UTF16.utf16be
-- | The UTF-32 unicode encoding (a byte-order-mark should be used to
-- | The UTF-32 Unicode encoding (a byte-order-mark should be used to
-- indicate endianness).
utf32 :: TextEncoding
utf32 = UTF32.utf32
-- | The UTF-32 unicode encoding (litte-endian)
-- | The UTF-32 Unicode encoding (litte-endian)
utf32le :: TextEncoding
utf32le = UTF32.utf32le
-- | The UTF-32 unicode encoding (big-endian)
-- | The UTF-32 Unicode encoding (big-endian)
utf32be :: TextEncoding
utf32be = UTF32.utf32be
-- | The text encoding of the current locale
-- | The Unicode encoding of the current locale
localeEncoding :: TextEncoding
#if !defined(mingw32_HOST_OS)
localeEncoding = Iconv.localeEncoding
#else
localeEncoding = Latin1.latin1
#endif
-- | Acquire the named text encoding
-- | Look up the named Unicode encoding. May fail with
--
-- * 'isDoesNotExistError' if the encoding is unknown
--
-- The set of known encodings is system-dependent.
--
mkTextEncoding :: String -> IO TextEncoding
#if !defined(mingw32_HOST_OS)
mkTextEncoding = Iconv.mkTextEncoding
......@@ -94,7 +106,7 @@ mkTextEncoding "UTF-32" = return utf32
mkTextEncoding "UTF-32LE" = return utf32le
mkTextEncoding "UTF-32BE" = return utf32be
mkTextEncoding e = ioException
(IOError Nothing InvalidArgument "mkTextEncoding"
(IOError Nothing NoSuchThing "mkTextEncoding"
("unknown encoding:" ++ e) Nothing Nothing)
#endif
......
......@@ -246,8 +246,7 @@ hSetBuffering handle mode =
-- hSetEncoding
-- | The action 'hSetEncoding' @hdl@ @encoding@ changes the text encoding
-- for the handle @hdl@ to @encoding@. Encodings are available from the
-- module "GHC.IO.Encoding". The default encoding when a 'Handle' is
-- for the handle @hdl@ to @encoding@. The default encoding when a 'Handle' is
-- created is 'localeEncoding', namely the default encoding for the current
-- locale.
--
......@@ -255,6 +254,9 @@ hSetBuffering handle mode =
-- stop further encoding or decoding on an existing 'Handle', use
-- 'hSetBinaryMode'.
--
-- 'hSetEncoding' may need to flush buffered data in order to change
-- the encoding.
--
hSetEncoding :: Handle -> TextEncoding -> IO ()
hSetEncoding hdl encoding = do
withHandle "hSetEncoding" hdl $ \h_@Handle__{..} -> do
......
......@@ -680,6 +680,9 @@ commitBuffer' raw sz@(I# _) count@(I# _) flush release
-- 'hPutBuf' ignores any text encoding that applies to the 'Handle',
-- writing the bytes directly to the underlying file or device.
--
-- 'hPutBuf' ignores the prevailing 'TextEncoding' and
-- 'NewlineMode' on the 'Handle', and writes bytes directly.
--
-- This operation may fail with:
--
-- * 'ResourceVanished' if the handle is a pipe or socket, and the
......@@ -784,6 +787,8 @@ writeChunkNonBlocking h_@Handle__{..} ptr bytes
-- If the handle is a pipe or socket, and the writing end
-- is closed, 'hGetBuf' will behave as if EOF was reached.
--
-- 'hGetBuf' ignores the prevailing 'TextEncoding' and 'NewlineMode'
-- on the 'Handle', and reads bytes directly.
hGetBuf :: Handle -> Ptr a -> Int -> IO Int
hGetBuf h ptr count
......@@ -868,6 +873,9 @@ readChunk h_@Handle__{..} ptr bytes
-- If the handle is a pipe or socket, and the writing end
-- is closed, 'hGetBufNonBlocking' will behave as if EOF was reached.
--
-- 'hGetBufNonBlocking' ignores the prevailing 'TextEncoding' and
-- 'NewlineMode' on the 'Handle', and reads bytes directly.
hGetBufNonBlocking :: Handle -> Ptr a -> Int -> IO Int
hGetBufNonBlocking h ptr count
| count == 0 = return 0
......
......@@ -322,8 +322,8 @@ and hence it is only possible on a seekable Handle.
-- Newline translation
-- | The representation of a newline in the external file or stream.
data Newline = LF -- ^ "\n"
| CRLF -- ^ "\r\n"
data Newline = LF -- ^ '\n'
| CRLF -- ^ '\r\n'
deriving Eq
-- | Specifies the translation, if any, of newline characters between
......@@ -339,7 +339,8 @@ data NewlineMode
}
deriving Eq
-- | The native newline representation for the current platform
-- | The native newline representation for the current platform: 'LF'
-- on Unix systems, 'CRLF' on Windows.
nativeNewline :: Newline
#ifdef mingw32_HOST_OS
nativeNewline = CRLF
......@@ -347,7 +348,7 @@ nativeNewline = CRLF
nativeNewline = LF
#endif
-- | Map "\r\n" into "\n" on input, and "\n" to the native newline
-- | Map '\r\n' into '\n' on input, and '\n' to the native newline
-- represetnation on output. This mode can be used on any platform, and
-- works with text files using any newline convention. The downside is
-- that @readFile >>= writeFile@ might yield a different file.
......
......@@ -159,6 +159,62 @@ module System.IO (
openTempFile,
openBinaryTempFile,
#if !defined(__NHC__) && !defined(__HUGS__)
-- * Unicode encoding\/decoding
-- | A text-mode 'Handle' has an associated 'TextEncoding', which
-- is used to decode bytes into Unicode characters when reading,
-- and encode Unicode characters into bytes when writing.
--
-- The default 'TextEncoding' is the same as the default encoding
-- on your system, which is also available as 'localeEncoding'.
-- (GHC note: on Windows, currently 'localeEncoding' is always
-- 'latin1'; there is no support for encoding and decoding using
-- the ANSI code page).
--
-- Encoding and decoding errors are always detected and reported,
-- except during lazy I/O ('hGetContents', 'getContents', and
-- 'readFile'), where a decoding error merely results in
-- termination of the character stream, as with other I/O errors.
hSetEncoding,
-- ** Unicode encodings
TextEncoding,
latin1,
utf8,
utf16, utf16le, utf16be,
utf32, utf32le, utf32be,
localeEncoding,
mkTextEncoding,
#endif
#if !defined(__NHC__) && !defined(__HUGS__)
-- * Newline conversion
-- | In Haskell, a newline is always represented by the character
-- '\n'. However, in files and external character streams, a
-- newline may be represented by another character sequence, such
-- as '\r\n'.
--
-- A text-mode 'Handle' has an associated 'NewlineMode' that
-- specifies how to transate newline characters. The
-- 'NewlineMode' specifies the input and output translation
-- separately, so that for instance you can translate '\r\n'
-- to '\n' on input, but leave newlines as '\n' on output.
--
-- The default 'NewlineMode' for a 'Handle' is
-- 'nativeNewlineMode', which does no translation on Unix systems,
-- but translates '\r\n' to '\n' and back on Windows.
--
-- Binary-mode 'Handle's do no newline translation at all.
--
hSetNewlineMode,
Newline(..), nativeNewline,
NewlineMode(..),
noNewlineTranslation, universalNewlineMode, nativeNewlineMode,
#endif
) where
import Control.Exception.Base
......@@ -180,7 +236,8 @@ import GHC.IO.Handle.FD
import GHC.IO.Handle
import GHC.IORef
import GHC.IO.Exception ( userError )
-- import GHC.Exception
import GHC.IO.Encoding
import GHC.Exception
import GHC.Num
import Text.Read
import GHC.Show
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment