Commit 469bef21 authored by Duncan Coutts's avatar Duncan Coutts
Browse files

Ignore a byte order mark (BOM) when reading UTF8 text files

Yes of course UTF8 text files should not use the BOM but
notepad.exe does anyway. Fixes ticket #533.
parent 247cd70a
...@@ -911,12 +911,19 @@ toUTF8 (c:cs) ...@@ -911,12 +911,19 @@ toUTF8 (c:cs)
: toUTF8 cs : toUTF8 cs
where w = ord c where w = ord c
-- | Ignore a Unicode byte order mark (BOM) at the beginning of the input
--
ignoreBOM :: String -> String
ignoreBOM ('\xFEFF':string) = string
ignoreBOM string = string
-- | Reads a UTF8 encoded text file as a Unicode String -- | Reads a UTF8 encoded text file as a Unicode String
-- --
-- Reads lazily using ordinary 'readFile'. -- Reads lazily using ordinary 'readFile'.
-- --
readUTF8File :: FilePath -> IO String readUTF8File :: FilePath -> IO String
readUTF8File f = fmap fromUTF8 . hGetContents =<< openBinaryFile f ReadMode readUTF8File f = fmap (ignoreBOM . fromUTF8)
. hGetContents =<< openBinaryFile f ReadMode
-- | Reads a UTF8 encoded text file as a Unicode String -- | Reads a UTF8 encoded text file as a Unicode String
-- --
...@@ -924,8 +931,10 @@ readUTF8File f = fmap fromUTF8 . hGetContents =<< openBinaryFile f ReadMode ...@@ -924,8 +931,10 @@ readUTF8File f = fmap fromUTF8 . hGetContents =<< openBinaryFile f ReadMode
-- --
withUTF8FileContents :: FilePath -> (String -> IO a) -> IO a withUTF8FileContents :: FilePath -> (String -> IO a) -> IO a
withUTF8FileContents name action = withUTF8FileContents name action =
Exception.bracket (openBinaryFile name ReadMode) hClose Exception.bracket
(\hnd -> hGetContents hnd >>= action . fromUTF8) (openBinaryFile name ReadMode)
hClose
(\hnd -> hGetContents hnd >>= action . ignoreBOM . fromUTF8)
-- | Writes a Unicode String as a UTF8 encoded text file. -- | Writes a Unicode String as a UTF8 encoded text file.
-- --
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment