Commit f7fd864c authored by Joachim Breitner's avatar Joachim Breitner

Skip a possible BOM in utf8 encoding

and not the system locale, which might be something else. This fixes
bug #10907. A test is added, but less useful than it could be until
task #10909 is done.

Differential Revision: D1274
parent a0b1f414
......@@ -53,6 +53,8 @@ import Data.Maybe
import Control.Exception
import System.IO
import System.IO.Unsafe ( unsafePerformIO )
import GHC.IO.Encoding.UTF8 ( mkUTF8 )
import GHC.IO.Encoding.Failure ( CodingFailureMode(IgnoreCodingFailure) )
import GHC.Exts
......@@ -131,14 +133,16 @@ skipBOM h size offset =
then do
-- Validate assumption that handle is in binary mode.
ASSERTM( hGetEncoding h >>= return . isNothing )
-- Temporarily select text mode to make `hLookAhead` and
-- `hGetChar` return full Unicode characters.
bracket_ (hSetBinaryMode h False) (hSetBinaryMode h True) $ do
-- Temporarily select utf8 encoding with error ignoring,
-- to make `hLookAhead` and `hGetChar` return full Unicode characters.
bracket_ (hSetEncoding h safeEncoding) (hSetBinaryMode h True) $ do
c <- hLookAhead h
if c == '\xfeff'
then hGetChar h >> hTell h
else return offset
else return offset
where
safeEncoding = mkUTF8 IgnoreCodingFailure
newUTF8StringBuffer :: ForeignPtr Word8 -> Ptr Word8 -> Int -> IO StringBuffer
newUTF8StringBuffer buf ptr size = do
......
module ByteOrderMark () where
......@@ -22,3 +22,6 @@ test('T2302', only_ways(['normal']), compile_fail, [''])
test('T4373', normal, compile, [''])
test('T6016', extra_clean(['T6016-twoBOMs']), compile_and_run, ['-package ghc'])
test('T7671', normal, compile, [''])
# TODO: This test ought to be run in a non-UTF8 locale, but this is not yet
# supported by the test suite (see 10907)
test('T10907', normal, compile, [''])
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment