Commit 1319363f authored by thomie's avatar thomie
Browse files

Always use native-Haskell de/encoders for ASCII and latin1

This fixes test encoding005 on Windows (#10623).

Reviewed by: austin, bgamari

Differential Revision: https://phabricator.haskell.org/D2262
parent 31f18043
......@@ -245,8 +245,16 @@ mkTextEncoding' cfm enc =
"UTF32" -> return $ UTF32.mkUTF32 cfm
"UTF32LE" -> return $ UTF32.mkUTF32le cfm
"UTF32BE" -> return $ UTF32.mkUTF32be cfm
-- ISO8859-1 we can handle ourselves as well
"ISO88591" -> return $ Latin1.mkLatin1 cfm
-- On AIX, we want to avoid iconv, because it is either
-- a) totally broken, or b) non-reentrant, or c) actually works.
-- Detecting b) is difficult as you'd have to trigger the reentrancy
-- corruption.
-- Therefore, on AIX, we handle the popular ASCII and latin1 encodings
-- ourselves. For consistency, we do the same on other platforms.
-- We use `mkLatin1_checked` instead of `mkLatin1`, since the latter
-- completely ignores the CodingFailureMode (TEST=encoding005).
_ | isAscii -> return (Latin1.mkAscii cfm)
_ | isLatin1 -> return (Latin1.mkLatin1_checked cfm)
#if defined(mingw32_HOST_OS)
'C':'P':n | [(cp,"")] <- reads n -> return $ CodePage.mkCodePageEncoding cfm cp
_ -> unknownEncodingErr (enc ++ codingFailureModeSuffix cfm)
......@@ -256,25 +264,21 @@ mkTextEncoding' cfm enc =
-- Unfortunately there is no good way to determine whether iconv is actually
-- functional without telling it to do something.
_ -> do res <- Iconv.mkIconvEncoding cfm enc
let isAscii = any (== enc) ansiEncNames
case res of
Just e -> return e
-- At this point we know that we can't count on iconv to work
-- (see, for instance, Trac #10298). However, we still want to do
-- what we can to work with what we have. For instance, ASCII is
-- easy. We match on ASCII encodings directly using several
-- possible aliases (specified by RFC 1345 & Co) and for this use
-- the 'ascii' encoding
Nothing
| isAscii -> return (Latin1.mkAscii cfm)
| otherwise ->
unknownEncodingErr (enc ++ codingFailureModeSuffix cfm)
Nothing -> unknownEncodingErr (enc ++ codingFailureModeSuffix cfm)
#endif
where
ansiEncNames = -- ASCII aliases
isAscii = enc `elem` asciiEncNames
isLatin1 = enc `elem` latin1EncNames
asciiEncNames = -- ASCII aliases specified by RFC 1345 and RFC 3808.
[ "ANSI_X3.4-1968", "iso-ir-6", "ANSI_X3.4-1986", "ISO_646.irv:1991"
, "US-ASCII", "us", "IBM367", "cp367", "csASCII", "ASCII", "ISO646-US"
]
#endif
latin1EncNames = -- latin1 aliases specified by RFC 1345 and RFC 3808.
[ "ISO_8859-1:1987", "iso-ir-100", "ISO_8859-1", "ISO-8859-1", "latin1",
"l1", "IBM819", "CP819", "csISOLatin1"
]
latin1_encode :: CharBuffer -> Buffer Word8 -> IO (CharBuffer, Buffer Word8)
......
......@@ -138,8 +138,7 @@ test('encoding001',
test('encoding002', normal, compile_and_run, [''])
test('encoding003', normal, compile_and_run, [''])
test('encoding004', normal, compile_and_run, [''])
test('encoding005', when(opsys('mingw32'), expect_broken(10623)),
compile_and_run, [''])
test('encoding005', normal, compile_and_run, [''])
test('environment001',
[extra_clean(['environment001'])],
......
......@@ -44,9 +44,9 @@ test_latin1 cfm enc = do
ErrorOnCodingFailure -> Nothing
IgnoreCodingFailure -> Just [0xfe,0xff,0xff,0xfe]
TransliterateCodingFailure -> Just [0xfe,0xff,0x3f,0x3f,0x3f,0xff,0xfe]
-- N.B. The argument "LATIN1//TRANSLIT" to mkTextEncoding does not
-- correspond to "LATIN1//TRANSLIT" in iconv! Instead GHC asks iconv
-- to encode to "LATIN1" and uses its own "evil hack" to insert '?'
-- N.B. The argument "latin1//TRANSLIT" to mkTextEncoding does not
-- correspond to "latin1//TRANSLIT" in iconv! Instead GHC asks iconv
-- to encode to "latin1" and uses its own "evil hack" to insert '?'
-- (ASCII 0x3f) in place of failures. See GHC.IO.Encoding.recoverEncode.
--
-- U+0100 is LATIN CAPITAL LETTER A WITH MACRON, which iconv would
......@@ -108,8 +108,8 @@ main = do
test_ascii TransliterateCodingFailure =<< mkTextEncoding "ASCII//TRANSLIT"
test_ascii RoundtripFailure =<< mkTextEncoding "ASCII//ROUNDTRIP"
putStrLn "mkTextEncoding LATIN1 tests"
test_latin1 ErrorOnCodingFailure =<< mkTextEncoding "LATIN1"
test_latin1 IgnoreCodingFailure =<< mkTextEncoding "LATIN1//IGNORE"
test_latin1 TransliterateCodingFailure =<< mkTextEncoding "LATIN1//TRANSLIT"
test_latin1 RoundtripFailure =<< mkTextEncoding "LATIN1//ROUNDTRIP"
putStrLn "mkTextEncoding latin1 tests"
test_latin1 ErrorOnCodingFailure =<< mkTextEncoding "latin1"
test_latin1 IgnoreCodingFailure =<< mkTextEncoding "latin1//IGNORE"
test_latin1 TransliterateCodingFailure =<< mkTextEncoding "latin1//TRANSLIT"
test_latin1 RoundtripFailure =<< mkTextEncoding "latin1//ROUNDTRIP"
......@@ -2,4 +2,4 @@ char8 tests
Latin1.ascii tests
Latin1.latin1_checked tests
mkTextEncoding ASCII tests
mkTextEncoding LATIN1 tests
mkTextEncoding latin1 tests
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment