diff --git a/libraries/base/GHC/IO/Encoding.hs b/libraries/base/GHC/IO/Encoding.hs index 014b61b8b02671f0ac1e7b981afef94445c34e47..108b0fcf117d5aa280fbf13ee20650bd87676b28 100644 --- a/libraries/base/GHC/IO/Encoding.hs +++ b/libraries/base/GHC/IO/Encoding.hs @@ -30,6 +30,7 @@ module GHC.IO.Encoding ( ) where import GHC.Base +import GHC.Foreign (charIsRepresentable) import GHC.IO.Exception import GHC.IO.Buffer import GHC.IO.Encoding.Failure @@ -235,14 +236,9 @@ mkTextEncoding e = case mb_coding_failure_mode of _ -> Nothing mkTextEncoding' :: CodingFailureMode -> String -> IO TextEncoding -mkTextEncoding' cfm enc - -- First, specifically match on ASCII encodings directly using - -- several possible aliases (specified by RFC 1345 & co), which - -- allows us to handle ASCII conversions without iconv at all (see - -- trac #10298). - | any (== enc) ansiEncNames = return (UTF8.mkUTF8 cfm) - -- Otherwise, handle other encoding needs via iconv. - | otherwise = case [toUpper c | c <- enc, c /= '-'] of +mkTextEncoding' cfm enc = + case [toUpper c | c <- enc, c /= '-'] of + -- UTF-8 and friends we can handle ourselves "UTF8" -> return $ UTF8.mkUTF8 cfm "UTF16" -> return $ UTF16.mkUTF16 cfm "UTF16LE" -> return $ UTF16.mkUTF16le cfm @@ -254,13 +250,32 @@ mkTextEncoding' cfm enc 'C':'P':n | [(cp,"")] <- reads n -> return $ CodePage.mkCodePageEncoding cfm cp _ -> unknownEncodingErr (enc ++ codingFailureModeSuffix cfm) #else - _ -> Iconv.mkIconvEncoding cfm enc -#endif + -- Otherwise, handle other encoding needs via iconv. + + -- Unfortunately there is no good way to determine whether iconv is actually + -- functional without telling it to do something. + _ -> do res <- Iconv.mkIconvEncoding cfm enc + good <- charIsRepresentable res 'a' + let isAscii = any (== enc) ansiEncNames + case good of + True -> return res + -- At this point we know that we can't count on iconv to work + -- (see, for instance, Trac #10298). However, we still want to do + -- what can to work with what we have. For instance, ASCII is + -- easy. We match on ASCII encodings directly using several + -- possible aliases (specified by RFC 1345 & Co) and for this use + -- the 'char8' encodeing + False + | isAscii -> return char8 + | otherwise -> + unknownEncodingErr (enc ++ codingFailureModeSuffix cfm) where ansiEncNames = -- ASCII aliases [ "ANSI_X3.4-1968", "iso-ir-6", "ANSI_X3.4-1986", "ISO_646.irv:1991" , "US-ASCII", "us", "IBM367", "cp367", "csASCII", "ASCII", "ISO646-US" ] +#endif + latin1_encode :: CharBuffer -> Buffer Word8 -> IO (CharBuffer, Buffer Word8) latin1_encode input output = fmap (\(_why,input',output') -> (input',output')) $ Latin1.latin1_encode input output -- unchecked, used for char8 diff --git a/libraries/base/GHC/IO/Encoding/Iconv.hs b/libraries/base/GHC/IO/Encoding/Iconv.hs index 89ca71e084add820e5b33359b65ae970f2f53c7e..f64d2451cf3f443c5e3340140be9bc10d16613c2 100644 --- a/libraries/base/GHC/IO/Encoding/Iconv.hs +++ b/libraries/base/GHC/IO/Encoding/Iconv.hs @@ -99,6 +99,8 @@ char_shift | charSize == 2 = 1 iconvEncoding :: String -> IO TextEncoding iconvEncoding = mkIconvEncoding ErrorOnCodingFailure +-- | Construct an iconv-based 'TextEncoding' for the given character set and +-- 'CodingFailureMode'. mkIconvEncoding :: CodingFailureMode -> String -> IO TextEncoding mkIconvEncoding cfm charset = do return (TextEncoding { diff --git a/libraries/base/GHC/TopHandler.hs b/libraries/base/GHC/TopHandler.hs index e725196cdff1eea3a0ec1bb59c0cad731618c713..d9010697b387b9278dbd5e572dcd8a38ffc4620c 100644 --- a/libraries/base/GHC/TopHandler.hs +++ b/libraries/base/GHC/TopHandler.hs @@ -176,7 +176,11 @@ disasterHandler exit _ = withCAString "%s" $ \fmt -> withCAString msgStr $ \msg -> errorBelch fmt msg >> exit 1 - where msgStr = "encountered an exception while trying to report an exception" + where + msgStr = + "encountered an exception while trying to report an exception." ++ + "One possible reason for this is that we failed while trying to " ++ + "encode an error message. Check that your locale configured properly." {- Note [Disaster with iconv] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/testsuite/tests/driver/Makefile b/testsuite/tests/driver/Makefile index 4418ba44ac3982c47591f5b99872bb5997056bd1..e12f3a5a989a0777f8f466fa64c664ac0b716440 100644 --- a/testsuite/tests/driver/Makefile +++ b/testsuite/tests/driver/Makefile @@ -551,7 +551,7 @@ T7563: -"$(TEST_HC)" $(TEST_HC_OPTS_NO_RECOMP) -C T7563.hs # Below we set LC_ALL=C to request standard ASCII output in the resulting error -# messagse. Unfortunately, Mac OS X still uses a Unicode encoding even with +# messages. Unfortunately, Mac OS X still uses a Unicode encoding even with # LC_ALL=C, so we expect these tests to fail there. .PHONY: T6037