diff --git a/libraries/base/GHC/IO/Encoding.hs b/libraries/base/GHC/IO/Encoding.hs index 014b61b8b02671f0ac1e7b981afef94445c34e47..76c7f55e02f66364b523c378e73a841c5812c5ec 100644 --- a/libraries/base/GHC/IO/Encoding.hs +++ b/libraries/base/GHC/IO/Encoding.hs @@ -235,14 +235,9 @@ mkTextEncoding e = case mb_coding_failure_mode of _ -> Nothing mkTextEncoding' :: CodingFailureMode -> String -> IO TextEncoding -mkTextEncoding' cfm enc - -- First, specifically match on ASCII encodings directly using - -- several possible aliases (specified by RFC 1345 & co), which - -- allows us to handle ASCII conversions without iconv at all (see - -- trac #10298). - | any (== enc) ansiEncNames = return (UTF8.mkUTF8 cfm) - -- Otherwise, handle other encoding needs via iconv. - | otherwise = case [toUpper c | c <- enc, c /= '-'] of +mkTextEncoding' cfm enc = + case [toUpper c | c <- enc, c /= '-'] of + -- UTF-8 and friends we can handle ourselves "UTF8" -> return $ UTF8.mkUTF8 cfm "UTF16" -> return $ UTF16.mkUTF16 cfm "UTF16LE" -> return $ UTF16.mkUTF16le cfm @@ -254,13 +249,31 @@ mkTextEncoding' cfm enc 'C':'P':n | [(cp,"")] <- reads n -> return $ CodePage.mkCodePageEncoding cfm cp _ -> unknownEncodingErr (enc ++ codingFailureModeSuffix cfm) #else - _ -> Iconv.mkIconvEncoding cfm enc -#endif + -- Otherwise, handle other encoding needs via iconv. + + -- Unfortunately there is no good way to determine whether iconv is actually + -- functional without telling it to do something. + _ -> do res <- Iconv.mkIconvEncoding cfm enc + let isAscii = any (== enc) ansiEncNames + case res of + Just e -> return e + -- At this point we know that we can't count on iconv to work + -- (see, for instance, Trac #10298). However, we still want to do + -- what we can to work with what we have. For instance, ASCII is + -- easy. We match on ASCII encodings directly using several + -- possible aliases (specified by RFC 1345 & Co) and for this use + -- the 'char8' encoding + Nothing + | isAscii -> return char8 + | otherwise -> + unknownEncodingErr (enc ++ codingFailureModeSuffix cfm) where ansiEncNames = -- ASCII aliases [ "ANSI_X3.4-1968", "iso-ir-6", "ANSI_X3.4-1986", "ISO_646.irv:1991" , "US-ASCII", "us", "IBM367", "cp367", "csASCII", "ASCII", "ISO646-US" ] +#endif + latin1_encode :: CharBuffer -> Buffer Word8 -> IO (CharBuffer, Buffer Word8) latin1_encode input output = fmap (\(_why,input',output') -> (input',output')) $ Latin1.latin1_encode input output -- unchecked, used for char8 diff --git a/libraries/base/GHC/IO/Encoding/Iconv.hs b/libraries/base/GHC/IO/Encoding/Iconv.hs index 89ca71e084add820e5b33359b65ae970f2f53c7e..061bd60c209a9868a846f0497160c243e7ee12dc 100644 --- a/libraries/base/GHC/IO/Encoding/Iconv.hs +++ b/libraries/base/GHC/IO/Encoding/Iconv.hs @@ -34,9 +34,10 @@ import GHC.Base () -- For build ordering #else import Foreign -import Foreign.C +import Foreign.C hiding (charIsRepresentable) import Data.Maybe import GHC.Base +import GHC.Foreign (charIsRepresentable) import GHC.IO.Buffer import GHC.IO.Encoding.Failure import GHC.IO.Encoding.Types @@ -96,15 +97,27 @@ char_shift :: Int char_shift | charSize == 2 = 1 | otherwise = 2 -iconvEncoding :: String -> IO TextEncoding +iconvEncoding :: String -> IO (Maybe TextEncoding) iconvEncoding = mkIconvEncoding ErrorOnCodingFailure -mkIconvEncoding :: CodingFailureMode -> String -> IO TextEncoding +-- | Construct an iconv-based 'TextEncoding' for the given character set and +-- 'CodingFailureMode'. +-- +-- As iconv is missing in some minimal environments (e.g. #10298), this +-- checks to ensure that iconv is working properly before returning the +-- encoding, returning 'Nothing' if not. +mkIconvEncoding :: CodingFailureMode -> String -> IO (Maybe TextEncoding) mkIconvEncoding cfm charset = do - return (TextEncoding { - textEncodingName = charset, - mkTextDecoder = newIConv raw_charset (haskellChar ++ suffix) (recoverDecode cfm) iconvDecode, - mkTextEncoder = newIConv haskellChar charset (recoverEncode cfm) iconvEncode}) + let enc = TextEncoding { + textEncodingName = charset, + mkTextDecoder = newIConv raw_charset (haskellChar ++ suffix) + (recoverDecode cfm) iconvDecode, + mkTextEncoder = newIConv haskellChar charset + (recoverEncode cfm) iconvEncode} + good <- charIsRepresentable enc 'a' + return $ if good + then Just enc + else Nothing where -- An annoying feature of GNU iconv is that the //PREFIXES only take -- effect when they appear on the tocode parameter to iconv_open: diff --git a/libraries/base/GHC/TopHandler.hs b/libraries/base/GHC/TopHandler.hs index e725196cdff1eea3a0ec1bb59c0cad731618c713..05c905f7c10bb7ec9001c90c1b17b9cd5a9ec52c 100644 --- a/libraries/base/GHC/TopHandler.hs +++ b/libraries/base/GHC/TopHandler.hs @@ -176,7 +176,12 @@ disasterHandler exit _ = withCAString "%s" $ \fmt -> withCAString msgStr $ \msg -> errorBelch fmt msg >> exit 1 - where msgStr = "encountered an exception while trying to report an exception" + where + msgStr = + "encountered an exception while trying to report an exception." ++ + "One possible reason for this is that we failed while trying to " ++ + "encode an error message. Check that your locale is configured " ++ + "properly." {- Note [Disaster with iconv] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/testsuite/tests/driver/Makefile b/testsuite/tests/driver/Makefile index d5ce683842de1fbfdf3e8e83f1965e14872bd6ab..dc1238c005ae1e8bd1a1916d1c915e65f31b9783 100644 --- a/testsuite/tests/driver/Makefile +++ b/testsuite/tests/driver/Makefile @@ -550,6 +550,10 @@ T7130: T7563: -"$(TEST_HC)" $(TEST_HC_OPTS_NO_RECOMP) -C T7563.hs +# Below we set LC_ALL=C to request standard ASCII output in the resulting error +# messages. Unfortunately, Mac OS X still uses a Unicode encoding even with +# LC_ALL=C, so we expect these tests to fail there. + .PHONY: T6037 T6037: -LC_ALL=C "$(TEST_HC)" $(TEST_HC_OPTS_NO_RECOMP) -c T6037.hs diff --git a/testsuite/tests/driver/T2507.stderr b/testsuite/tests/driver/T2507.stderr index eb0878f682d78e790f5bea23c5ccb73ab2587000..1a6e6f38f12301d81af6cbf12441e9fad0cc611a 100644 --- a/testsuite/tests/driver/T2507.stderr +++ b/testsuite/tests/driver/T2507.stderr @@ -1,5 +1,5 @@ T2507.hs:5:7: error: - Couldn't match expected type ‘Int’ with actual type ‘()’ + Couldn't match expected type `Int' with actual type `()' In the expression: () - In an equation for ‘foo’: foo = () + In an equation for `foo': foo = () diff --git a/testsuite/tests/driver/T8959a.stderr b/testsuite/tests/driver/T8959a.stderr index defb34b853e15c49bb179ef86f00bcdc52f0d1c4..476b9ee0b1865263e0b0c38fe398c899985ae885 100644 --- a/testsuite/tests/driver/T8959a.stderr +++ b/testsuite/tests/driver/T8959a.stderr @@ -1,5 +1,5 @@ T8959a.hs:5:7: error: - Couldn't match expected type ‘Int -> Int’ with actual type ‘()’ + Couldn't match expected type `Int -> Int' with actual type `()' In the expression: () - In an equation for ‘foo’: foo = () + In an equation for `foo': foo = ()