diff --git a/Distribution/ParseUtils.hs b/Distribution/ParseUtils.hs index dafb7e1e08e54c14d179f1f601e1086295fa8db5..f63f8ffea5a1d2ec9c37be2b1446dbb5f33b2910 100644 --- a/Distribution/ParseUtils.hs +++ b/Distribution/ParseUtils.hs @@ -69,13 +69,12 @@ import Distribution.License import Distribution.Version import Distribution.Package ( parsePackageName ) import Distribution.Compat.ReadP as ReadP hiding (get) -import Distribution.Simple.Utils (intercalate) +import Distribution.Simple.Utils (intercalate, fromUTF8) import Language.Haskell.Extension (Extension) import Text.PrettyPrint.HughesPJ hiding (braces) import Data.Char (isSpace, isUpper, toLower, isAlphaNum, isSymbol, isDigit) -import Data.Maybe ( fromMaybe) -import Data.List (intersperse) +import Data.Maybe (fromMaybe) #ifdef DEBUG import Test.HUnit (Test(..), assertBool, Assertion, runTestTT, Counts, assertEqual) @@ -273,7 +272,7 @@ readFields input = . trimLines . lines . normaliseLineEndings - -- TODO: should decode UTF8 + . fromUTF8 -- attach line number and determine indentation trimLines :: [String] -> [(LineNo, Indent, HasTabs, String)] @@ -516,7 +515,7 @@ parseBuildToolNameQ = parseQuoted parseBuildToolName <++ parseBuildToolName -- like parsePackageName but accepts symbols in components parseBuildToolName :: ReadP r String parseBuildToolName = do ns <- sepBy1 component (ReadP.char '-') - return (concat (intersperse "-" ns)) + return (intercalate "-" ns) where component = do cs <- munch1 (\c -> isAlphaNum c || isSymbol c && c /= '-') if all isDigit cs then pfail else return cs diff --git a/Distribution/Simple/Haddock.hs b/Distribution/Simple/Haddock.hs index 3ac1d4e4a7726c084b9b5b3891ecae44ef8e6866..20c348e8d622fa115f8fb24436f24a69c461b821 100644 --- a/Distribution/Simple/Haddock.hs +++ b/Distribution/Simple/Haddock.hs @@ -190,6 +190,7 @@ haddock pkg_descr lbi suffixes flags = do subtitle | null (synopsis pkg_descr) = "" | otherwise = ": " ++ synopsis pkg_descr withTempFile distPref template $ \prologFileName prologFileHandle -> do + --TODO: what format is this? utf8 or ascii? hPutStrLn prologFileHandle prolog hClose prologFileHandle let targets diff --git a/Distribution/Simple/Register.hs b/Distribution/Simple/Register.hs index 043e735bf979d4854df16c450c5a7cfcf858c4fb..51d5e723f51f3b790d9c622c2fd8f106193e137b 100644 --- a/Distribution/Simple/Register.hs +++ b/Distribution/Simple/Register.hs @@ -157,9 +157,9 @@ register pkg_descr lbi regFlags createDirectoryIfMissingVerbose verbosity True (libdir installDirs) copyFileVerbose verbosity installedPkgConfigFile (libdir installDirs </> "package.conf") - JHC -> when (verbosity >= normal) $ putStrLn "registering for JHC (nothing to do)" - NHC -> when (verbosity >= normal) $ putStrLn "registering nhc98 (nothing to do)" - _ -> die ("only registering with GHC/Hugs/jhc/nhc98 is implemented") + JHC -> notice verbosity "registering for JHC (nothing to do)" + NHC -> notice verbosity "registering nhc98 (nothing to do)" + _ -> die "only registering with GHC/Hugs/jhc/nhc98 is implemented" -- ----------------------------------------------------------------------------- -- The installed package config diff --git a/Distribution/Simple/Utils.hs b/Distribution/Simple/Utils.hs index 30ed5ac96aaace7fc859963c3da70a35dd6d3a33..a0017e5a38e4eced323ddb332eeefccf0ea65ead 100644 --- a/Distribution/Simple/Utils.hs +++ b/Distribution/Simple/Utils.hs @@ -85,6 +85,10 @@ module Distribution.Simple.Utils ( defaultHookedPackageDesc, findHookedPackageDesc, + -- * Unicode + fromUTF8, + toUTF8, + -- * generic utils equating, comparing, @@ -99,7 +103,9 @@ import Control.Monad import Data.List ( nub, unfoldr, isPrefixOf, tails, intersperse ) import Data.Char as Char - ( toLower ) + ( toLower, chr, ord ) +import Data.Bits + ( (.|.), (.&.), shiftL, shiftR ) import System.Directory ( getDirectoryContents, getCurrentDirectory, doesDirectoryExist @@ -158,7 +164,7 @@ die :: String -> IO a die msg = do hFlush stdout pname <- getProgName - hPutStrLn stderr (pname ++ ": " ++ msg) + hPutStrLn stderr $ toUTF8 (pname ++ ": " ++ msg) exitWith (ExitFailure 1) -- | Non fatal conditions that may be indicative of an error or problem. @@ -169,7 +175,7 @@ warn :: Verbosity -> String -> IO () warn verbosity msg = when (verbosity >= normal) $ do hFlush stdout - hPutStrLn stderr ("Warning: " ++ msg) + hPutStrLn stderr ("Warning: " ++ toUTF8 msg) -- | Useful status messages. -- @@ -181,7 +187,7 @@ warn verbosity msg = notice :: Verbosity -> String -> IO () notice verbosity msg = when (verbosity >= normal) $ - putStrLn msg + putStrLn (toUTF8 msg) setupMessage :: Verbosity -> String -> PackageIdentifier -> IO () setupMessage verbosity msg pkgid = @@ -194,7 +200,7 @@ setupMessage verbosity msg pkgid = info :: Verbosity -> String -> IO () info verbosity msg = when (verbosity >= verbose) $ - putStrLn msg + putStrLn (toUTF8 msg) -- | Detailed internal debugging information -- @@ -203,7 +209,7 @@ info verbosity msg = debug :: Verbosity -> String -> IO () debug verbosity msg = when (verbosity >= deafening) $ - putStrLn msg + putStrLn (toUTF8 msg) -- | Perform an IO action, catching any IO exceptions and printing an error -- if one occurs. @@ -212,7 +218,7 @@ chattyTry :: String -- ^ a description of the action we were attempting -> IO () chattyTry desc action = Exception.catch action $ \exception -> - putStrLn $ "Error while " ++ desc ++ ": " ++ show exception + putStrLn $ toUTF8 $ "Error while " ++ desc ++ ": " ++ show exception -- ----------------------------------------------------------------------------- -- Helper functions @@ -587,6 +593,46 @@ findHookedPackageDesc dir = do [f] -> return (Just f) _ -> die ("Multiple files with extension " ++ buildInfoExt) +-- ------------------------------------------------------------ +-- * UTF8 <-> Unicode String Conversions +-- ------------------------------------------------------------ + +-- This is a modification of the UTF8 code from gtk2hs + +fromUTF8 :: String -> String +fromUTF8 [] = [] +fromUTF8 (c:cs) + | c <= '\x7F' = c : fromUTF8 cs + | c <= '\xBF' = replacementChar : fromUTF8 cs + | c <= '\xDF' = twoBytes c cs + | c <= '\xEF' = threeBytes c cs + | otherwise = replacementChar : fromUTF8 cs + where + twoBytes c1 (c2:cs') = chr (((ord c1 .&. 0x1F) `shiftL` 6) .|. + (ord c2 .&. 0x3F)) : fromUTF8 cs' + twoBytes _ _ = replacementChar : [] + + threeBytes c1 (c2:c3:cs') = chr (((ord c1 .&. 0x0F) `shiftL` 12) .|. + ((ord c2 .&. 0x3F) `shiftL` 6) .|. + (ord c3 .&. 0x3F)) : fromUTF8 cs' + threeBytes _ _ = replacementChar : [] + + replacementChar = '\xfffd' + +toUTF8 :: String -> String +toUTF8 [] = [] +toUTF8 (c:cs) + | c <= '\x07F' = c + : toUTF8 cs + | c <= '\x7FF' = chr (0xC0 .|. (w `shiftR` 6)) + : chr (0x80 .|. (w .&. 0x3F)) + : toUTF8 cs + | otherwise = chr (0xE0 .|. (w `shiftR` 12)) + : chr (0x80 .|. ((w `shiftR` 6) .&. 0x3F)) + : chr (0x80 .|. (w .&. 0x3F)) + : toUTF8 cs + where w = ord c + -- ------------------------------------------------------------ -- * Common utils -- ------------------------------------------------------------