diff --git a/libraries/ghc-internal/tools/ucd2haskell/README.md b/libraries/ghc-internal/tools/ucd2haskell/README.md index d5a7e720d4cbb58721a6241b0efd1ec81178dceb..292b143b4aa1a2457a29feb882db00669db57df4 100644 --- a/libraries/ghc-internal/tools/ucd2haskell/README.md +++ b/libraries/ghc-internal/tools/ucd2haskell/README.md @@ -1,6 +1,6 @@ # Generating GHC’s Unicode modules -`GHC.Unicode.Internal.*` modules are generated with the internal tool `ucd2haskell`. +`GHC.Internal.Unicode.*` modules are generated with the internal tool `ucd2haskell`. ```bash cd ucd2haskell @@ -13,7 +13,7 @@ cd ucd2haskell 2. _Comment_ the line in `ucd.sh` with `VERIFY_CHECKSUM=y`. 3. Run `./ucd.sh download`. 4. Update the checksums in `ucd.sh` and _uncomment_ `VERIFY_CHECKSUM=y`. -5. Run `./ucd.sh generate`. This will generate the `GHC.Unicode.Internal.*` +5. Run `./ucd.sh generate`. This will generate the `GHC.Internal.Unicode.*` modules. 6. Check and update the output of the tests `base/tests/unicodeXXX.hs`. 7. Compare with Python (see hereinafter) and fix any error. @@ -32,7 +32,7 @@ __Warning:__ A Python version with the _exact same Unicode version_ is required. Check the properties of all the characters. ```bash -ghc -O2 tests/export_all_chars.hs +ghc -O2 tests/export_all_chars.hs ./tests/export_all_chars > tests/all_chars.csv python3 tests/check_all_chars.py tests/all_chars.csv ``` diff --git a/libraries/ghc-internal/tools/ucd2haskell/exe/Parser/Text.hs b/libraries/ghc-internal/tools/ucd2haskell/exe/Parser/Text.hs index 9e5e3f104060d4b754828b5c8caede7278fe65bb..7407d00015c392d1c091a07817c35640fb9e77fe 100644 --- a/libraries/ghc-internal/tools/ucd2haskell/exe/Parser/Text.hs +++ b/libraries/ghc-internal/tools/ucd2haskell/exe/Parser/Text.hs @@ -21,7 +21,6 @@ module Parser.Text (genModules) where import Control.Exception (catch, IOException) import Control.Monad (void) -import Control.Monad.IO.Class (MonadIO(liftIO)) import Data.Bits (Bits(..)) import Data.Word (Word8) import Data.Char (chr, ord, isSpace) @@ -31,19 +30,20 @@ import Data.List (intersperse, unfoldr) import Data.List.Split (splitWhen) import Numeric (showHex) import Streamly.Data.Fold (Fold) -import Streamly.Prelude (IsStream, SerialT) import System.Directory (createDirectoryIfMissing) import System.Environment (getEnv) import System.FilePath ((</>), (<.>)) -- import qualified Data.Set as Set -import qualified Streamly.Prelude as Stream +import Streamly.Data.Stream (Stream) +import qualified Streamly.Data.Stream.Prelude as Stream import qualified Streamly.Data.Fold as Fold import qualified Streamly.Internal.Data.Fold as Fold import qualified Streamly.Data.Unfold as Unfold import qualified Streamly.FileSystem.Handle as Handle -import qualified System.IO as Sys import qualified Streamly.Unicode.Stream as Unicode +import qualified Streamly.Internal.Unicode.Stream as Unicode +import qualified System.IO as Sys import Prelude hiding (pred) @@ -271,7 +271,7 @@ genUnicodeVersion outdir = do (\(_ :: IOException) -> return "<unknown>") Stream.fold f (Stream.fromList (body version)) where - moduleName = "GHC.Unicode.Internal.Version" + moduleName = "GHC.Internal.Unicode.Version" f = moduleFileEmitter Nothing outdir (moduleName, \_ -> Fold.foldMap (<> "\n")) body :: String -> [String] @@ -284,12 +284,12 @@ genUnicodeVersion outdir = do , "(unicodeVersion)" , "where" , "" - , "import {-# SOURCE #-} Data.Version" + , "import {-# SOURCE #-} GHC.Internal.Data.Version" , "" , "-- | Version of Unicode standard used by @base@:" , "-- [" <> version <> "](https://www.unicode.org/versions/Unicode" <> version <> "/)." , "--" - , "-- @since 4.15.0.0" + , "-- @since base-4.15.0.0" , "unicodeVersion :: Version" , "unicodeVersion = makeVersion [" <> mkVersion version <> "]" ] mkVersion = foldr (\c acc -> case c of {'.' -> ',':' ':acc; _ -> c:acc}) mempty @@ -331,8 +331,8 @@ genGeneralCategoryModule moduleName = , "(generalCategory)" , "where" , "" - , "import GHC.Base (Char, Int, Ord(..), ord)" - , "import GHC.Unicode.Internal.Bits (lookupIntN)" + , "import GHC.Internal.Base (Char, Int, Ord(..), ord)" + , "import GHC.Internal.Unicode.Bits (lookupIntN)" , "" , genEnumBitmap "generalCategory" Cn (reverse acc) ] @@ -415,7 +415,7 @@ genDecomposableModule moduleName dtype = , "where" , "" , "import Data.Char (ord)" - , "import GHC.Unicode.Internal.Bits (lookupBit64)" + , "import GHC.Internal.Unicode.Bits (lookupBit64)" , "" , genBitmap "isDecomposable" (reverse st) ] @@ -443,7 +443,7 @@ genCombiningClassModule moduleName = , "where" , "" , "import Data.Char (ord)" - , "import GHC.Unicode.Internal.Bits (lookupBit64)" + , "import GHC.Internal.Unicode.Bits (lookupBit64)" , "" , "combiningClass :: Char -> Int" , unlines (reverse st1) @@ -566,8 +566,8 @@ genCompositionsModule moduleName compExclu non0CC = , "(compose, composeStarters, isSecondStarter)" , "where" , "" - , "import GHC.Base (Char, ord)" - , "import GHC.Unicode.Internal.Bits (lookupBit64)" + , "import GHC.Internal.Base (Char, ord)" + , "import GHC.Internal.Unicode.Bits (lookupBit64)" , "" ] @@ -616,7 +616,7 @@ genSimpleCaseMappingModule moduleName funcName field = , "(" <> funcName <> ")" , "where" , "" - , "import GHC.Base (Char)" + , "import GHC.Internal.Base (Char)" , "" ] genSign = @@ -670,8 +670,8 @@ genCorePropertiesModule moduleName isProp = , "(" <> unwords (intersperse "," (map prop2FuncName exports)) <> ")" , "where" , "" - , "import GHC.Base (Bool, Char, Ord(..), (&&), ord)" - , "import GHC.Unicode.Internal.Bits (lookupBit64)" + , "import GHC.Internal.Base (Bool, Char, Ord(..), (&&), ord)" + , "import GHC.Internal.Unicode.Bits (lookupBit64)" , "" ] @@ -818,7 +818,7 @@ parsePropertyLine ln isDivider :: String -> Bool isDivider x = x == "# ================================================" -parsePropertyLines :: (IsStream t, Monad m) => t m String -> t m PropertyLine +parsePropertyLines :: (Monad m) => Stream m String -> Stream m PropertyLine parsePropertyLines = Stream.splitOn isDivider $ Fold.lmap parsePropertyLine @@ -843,11 +843,11 @@ Parse ranges according to https://www.unicode.org/reports/tr44/#Code_Point_Range __Note:__ this does /not/ fill missing char entries, i.e. entries with no explicit entry nor within a range. -} -parseUnicodeDataLines :: forall t m. (IsStream t, Monad m) => t m String -> t m DetailedChar +parseUnicodeDataLines :: forall m. (Monad m) => Stream m String -> Stream m DetailedChar parseUnicodeDataLines = Stream.unfoldMany (Unfold.unfoldr unitToRange) . Stream.foldMany ( Fold.lmap parseDetailedChar - $ Fold.mkFold_ step initial ) + $ Fold.foldt' step initial id) where @@ -913,19 +913,14 @@ parseDetailedChar line = case splitWhen (== ';') line of -- Generation ------------------------------------------------------------------------------- -readLinesFromFile :: String -> SerialT IO String +readLinesFromFile :: String -> Stream IO String readLinesFromFile file = withFile file Sys.ReadMode - $ \h -> - Stream.unfold Handle.read h & Unicode.decodeUtf8 - & unicodeLines Fold.toList + $ \h -> Handle.read h & Unicode.decodeUtf8 & Unicode.lines Fold.toList where - - unicodeLines = Stream.splitOnSuffix (== '\n') - withFile file_ mode = - Stream.bracket (liftIO $ Sys.openFile file_ mode) (liftIO . Sys.hClose) + Stream.bracketIO (Sys.openFile file_ mode) (Sys.hClose) moduleToFileName :: String -> String @@ -995,7 +990,7 @@ testOutputFileEmitter outdir (name, fldGen) = Fold.rmapM action fldGen runGenerator :: FilePath -> FilePath - -> (SerialT IO String -> SerialT IO a) + -> (Stream IO String -> Stream IO a) -> FilePath -> GeneratorRecipe a -> IO () @@ -1067,64 +1062,64 @@ genModules indir outdir props = do -- [NOTE] Disabled generator -- propList = - -- ("GHC.Unicode.Internal.Char.PropList" + -- ("GHC.Internal.Unicode.Char.PropList" -- , (`genCorePropertiesModule` (`elem` props))) derivedCoreProperties = - ("GHC.Unicode.Internal.Char.DerivedCoreProperties" + ("GHC.Internal.Unicode.Char.DerivedCoreProperties" , (`genCorePropertiesModule` (`elem` props))) -- [NOTE] Disabled generator -- compositions exc non0 = - -- ( "GHC.Unicode.Internal.Char.UnicodeData.Compositions" + -- ( "GHC.Internal.Unicode.Char.UnicodeData.Compositions" -- , \m -> genCompositionsModule m exc non0) -- [NOTE] Disabled generator -- combiningClass = - -- ( "GHC.Unicode.Internal.Char.UnicodeData.CombiningClass" + -- ( "GHC.Internal.Unicode.Char.UnicodeData.CombiningClass" -- , genCombiningClassModule) -- [NOTE] Disabled generator -- decomposable = - -- ( "GHC.Unicode.Internal.Char.UnicodeData.Decomposable" + -- ( "GHC.Internal.Unicode.Char.UnicodeData.Decomposable" -- , (`genDecomposableModule` Canonical)) -- [NOTE] Disabled generator -- decomposableK = - -- ( "GHC.Unicode.Internal.Char.UnicodeData.DecomposableK" + -- ( "GHC.Internal.Unicode.Char.UnicodeData.DecomposableK" -- , (`genDecomposableModule` Kompat)) -- [NOTE] Disabled generator -- decompositions = - -- ( "GHC.Unicode.Internal.Char.UnicodeData.Decompositions" + -- ( "GHC.Internal.Unicode.Char.UnicodeData.Decompositions" -- , \m -> genDecomposeDefModule m [] [] Canonical (const True)) -- [NOTE] Disabled generator -- decompositionsK2 = - -- ( "GHC.Unicode.Internal.Char.UnicodeData.DecompositionsK2" + -- ( "GHC.Internal.Unicode.Char.UnicodeData.DecompositionsK2" -- , \m -> genDecomposeDefModule m [] [] Kompat (>= 60000)) -- [NOTE] Disabled generator -- decompositionsK = -- let pre = ["import qualified " <> fst decompositionsK2 <> " as DK2", ""] -- post = ["decompose c = DK2.decompose c"] - -- in ( "GHC.Unicode.Internal.Char.UnicodeData.DecompositionsK" + -- in ( "GHC.Internal.Unicode.Char.UnicodeData.DecompositionsK" -- , \m -> genDecomposeDefModule m pre post Kompat (< 60000)) generalCategory = - ( "GHC.Unicode.Internal.Char.UnicodeData.GeneralCategory" + ( "GHC.Internal.Unicode.Char.UnicodeData.GeneralCategory" , genGeneralCategoryModule) simpleUpperCaseMapping = - ( "GHC.Unicode.Internal.Char.UnicodeData.SimpleUpperCaseMapping" + ( "GHC.Internal.Unicode.Char.UnicodeData.SimpleUpperCaseMapping" , \m -> genSimpleCaseMappingModule m "toSimpleUpperCase" _simpleUppercaseMapping) simpleLowerCaseMapping = - ( "GHC.Unicode.Internal.Char.UnicodeData.SimpleLowerCaseMapping" + ( "GHC.Internal.Unicode.Char.UnicodeData.SimpleLowerCaseMapping" , \m -> genSimpleCaseMappingModule m "toSimpleLowerCase" _simpleLowercaseMapping) simpleTitleCaseMapping = - ( "GHC.Unicode.Internal.Char.UnicodeData.SimpleTitleCaseMapping" + ( "GHC.Internal.Unicode.Char.UnicodeData.SimpleTitleCaseMapping" , \m -> genSimpleCaseMappingModule m "toSimpleTitleCase" _simpleTitlecaseMapping) -- unicode002Test = diff --git a/libraries/ghc-internal/tools/ucd2haskell/ucd.sh b/libraries/ghc-internal/tools/ucd2haskell/ucd.sh index 8b49a998cb11af7b62666659e1988d1b8c059d2b..504ba8d97f5f200e1553500e9db9a18a1f8c7d1b 100755 --- a/libraries/ghc-internal/tools/ucd2haskell/ucd.sh +++ b/libraries/ghc-internal/tools/ucd2haskell/ucd.sh @@ -52,7 +52,7 @@ download_files() { done } -GHC_MODULE_PATH=$(realpath "$SCRIPT_DIR/../../") +GHC_MODULE_PATH=$(realpath "$SCRIPT_DIR/../../src") # Generate the Haskell files. run_generator() { @@ -63,7 +63,7 @@ run_generator() { --core-prop Uppercase \ --core-prop Lowercase # [NOTE] disabled generator - # --core-prop Alphabetic + # --core-prop Alphabetic # --core-prop White_Space \ # --core-prop ID_Start \ # --core-prop ID_Continue \ diff --git a/libraries/ghc-internal/tools/ucd2haskell/ucd2haskell.cabal b/libraries/ghc-internal/tools/ucd2haskell/ucd2haskell.cabal index eb5600f49a714fe427eb62f05b282225aa6a7aa1..8b189480aed48f92030c5d1d39a253e673c6e3ec 100644 --- a/libraries/ghc-internal/tools/ucd2haskell/ucd2haskell.cabal +++ b/libraries/ghc-internal/tools/ucd2haskell/ucd2haskell.cabal @@ -52,10 +52,11 @@ executable ucd2haskell main-is: UCD2Haskell.hs other-modules: Parser.Text build-depends: - base >= 4.7 && < 4.18 - , streamly >= 0.8 && < 0.9 + base >= 4.7 && < 4.20 + , streamly-core >= 0.2.2 && < 0.3 + , streamly >= 0.10 && < 0.11 , split >= 0.2.3 && < 0.3 , getopt-generics >= 0.13 && < 0.14 , containers >= 0.5 && < 0.7 - , directory >= 1.3.6 && < 1.3.7 + , directory >= 1.3.6 && < 1.3.8 , filepath >= 1.4.2 && < 1.5