From adb1fe42c00ceeddf6a4412550d8e34ac1b49ce9 Mon Sep 17 00:00:00 2001
From: "Serge S. Gulin" <gulin.serge@gmail.com>
Date: Sat, 25 May 2024 22:57:39 +0300
Subject: [PATCH] Unicode: make ucd2haskell build-able again

ucd2haskell tool used streamly library which version in cabal was out of date. It is updated to the latest version at hackage with deprecated parts rewritten.

Also following fixes were applied to existing code in suppose that from its last run the code structure was changed and now it was required to be up to date with actual folder structures:
1. Ghc module path environment got a suffix with `src`.
2. Generated code got
2.1 `GHC.Internal` prefix for `Data.*`.
2.2 `GHC.Unicode.Internal` swapped on `GHC.Internal.Unicode` according to actual structure.
---
 .../ghc-internal/tools/ucd2haskell/README.md  |  6 +-
 .../tools/ucd2haskell/exe/Parser/Text.hs      | 77 +++++++++----------
 .../ghc-internal/tools/ucd2haskell/ucd.sh     |  4 +-
 .../tools/ucd2haskell/ucd2haskell.cabal       |  7 +-
 4 files changed, 45 insertions(+), 49 deletions(-)

diff --git a/libraries/ghc-internal/tools/ucd2haskell/README.md b/libraries/ghc-internal/tools/ucd2haskell/README.md
index d5a7e720d4cb..292b143b4aa1 100644
--- a/libraries/ghc-internal/tools/ucd2haskell/README.md
+++ b/libraries/ghc-internal/tools/ucd2haskell/README.md
@@ -1,6 +1,6 @@
 # Generating GHC’s Unicode modules
 
-`GHC.Unicode.Internal.*` modules are generated with the internal tool `ucd2haskell`.
+`GHC.Internal.Unicode.*` modules are generated with the internal tool `ucd2haskell`.
 
 ```bash
 cd ucd2haskell
@@ -13,7 +13,7 @@ cd ucd2haskell
 2. _Comment_ the line in `ucd.sh` with `VERIFY_CHECKSUM=y`.
 3. Run `./ucd.sh download`.
 4. Update the checksums in `ucd.sh` and _uncomment_ `VERIFY_CHECKSUM=y`.
-5. Run `./ucd.sh generate`. This will generate the `GHC.Unicode.Internal.*` 
+5. Run `./ucd.sh generate`. This will generate the `GHC.Internal.Unicode.*`
    modules.
 6. Check and update the output of the tests `base/tests/unicodeXXX.hs`.
 7. Compare with Python (see hereinafter) and fix any error.
@@ -32,7 +32,7 @@ __Warning:__ A Python version with the _exact same Unicode version_ is required.
 Check the properties of all the characters.
 
 ```bash
-ghc -O2 tests/export_all_chars.hs 
+ghc -O2 tests/export_all_chars.hs
 ./tests/export_all_chars > tests/all_chars.csv
 python3 tests/check_all_chars.py tests/all_chars.csv
 ```
diff --git a/libraries/ghc-internal/tools/ucd2haskell/exe/Parser/Text.hs b/libraries/ghc-internal/tools/ucd2haskell/exe/Parser/Text.hs
index 9e5e3f104060..7407d00015c3 100644
--- a/libraries/ghc-internal/tools/ucd2haskell/exe/Parser/Text.hs
+++ b/libraries/ghc-internal/tools/ucd2haskell/exe/Parser/Text.hs
@@ -21,7 +21,6 @@ module Parser.Text (genModules) where
 
 import Control.Exception (catch, IOException)
 import Control.Monad (void)
-import Control.Monad.IO.Class (MonadIO(liftIO))
 import Data.Bits (Bits(..))
 import Data.Word (Word8)
 import Data.Char (chr, ord, isSpace)
@@ -31,19 +30,20 @@ import Data.List (intersperse, unfoldr)
 import Data.List.Split (splitWhen)
 import Numeric (showHex)
 import Streamly.Data.Fold (Fold)
-import Streamly.Prelude (IsStream, SerialT)
 import System.Directory (createDirectoryIfMissing)
 import System.Environment (getEnv)
 import System.FilePath ((</>), (<.>))
 
 -- import qualified Data.Set as Set
-import qualified Streamly.Prelude as Stream
+import Streamly.Data.Stream (Stream)
+import qualified Streamly.Data.Stream.Prelude as Stream
 import qualified Streamly.Data.Fold as Fold
 import qualified Streamly.Internal.Data.Fold as Fold
 import qualified Streamly.Data.Unfold as Unfold
 import qualified Streamly.FileSystem.Handle as Handle
-import qualified System.IO as Sys
 import qualified Streamly.Unicode.Stream as Unicode
+import qualified Streamly.Internal.Unicode.Stream as Unicode
+import qualified System.IO as Sys
 
 import Prelude hiding (pred)
 
@@ -271,7 +271,7 @@ genUnicodeVersion outdir = do
               (\(_ :: IOException) -> return "<unknown>")
   Stream.fold f (Stream.fromList (body version))
   where
-    moduleName = "GHC.Unicode.Internal.Version"
+    moduleName = "GHC.Internal.Unicode.Version"
     f = moduleFileEmitter Nothing outdir
           (moduleName, \_ -> Fold.foldMap (<> "\n"))
     body :: String -> [String]
@@ -284,12 +284,12 @@ genUnicodeVersion outdir = do
       , "(unicodeVersion)"
       , "where"
       , ""
-      , "import {-# SOURCE #-} Data.Version"
+      , "import {-# SOURCE #-} GHC.Internal.Data.Version"
       , ""
       , "-- | Version of Unicode standard used by @base@:"
       , "-- [" <> version <> "](https://www.unicode.org/versions/Unicode" <> version <> "/)."
       , "--"
-      , "-- @since 4.15.0.0"
+      , "-- @since base-4.15.0.0"
       , "unicodeVersion :: Version"
       , "unicodeVersion = makeVersion [" <> mkVersion version <> "]" ]
     mkVersion = foldr (\c acc -> case c of {'.' -> ',':' ':acc; _ -> c:acc}) mempty
@@ -331,8 +331,8 @@ genGeneralCategoryModule moduleName =
         , "(generalCategory)"
         , "where"
         , ""
-        , "import GHC.Base (Char, Int, Ord(..), ord)"
-        , "import GHC.Unicode.Internal.Bits (lookupIntN)"
+        , "import GHC.Internal.Base (Char, Int, Ord(..), ord)"
+        , "import GHC.Internal.Unicode.Bits (lookupIntN)"
         , ""
         , genEnumBitmap "generalCategory" Cn (reverse acc)
         ]
@@ -415,7 +415,7 @@ genDecomposableModule moduleName dtype =
             , "where"
             , ""
             , "import Data.Char (ord)"
-            , "import GHC.Unicode.Internal.Bits (lookupBit64)"
+            , "import GHC.Internal.Unicode.Bits (lookupBit64)"
             , ""
             , genBitmap "isDecomposable" (reverse st)
             ]
@@ -443,7 +443,7 @@ genCombiningClassModule moduleName =
             , "where"
             , ""
             , "import Data.Char (ord)"
-            , "import GHC.Unicode.Internal.Bits (lookupBit64)"
+            , "import GHC.Internal.Unicode.Bits (lookupBit64)"
             , ""
             , "combiningClass :: Char -> Int"
             , unlines (reverse st1)
@@ -566,8 +566,8 @@ genCompositionsModule moduleName compExclu non0CC =
         , "(compose, composeStarters, isSecondStarter)"
         , "where"
         , ""
-        , "import GHC.Base (Char, ord)"
-        , "import GHC.Unicode.Internal.Bits (lookupBit64)"
+        , "import GHC.Internal.Base (Char, ord)"
+        , "import GHC.Internal.Unicode.Bits (lookupBit64)"
         , ""
         ]
 
@@ -616,7 +616,7 @@ genSimpleCaseMappingModule moduleName funcName field =
         , "(" <> funcName <> ")"
         , "where"
         , ""
-        , "import GHC.Base (Char)"
+        , "import GHC.Internal.Base (Char)"
         , ""
         ]
     genSign =
@@ -670,8 +670,8 @@ genCorePropertiesModule moduleName isProp =
         , "(" <> unwords (intersperse "," (map prop2FuncName exports)) <> ")"
         , "where"
         , ""
-        , "import GHC.Base (Bool, Char, Ord(..), (&&), ord)"
-        , "import GHC.Unicode.Internal.Bits (lookupBit64)"
+        , "import GHC.Internal.Base (Bool, Char, Ord(..), (&&), ord)"
+        , "import GHC.Internal.Unicode.Bits (lookupBit64)"
         , ""
         ]
 
@@ -818,7 +818,7 @@ parsePropertyLine ln
 isDivider :: String -> Bool
 isDivider x = x == "# ================================================"
 
-parsePropertyLines :: (IsStream t, Monad m) => t m String -> t m PropertyLine
+parsePropertyLines :: (Monad m) => Stream m String -> Stream m PropertyLine
 parsePropertyLines =
     Stream.splitOn isDivider
         $ Fold.lmap parsePropertyLine
@@ -843,11 +843,11 @@ Parse ranges according to https://www.unicode.org/reports/tr44/#Code_Point_Range
 __Note:__ this does /not/ fill missing char entries,
 i.e. entries with no explicit entry nor within a range.
 -}
-parseUnicodeDataLines :: forall t m. (IsStream t, Monad m) => t m String -> t m DetailedChar
+parseUnicodeDataLines :: forall m. (Monad m) => Stream m String -> Stream m DetailedChar
 parseUnicodeDataLines
     = Stream.unfoldMany (Unfold.unfoldr unitToRange)
     . Stream.foldMany ( Fold.lmap parseDetailedChar
-                      $ Fold.mkFold_ step initial )
+                      $ Fold.foldt' step initial id)
 
     where
 
@@ -913,19 +913,14 @@ parseDetailedChar line = case splitWhen (== ';') line of
 -- Generation
 -------------------------------------------------------------------------------
 
-readLinesFromFile :: String -> SerialT IO String
+readLinesFromFile :: String -> Stream IO String
 readLinesFromFile file =
     withFile file Sys.ReadMode
-        $ \h ->
-              Stream.unfold Handle.read h & Unicode.decodeUtf8
-                  & unicodeLines Fold.toList
+        $ \h -> Handle.read h & Unicode.decodeUtf8 & Unicode.lines Fold.toList
 
     where
-
-    unicodeLines = Stream.splitOnSuffix (== '\n')
-
     withFile file_ mode =
-        Stream.bracket (liftIO $ Sys.openFile file_ mode) (liftIO . Sys.hClose)
+        Stream.bracketIO (Sys.openFile file_ mode) (Sys.hClose)
 
 
 moduleToFileName :: String -> String
@@ -995,7 +990,7 @@ testOutputFileEmitter outdir (name, fldGen) = Fold.rmapM action fldGen
 runGenerator ::
        FilePath
     -> FilePath
-    -> (SerialT IO String -> SerialT IO a)
+    -> (Stream IO String -> Stream IO a)
     -> FilePath
     -> GeneratorRecipe a
     -> IO ()
@@ -1067,64 +1062,64 @@ genModules indir outdir props = do
 
     -- [NOTE] Disabled generator
     -- propList =
-    --     ("GHC.Unicode.Internal.Char.PropList"
+    --     ("GHC.Internal.Unicode.Char.PropList"
     --     , (`genCorePropertiesModule` (`elem` props)))
 
     derivedCoreProperties =
-        ("GHC.Unicode.Internal.Char.DerivedCoreProperties"
+        ("GHC.Internal.Unicode.Char.DerivedCoreProperties"
         , (`genCorePropertiesModule` (`elem` props)))
 
     -- [NOTE] Disabled generator
     -- compositions exc non0 =
-    --     ( "GHC.Unicode.Internal.Char.UnicodeData.Compositions"
+    --     ( "GHC.Internal.Unicode.Char.UnicodeData.Compositions"
     --     , \m -> genCompositionsModule m exc non0)
 
     -- [NOTE] Disabled generator
     -- combiningClass =
-    --     ( "GHC.Unicode.Internal.Char.UnicodeData.CombiningClass"
+    --     ( "GHC.Internal.Unicode.Char.UnicodeData.CombiningClass"
     --     , genCombiningClassModule)
 
     -- [NOTE] Disabled generator
     -- decomposable =
-    --     ( "GHC.Unicode.Internal.Char.UnicodeData.Decomposable"
+    --     ( "GHC.Internal.Unicode.Char.UnicodeData.Decomposable"
     --     , (`genDecomposableModule` Canonical))
 
     -- [NOTE] Disabled generator
     -- decomposableK =
-    --     ( "GHC.Unicode.Internal.Char.UnicodeData.DecomposableK"
+    --     ( "GHC.Internal.Unicode.Char.UnicodeData.DecomposableK"
     --     , (`genDecomposableModule` Kompat))
 
     -- [NOTE] Disabled generator
     -- decompositions =
-    --     ( "GHC.Unicode.Internal.Char.UnicodeData.Decompositions"
+    --     ( "GHC.Internal.Unicode.Char.UnicodeData.Decompositions"
     --     , \m -> genDecomposeDefModule m [] [] Canonical (const True))
 
     -- [NOTE] Disabled generator
     -- decompositionsK2 =
-    --     ( "GHC.Unicode.Internal.Char.UnicodeData.DecompositionsK2"
+    --     ( "GHC.Internal.Unicode.Char.UnicodeData.DecompositionsK2"
     --     , \m -> genDecomposeDefModule m [] [] Kompat (>= 60000))
 
     -- [NOTE] Disabled generator
     -- decompositionsK =
     --     let pre = ["import qualified " <> fst decompositionsK2 <> " as DK2", ""]
     --         post = ["decompose c = DK2.decompose c"]
-    --      in ( "GHC.Unicode.Internal.Char.UnicodeData.DecompositionsK"
+    --      in ( "GHC.Internal.Unicode.Char.UnicodeData.DecompositionsK"
     --         , \m -> genDecomposeDefModule m pre post Kompat (< 60000))
 
     generalCategory =
-         ( "GHC.Unicode.Internal.Char.UnicodeData.GeneralCategory"
+         ( "GHC.Internal.Unicode.Char.UnicodeData.GeneralCategory"
          , genGeneralCategoryModule)
 
     simpleUpperCaseMapping =
-         ( "GHC.Unicode.Internal.Char.UnicodeData.SimpleUpperCaseMapping"
+         ( "GHC.Internal.Unicode.Char.UnicodeData.SimpleUpperCaseMapping"
          , \m -> genSimpleCaseMappingModule m "toSimpleUpperCase" _simpleUppercaseMapping)
 
     simpleLowerCaseMapping =
-         ( "GHC.Unicode.Internal.Char.UnicodeData.SimpleLowerCaseMapping"
+         ( "GHC.Internal.Unicode.Char.UnicodeData.SimpleLowerCaseMapping"
          , \m -> genSimpleCaseMappingModule m "toSimpleLowerCase" _simpleLowercaseMapping)
 
     simpleTitleCaseMapping =
-         ( "GHC.Unicode.Internal.Char.UnicodeData.SimpleTitleCaseMapping"
+         ( "GHC.Internal.Unicode.Char.UnicodeData.SimpleTitleCaseMapping"
          , \m -> genSimpleCaseMappingModule m "toSimpleTitleCase" _simpleTitlecaseMapping)
 
     -- unicode002Test =
diff --git a/libraries/ghc-internal/tools/ucd2haskell/ucd.sh b/libraries/ghc-internal/tools/ucd2haskell/ucd.sh
index 8b49a998cb11..504ba8d97f5f 100755
--- a/libraries/ghc-internal/tools/ucd2haskell/ucd.sh
+++ b/libraries/ghc-internal/tools/ucd2haskell/ucd.sh
@@ -52,7 +52,7 @@ download_files() {
     done
 }
 
-GHC_MODULE_PATH=$(realpath "$SCRIPT_DIR/../../")
+GHC_MODULE_PATH=$(realpath "$SCRIPT_DIR/../../src")
 
 # Generate the Haskell files.
 run_generator() {
@@ -63,7 +63,7 @@ run_generator() {
           --core-prop Uppercase \
           --core-prop Lowercase
         # [NOTE] disabled generator
-        #   --core-prop Alphabetic 
+        #   --core-prop Alphabetic
         #   --core-prop White_Space \
         #   --core-prop ID_Start \
         #   --core-prop ID_Continue \
diff --git a/libraries/ghc-internal/tools/ucd2haskell/ucd2haskell.cabal b/libraries/ghc-internal/tools/ucd2haskell/ucd2haskell.cabal
index eb5600f49a71..8b189480aed4 100644
--- a/libraries/ghc-internal/tools/ucd2haskell/ucd2haskell.cabal
+++ b/libraries/ghc-internal/tools/ucd2haskell/ucd2haskell.cabal
@@ -52,10 +52,11 @@ executable ucd2haskell
   main-is: UCD2Haskell.hs
   other-modules: Parser.Text
   build-depends:
-      base             >= 4.7   && < 4.18
-    , streamly         >= 0.8   && < 0.9
+      base             >= 4.7   && < 4.20
+    , streamly-core    >= 0.2.2 && < 0.3
+    , streamly         >= 0.10   && < 0.11
     , split            >= 0.2.3 && < 0.3
     , getopt-generics  >= 0.13  && < 0.14
     , containers       >= 0.5   && < 0.7
-    , directory        >= 1.3.6 && < 1.3.7
+    , directory        >= 1.3.6 && < 1.3.8
     , filepath         >= 1.4.2 && < 1.5
-- 
GitLab