From 52c0fc691e6501e99a96693ec1fc02e3c93a4fbc Mon Sep 17 00:00:00 2001
From: PHO <pho@cielonegro.org>
Date: Thu, 2 Nov 2023 22:59:39 +0900
Subject: [PATCH] Don't assume the current locale is *.UTF-8, set the encoding
 explicitly

primops.txt contains Unicode characters:
> LC_ALL=C ./genprimopcode --data-decl < ./primops.txt
> genprimopcode: <stdin>: hGetContents: invalid argument (cannot decode byte sequence starting from 226)

Hadrian must also avoid using readFile' to read primops.txt because it
tries to decode the file with a locale-specific encoding.
---
 hadrian/src/Builder.hs      | 4 ++--
 utils/genprimopcode/Main.hs | 5 ++++-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/hadrian/src/Builder.hs b/hadrian/src/Builder.hs
index 87171c960977..dbe6f4b735f4 100644
--- a/hadrian/src/Builder.hs
+++ b/hadrian/src/Builder.hs
@@ -333,8 +333,8 @@ instance H.Builder Builder where
                 GenApply -> captureStdout
 
                 GenPrimopCode -> do
-                    stdin <- readFile' input
-                    Stdout stdout <- cmd' (Stdin stdin) [path] buildArgs buildOptions
+                    need [input]
+                    Stdout stdout <- cmd' (FileStdin input) [path] buildArgs buildOptions
                     -- see Note [Capture stdout as a ByteString]
                     writeFileChangedBS output stdout
 
diff --git a/utils/genprimopcode/Main.hs b/utils/genprimopcode/Main.hs
index fd4d4ec25e4c..0f5fa29acfad 100644
--- a/utils/genprimopcode/Main.hs
+++ b/utils/genprimopcode/Main.hs
@@ -13,6 +13,7 @@ import Data.Char
 import Data.List (union, intersperse, intercalate, nub)
 import Data.Maybe ( catMaybes )
 import System.Environment ( getArgs )
+import System.IO ( hSetEncoding, stdin, stdout, utf8 )
 
 vecOptions :: Entry -> [(String,String,Int)]
 vecOptions i =
@@ -116,7 +117,9 @@ main = getArgs >>= \args ->
                    ++ unlines (map ("            "++) known_args)
                   )
        else
-       do s <- getContents
+       do hSetEncoding stdin  utf8 -- The input file is in UTF-8. Set the encoding explicitly.
+          hSetEncoding stdout utf8
+          s <- getContents
           case parse s of
              Left err -> error ("parse error at " ++ (show err))
              Right p_o_specs@(Info _ _)
-- 
GitLab