From 0255d03c302c1b83367edae169fee0b4a4306965 Mon Sep 17 00:00:00 2001
From: Oleg Grenrus <oleg.grenrus@iki.fi>
Date: Fri, 12 Apr 2024 00:13:21 +0300
Subject: [PATCH] FastString is a __Modified__ UTF-8

---
 compiler/GHC/Data/FastString.hs               | 4 ++--
 libraries/ghc-boot/GHC/Data/ShortText.hs      | 4 ++++
 libraries/ghc-boot/GHC/Utils/Encoding/UTF8.hs | 2 +-
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/compiler/GHC/Data/FastString.hs b/compiler/GHC/Data/FastString.hs
index 26be5c250b78..05af8fdca87e 100644
--- a/compiler/GHC/Data/FastString.hs
+++ b/compiler/GHC/Data/FastString.hs
@@ -198,8 +198,8 @@ mkFastZStringString str = FastZString (BSC.pack str)
 
 -- -----------------------------------------------------------------------------
 
-{-| A 'FastString' is a UTF-8 encoded string together with a unique ID. All
-'FastString's are stored in a global hashtable to support fast O(1)
+{-| A 'FastString' is a Modified UTF-8 encoded string together with a unique ID.
+All 'FastString's are stored in a global hashtable to support fast O(1)
 comparison.
 
 It is also associated with a lazy reference to the Z-encoding
diff --git a/libraries/ghc-boot/GHC/Data/ShortText.hs b/libraries/ghc-boot/GHC/Data/ShortText.hs
index aa5a50ce7fef..6fcd8afee36a 100644
--- a/libraries/ghc-boot/GHC/Data/ShortText.hs
+++ b/libraries/ghc-boot/GHC/Data/ShortText.hs
@@ -24,6 +24,10 @@
 
 -- Very similar to FastString, but not hash-consed and with some extra instances and
 -- functions for serialisation and I/O. Should be imported qualified.
+--
+-- /Note:/ This string is stored in Modified UTF8 format,
+-- thus it's not byte-compatible with @ShortText@ type in @text-short@
+-- package.
 
 module GHC.Data.ShortText (
         -- * ShortText
diff --git a/libraries/ghc-boot/GHC/Utils/Encoding/UTF8.hs b/libraries/ghc-boot/GHC/Utils/Encoding/UTF8.hs
index 5088e40dc286..1060c631b562 100644
--- a/libraries/ghc-boot/GHC/Utils/Encoding/UTF8.hs
+++ b/libraries/ghc-boot/GHC/Utils/Encoding/UTF8.hs
@@ -7,7 +7,7 @@
 -- of the package database (needed in both ghc and in ghc-pkg) lives in
 -- `ghc-boot` and uses ShortText, which in turn depends on this module.
 
--- | Simple, non-streaming UTF-8 codecs.
+-- | Simple, non-streaming Modified UTF-8 codecs.
 --
 -- This is one of several UTF-8 implementations provided by GHC; see Note
 -- [GHC's many UTF-8 implementations] in "GHC.Encoding.UTF8" for an
-- 
GitLab