From 0255d03c302c1b83367edae169fee0b4a4306965 Mon Sep 17 00:00:00 2001 From: Oleg Grenrus <oleg.grenrus@iki.fi> Date: Fri, 12 Apr 2024 00:13:21 +0300 Subject: [PATCH] FastString is a __Modified__ UTF-8 --- compiler/GHC/Data/FastString.hs | 4 ++-- libraries/ghc-boot/GHC/Data/ShortText.hs | 4 ++++ libraries/ghc-boot/GHC/Utils/Encoding/UTF8.hs | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/compiler/GHC/Data/FastString.hs b/compiler/GHC/Data/FastString.hs index 26be5c250b78..05af8fdca87e 100644 --- a/compiler/GHC/Data/FastString.hs +++ b/compiler/GHC/Data/FastString.hs @@ -198,8 +198,8 @@ mkFastZStringString str = FastZString (BSC.pack str) -- ----------------------------------------------------------------------------- -{-| A 'FastString' is a UTF-8 encoded string together with a unique ID. All -'FastString's are stored in a global hashtable to support fast O(1) +{-| A 'FastString' is a Modified UTF-8 encoded string together with a unique ID. +All 'FastString's are stored in a global hashtable to support fast O(1) comparison. It is also associated with a lazy reference to the Z-encoding diff --git a/libraries/ghc-boot/GHC/Data/ShortText.hs b/libraries/ghc-boot/GHC/Data/ShortText.hs index aa5a50ce7fef..6fcd8afee36a 100644 --- a/libraries/ghc-boot/GHC/Data/ShortText.hs +++ b/libraries/ghc-boot/GHC/Data/ShortText.hs @@ -24,6 +24,10 @@ -- Very similar to FastString, but not hash-consed and with some extra instances and -- functions for serialisation and I/O. Should be imported qualified. +-- +-- /Note:/ This string is stored in Modified UTF8 format, +-- thus it's not byte-compatible with @ShortText@ type in @text-short@ +-- package. module GHC.Data.ShortText ( -- * ShortText diff --git a/libraries/ghc-boot/GHC/Utils/Encoding/UTF8.hs b/libraries/ghc-boot/GHC/Utils/Encoding/UTF8.hs index 5088e40dc286..1060c631b562 100644 --- a/libraries/ghc-boot/GHC/Utils/Encoding/UTF8.hs +++ b/libraries/ghc-boot/GHC/Utils/Encoding/UTF8.hs @@ -7,7 +7,7 @@ -- of the package database (needed in both ghc and in ghc-pkg) lives in -- `ghc-boot` and uses ShortText, which in turn depends on this module. --- | Simple, non-streaming UTF-8 codecs. +-- | Simple, non-streaming Modified UTF-8 codecs. -- -- This is one of several UTF-8 implementations provided by GHC; see Note -- [GHC's many UTF-8 implementations] in "GHC.Encoding.UTF8" for an -- GitLab