Commit d4d34a73 authored by niteria's avatar niteria Committed by Austin Seipp

Make derived names deterministic

The names of auxiliary bindings end up in the interface file, and since uniques
are nondeterministic, we end up with nondeterministic interface files.

This uses the package and module name in the generated name, so I believe it
should avoid problems from #7947 and be deterministic as well.

The generated names look like this now:

  `$cLrlbmVwI3gpI8G2E6Hg3mO`

and with `-ppr-debug`:

  `$c$aeson_70dylHtv1FFGeai1IoxcQr$Data.Aeson.Types.Internal$String`.

Reviewed By: simonmar, austin, ezyang

Differential Revision: https://phabricator.haskell.org/D1133

GHC Trac Issues: #4012
parent 0b852fcf
......@@ -17,6 +17,7 @@ module ShPackageKey(
import Module
import Packages
import Encoding
import FastString
import UniqFM
import UniqSet
......@@ -26,11 +27,8 @@ import DynFlags
import System.IO.Unsafe ( unsafePerformIO )
import Control.Monad
import Numeric
import Data.IORef
import GHC.Fingerprint
import Data.Word
import qualified Data.Char as Char
import Data.List
import Data.Function
......@@ -237,44 +235,7 @@ canonicalizeModule dflags m = do
| Just m' <- lookup (moduleName m) insts -> m'
_ -> m
{-
************************************************************************
* *
Base 62
* *
************************************************************************
-}
--------------------------------------------------------------------------
-- Base 62
-- The base-62 code is based off of 'locators'
-- ((c) Operational Dynamics Consulting, BSD3 licensed)
-- Note: Instead of base-62 encoding a single 128-bit integer
-- (ceil(21.49) characters), we'll base-62 a pair of 64-bit integers
-- (2 * ceil(10.75) characters). Luckily for us, it's the same number of
-- characters! In the long term, this should go in GHC.Fingerprint,
-- but not now...
-- | Size of a 64-bit word when written as a base-62 string
word64Base62Len :: Int
word64Base62Len = 11
-- | Converts a 64-bit word into a base-62 string
toBase62 :: Word64 -> String
toBase62 w = pad ++ str
where
pad = replicate len '0'
len = word64Base62Len - length str -- 11 == ceil(64 / lg 62)
str = showIntAtBase 62 represent w ""
represent :: Int -> Char
represent x
| x < 10 = Char.chr (48 + x)
| x < 36 = Char.chr (65 + x - 10)
| x < 62 = Char.chr (97 + x - 36)
| otherwise = error ("represent (base 62): impossible!")
fingerprintPackageKey :: Fingerprint -> PackageKey
fingerprintPackageKey (Fingerprint a b)
= stringToPackageKey (toBase62 a ++ toBase62 b)
= stringToPackageKey (toBase62Padded a ++ toBase62Padded b)
-- See Note [Base 62 encoding 128-bit integers]
......@@ -10,6 +10,7 @@ the keys.
-}
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE RecordWildCards #-}
module Module
(
......@@ -19,6 +20,7 @@ module Module
moduleNameFS,
moduleNameString,
moduleNameSlashes, moduleNameColons,
moduleStableString,
mkModuleName,
mkModuleNameFS,
stableModuleNameCmp,
......@@ -209,6 +211,13 @@ moduleNameFS (ModuleName mod) = mod
moduleNameString :: ModuleName -> String
moduleNameString (ModuleName mod) = unpackFS mod
-- | Get a string representation of a 'Module' that's unique and stable
-- across recompilations.
-- eg. "$aeson_70dylHtv1FFGeai1IoxcQr$Data.Aeson.Types.Internal"
moduleStableString :: Module -> String
moduleStableString Module{..} =
"$" ++ packageKeyString modulePackageKey ++ "$" ++ moduleNameString moduleName
mkModuleName :: String -> ModuleName
mkModuleName s = ModuleName (mkFastString s)
......
......@@ -6,6 +6,7 @@
-}
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE RecordWildCards #-}
-- |
-- #name_types#
......@@ -70,6 +71,7 @@ module Name (
getSrcLoc, getSrcSpan, getOccString,
pprInfixName, pprPrefixName, pprModulePrefix,
nameStableString,
-- Re-export the OccName stuff
module OccName
......@@ -598,6 +600,21 @@ pprNameDefnLoc name
| otherwise
-> ptext (sLit "in") <+> quotes (ppr (nameModule name))
-- | Get a string representation of a 'Name' that's unique and stable
-- across recompilations. Used for deterministic generation of binds for
-- derived instances.
-- eg. "$aeson_70dylHtv1FFGeai1IoxcQr$Data.Aeson.Types.Internal$String"
nameStableString :: Name -> String
nameStableString Name{..} =
nameSortStableString n_sort ++ "$" ++ occNameString n_occ
nameSortStableString :: NameSort -> String
nameSortStableString System = "$_sys"
nameSortStableString Internal = "$_in"
nameSortStableString (External mod) = moduleStableString mod
nameSortStableString (WiredIn mod _ _) = moduleStableString mod
{-
************************************************************************
* *
......
......@@ -36,6 +36,8 @@ import RdrName
import BasicTypes
import DataCon
import Name
import Fingerprint
import Encoding
import DynFlags
import PrelInfo
......@@ -2295,20 +2297,20 @@ mkAuxBinderName :: Name -> (OccName -> OccName) -> RdrName
-- ^ Make a top-level binder name for an auxiliary binding for a parent name
-- See Note [Auxiliary binders]
mkAuxBinderName parent occ_fun
= mkRdrUnqual (occ_fun uniq_parent_occ)
= mkRdrUnqual (occ_fun stable_parent_occ)
where
uniq_parent_occ = mkOccName (occNameSpace parent_occ) uniq_string
uniq_string
| opt_PprStyle_Debug
= showSDocUnsafe (ppr parent_occ <> underscore <> ppr parent_uniq)
| otherwise
= show parent_uniq
-- The debug thing is just to generate longer, but perhaps more perspicuous, names
parent_uniq = nameUnique parent
stable_parent_occ = mkOccName (occNameSpace parent_occ) stable_string
stable_string
| opt_PprStyle_Debug = parent_stable
| otherwise = parent_stable_hash
parent_stable = nameStableString parent
parent_stable_hash =
let Fingerprint high low = fingerprintString parent_stable
in toBase62 high ++ toBase62Padded low
-- See Note [Base 62 encoding 128-bit integers]
parent_occ = nameOccName parent
{-
Note [Auxiliary binders]
~~~~~~~~~~~~~~~~~~~~~~~~
......@@ -2325,12 +2327,12 @@ generating RdrNames here. We can't just use the TyCon or DataCon to distinguish
because with standalone deriving two imported TyCons might both be called T!
(See Trac #7947.)
So we use the *unique* from the parent name (T in this example) as part of the
OccName we generate for the new binding.
So we use package name, module name and the name of the parent
(T in this example) as part of the OccName we generate for the new binding.
To make the symbol names short we take a base62 hash of the full name.
In the past we used mkDerivedRdrName name occ_fun, which made an original name
But: (a) that does not work well for standalone-deriving either
(b) an unqualified name is just fine, provided it can't clash with user code
In the past we used the *unique* from the parent, but that's not stable across
recompilations as uniques are nondeterministic.
Note [DeriveFoldable with ExistentialQuantification]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
......
......@@ -25,11 +25,16 @@ module Encoding (
-- * Z-encoding
zEncodeString,
zDecodeString
zDecodeString,
-- * Base62-encoding
toBase62,
toBase62Padded
) where
import Foreign
import Data.Char
import qualified Data.Char as Char
import Numeric
import GHC.Exts
......@@ -385,3 +390,47 @@ maybe_tuple _ = Nothing
count_commas :: Int -> String -> (Int, String)
count_commas n (',' : cs) = count_commas (n+1) cs
count_commas n cs = (n,cs)
{-
************************************************************************
* *
Base 62
* *
************************************************************************
Note [Base 62 encoding 128-bit integers]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Instead of base-62 encoding a single 128-bit integer
(ceil(21.49) characters), we'll base-62 a pair of 64-bit integers
(2 * ceil(10.75) characters). Luckily for us, it's the same number of
characters!
-}
--------------------------------------------------------------------------
-- Base 62
-- The base-62 code is based off of 'locators'
-- ((c) Operational Dynamics Consulting, BSD3 licensed)
-- | Size of a 64-bit word when written as a base-62 string
word64Base62Len :: Int
word64Base62Len = 11
-- | Converts a 64-bit word into a base-62 string
toBase62Padded :: Word64 -> String
toBase62Padded w = pad ++ str
where
pad = replicate len '0'
len = word64Base62Len - length str -- 11 == ceil(64 / lg 62)
str = toBase62 w
toBase62 :: Word64 -> String
toBase62 w = showIntAtBase 62 represent w ""
where
represent :: Int -> Char
represent x
| x < 10 = Char.chr (48 + x)
| x < 36 = Char.chr (65 + x - 10)
| x < 62 = Char.chr (97 + x - 36)
| otherwise = error "represent (base 62): impossible!"
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment