diff --git a/compiler/GHC/Iface/Binary.hs b/compiler/GHC/Iface/Binary.hs
index 44a60587afb0bda508feb67db8abe2afcbc21578..9abf76ce2c745891957bca1f8f06d879cf711da3 100644
--- a/compiler/GHC/Iface/Binary.hs
+++ b/compiler/GHC/Iface/Binary.hs
@@ -25,6 +25,8 @@ module GHC.Iface.Binary (
         putName,
         putSymbolTable,
         BinSymbolTable(..),
+        initWriteIfaceType, initReadIfaceTypeTable,
+        putAllTables,
     ) where
 
 import GHC.Prelude
@@ -46,14 +48,19 @@ import GHC.Types.SrcLoc
 import GHC.Platform
 import GHC.Settings.Constants
 import GHC.Utils.Fingerprint
+import GHC.Iface.Type (IfaceType, getIfaceType, putIfaceType)
 
+import Control.Monad
 import Data.Array
 import Data.Array.IO
 import Data.Array.Unsafe
 import Data.Char
-import Data.Word
 import Data.IORef
-import Control.Monad
+import Data.Map.Strict (Map)
+import Data.Word
+import System.IO.Unsafe
+import Data.Typeable (Typeable)
+
 
 -- ---------------------------------------------------------------------------
 -- Reading and writing binary interface files
@@ -158,24 +165,37 @@ getWithUserData name_cache bh = do
 -- Reading names has the side effect of adding them into the given NameCache.
 getTables :: NameCache -> ReadBinHandle -> IO ReadBinHandle
 getTables name_cache bh = do
+    bhRef <- newIORef (error "used too soon")
+    -- It is important this is passed to 'getTable'
+    -- See Note [Lazy ReaderUserData during IfaceType serialisation]
+    ud <- unsafeInterleaveIO (readIORef bhRef)
+
     fsReaderTable <- initFastStringReaderTable
     nameReaderTable <- initNameReaderTable name_cache
-
-
-    -- The order of these deserialisation matters!
-    --
-    -- See Note [Order of deduplication tables during iface binary serialisation] for details.
-    fsTable <- Binary.forwardGet bh (getTable fsReaderTable bh)
-    let
-      fsReader = mkReaderFromTable fsReaderTable fsTable
-      bhFs = addReaderToUserData (mkSomeBinaryReader fsReader) bh
-
-    nameTable <- Binary.forwardGet bh (getTable nameReaderTable bhFs)
-    let
-      nameReader = mkReaderFromTable nameReaderTable nameTable
-      bhName = addReaderToUserData (mkSomeBinaryReader nameReader) bhFs
-
-    pure bhName
+    ifaceTypeReaderTable <- initReadIfaceTypeTable ud
+
+    let -- For any 'ReaderTable', we decode the table that is found at the location
+        -- the forward reference points to.
+        -- After decoding the table, we create a 'BinaryReader' and immediately
+        -- add it to the 'ReaderUserData' of 'ReadBinHandle'.
+        decodeReaderTable :: Typeable a => ReaderTable a -> ReadBinHandle -> IO ReadBinHandle
+        decodeReaderTable tbl bh0 = do
+          table <- Binary.forwardGet bh (getTable tbl bh0)
+          let binaryReader = mkReaderFromTable tbl table
+          pure $ addReaderToUserData binaryReader bh0
+
+    -- Decode all the tables and populate the 'ReaderUserData'.
+    bhFinal <- foldM (\bh0 act -> act bh0) bh
+      -- The order of these deserialisation matters!
+      --
+      -- See Note [Order of deduplication tables during iface binary serialisation] for details.
+      [ decodeReaderTable fsReaderTable
+      , decodeReaderTable nameReaderTable
+      , decodeReaderTable ifaceTypeReaderTable
+      ]
+
+    writeIORef bhRef (getReaderUserData bhFinal)
+    pure bhFinal
 
 -- | Write an interface file.
 --
@@ -239,6 +259,7 @@ putWithTables bh' put_payload = do
   -- Initialise deduplicating tables.
   (fast_wt, fsWriter) <- initFastStringWriterTable
   (name_wt, nameWriter) <- initNameWriterTable
+  (ifaceType_wt, ifaceTypeWriter) <- initWriteIfaceType
 
   -- Initialise the 'WriterUserData'.
   let writerUserData = mkWriterUserData
@@ -250,6 +271,7 @@ putWithTables bh' put_payload = do
         --
         -- See Note [Binary UserData]
         , mkSomeBinaryWriter @BindingName  $ mkWriter (\bh name -> putEntry nameWriter bh (getBindingName name))
+        , mkSomeBinaryWriter @IfaceType ifaceTypeWriter
         ]
   let bh = setWriterUserData bh' writerUserData
 
@@ -257,18 +279,24 @@ putWithTables bh' put_payload = do
     -- The order of these entries matters!
     --
     -- See Note [Order of deduplication tables during iface binary serialisation] for details.
-    putAllTables bh [fast_wt, name_wt] $ do
+    putAllTables bh [fast_wt, name_wt, ifaceType_wt] $ do
       put_payload bh
 
   return (name_count, fs_count, r)
- where
-  putAllTables _ [] act = do
-    a <- act
-    pure ([], a)
-  putAllTables bh (x : xs) act = do
-    (r, (res, a)) <- forwardPut bh (const $ putTable x bh) $ do
-      putAllTables bh xs act
-    pure (r : res, a)
+
+-- | Write all deduplication tables to disk after serialising the
+-- main payload.
+--
+-- Writes forward pointers to the deduplication tables before writing the payload
+-- to allow deserialisation *before* the payload is read again.
+putAllTables :: WriteBinHandle -> [WriterTable] -> IO b -> IO ([Int], b)
+putAllTables _ [] act = do
+  a <- act
+  pure ([], a)
+putAllTables bh (x : xs) act = do
+  (r, (res, a)) <- forwardPut bh (const $ putTable x bh) $ do
+    putAllTables bh xs act
+  pure (r : res, a)
 
 -- | Initial ram buffer to allocate for writing interface files
 initBinMemSize :: Int
@@ -445,11 +473,69 @@ Here, a visualisation of the table structure we currently have (ignoring 'Extens
 
 -}
 
+{-
+Note [Lazy ReaderUserData during IfaceType serialisation]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Serialising recursive data types, such as 'IfaceType', requires some trickery
+to inject the deduplication table at the right moment.
+
+When we serialise a value of 'IfaceType', we might encounter new 'IfaceType' values.
+For example, 'IfaceAppTy' has an 'IfaceType' field, which we want to deduplicate as well.
+Thus, when we serialise an 'IfaceType', we might add new 'IfaceType's to the 'GenericSymbolTable'
+(i.e., the deduplication table). These 'IfaceType's are then subsequently also serialised to disk,
+and uncover new 'IfaceType' values, etc...
+In other words, when we serialise an 'IfaceType' we write it out using a post-order traversal.
+See 'putGenericSymbolTable' for the implementation.
+
+Now, when we deserialise the deduplication table, reading the first element of the deduplication table
+will fail, as deserialisation requires that we read the child elements first. Remember, we wrote them to disk
+using a post-order traversal.
+To make this work, we therefore use 'lazyGet'' to lazily read the parent 'IfaceType', but delay the actual
+deserialisation. We just assume that once you need to force a value, the deduplication table for 'IfaceType'
+will be available.
+
+That's where 'bhRef' comes into play:
+
+@
+    bhRef <- newIORef (error "used too soon")
+    ud <- unsafeInterleaveIO (readIORef bhRef)
+    ...
+    ifaceTypeReaderTable <- initReadIfaceTypeTable ud
+    ...
+    writeIORef bhRef (getReaderUserData bhFinal)
+@
+
+'ud' is the 'ReaderUserData' that will eventually contain the deduplication table for 'IfaceType'.
+As deserialisation of the 'IfaceType' needs the deduplication table, we provide a
+promise that it will exist in the future (represented by @unsafeInterleaveIO (readIORef bhRef)@).
+We pass 'ud' to 'initReadIfaceTypeTable', so the deserialisation will use the promised deduplication table.
+
+Once we have "read" the deduplication table, it will be available in 'bhFinal', and we fulfill the promise
+that the deduplication table for 'IfaceType' exists when forced.
+-}
 
 -- -----------------------------------------------------------------------------
 -- The symbol table
 --
 
+initReadIfaceTypeTable :: ReaderUserData -> IO (ReaderTable IfaceType)
+initReadIfaceTypeTable ud = do
+  pure $
+    ReaderTable
+      { getTable = getGenericSymbolTable (\bh -> lazyGet' getIfaceType (setReaderUserData bh ud))
+      , mkReaderFromTable = \tbl -> mkReader (getGenericSymtab tbl)
+      }
+
+initWriteIfaceType :: IO (WriterTable, BinaryWriter IfaceType)
+initWriteIfaceType = do
+  sym_tab <- initGenericSymbolTable @(Map IfaceType)
+  pure
+    ( WriterTable
+        { putTable = putGenericSymbolTable sym_tab (lazyPut' putIfaceType)
+        }
+    , mkWriter $ putGenericSymTab sym_tab
+    )
+
 
 initNameReaderTable :: NameCache -> IO (ReaderTable Name)
 initNameReaderTable cache = do
diff --git a/compiler/GHC/Iface/Ext/Binary.hs b/compiler/GHC/Iface/Ext/Binary.hs
index c0cc3b2b30076e7f69160fadb001326316e2b99f..3b117b4d9a977d39a04e8aa11aa8baa8db2b8cc4 100644
--- a/compiler/GHC/Iface/Ext/Binary.hs
+++ b/compiler/GHC/Iface/Ext/Binary.hs
@@ -38,22 +38,21 @@ import Data.ByteString            ( ByteString )
 import qualified Data.ByteString  as BS
 import qualified Data.ByteString.Char8 as BSC
 import Data.Word                  ( Word8, Word32 )
-import Control.Monad              ( replicateM, when, forM_ )
+import Control.Monad              ( replicateM, when, forM_, foldM )
 import System.Directory           ( createDirectoryIfMissing )
 import System.FilePath            ( takeDirectory )
 
 import GHC.Iface.Ext.Types
+import GHC.Iface.Binary (initWriteIfaceType, putAllTables, initReadIfaceTypeTable)
+import GHC.Iface.Type (IfaceType)
+import System.IO.Unsafe (unsafeInterleaveIO)
+import qualified GHC.Utils.Binary as Binary
 
 data HieSymbolTable = HieSymbolTable
   { hie_symtab_next :: !FastMutInt
   , hie_symtab_map  :: !(IORef (UniqFM Name (Int, HieName)))
   }
 
-data HieDictionary = HieDictionary
-  { hie_dict_next :: !FastMutInt -- The next index to use
-  , hie_dict_map  :: !(IORef (UniqFM FastString (Int,FastString))) -- indexed by FastString
-  }
-
 initBinMemSize :: Int
 initBinMemSize = 1024*1024
 
@@ -84,58 +83,58 @@ writeHieFile hie_file_path hiefile = do
   putBinLine bh0 $ BSC.pack $ show hieVersion
   putBinLine bh0 $ ghcVersion
 
-  -- remember where the dictionary pointer will go
-  dict_p_p <- tellBinWriter bh0
-  put_ bh0 dict_p_p
+  (fs_tbl, fs_w) <- initFastStringWriterTable
+  (name_tbl, name_w) <- initWriteNameTable
+  (iface_tbl, iface_w) <- initWriteIfaceType
 
-  -- remember where the symbol table pointer will go
-  symtab_p_p <- tellBinWriter bh0
-  put_ bh0 symtab_p_p
+  let bh = setWriterUserData bh0 $ mkWriterUserData
+        [ mkSomeBinaryWriter @IfaceType iface_w
+        , mkSomeBinaryWriter @Name name_w
+        , mkSomeBinaryWriter @BindingName (simpleBindingNameWriter name_w)
+        , mkSomeBinaryWriter @FastString fs_w
+        ]
 
-  -- Make some initial state
-  symtab_next <- newFastMutInt 0
-  symtab_map <- newIORef emptyUFM :: IO (IORef (UniqFM Name (Int, HieName)))
-  let hie_symtab = HieSymbolTable {
-                      hie_symtab_next = symtab_next,
-                      hie_symtab_map  = symtab_map }
-  dict_next_ref <- newFastMutInt 0
-  dict_map_ref <- newIORef emptyUFM
-  let hie_dict = HieDictionary {
-                      hie_dict_next = dict_next_ref,
-                      hie_dict_map  = dict_map_ref }
-
-  -- put the main thing
-  let bh = setWriterUserData bh0
-          $ newWriteState (putName hie_symtab)
-                          (putName hie_symtab)
-                          (putFastString hie_dict)
-  put_ bh hiefile
-
-  -- write the symtab pointer at the front of the file
-  symtab_p <- tellBinWriter bh
-  putAt bh symtab_p_p symtab_p
-  seekBinWriter bh symtab_p
-
-  -- write the symbol table itself
-  symtab_next' <- readFastMutInt symtab_next
-  symtab_map'  <- readIORef symtab_map
-  putSymbolTable bh symtab_next' symtab_map'
-
-  -- write the dictionary pointer at the front of the file
-  dict_p <- tellBinWriter bh
-  putAt bh dict_p_p dict_p
-  seekBinWriter bh dict_p
-
-  -- write the dictionary itself
-  dict_next <- readFastMutInt dict_next_ref
-  dict_map  <- readIORef dict_map_ref
-  putDictionary bh dict_next dict_map
+  -- Discard number of written elements
+  -- Order matters! See Note [Order of deduplication tables during iface binary serialisation]
+  _ <- putAllTables bh [fs_tbl, name_tbl, iface_tbl] $ do
+    put_ bh hiefile
 
   -- and send the result to the file
   createDirectoryIfMissing True (takeDirectory hie_file_path)
   writeBinMem bh hie_file_path
   return ()
 
+initWriteNameTable :: IO (WriterTable, BinaryWriter Name)
+initWriteNameTable = do
+  symtab_next <- newFastMutInt 0
+  symtab_map <- newIORef emptyUFM
+  let bin_symtab =
+        HieSymbolTable
+          { hie_symtab_next = symtab_next
+          , hie_symtab_map = symtab_map
+          }
+
+  let put_symtab bh = do
+        name_count <- readFastMutInt symtab_next
+        symtab_map <- readIORef symtab_map
+        putSymbolTable bh name_count symtab_map
+        pure name_count
+
+  return
+    ( WriterTable
+        { putTable = put_symtab
+        }
+    , mkWriter $ putName bin_symtab
+    )
+
+initReadNameTable :: NameCache -> IO (ReaderTable Name)
+initReadNameTable cache = do
+  return $
+    ReaderTable
+      { getTable = \bh -> getSymbolTable bh cache
+      , mkReaderFromTable = \tbl -> mkReader (getSymTabName tbl)
+      }
+
 data HieFileResult
   = HieFileResult
   { hie_file_result_version :: Integer
@@ -216,50 +215,37 @@ readHieFileHeader file bh0 = do
 
 readHieFileContents :: ReadBinHandle -> NameCache -> IO HieFile
 readHieFileContents bh0 name_cache = do
-  dict <- get_dictionary bh0
-  -- read the symbol table so we are capable of reading the actual data
-  bh1 <- do
-      let bh1 = setReaderUserData bh0
-              $ newReadState (error "getSymtabName")
-                             (getDictFastString dict)
-      symtab <- get_symbol_table bh1
-      let bh1' = setReaderUserData bh1
-               $ newReadState (getSymTabName symtab)
-                              (getDictFastString dict)
-      return bh1'
+  bhRef <- newIORef (error "used too soon")
+  -- It is important this is passed to 'getTable'
+  -- See Note [Lazy ReaderUserData during IfaceType serialisation]
+  ud <- unsafeInterleaveIO (readIORef bhRef)
+
+  fsReaderTable <- initFastStringReaderTable
+  nameReaderTable <- initReadNameTable name_cache
+  ifaceTypeReaderTable <- initReadIfaceTypeTable ud
 
+  -- read the symbol table so we are capable of reading the actual data
+  bh1 <-
+    foldM (\bh tblReader -> tblReader bh) bh0
+      -- The order of these deserialisation matters!
+      --
+      -- See Note [Order of deduplication tables during iface binary serialisation] for details.
+      [ get_dictionary fsReaderTable
+      , get_dictionary nameReaderTable
+      , get_dictionary ifaceTypeReaderTable
+      ]
+
+  writeIORef bhRef (getReaderUserData bh1)
   -- load the actual data
   get bh1
   where
-    get_dictionary bin_handle = do
-      dict_p <- get bin_handle
-      data_p <- tellBinReader bin_handle
-      seekBinReader bin_handle dict_p
-      dict <- getDictionary bin_handle
-      seekBinReader bin_handle data_p
-      return dict
-
-    get_symbol_table bh1 = do
-      symtab_p <- get bh1
-      data_p'  <- tellBinReader bh1
-      seekBinReader bh1 symtab_p
-      symtab <- getSymbolTable bh1 name_cache
-      seekBinReader bh1 data_p'
-      return symtab
-
-putFastString :: HieDictionary -> WriteBinHandle -> FastString -> IO ()
-putFastString HieDictionary { hie_dict_next = j_r,
-                              hie_dict_map  = out_r}  bh f
-  = do
-    out <- readIORef out_r
-    let !unique = getUnique f
-    case lookupUFM_Directly out unique of
-        Just (j, _)  -> put_ bh (fromIntegral j :: Word32)
-        Nothing -> do
-           j <- readFastMutInt j_r
-           put_ bh (fromIntegral j :: Word32)
-           writeFastMutInt j_r (j + 1)
-           writeIORef out_r $! addToUFM_Directly out unique (j, f)
+    get_dictionary tbl bin_handle = do
+      fsTable <- Binary.forwardGet bin_handle (getTable tbl bin_handle)
+      let
+        fsReader = mkReaderFromTable tbl fsTable
+        bhFs = addReaderToUserData fsReader bin_handle
+      pure bhFs
+
 
 putSymbolTable :: WriteBinHandle -> Int -> UniqFM Name (Int,HieName) -> IO ()
 putSymbolTable bh next_off symtab = do
diff --git a/compiler/GHC/Iface/Recomp/Binary.hs b/compiler/GHC/Iface/Recomp/Binary.hs
index 1f83e578667ad338c6d10bb226eadd89422b4632..3dc55e68296a5a86b65711a83b4da11c3ad30144 100644
--- a/compiler/GHC/Iface/Recomp/Binary.hs
+++ b/compiler/GHC/Iface/Recomp/Binary.hs
@@ -14,6 +14,7 @@ import GHC.Utils.Fingerprint
 import GHC.Utils.Binary
 import GHC.Types.Name
 import GHC.Utils.Panic.Plain
+import GHC.Iface.Type (putIfaceType)
 
 fingerprintBinMem :: WriteBinHandle -> IO Fingerprint
 fingerprintBinMem bh = withBinBuffer bh f
@@ -34,8 +35,12 @@ computeFingerprint put_nonbinding_name a = do
     put_ bh a
     fingerprintBinMem bh
   where
-    set_user_data bh =
-      setWriterUserData bh $ newWriteState put_nonbinding_name putNameLiterally putFS
+    set_user_data bh = setWriterUserData bh $ mkWriterUserData
+      [ mkSomeBinaryWriter $ mkWriter putIfaceType
+      , mkSomeBinaryWriter $ mkWriter put_nonbinding_name
+      , mkSomeBinaryWriter $ simpleBindingNameWriter $ mkWriter putNameLiterally
+      , mkSomeBinaryWriter $ mkWriter putFS
+      ]
 
 -- | Used when we want to fingerprint a structure without depending on the
 -- fingerprints of external Names that it refers to.
diff --git a/compiler/GHC/Iface/Type.hs b/compiler/GHC/Iface/Type.hs
index 163bc581f7fe46f2e5e46811f26523da88db1a3a..9605e712cfba9e250f4a1a7293033847a3e1a30b 100644
--- a/compiler/GHC/Iface/Type.hs
+++ b/compiler/GHC/Iface/Type.hs
@@ -33,6 +33,8 @@ module GHC.Iface.Type (
         ifForAllBndrVar, ifForAllBndrName, ifaceBndrName,
         ifTyConBinderVar, ifTyConBinderName,
 
+        -- Binary utilities
+        putIfaceType, getIfaceType,
         -- Equality testing
         isIfaceLiftedTypeKind,
 
@@ -91,10 +93,11 @@ import GHC.Utils.Panic
 import {-# SOURCE #-} GHC.Tc.Utils.TcType ( isMetaTyVar, isTyConableTyVar )
 
 import Control.DeepSeq
+import Data.Proxy
 import Control.Monad ((<$!>))
 import Control.Arrow (first)
 import qualified Data.Semigroup as Semi
-import Data.Maybe( isJust )
+import Data.Maybe (isJust)
 
 {-
 ************************************************************************
@@ -192,6 +195,34 @@ data IfaceType
           -- in interface file size (in GHC's boot libraries).
           -- See !3987.
   deriving (Eq, Ord)
+  -- See Note [Ord instance of IfaceType]
+
+{-
+Note [Ord instance of IfaceType]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We need an 'Ord' instance to have a 'Map' keyed by 'IfaceType'. This 'Map' is
+required for implementing the deduplication table during interface file
+serialisation.
+See Note [Deduplication during iface binary serialisation] for the implementation details.
+
+We experimented with a 'TrieMap' based implementation, but it seems to be
+slower than using a straight-forward 'Map IfaceType'.
+The experiments loaded the full agda library into a ghci session with the
+following scenarios:
+
+* normal: a plain ghci session.
+* cold: a ghci session that uses '-fwrite-if-simplified-core -fforce-recomp',
+  forcing a cold-cache.
+* warm: a subsequent ghci session that uses a warm cache for
+  '-fwrite-if-simplified-core', e.g. nothing needs to be recompiled.
+
+The implementation was up to 5% slower in some execution runs. However, on
+'lib:Cabal', the performance difference between 'Map IfaceType' and
+'TrieMap IfaceType' was negligible.
+
+We share our implementation of the 'TrieMap' in the ticket #24816, so that
+further performance analysis and improvements don't need to start from scratch.
+-}
 
 type IfaceMult = IfaceType
 
@@ -2194,39 +2225,56 @@ ppr_parend_preds :: [IfacePredType] -> SDoc
 ppr_parend_preds preds = parens (fsep (punctuate comma (map ppr preds)))
 
 instance Binary IfaceType where
-    put_ _ (IfaceFreeTyVar tv)
-       = pprPanic "Can't serialise IfaceFreeTyVar" (ppr tv)
-           -- See Note [Free TyVars and CoVars in IfaceType]
+   put_ bh tyCon = case findUserDataWriter Proxy bh of
+    tbl -> putEntry tbl bh tyCon
 
-    put_ bh (IfaceForAllTy aa ab) = do
-            putByte bh 0
-            put_ bh aa
-            put_ bh ab
-    put_ bh (IfaceTyVar ad) = do
-            putByte bh 1
-            put_ bh ad
-    put_ bh (IfaceAppTy ae af) = do
-            putByte bh 2
-            put_ bh ae
-            put_ bh af
-    put_ bh (IfaceFunTy af aw ag ah) = do
-            putByte bh 3
-            put_ bh af
-            put_ bh aw
-            put_ bh ag
-            put_ bh ah
-    put_ bh (IfaceTyConApp tc tys)
-      = do { putByte bh 5; put_ bh tc; put_ bh tys }
-    put_ bh (IfaceCastTy a b)
-      = do { putByte bh 6; put_ bh a; put_ bh b }
-    put_ bh (IfaceCoercionTy a)
-      = do { putByte bh 7; put_ bh a }
-    put_ bh (IfaceTupleTy s i tys)
-      = do { putByte bh 8; put_ bh s; put_ bh i; put_ bh tys }
-    put_ bh (IfaceLitTy n)
-      = do { putByte bh 9; put_ bh n }
+   get bh = case findUserDataReader Proxy bh of
+    tbl -> getEntry tbl bh
 
-    get bh = do
+
+-- | Serialises an 'IfaceType' to the given 'WriteBinHandle'.
+--
+-- Serialising inner 'IfaceType''s uses the 'Binary.put' of 'IfaceType' which may be using
+-- a deduplication table. See Note [Deduplication during iface binary serialisation].
+putIfaceType :: WriteBinHandle -> IfaceType -> IO ()
+putIfaceType _ (IfaceFreeTyVar tv)
+  = pprPanic "Can't serialise IfaceFreeTyVar" (ppr tv)
+  -- See Note [Free TyVars and CoVars in IfaceType]
+
+putIfaceType bh (IfaceForAllTy aa ab) = do
+        putByte bh 0
+        put_ bh aa
+        put_ bh ab
+putIfaceType bh (IfaceTyVar ad) = do
+        putByte bh 1
+        put_ bh ad
+putIfaceType bh (IfaceAppTy ae af) = do
+        putByte bh 2
+        put_ bh ae
+        put_ bh af
+putIfaceType bh (IfaceFunTy af aw ag ah) = do
+        putByte bh 3
+        put_ bh af
+        put_ bh aw
+        put_ bh ag
+        put_ bh ah
+putIfaceType bh (IfaceTyConApp tc tys)
+  = do { putByte bh 5; put_ bh tc; put_ bh tys }
+putIfaceType bh (IfaceCastTy a b)
+  = do { putByte bh 6; put_ bh a; put_ bh b }
+putIfaceType bh (IfaceCoercionTy a)
+  = do { putByte bh 7; put_ bh a }
+putIfaceType bh (IfaceTupleTy s i tys)
+  = do { putByte bh 8; put_ bh s; put_ bh i; put_ bh tys }
+putIfaceType bh (IfaceLitTy n)
+  = do { putByte bh 9; put_ bh n }
+
+-- | Deserialises an 'IfaceType' from the given 'ReadBinHandle'.
+--
+-- Reading inner 'IfaceType''s uses the 'Binary.get' of 'IfaceType' which may be using
+-- a deduplication table. See Note [Deduplication during iface binary serialisation].
+getIfaceType :: HasCallStack => ReadBinHandle -> IO IfaceType
+getIfaceType bh = do
             h <- getByte bh
             case h of
               0 -> do aa <- get bh
diff --git a/compiler/GHC/StgToJS/Object.hs b/compiler/GHC/StgToJS/Object.hs
index 210c1a2803418fd0e817516e119d559b4eebcd69..f5319abbfca2478c40a772bd7ea8ec9c57032db3 100644
--- a/compiler/GHC/StgToJS/Object.hs
+++ b/compiler/GHC/StgToJS/Object.hs
@@ -314,7 +314,7 @@ putObject bh mod_name deps os = do
   put_ bh (moduleNameString mod_name)
 
   (fs_tbl, fs_writer) <- initFastStringWriterTable
-  let bh_fs = addWriterToUserData (mkSomeBinaryWriter fs_writer) bh
+  let bh_fs = addWriterToUserData fs_writer bh
 
   forwardPut_ bh (const (putTable fs_tbl bh_fs)) $ do
     put_ bh_fs deps
diff --git a/compiler/GHC/Utils/Binary.hs b/compiler/GHC/Utils/Binary.hs
index 4b33525a5eafb89218e2618feb24074a4838650f..dd206011d5ab9ff87d2c6d7560ebc7eddc2fd200 100644
--- a/compiler/GHC/Utils/Binary.hs
+++ b/compiler/GHC/Utils/Binary.hs
@@ -64,6 +64,8 @@ module GHC.Utils.Binary
    -- * Lazy Binary I/O
    lazyGet,
    lazyPut,
+   lazyGet',
+   lazyPut',
    lazyGetMaybe,
    lazyPutMaybe,
 
@@ -86,10 +88,17 @@ module GHC.Utils.Binary
    initFastStringReaderTable, initFastStringWriterTable,
    putDictionary, getDictionary, putFS,
    FSTable(..), getDictFastString, putDictFastString,
+   -- * Generic deduplication table
+   GenericSymbolTable(..),
+   initGenericSymbolTable,
+   getGenericSymtab, putGenericSymTab,
+   getGenericSymbolTable, putGenericSymbolTable,
    -- * Newtype wrappers
    BinSpan(..), BinSrcSpan(..), BinLocated(..),
    -- * Newtypes for types that have canonically more than one valid encoding
    BindingName(..),
+   simpleBindingNameWriter,
+   simpleBindingNameReader,
   ) where
 
 import GHC.Prelude
@@ -102,11 +111,11 @@ import GHC.Utils.Panic.Plain
 import GHC.Types.Unique.FM
 import GHC.Data.FastMutInt
 import GHC.Utils.Fingerprint
-import GHC.Utils.Misc (HasCallStack)
 import GHC.Types.SrcLoc
 import GHC.Types.Unique
 import qualified GHC.Data.Strict as Strict
 import GHC.Utils.Outputable( JoinPointHood(..) )
+import GHC.Utils.Misc ( HasCallStack, HasDebugCallStack )
 
 import Control.DeepSeq
 import Control.Monad            ( when, (<$!>), unless, forM_, void )
@@ -132,6 +141,7 @@ import Data.List (unfoldr)
 import System.IO as IO
 import System.IO.Unsafe         ( unsafeInterleaveIO )
 import System.IO.Error          ( mkIOError, eofErrorType )
+import Type.Reflection          ( Typeable, SomeTypeRep(..) )
 import qualified Type.Reflection as Refl
 import GHC.Real                 ( Ratio(..) )
 import Data.IntMap (IntMap)
@@ -142,6 +152,8 @@ import GHC.ForeignPtr           ( unsafeWithForeignPtr )
 
 import Unsafe.Coerce (unsafeCoerce)
 
+import GHC.Data.TrieMap
+
 type BinArray = ForeignPtr Word8
 
 #if !MIN_VERSION_base(4,15,0)
@@ -230,20 +242,28 @@ setReaderUserData bh us = bh { rbm_userData = us }
 -- | Add 'SomeBinaryReader' as a known binary decoder.
 -- If a 'BinaryReader' for the associated type already exists in 'ReaderUserData',
 -- it is overwritten.
-addReaderToUserData :: SomeBinaryReader -> ReadBinHandle -> ReadBinHandle
-addReaderToUserData cache@(SomeBinaryReader typRep _) bh = bh
+addReaderToUserData :: forall a. Typeable a => BinaryReader a -> ReadBinHandle -> ReadBinHandle
+addReaderToUserData reader bh = bh
   { rbm_userData = (rbm_userData bh)
-      { ud_reader_data = Map.insert (Refl.SomeTypeRep typRep) cache (ud_reader_data (rbm_userData bh))
+      { ud_reader_data =
+          let
+            typRep = Refl.typeRep @a
+          in
+            Map.insert (SomeTypeRep typRep) (SomeBinaryReader typRep reader) (ud_reader_data (rbm_userData bh))
       }
   }
 
 -- | Add 'SomeBinaryWriter' as a known binary encoder.
 -- If a 'BinaryWriter' for the associated type already exists in 'WriterUserData',
 -- it is overwritten.
-addWriterToUserData :: SomeBinaryWriter -> WriteBinHandle -> WriteBinHandle
-addWriterToUserData cache@(SomeBinaryWriter typRep _) bh = bh
+addWriterToUserData :: forall a . Typeable a => BinaryWriter a -> WriteBinHandle -> WriteBinHandle
+addWriterToUserData writer bh = bh
   { wbm_userData = (wbm_userData bh)
-      { ud_writer_data = Map.insert (Refl.SomeTypeRep typRep) cache (ud_writer_data (wbm_userData bh))
+      { ud_writer_data =
+          let
+            typRep = Refl.typeRep @a
+          in
+            Map.insert (SomeTypeRep typRep) (SomeBinaryWriter typRep writer) (ud_writer_data (wbm_userData bh))
       }
   }
 
@@ -1102,24 +1122,32 @@ forwardGet bh get_A = do
 -- Lazy reading/writing
 
 lazyPut :: Binary a => WriteBinHandle -> a -> IO ()
-lazyPut bh a = do
+lazyPut = lazyPut' put_
+
+lazyGet :: Binary a => ReadBinHandle -> IO a
+lazyGet = lazyGet' get
+
+lazyPut' :: HasDebugCallStack => (WriteBinHandle -> a -> IO ()) -> WriteBinHandle -> a -> IO ()
+lazyPut' f bh a = do
     -- output the obj with a ptr to skip over it:
     pre_a <- tellBinWriter bh
     put_ bh pre_a       -- save a slot for the ptr
-    put_ bh a           -- dump the object
+    f bh a           -- dump the object
     q <- tellBinWriter bh     -- q = ptr to after object
     putAt bh pre_a q    -- fill in slot before a with ptr to q
     seekBinWriter bh q        -- finally carry on writing at q
 
-lazyGet :: Binary a => ReadBinHandle -> IO a
-lazyGet bh = do
+lazyGet' :: HasDebugCallStack => (ReadBinHandle -> IO a) -> ReadBinHandle -> IO a
+lazyGet' f bh = do
     p <- get bh -- a BinPtr
     p_a <- tellBinReader bh
     a <- unsafeInterleaveIO $ do
-        -- NB: Use a fresh off_r variable in the child thread, for thread
+        -- NB: Use a fresh rbm_off_r variable in the child thread, for thread
         -- safety.
         off_r <- newFastMutInt 0
-        getAt bh { rbm_off_r = off_r } p_a
+        let bh' = bh { rbm_off_r = off_r }
+        seekBinReader bh' p_a
+        f bh'
     seekBinReader bh p -- skip over the object for now
     return a
 
@@ -1173,6 +1201,12 @@ lazyGetMaybe bh = do
 newtype BindingName = BindingName { getBindingName :: Name }
   deriving ( Eq )
 
+simpleBindingNameWriter :: BinaryWriter Name -> BinaryWriter BindingName
+simpleBindingNameWriter = coerce
+
+simpleBindingNameReader :: BinaryReader Name -> BinaryReader BindingName
+simpleBindingNameReader = coerce
+
 -- | Existential for 'BinaryWriter' with a type witness.
 data SomeBinaryWriter = forall a . SomeBinaryWriter (Refl.TypeRep a) (BinaryWriter a)
 
@@ -1184,7 +1218,7 @@ data SomeBinaryReader = forall a . SomeBinaryReader (Refl.TypeRep a) (BinaryRead
 -- See Note [Binary UserData]
 data WriterUserData =
    WriterUserData {
-      ud_writer_data :: Map Refl.SomeTypeRep SomeBinaryWriter
+      ud_writer_data :: Map SomeTypeRep SomeBinaryWriter
       -- ^ A mapping from a type witness to the 'Writer' for the associated type.
       -- This is a 'Map' because microbenchmarks indicated this is more efficient
       -- than other representations for less than ten elements.
@@ -1201,7 +1235,7 @@ data WriterUserData =
 -- See Note [Binary UserData]
 data ReaderUserData =
    ReaderUserData {
-      ud_reader_data :: Map Refl.SomeTypeRep SomeBinaryReader
+      ud_reader_data :: Map SomeTypeRep SomeBinaryReader
       -- ^ A mapping from a type witness to the 'Reader' for the associated type.
       -- This is a 'Map' because microbenchmarks indicated this is more efficient
       -- than other representations for less than ten elements.
@@ -1215,12 +1249,12 @@ data ReaderUserData =
 
 mkWriterUserData :: [SomeBinaryWriter] -> WriterUserData
 mkWriterUserData caches = noWriterUserData
-  { ud_writer_data = Map.fromList $ map (\cache@(SomeBinaryWriter typRep _) -> (Refl.SomeTypeRep typRep, cache)) caches
+  { ud_writer_data = Map.fromList $ map (\cache@(SomeBinaryWriter typRep _) -> (SomeTypeRep typRep, cache)) caches
   }
 
 mkReaderUserData :: [SomeBinaryReader] -> ReaderUserData
 mkReaderUserData caches = noReaderUserData
-  { ud_reader_data = Map.fromList $ map (\cache@(SomeBinaryReader typRep _) -> (Refl.SomeTypeRep typRep, cache)) caches
+  { ud_reader_data = Map.fromList $ map (\cache@(SomeBinaryReader typRep _) -> (SomeTypeRep typRep, cache)) caches
   }
 
 mkSomeBinaryWriter :: forall a . Refl.Typeable a => BinaryWriter a -> SomeBinaryWriter
@@ -1346,6 +1380,110 @@ newtype WriterTable = WriterTable
   -- ^ Serialise a table to disk. Returns the number of written elements.
   }
 
+-- ----------------------------------------------------------------------------
+-- Common data structures for constructing and maintaining lookup tables for
+-- binary serialisation and deserialisation.
+-- ----------------------------------------------------------------------------
+
+-- | The 'GenericSymbolTable' stores a mapping from already seen elements to an index.
+-- If an element wasn't seen before, it is added to the mapping together with a fresh
+-- index.
+--
+-- 'GenericSymbolTable' is a variant of a 'BinSymbolTable' that is polymorphic in the table implementation.
+-- As such it can be used with any container that implements the 'TrieMap' type class.
+--
+-- While 'GenericSymbolTable' is similar to the 'BinSymbolTable', it supports storing tree-like
+-- structures such as 'Type' and 'IfaceType' more efficiently.
+--
+data GenericSymbolTable m = GenericSymbolTable
+  { gen_symtab_next :: !FastMutInt
+  -- ^ The next index to use.
+  , gen_symtab_map  :: !(IORef (m Int))
+  -- ^ Given a symbol, find the symbol and return its index.
+  , gen_symtab_to_write :: !(IORef [Key m])
+  -- ^ Reversed list of values to write into the buffer.
+  -- This is an optimisation, as it allows us to write out quickly all
+  -- newly discovered values that are discovered when serialising 'Key m'
+  -- to disk.
+  }
+
+-- | Initialise a 'GenericSymbolTable', initialising the index to '0'.
+initGenericSymbolTable :: TrieMap m => IO (GenericSymbolTable m)
+initGenericSymbolTable = do
+  symtab_next <- newFastMutInt 0
+  symtab_map <- newIORef emptyTM
+  symtab_todo <- newIORef []
+  pure $ GenericSymbolTable
+        { gen_symtab_next = symtab_next
+        , gen_symtab_map  = symtab_map
+        , gen_symtab_to_write = symtab_todo
+        }
+
+-- | Serialise the 'GenericSymbolTable' to disk.
+--
+-- Since 'GenericSymbolTable' stores tree-like structures, such as 'IfaceType',
+-- serialising an element can add new elements to the mapping.
+-- Thus, 'putGenericSymbolTable' first serialises all values, and then checks whether any
+-- new elements have been discovered. If so, repeat the loop.
+putGenericSymbolTable :: forall m. (TrieMap m) => GenericSymbolTable m -> (WriteBinHandle -> Key m -> IO ()) -> WriteBinHandle -> IO Int
+{-# INLINE putGenericSymbolTable #-}
+putGenericSymbolTable gen_sym_tab serialiser bh = do
+  putGenericSymbolTable bh
+  where
+    symtab_next = gen_symtab_next gen_sym_tab
+    symtab_to_write = gen_symtab_to_write gen_sym_tab
+    putGenericSymbolTable :: WriteBinHandle -> IO Int
+    putGenericSymbolTable bh  = do
+      let loop = do
+            vs <- atomicModifyIORef' symtab_to_write (\a -> ([], a))
+            case vs of
+              [] -> readFastMutInt symtab_next
+              todo -> do
+                mapM_ (\n -> serialiser bh n) (reverse todo)
+                loop
+      snd <$>
+        (forwardPut bh (const $ readFastMutInt symtab_next >>= put_ bh) $
+          loop)
+
+-- | Read the elements of a 'GenericSymbolTable' from disk into a 'SymbolTable'.
+getGenericSymbolTable :: forall a . (ReadBinHandle -> IO a) -> ReadBinHandle -> IO (SymbolTable a)
+getGenericSymbolTable deserialiser bh = do
+  sz <- forwardGet bh (get bh) :: IO Int
+  mut_arr <- newArray_ (0, sz-1) :: IO (IOArray Int a)
+  forM_ [0..(sz-1)] $ \i -> do
+    f <- deserialiser bh
+    writeArray mut_arr i f
+  unsafeFreeze mut_arr
+
+-- | Write an element 'Key m' to the given 'WriteBinHandle'.
+--
+-- If the element was seen before, we simply write the index of that element to the
+-- 'WriteBinHandle'. If we haven't seen it before, we add the element to
+-- the 'GenericSymbolTable', increment the index, and return this new index.
+putGenericSymTab :: (TrieMap m) => GenericSymbolTable m -> WriteBinHandle -> Key m -> IO ()
+{-# INLINE putGenericSymTab #-}
+putGenericSymTab GenericSymbolTable{
+               gen_symtab_map = symtab_map_ref,
+               gen_symtab_next = symtab_next,
+               gen_symtab_to_write = symtab_todo }
+        bh val = do
+  symtab_map <- readIORef symtab_map_ref
+  case lookupTM val symtab_map of
+    Just off -> put_ bh (fromIntegral off :: Word32)
+    Nothing -> do
+      off <- readFastMutInt symtab_next
+      writeFastMutInt symtab_next (off+1)
+      writeIORef symtab_map_ref
+          $! insertTM val off symtab_map
+      atomicModifyIORef symtab_todo (\todo -> (val : todo, ()))
+      put_ bh (fromIntegral off :: Word32)
+
+-- | Read a value from a 'SymbolTable'.
+getGenericSymtab :: Binary a => SymbolTable a -> ReadBinHandle -> IO a
+getGenericSymtab symtab bh = do
+  i :: Word32 <- get bh
+  return $! symtab ! fromIntegral i
+
 ---------------------------------------------------------
 -- The Dictionary
 ---------------------------------------------------------
diff --git a/utils/haddock b/utils/haddock
index a711607e29b925f3d69e27c5fde4ba655c711ff1..c9bc29c6a708483d2abc3d8ec9262510ce87ca61 160000
--- a/utils/haddock
+++ b/utils/haddock
@@ -1 +1 @@
-Subproject commit a711607e29b925f3d69e27c5fde4ba655c711ff1
+Subproject commit c9bc29c6a708483d2abc3d8ec9262510ce87ca61