Compare revisions

08e33edf · 08e33edf · 08e33edf · 08e33edf · 08e33edf · 08e33edf
--- a/compiler/supercompile/Supercompile/Drive/Process2.hs
+++ b/compiler/supercompile/Supercompile/Drive/Process2.hs
+{-# LANGUAGE GADTs, RankNTypes, GeneralizedNewtypeDeriving #-}
+module Supercompile.Drive.Process2 (supercompile) where
+
+import Supercompile.Drive.Match
+import Supercompile.Drive.Split
+import Supercompile.Drive.Process
+
+import Supercompile.Core.FreeVars
+import Supercompile.Core.Renaming
+import Supercompile.Core.Syntax
+import Supercompile.Core.Tag
+
+import Supercompile.Evaluator.Deeds
+import Supercompile.Evaluator.Residualise
+import Supercompile.Evaluator.Syntax
+
+import Supercompile.Termination.TagBag (stateTags)
+import Supercompile.Termination.Combinators
+
+import Supercompile.Utilities
+
+import Id         (mkLocalId)
+import Name       (Name, mkSystemVarName)
+import FastString (mkFastString)
+import qualified State as State
+
+import qualified Data.Map as M
+import Data.Monoid (mempty)
+
+
+data LeafTy a
+
+data DelayStructure sh f where
+    Leaf   :: f a -> DelayStructure (LeafTy a) f
+    Branch :: DelayStructure sh1 f -> DelayStructure sh2 f -> DelayStructure (sh1, sh2) f
+
+
+--newtype I a = I { unI :: a }
+newtype QM m a = QM { unQM :: m (DelayM m a) }
+
+-- If you don't want DelayM to have Monad structure, you can nuke the nested use of DelayM,
+-- and make some of the consumers simpler. I actually want this generalisation, though.
+data DelayM m r = Done r
+                | forall sh. Delayed (DelayStructure sh (QM m)) (DelayStructure sh Identity -> DelayM m r)
+
+instance Functor (DelayM m) where
+    fmap f x = pure f <*> x
+
+instance Applicative (DelayM m) where
+    pure = return
+    Done f         <*> Done x         = Done (f x)
+    Delayed qs k   <*> Done x         = Delayed qs (\as -> k as <*> Done x)
+    Done f         <*> Delayed qs k   = Delayed qs (\as -> Done f <*> k as)
+    Delayed qs1 k1 <*> Delayed qs2 k2 = Delayed (Branch qs1 qs2) (\(Branch as1 as2) -> k1 as1 <*> k2 as2)
+
+instance Monad (DelayM m) where
+    return = Done
+    Done x       >>= fxmy = fxmy x
+    Delayed qs k >>= fxmy = Delayed qs (\as -> k as >>= fxmy)
+
+delay :: m (DelayM m a) -> DelayM m a
+delay q = Delayed (Leaf (QM q)) (\(Leaf (I a)) -> pure a)
+
+runDelayM :: (Applicative m, Monad m)
+          => (DelayM m r -> DelayM m r) -- ^ Chooses the evaluation strategy
+          -> DelayM m r -> m r
+runDelayM choose_some = go
+  where
+    go = go' . choose_some
+    
+    go' (Done x)       = pure x
+    go' (Delayed qs k) = mungeDS qs >>= \mx -> go (mx >>= k)
+
+fmapNT :: Applicative m
+       => (forall a. f a -> m (g a))
+       -> DelayStructure sh f
+       -> m (DelayStructure sh g)
+fmapNT f (Leaf x)         = fmap Leaf (f x)
+fmapNT f (Branch qs1 qs2) = liftA2 Branch (fmapNT f qs1) (fmapNT f qs2)
+
+mungeDS :: Applicative n
+        => DelayStructure sh (QM n)
+        -> n (DelayM n (DelayStructure sh Identity))
+mungeDS = unComp . fmapNT (Comp . fmap (fmap I) . unQM)
+{-
+mungeDS (Leaf (QM mx))   = fmap (fmap (Leaf . I)) mx
+mungeDS (Branch qs1 qs2) = liftA2 (liftA2 Branch) (mungeDS qs1) (mungeDS qs2)
+-}
+
+delayDS :: DelayStructure sh (QM n)
+        -> DelayM n (DelayStructure sh Identity)
+delayDS = fmapNT (fmap I . delay . unQM)
+{-
+delayDS (Leaf (QM mx))   = fmap (Leaf . I) (delay mx)
+delayDS (Branch qs1 qs2) = liftA2 Branch (delayDS qs1) (delayDS qs2)
+-}
+
+depthFirst :: DelayM m r -> DelayM m r
+depthFirst (Done x)       = Done x
+depthFirst (Delayed qs k) = delayTail qs >>= k
+  where
+    delayTail :: DelayStructure sh (QM m) -> DelayM m (DelayStructure sh Identity)
+    delayTail (Leaf (QM q))    = fmap (Leaf . I) (delay q)
+    delayTail (Branch qs1 qs2) = liftM2 Branch (delayTail qs1) (delayDS qs2)
+
+breadthFirst :: DelayM m r -> DelayM m r
+breadthFirst = id
+
+
+delayStateT :: Functor m
+            => (forall a. m (n a) -> n a)
+            -> m (StateT s n a) -> StateT s n a
+delayStateT delay mx = StateT $ \s -> delay (fmap (($ s) . unStateT) mx)
+
+{-
+-- NB: you can't implement this for all monad transformers
+-- (in particular the continuation monad transformer).
+-- But if you can, we can derive a delayStateT equivalent from it:
+fiddle :: (forall b. m b -> n b)
+       -> StateT s m a -> StateT s n a
+fiddle f mx = ST $ \s -> f (unStateT mx s)
+
+
+lifty :: Monad m => m a -> DelayM m a
+lifty = delay . liftM return
+
+mx                                  :: m (StateT s (DelayM m) a)
+liftStateT mx                       :: StateT s m (StateT s (DelayM m) a)
+fiddle lifty                        :: forall a. Monad m => StateT s m a -> StateT s (DelayM m) a
+fiddle lifty (liftStateT mx)        :: Monad m => StateT s (DelayM m) (StateT s (DelayM m) a)
+join (fiddle lifty (liftStateT mx)) :: Monad m => StateT s (DelayM m) a
+
+Therefore:
+
+delayStateT :: Monad m => m (StateT s (DelayM m) a) -> StateT s (DelayM m) a
+delayStateT = join . fiddle lifty . liftStateT
+-}
+
+
+delayContT :: Functor m
+           => (forall a. m (n a) -> n a)
+           -> m (ContT r n b) -> ContT r n b
+delayContT delay mx = ContT $ \k -> delay (fmap (flip unContT k) mx)
+
+callCCish :: (Applicative n, Applicative m) => ((b -> a -> ContT (n r) m b) -> ContT (n r) m a) -> ContT (n r) m a
+callCCish f = ContT $ \k -> unContT (f (\b a -> ContT $ \k' -> liftA2 (*>) (k' b) (k a))) k
+
+
+delayReaderT :: Functor m
+             => (forall a. m (n a) -> n a)
+            -> m (ReaderT r n a) -> ReaderT r n a
+delayReaderT delay mx = ReaderT $ \r -> delay (fmap (($ r) . unReaderT) mx)
+
+liftCallCCReaderT :: (((forall b. a -> m b)           -> m a)           -> m a)
+                  ->  ((forall b. a -> ReaderT r m b) -> ReaderT r m a) -> ReaderT r m a
+liftCallCCReaderT call_cc f = ReaderT $ \r -> call_cc $ \c -> runReaderT r (f (ReaderT . const . c))
+
+
+newtype RollbackScpM = RB { doRB :: LevelM (Deeds, Out FVedTerm) -> LevelM (Deeds, Out FVedTerm) -> ProcessM (LevelM (Deeds, Out FVedTerm)) }
+
+
+type ProcessHistory = GraphicalHistory (NodeKey, (State, RollbackScpM)) -- TODO: GraphicalHistory
+
+pROCESS_HISTORY :: ProcessHistory
+pROCESS_HISTORY = mkGraphicalHistory (cofmap fst wQO)
+
+type HistoryEnvM = (->) ProcessHistory
+
+runHistoryEnvM :: HistoryEnvM a -> a
+runHistoryEnvM = flip ($) pROCESS_HISTORY
+
+type HistoryThreadM = State.State ProcessHistory
+
+withHistory :: (ProcessHistory -> (ProcessHistory, a)) -> HistoryThreadM a
+withHistory f = State.state (swap . f)
+  where swap = uncurry (flip (,))
+
+runHistoryThreadM :: HistoryThreadM a -> a
+runHistoryThreadM = flip State.evalState pROCESS_HISTORY
+
+
+type Parent = NodeKey
+
+terminateM :: Parent -> State -> RollbackScpM -> (Parent -> a) -> (Parent -> State -> RollbackScpM -> ProcessM a) -> ProcessM a
+terminateM parent state rb k_continue k_stop = withHistory' $ \hist -> trace (show hist) $ case terminate hist (parent, (state, rb)) of
+    Continue hist'                                     -> return (hist', k_continue (generatedKey hist'))
+    Stop (shallow_parent, (shallow_state, shallow_rb)) -> liftM ((,) hist) $ k_stop shallow_parent shallow_state shallow_rb
+  where
+    withHistory' :: (ProcessHistory -> ProcessM (ProcessHistory, a)) -> ProcessM a
+    withHistory' act = lift (lift State.get) >>= \hist -> act hist >>= \(hist', x) -> lift (lift (State.put hist')) >> return x
+
+
+data Promise = P {
+    fun        :: Var,      -- Name assigned in output program
+    abstracted :: [AbsVar], -- Abstracted over these variables
+    meaning    :: State     -- Minimum adequate term
+  }
+
+data MemoState = MS {
+    promises :: [Promise],
+    hNames   :: Stream Name
+  }
+
+type MemoT = StateT MemoState
+
+runMemoT :: Functor m => MemoT m a -> m a
+runMemoT mx = fmap fst $ unStateT mx MS { promises = [], hNames = h_names }
+  where h_names = listToStream $ zipWith (\i uniq -> mkSystemVarName uniq (mkFastString ('h' : show (i :: Int))))
+                                         [1..] (uniqsFromSupply hFunctionsUniqSupply)
+
+
+newtype FulfilmentState = FS {
+    fulfilments :: M.Map Var FVedTerm
+  }
+
+type FulfilmentT = StateT FulfilmentState
+
+fulfill :: Monad m => Promise -> (Deeds, FVedTerm) -> FulfilmentT m (Deeds, FVedTerm)
+fulfill p (deeds, e_body) = StateT $ \fs ->
+  let fs' | fun p `M.member` fulfilments fs = fs
+          | otherwise                       = FS { fulfilments = M.insert (fun p) (absVarLambdas (abstracted p) e_body) (fulfilments fs) }
+  in return ((deeds, applyAbsVars (fun p) Nothing (abstracted p)), fs')
+
+runFulfilmentT :: Monad m => FulfilmentT m FVedTerm -> m FVedTerm
+runFulfilmentT mx = liftM (\(e, fs) -> letRec (M.toList (fulfilments fs)) e) $ unStateT mx (FS { fulfilments = M.empty })
+
+
+promise :: State -> MemoState -> (Promise, MemoState)
+promise state ms = (p, ms')
+  where (vs_list, h_ty) = stateAbsVars Nothing state
+        h_name :< h_names' = hNames ms
+        x = mkLocalId h_name h_ty
+        p = P {
+            fun        = x,
+            abstracted = vs_list,
+            meaning    = state
+          }
+        ms' = MS {
+            promises = p : promises ms,
+            hNames   = h_names'
+          }
+
+instance MonadStatics LevelM where
+    --bindCapturedFloats fvs mx | isEmptyVarSet fvs = liftM ((,) []) mx
+    --                          | otherwise         = pprPanic "bindCapturedFloats: does not support statics" (ppr fvs)
+    bindCapturedFloats _fvs mx = liftM ((,) []) mx -- FIXME: do something other than hope for the best
+    monitorFVs = liftM ((,) emptyVarSet)
+
+memo :: (MonadTrans t1, Monad (t1 (MemoT t2)), Applicative t2, Monad t2)
+     => (State -> t1 (MemoT t2) (LevelM (Deeds, Out FVedTerm)))
+     ->  State -> t1 (MemoT t2) (LevelM (Deeds, Out FVedTerm))
+memo opt state = do
+  mb_res <- lift $ StateT $ \ms ->
+    -- NB: If tb contains a dead PureHeap binding (hopefully impossible) then it may have a free variable that
+     -- I can't rename, so "rename" will cause an error. Not observed in practice yet.
+    case [ (p, (releaseStateDeed state, applyAbsVars (fun p) (Just (mkRenaming rn_lr)) (abstracted p)))
+         | p <- promises ms
+         , Just rn_lr <- [(\res -> if isNothing res then pprTraceSC "no match:" (ppr (fun p)) res else res) $
+                          match (meaning p) state]
+         ] of (p, res):_ -> pure (Right (do { traceRenderScpM "=sc" (fun p, PrettyDoc (pPrintFullState fullStatePrettiness state), res)
+                                            ; return res }), ms)
+              _          -> pure (Left p, ms')
+                where (p, ms') = promise state ms
+  case mb_res of
+    Right res -> return res
+    Left p  -> flip liftM (opt state) $ \mres ->
+                   (do { traceRenderScpM ">sc" (fun p, PrettyDoc (pPrintFullState quietStatePrettiness state))
+                       ; res <- mres
+                       ; traceRenderScpM "<sc" (fun p, PrettyDoc (pPrintFullState quietStatePrettiness state), res)
+                       ; fulfill p res })
+
+type SpecT = ReaderT AlreadySpeculated
+
+runSpecT :: SpecT m a -> m a
+runSpecT = runReaderT nothingSpeculated
+
+speculated :: State -> (State -> SpecT m a) -> SpecT m a
+speculated s k = ReaderT $ \already -> case speculate already (mempty, s) of (already, (_stats, s')) -> unReaderT (k s') already
+
+liftSpeculatedStateT :: (forall a. State -> (State -> m a)        -> m a)
+                     ->  State -> (State -> StateT s m a) -> StateT s m a
+liftSpeculatedStateT speculated state k = StateT $ \s -> speculated state (\state' -> unStateT (k state') s)
+
+
+type LevelM = FulfilmentT (SpecT ScpM)
+
+-- NB: monads *within* the ContT are persistent over a rollback. Ones outside get reset.
+type ProcessM = ContT (FulfilmentT Identity (Out FVedTerm)) (MemoT HistoryThreadM)
+type ScpM = DelayM ProcessM
+
+traceRenderScpM :: (Outputable a, Applicative t) => String -> a -> t ()
+traceRenderScpM msg x = pprTraceSC msg (pPrint x) $ pure () -- TODO: include depth, refine to ScpM monad only
+
+runScpM :: (Applicative m, Monad m) => m (DelayM m a) -> m a
+runScpM mx = mx >>= runDelayM eval_strat
+  where
+    -- Doing things this way prevents GHC bleating about depthFirst being unused
+    eval_strat | False     = depthFirst
+               | otherwise = breadthFirst
+
+
+-- callCC :: ((forall b. a -> ContT r m b) -> ContT r m a) -> ContT r m a
+-- callCC f = ContT $ \k -> unContT (f (\a -> ContT $ \_k -> k a)) k
+-- newtype ContT r m a = ContT { unContT :: (a -> m r) -> m r }
+
+
+sc' :: Parent -> State -> ProcessM (LevelM (Deeds, Out FVedTerm))
+sc' parent state = callCCish (\k -> try (RB k))
+  where
+    trce how shallow_state = pprTraceSC ("sc-stop(" ++ how ++ ")") (ppr (stateTags shallow_state) <+> text "<|" <+> ppr (stateTags state) $$
+                                                                    pPrintFullState fullStatePrettiness shallow_state $$ pPrintFullState fullStatePrettiness state)
+    try :: RollbackScpM -> ProcessM (LevelM (Deeds, Out FVedTerm))
+    try rb = terminateM parent state rb
+               (\parent -> liftSpeculatedStateT speculated state $ \state' -> my_split (reduce state') (sc'' parent))
+               -- (\_ shallow_state _ -> return $ maybe (trce "split" shallow_state $ split state) (trce "gen" shallow_state) (generalise (mK_GENERALISER shallow_state state) state) (delayStateT (delayReaderT delay) . sc parent))
+               (\shallow_parent shallow_state shallow_rb -> trace "rb" $ doRB shallow_rb (my_split state (sc'' parent))
+                                                                                         (maybe (trce "split" shallow_state $ my_split shallow_state) (trce "gen" shallow_state) (my_generalise (mK_GENERALISER shallow_state state) shallow_state) (sc'' shallow_parent)))
+    
+    sc'' :: Parent -> State -> LevelM (Deeds, Out FVedTerm)
+    sc'' parent = delayStateT (delayReaderT delay) . sc parent
+
+    my_generalise gen = liftM (\splt -> liftM (\(_, deeds, e') -> (deeds, e')) . splt) . generalise gen
+    my_split      opt =                 liftM (\(_, deeds, e') -> (deeds, e')) . split opt
+
+sc :: Parent -> State -> ProcessM (LevelM (Deeds, Out FVedTerm))
+sc parent = memo (sc' parent) . gc -- Garbage collection necessary because normalisation might have made some stuff dead
+
+foo :: LevelM (Out FVedTerm) -> ScpM (FulfilmentT Identity (Out FVedTerm))
+foo = undefined
+
+supercompile :: M.Map Var Term -> Term -> Term
+supercompile unfoldings e = fVedTermToTerm $ bindManyMixedLiftedness fvedTermFreeVars to_bind $ unI $ runFulfilmentT $ runHistoryThreadM $ runMemoT $ runContT $ runScpM $ liftM (foo . fmap snd) $ sc 0 state
+  where (_, (to_bind, _preinit_with, state), _) = prepareTerm unfoldings e
--- a/compiler/supercompile/Supercompile/Drive/Process3.hs
+++ b/compiler/supercompile/Supercompile/Drive/Process3.hs
+{-# LANGUAGE GeneralizedNewtypeDeriving, RankNTypes, ImpredicativeTypes #-}
+module Supercompile.Drive.Process3 (supercompile) where
+
+--import Supercompile.Drive.Match
+import Supercompile.Drive.MSG
+import Supercompile.Drive.Split2
+import Supercompile.Drive.Process
+
+import Supercompile.Core.FreeVars
+import Supercompile.Core.Renaming
+import Supercompile.Core.Size (fvedTermSize)
+import Supercompile.Core.Syntax
+import Supercompile.Core.Tag
+
+import Supercompile.Evaluator.Deeds
+import Supercompile.Evaluator.Residualise
+import Supercompile.Evaluator.Syntax
+import Supercompile.Evaluator.FreeVars
+
+--import Supercompile.Termination.Generaliser (Generaliser)
+import Supercompile.Termination.TagBag (stateTags)
+import Supercompile.Termination.Combinators hiding (generatedKey)
+import qualified Supercompile.Termination.Combinators as Combinators
+
+import Supercompile.StaticFlags
+import Supercompile.Utilities
+
+import Type       (typeSize, isTyVarTy)
+import Coercion   (coercionSize, getCoVar_maybe)
+import Var        (varName)
+import Id         (Id, mkLocalId)
+import MkId       (nullAddrId)
+import Name       (Name, mkSystemVarName, getOccString)
+import FastString (mkFastString)
+import Util       (sndOf3)
+import Pair
+import VarEnv     (varEnvElts)
+
+import Control.Monad (join)
+
+import Data.Function (on)
+import qualified Data.Set as S
+import qualified Data.Map as M
+import Data.Monoid (mempty)
+
+
+{--}
+type RollbackState = ScpM (ResidTags, Deeds, Out FVedTerm)
+type ProcessHistory = GraphicalHistory (NodeKey, (String, State, forall b. RollbackState -> ScpM b))
+
+pROCESS_HISTORY :: ProcessHistory
+pROCESS_HISTORY = mkGraphicalHistory (cofmap sndOf3 wQO)
+
+generatedKey :: ProcessHistory -> NodeKey
+generatedKey = Combinators.generatedKey
+{--}
+{-
+type ProcessHistory = LinearHistory (NodeKey, State)
+
+pROCESS_HISTORY :: ProcessHistory
+pROCESS_HISTORY = mkLinearHistory (cofmap snd wQO)
+
+generatedKey :: ProcessHistory -> NodeKey
+generatedKey _ = 0
+-}
+
+data Promise = P {
+    fun        :: Var,      -- Name assigned in output program
+    abstracted :: [AbsVar], -- Abstracted over these variables
+    meaning    :: State,    -- Minimum adequate term
+    dumped     :: Bool      -- Already rolled back, and hence inaccessible?
+  }
+
+
+appendHead :: [b] -> Train (a, [b]) [b] -> Train (a, [b]) [b]
+appendHead ys1 (Car (x, ys2) zs) = (x, ys1 ++ ys2) `Car` zs
+appendHead ys1 (Loco ys2)        = Loco (ys1 ++ ys2)
+
+leftExtension :: Train (Promise, a) b -- ^ Longer list
+              -> Train (Promise, a) b -- ^ Shorter list
+              -> Maybe ([(Promise, a)], Train (Promise, a) b) -- Pair of the prefix present in the longer list and the common suffix (== shorter list)
+leftExtension = trainLeftExtensionBy (\orig@(p1, _) (p2, _) -> if fun p1 == fun p2 then Just orig else Nothing) (\b1 _b2 -> b1)
+    -- We can only roll back to direct ancestors, or we risk loops/other madness
+
+
+data MemoState = MS {
+    promises :: Train (Promise, [Promise]) [Promise], -- (parent, siblings) pairs, with those closest to current level first
+    hNames   :: Stream Name
+  }
+
+promise :: MemoState -> (State, State) -> (MemoState, Promise)
+promise ms (state, reduced_state) = (ms', p)
+  where -- NB: because we stopped garbage-collecting in reduceForMatch, we need to garbage
+        -- collect here to ensure we mark as dead any lambda binders we won't be able to
+        -- determine a renaming for because they are dead.
+        --
+        -- If we don't do this then renameAbsVar will panic when it tries to lookup the renamed
+        -- version of a live variable.
+        (vs_list, h_ty) = stateAbsVars (Just (stateLambdaBounders (gc reduced_state))) state
+        h_name :< h_names' = hNames ms
+        x = mkLocalId h_name h_ty
+        p = P {
+            fun        = x,
+            -- We mark as dead any of those variables that are not in the stateLambdaBounders of
+            -- the *reduced* state. This serves two purposes:
+            --   1. The tieback we do right here can supply dummy values to those parameters rather
+            --      than applying the free variables. This may make some bindings above us dead.
+            --
+            --   2. We can get rid of the code in renameAbsVar that downgrades live AbsVars to dead
+            --      ones if they are not present in the renaming: only dead AbsVars are allowed to
+            --      be absent in the renaming.
+            abstracted = vs_list,
+            meaning    = reduced_state,
+            dumped     = False
+          }
+        ms' = MS {
+            promises = (p, []) `Car` promises ms, -- Establishes a new level in the process tree
+            hNames   = h_names'
+          }
+
+
+newtype FulfilmentState = FS {
+    fulfilments :: [(Var, FVedTerm)]
+  }
+
+fulfill :: (Deeds, FVedTerm) -> FulfilmentState -> MemoState -> ((Deeds, FVedTerm), FulfilmentState, MemoState)
+fulfill (deeds, e_body) fs ms
+  = ((deeds, applyAbsVars (fun p) Nothing (abstracted p)),
+     refulfill e_body fs p,
+     ms { promises = appendHead (p:children) promises' })
+  where (p, children) `Car` promises' = promises ms
+
+-- NB: we filter out from the existing fulfilments because if we are doing type generalisation during
+-- matching then we will fulfill the same promise twice. Either:
+--  1. We fulfil an on-stack promise from the type generalisation, and then later when unrolling the stack
+--  2. OR we fulfil a promise from supercompilation, and then later overwrite it when we find a type generalisation
+--
+-- FIXME: should prefer the existing promise in case 1 for slightly better code.
+refulfill :: FVedTerm -> FulfilmentState -> Promise -> FulfilmentState
+refulfill e_body fs p = FS { fulfilments = (fun p, absVarLambdas (abstracted p) e_body) : filter ((/= fun p) . fst) (fulfilments fs) }
+
+
+type StopCount = Int
+
+data ScpState = ScpState {
+    scpMemoState :: MemoState,
+    scpProcessHistoryState :: ProcessHistory,
+    scpFulfilmentState :: FulfilmentState,
+    -- Debugging aids below this line:
+    scpResidTags :: ResidTags,
+    scpParentChildren :: ParentChildren
+  }
+
+data ScpEnv = ScpEnv {
+    scpProcessHistoryEnv :: ProcessHistory,
+    scpStopCount :: StopCount,
+    scpNodeKey :: NodeKey,
+    scpParents :: [Var],
+    scpAlreadySpeculated :: AlreadySpeculated,
+    -- Debugging aids below this line:
+    scpTagAnnotations :: TagAnnotations
+  }
+
+type ScpResType = (FVedTerm, ScpState)
+
+newtype ScpM a = ScpM { unScpM :: StateT ScpState
+                                         (ReaderT ScpEnv (ContT ScpResType Identity)) a }
+               deriving (Functor, Applicative, Monad)
+
+{-
+instance MonadStatics ScpM where
+    bindCapturedFloats _fvs mx = liftM ((,) []) mx -- FIXME: do something other than hope for the best
+    monitorFVs = liftM ((,) emptyVarSet)
+-}
+
+withScpEnv :: (ScpEnv -> ScpEnv) -> ScpM a -> ScpM a
+withScpEnv f mx = ScpM $ StateT $ \s -> ReaderT $ \env -> unReaderT (unStateT (unScpM mx) s) (f env)
+
+runScpM :: TagAnnotations -> ScpM FVedTerm -> FVedTerm
+runScpM tag_anns me = fvedTermSize e' `seq` trace ("Deepest path:\n" ++ showSDoc (deepestPath fulfils (scpParentChildren s')) ++
+                                                   "\nDepth histogram:\n" ++ showSDoc (depthHistogram (scpParentChildren s'))) e'
+  where h_names = listToStream $ zipWith (\i uniq -> mkSystemVarName uniq (mkFastString ('h' : show (i :: Int))))
+                                         [1..] (uniqsFromSupply hFunctionsUniqSupply)
+        ms = MS { promises = Loco [], hNames = h_names }
+        hist = pROCESS_HISTORY
+        fs = FS { fulfilments = [] }
+        parent = generatedKey hist
+        (e, s') = unI $ runContT $ unReaderT (unStateT (unScpM me) (ScpState ms hist fs emptyResidTags emptyParentChildren)) (ScpEnv hist 0 parent [] nothingSpeculated tag_anns)
+        fulfils = fulfilments (scpFulfilmentState s')
+        e' = letRec fulfils e
+
+
+outputFreeVars :: ScpM [Id]
+outputFreeVars = ScpM $ StateT $ \s -> let (pss, ps) = trainToList (promises (scpMemoState s))
+                                       in return (varSetElems extraOutputFvs ++ concatMap (\(p, ps) -> fun p : map fun ps) pss ++ map fun ps, s)
+
+callCCM :: ((forall b. a -> ScpM b) -> ScpM a) -> ScpM a
+callCCM act = ScpM $ StateT $ \s -> ReaderT $ \env -> callCC (\jump_back -> unReaderT (unStateT (unScpM (act (\a -> ScpM $ StateT $ \s' -> ReaderT $ \_ -> case s' `rolledBackTo` s of Just s'' -> jump_back (a, s''); Nothing -> error "callCCM: rolledBackTo failed"))) s) env)
+
+
+-- Thinking about a rollback operator suitable for type gen in "memo":
+--callCCM' :: (forall r. (a -> ScpM ()) -> ScpM' r b)
+--         -> (b -> ScpM a) -> ScpM a
+--callCCM' mx act = ScpM $ StateT $ \s -> ReaderT $ \env -> callCC (\jump_back -> let mk_rb s_upd = (\a -> ScpM $ StateT $ \s' -> ReaderT $ \_ -> case s' `rolledBackTo` s_upd of Just s'' -> jump_back (a, s''); Nothing -> return ((), s'))
+--                                                                                in unReaderT (unStateT (unScpM (fmap snd (mfix (\(~(rb, _b) -> do { b <- mx rb; s_upd <- get; return (mk_rb s_upd, b) ))) >>= act)) (s { scpRollbackUniques = scpRollbackUniques s + 1 })) env)
+
+catchM :: ((forall b. c -> ScpM b) -> ScpM a) -- ^ Action to try: supplies a function than can be called to "raise an exception". Raising an exception restores the original ScpEnv and ScpState
+       -> (c -> ScpM a)                       -- ^ Handler deferred to if an exception is raised
+       -> ScpM a                              -- ^ Result from either the main action or the handler
+catchM try handler = do
+    ei_exc_res <- callCCM $ \jump_back -> fmap Right (try (jump_back . Left))
+    case ei_exc_res of
+      Left exc  -> handler exc
+      Right res -> return res
+
+rolledBackTo :: ScpState -> ScpState -> Maybe ScpState
+rolledBackTo s' s = case on leftExtension (promises . scpMemoState) s' s of
+  -- NB: we check scpRolledBack to ensure that rollback is *one-shot*, or else sc-rollback is dangerous for termination
+  Just (dangerous_promises, ok_promises) -> Just $
+   let -- We have to roll back any promise on the "stack" above us:
+       (spine_rolled_back, possibly_rolled_back) = (second concat) $ unzip dangerous_promises
+       -- NB: rolled_back includes names of both unfulfilled promises rolled back from the stack and fulfilled promises that have to be dumped as a result
+       (rolled_fulfilments, rolled_back) = pruneFulfilments (scpFulfilmentState s') (mkVarSet (map fun spine_rolled_back))
+ 
+       pruneFulfilments :: FulfilmentState -> VarSet -> (FulfilmentState, VarSet)
+       pruneFulfilments (FS fulfilments) rolled_back
+         | null dump = (if isEmptyVarSet rolled_back then id else pprTraceSC ("dumping " ++ show (sizeVarSet rolled_back) ++ " promises/fulfilments:") (ppr (map fun spine_rolled_back, rolled_back)))
+                       (FS fulfilments, rolled_back)
+         | otherwise = pruneFulfilments (FS keep) (rolled_back `unionVarSet` mkVarSet (map fst dump))
+         where (dump, keep) = partition (\(_, e) -> fvedTermFreeVars e `intersectsVarSet` rolled_back) fulfilments
+   in trace (replicate (length spine_rolled_back) '}') $ ScpState {
+       scpMemoState = MS {
+           -- The most recent promise in s' always has no work done on it, so don't report dumping for it
+           promises = appendHead [if fun p `elemVarSet` rolled_back then p { dumped = True } else p | p <- safeTail spine_rolled_back ++ possibly_rolled_back] ok_promises,
+           hNames   = hNames (scpMemoState s')
+         },
+       scpProcessHistoryState = scpProcessHistoryState s,
+       scpFulfilmentState     = rolled_fulfilments,
+       scpResidTags           = scpResidTags s', -- FIXME: not totally accurate
+       scpParentChildren      = scpParentChildren s'
+     }
+  _ -> pprTrace "rollback failed" (on (curry ppr) (fmapTrain (map fun . uncurry (:)) (map fun) . promises . scpMemoState) s' s) Nothing
+
+scpDepth :: ScpEnv -> Int
+scpDepth = length . scpParents
+
+traceRenderM :: Outputable a => String -> a -> ScpM ()
+traceRenderM msg x
+  | tRACE     = ScpM $ StateT $ \s -> ReaderT $ \env -> pprTraceSC (replicate (scpDepth env) ' ' ++ msg) (pPrint x) $ pure ((), s)
+  | otherwise = return ()
+
+addParentM :: Promise -> (State -> ScpM (Bool, (Deeds, FVedTerm))) -> State -> ScpM (Deeds, FVedTerm)
+addParentM p opt state = ScpM $ StateT $ \s -> ReaderT $ add_parent s
+  where
+    add_parent s env
+      | maybe False (scpDepth env >=) dEPTH_LIIMT
+      , let (deeds, _statics, e, _gen) = residualiseState state
+      = return ((deeds, e), s)
+      | otherwise
+      = trace ("depth: " ++ show (scpDepth env) ++ ' ' : showSDoc (parens (hsep (map ppr (scpParents env))))) $
+        unReaderT (unStateT (unScpM (opt state)) s)
+                  (env { scpParents = fun p : scpParents env }) >>= \((gen, res), s') -> return (res, s' { scpParentChildren = addChild (safeHead (scpParents env)) (fun p) (meaning p) gen (scpParentChildren s') })
+
+fulfillM :: (Deeds, FVedTerm) -> ScpM (Deeds, FVedTerm)
+fulfillM res = ScpM $ StateT $ \s -> case fulfill res (scpFulfilmentState s) (scpMemoState s) of (res', fs', ms') -> return (res', s { scpFulfilmentState = fs', scpMemoState = ms' })
+
+refulfillM :: Promise -> FVedTerm -> ScpM ()
+refulfillM p e' = ScpM $ StateT $ \s -> return ((), s { scpFulfilmentState = refulfill e' (scpFulfilmentState s) p })
+
+terminateM :: String -> State -> (forall b. RollbackState -> ScpM b) -> ScpM a -> (String -> State -> (forall b. RollbackState -> ScpM b) -> ScpM a) -> ScpM a
+terminateM h state rb mcont mstop = ScpM $ StateT $ \s -> ReaderT $ \env -> case ({-# SCC "terminate" #-} terminate (if hISTORY_TREE then scpProcessHistoryEnv env else scpProcessHistoryState s) (scpNodeKey env, (h, state, rb))) of
+        Stop (_, (shallow_h, shallow_state, shallow_rb))
+          -> trace ("stops: " ++ show (scpStopCount env)) $
+             unReaderT (unStateT (unScpM (mstop shallow_h shallow_state (\x -> trace ("back to " ++ shallow_h) (shallow_rb x)))) s)                                      (env { scpStopCount = scpStopCount env + 1})
+        Continue hist'
+          -> unReaderT (unStateT (unScpM mcont)                                      (s { scpProcessHistoryState = hist' })) (env { scpNodeKey = generatedKey hist', scpProcessHistoryEnv = hist' })
+  -- TODO: record the names of the h-functions on the way to the current one instead of a Int depth
+
+speculateM :: State -> (State -> ScpM a) -> ScpM a
+speculateM state mcont = ScpM $ StateT $ \s -> ReaderT $ \env -> case speculate (scpAlreadySpeculated env) (mempty, state) of (already', (_stats, state')) -> unReaderT (unStateT (unScpM (mcont state')) s) (env { scpAlreadySpeculated = already' })
+
+
+sc :: State -> ScpM (Deeds, FVedTerm)
+sc = memo sc' . gc -- Garbage collection necessary because normalisation might have made some stuff dead
+
+sc' :: Maybe String -> State -> ScpM (Bool, (Deeds, FVedTerm)) -- Bool records whether generalisation occurred, for debug printing
+sc' mb_h state = {- pprTrace "sc'" (trce1 state) $ -} {-# SCC "sc'" #-} case mb_h of
+  Nothing -> speculateM (reduce state) $ \state -> -- traceRenderM "!sc" (PrettyDoc (pPrintFullState quietStatePrettiness state)) >>
+                                                   my_split state
+  Just h  -> flip catchM my_generalise $ \rb ->
+               terminateM h state rb
+                 (speculateM (reduce state) $ \state -> my_split state)
+                 (\shallow_h shallow_state shallow_rb -> trce shallow_h shallow_state $ do
+                                                           let (mb_shallow_gen, mb_gen) | not gENERALISATION = (Nothing, Nothing)
+                                                                                        | otherwise = zipPair mplus mplus (tryMSG sc shallow_state state)
+                                                                                                                          (tryTaG sc shallow_state state)
+                                                           case mb_shallow_gen of
+                                                             Just shallow_gen | sC_ROLLBACK           -> trace "sc-stop(rb,gen)"   $ shallow_rb shallow_gen
+                                                             Nothing | sC_ROLLBACK, Nothing <- mb_gen -> trace "sc-stop(rb,split)" $ shallow_rb (split sc shallow_state)
+                                                             _ -> case mb_gen of Just gen             -> trace "sc-stop(gen)"      $ my_generalise gen
+                                                                                 Nothing              -> trace "sc-stop(split)"    $ my_generalise (split sc state))
+  where
+    -- FIXME: the "could have tied back" case is reachable (e.g. exp3_8 with unfoldings as internal bindings), and it doesn't appear to be
+    -- because of dumped promises (no "dumped" in output). I'm reasonably sure this should not happen :(
+    trce shallow_h shallow_state = pprTraceSC ("Embedding:" ++ shallow_h)
+                                              ({- ppr (stateTags shallow_state) <+> text "<|" <+> ppr (stateTags state) $$ -}
+                                               hang (text "Before:") 2 (trce1 shallow_state) $$
+                                               hang (text "After:")  2 (trce1 state) $$
+                                               (case msg (MSGMode { msgCommonHeapVars = emptyInScopeSet }) (snd (reduceForMatch shallow_state)) (snd (reduceForMatch state)) of Left why -> text why; Right res -> case msgMatch AllInstances res of Nothing -> text "msg, not instance"; Just _ -> text "!!! could have tied back?"))
+    trce1 state = pPrintFullState quietStatePrettiness state $$ pPrintFullState quietStatePrettiness (snd (reduceForMatch state))
+
+    -- NB: we could try to generalise against all embedded things in the history, not just one. This might make a difference in rare cases.
+    my_generalise splt  = liftM ((,) True)  $ splt           >>= insertTagsM
+    my_split      state = --pprTrace "my_split" (pPrintFullState quietStatePrettiness state) $
+                          liftM ((,) False) $ split sc state >>= insertTagsM
+
+tryTaG, tryMSG :: (State -> ScpM (Deeds, Out FVedTerm))
+               -> State -> State
+               -> (Maybe (ScpM (ResidTags, Deeds, Out FVedTerm)),
+                   Maybe (ScpM (ResidTags, Deeds, Out FVedTerm)))
+
+-- NB: this actually returns either (Nothing, Nothing) or (Just, Just)
+tryTaG opt shallow_state state = bothWays (\_ -> generaliseSplit opt gen) shallow_state state
+  where gen = mK_GENERALISER shallow_state state
+
+-- NB: this cannot return (Just, Nothing)
+tryMSG opt shallow_state state
+  | not mSG_GENERALISATION = (Nothing, Nothing)
+  | otherwise = case msgMaybe mode shallow_state state of
+    -- If we fail this way round, we should certainly fail the other way round too
+    Nothing -> (Nothing, Nothing)
+    Just msg_result@(Pair l r, _)
+      | let Just msg_result_sym = msgMaybe mode state shallow_state -- Will certainly succeed, but with tags of shallow_state
+      -> pprTrace "MSG success" (pprMSGResult msg_result) $ -- NB: pretty print MSG the "correct" way around even if we roll back
+         case (trivialMSG l, trivialMSG r) of
+           -- Both trivial: we have certainly discovered a variable generalisation (not an instance match, or we would have tied back)
+           -- Perhaps ideally we would just SC our deep state normally, but that is awkward. Instead we will rollback and generalise,
+           -- but it might be unsafe to generalise without rolling back (we might not be throwing any info away)
+           (True, True)   -> (Just (genFrom msg_result_sym), Nothing)
+           -- Trivial on the LHS only: probably an instance match. Unsafe to roll back because we might not throw any info away.
+           (True, False)  -> (Nothing,                       Just (genFrom msg_result))
+           -- Trivial on the RHS only: kind of weird. Perhaps ideally we would just reduce+split our deep state normally, but it's a bit
+           -- awkward to arrange that. Instead we will accept generalising the earlier state.
+           (False, True)  -> (Just (genFrom msg_result_sym), Nothing)
+           -- Non-trivial on both sides: can either rollback or not, doesn't matter. We throw away info either way.
+           (False, False) -> (Just (genFrom msg_result_sym), Just (genFrom msg_result))
+
+  where
+    trivialMSG (_, Heap h_lr _, _, k_lr) = isPureHeapEmpty h_lr && isStackEmpty k_lr
+
+    genFrom (Pair _ (deeds_r, heap_r@(Heap h_r ids_r), rn_r, k_r), (heap@(Heap _ ids), k, qa)) = do
+      let [deeds, deeds_r'] = splitDeeds deeds_r [heapSize heap + stackSize k + annedSize qa, heapSize heap_r + stackSize k_r]
+      (deeds', e) <- sc (deeds, heap, k, qa)
+      -- Just to suppress warnings from renameId (since output term may mention h functions). Alternatively, I could rename the State I pass to "sc"
+      -- NB: adding some new bindings to h_r for the h functions is a bit of a hack because:
+      --  1. It only serves to suppress errors from "split" which occur when e' refers to some variables not bound in the heap
+      --  2. These new dummy bindings will never be passed down to any recursive invocation of opt
+      (h_hs, e') <- renameSCResult ids (rn_r, e)
+      instanceSplit opt (deeds' `plusDeeds` deeds_r', Heap (h_r `M.union` h_hs) ids_r, k_r, e')
+
+    mode = MSGMode { msgCommonHeapVars = case shallow_state of (_, Heap _ ids, _, _) -> ids }
+
+{-
+tryMSG opt = bothWays $ \shallow_state state -> do
+  msg_result@(Pair _ (deeds_r, heap_r@(Heap h_r ids_r), rn_r, k_r), (heap@(Heap _ ids), k, qa)) <- msgMaybe (MSGMode { msgCommonHeapVars = case shallow_state of (_, Heap _ ids, _, _) -> ids }) shallow_state state
+  -- NB: have to check that we throw away *some* info via MSG or else we can get a loop where we
+  -- MSG back to the same state and thus create a loop (i.e. if previous state is (a, a)^t and new state is (b, c)^t)
+  guard (not (isPureHeapEmpty h_r) || not (isStackEmpty k_r))
+  let [deeds, deeds_r'] = splitDeeds deeds_r [heapSize heap + stackSize k + annedSize qa, heapSize heap_r + stackSize k_r]
+  pprTrace "MSG success" (pprMSGResult msg_result) $ Just $ do
+    (deeds', e) <- sc (deeds, heap, k, qa)
+    -- Just to suppress warnings from renameId (since output term may mention h functions). Alternatively, I could rename the State I pass to "sc"
+    -- NB: adding some new bindings to h_r for the h functions is a bit of a hack because:
+    --  1. It only serves to suppress errors from "split" which occur when e' refers to some variables not bound in the heap
+    --  2. These new dummy bindings will never be passed down to any recursive invocation of opt
+    (h_hs, e') <- renameSCResult ids (rn_r, e)
+    instanceSplit opt (deeds' `plusDeeds` deeds_r', Heap (h_r `M.union` h_hs) ids_r, k_r, e')
+-}
+
+pprMSGResult :: MSGResult -> SDoc
+pprMSGResult (Pair (deeds_l, heap_l, rn_l, k_l) (deeds_r, heap_r, rn_r, k_r), (heap, k, qa))
+  = {- ppr (case heap   of Heap h   _ -> M.keysSet h)   $$ -} pPrintFullState quietStatePrettiness (emptyDeeds, heap, k, qa) $$
+    {- ppr (case heap_l of Heap h_l _ -> M.keysSet h_l) $$ -} ppr rn_l $$ pPrintFullState quietStatePrettiness (deeds_l, heap_l, k_l, fmap Question (annedVar (mkTag 0) nullAddrId)) $$
+    {- ppr (case heap_r of Heap h_r _ -> M.keysSet h_r) $$ -} ppr rn_r $$ pPrintFullState quietStatePrettiness (deeds_r, heap_r, k_r, fmap Question (annedVar (mkTag 0) nullAddrId))
+
+renameSCResult :: InScopeSet -> In FVedTerm -> ScpM (PureHeap, FVedTerm)
+renameSCResult ids (rn_r, e) = do
+    hs <- outputFreeVars
+    let rn_r' = foldr (\x rn -> insertIdRenaming rn x x) rn_r hs
+        h_r'  = foldr (\x h  -> M.insert x lambdaBound h) M.empty hs
+    return (h_r', renameFVedTerm ids rn_r' e)
+
+bothWays :: (a -> a -> b)
+         -> a -> a -> (b, b)
+bothWays f shallow_state state = (f state shallow_state, f shallow_state state)
+
+insertTagsM :: (ResidTags, a, b) -> ScpM (a, b)
+insertTagsM (resid_tags, deeds, e') =
+  ScpM $ StateT $ \s -> ReaderT $ \env -> let resid_tags' = scpResidTags s `plusResidTags` resid_tags
+                                          in trace (tagSummary (scpTagAnnotations env) 1 30 resid_tags' ++ "\n" ++ childrenSummary (scpParentChildren s)) $
+                                             return ((deeds, e'), s { scpResidTags = resid_tags' })
+
+-- Note [Prevent rollback loops by only rolling back when generalising]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- I tried to think about another way to fix rollback.
+--  1. If we split <x |-> v[x] | \underbar{x} | > to < | v[x] | update x >
+--     .. then we have the invariant that the children of a split are <= size of the parent
+--        (we presently don't have this because we duplicate the bodies of heap-bound lambdas)
+--  2. Then we can say that the children of any *generalise* have size strictly < that of the parent
+--  3. As a result we can recover the termination argument by saying that:
+--     a) For any potentially infinite final chain of states all related by "split"/"generalise..
+--     b) We can chop it into segments of consecutive "split"s
+--       i) Each segment must be of finite length (because of the alpha-renaming-tieback property and
+--          the fact that each state in the chain is a syntactic subset of the initial one)
+--       ii) There must be a finite number of segments, because each time we generalise we reduce size
+--           by at least one, and the intervening splits don't increase it
+--       iii) QED (The final chain is finite)
+--
+-- This is a really beautiful plan. The problem I've found with it is that we can't do 1) because of the
+-- problem mentioned in the section about Arjan's idea in the splitter --- i.e. we can do:
+--   1. SC (let x = v in x) 
+--   2. Reduce to (let x = v in \underbar{x})
+--   3. Split to (let x = v in x)
+--   4. Tieback to 1), building a worthless loop
+--
+-- So we should probably work out why the existing supercompiler never builds dumb loops like this, so
+-- we can carefully preserve that property when making the Arjan modification.
+
+
+-- Are rollback-loops really a problem? If we have:
+--   h0 x = let f = \x -> f x in f x
+-- And we split to:
+--   h1 x = let f = \x -> f x in f x
+-- Then we will still have the promise for h0 in the environment, so we tie back!
+--
+-- NB: if a state is in the history then we are guaranteed to have done reduce+split on it.
+-- We might still want to prevent rollback if we can generalise the current state but not the older state,
+-- but if we can't generalise either then we can roll back to split just fine.
+
+
+memo :: (Maybe String -> State -> ScpM (Bool, (Deeds, FVedTerm)))
+     ->  State -> ScpM (Deeds, FVedTerm)
+memo opt init_state = {-# SCC "memo'" #-} memo_opt init_state
+  where
+    memo_opt state
+      | Skip <- memo_how = liftM snd $ opt Nothing state
+      | otherwise = join $ ScpM $ StateT $ \s ->
+        -- NB: If tb contains a dead PureHeap binding (hopefully impossible) then it may have a free variable that
+        -- I can't rename, so "rename" will cause an error. Not observed in practice yet.
+    
+        -- Note [Matching after reduction]
+        -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+        --
+        -- If we match on States *after* reduction, we might get the following problem. Our initial state could be:
+        --
+        --  let y = 1; p = (x, y) in snd p
+        --
+        -- This state has the free variables {x}. However, after reduction+GC it does not have any free variables.
+        -- Which set of free variables should we lambda-abstract the h-function over? Well, clearly we have to 
+        -- lambda-abstract over the pre-reduction FVs in case that "opt" does not do any reduction and leaves x as a
+        -- free variable.
+        --
+        -- A consequence of this decision is that we have to do something a bit weird at *tieback* site like this one:
+        --
+        --  let y = 1 in y
+        --
+        -- To tieback to the h-function for the initial state, we need to supply an x. Luckily, the act of reduction
+        -- proves that x is a dead variable and hence we should just be able to supply "undefined".
+        --
+        -- Note that:
+        --  1. Terms that match *after* reduction may not match *before* reduction. This is obvious, and the reason
+        --     that we match to reduce before matching in the first place
+        --  2. Suprisingly, terms that match *before* reduction may not match *after* reduction! This occurs because
+        --     two terms with different distributions of tag may match, but may roll back in different ways in reduce.
+
+        -- Note [Instance matching and loops]
+        -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+        --
+        -- If we aren't careful, instance matching can easily cause the supercompiler to loop. What if we have this tieback:
+        --
+        --   \_ -> f x                   f x
+        --
+        -- And we are asked to supercompile:
+        --
+        --   let a = \_ -> f a           let a = \_ -> f a
+        --   in \_ -> f a                in f a
+        --
+        -- If we aren't careful, we will detect an instance match, and hence recursively supercompile the "remaining" term:
+        --
+        --   let a = \_ -> f a           let a = \_ -> f a
+        --   in \_ -> f a                in f a
+        --
+        -- We ran into a very similar situation in practice with exp3_8, where the recursive structure was actually an instance
+        -- dictionary which contained several methods (lambdas), each of which selected methods from that same dictionary. i.e.
+        -- we had something like this in the memo table:
+        --
+        --  let $cnegate = \x y -> ...[$fNumNat]
+        --      $c+ = \x y -> ...[$fNumNat]
+        --      $fNumNat = D:Num Nat $cnegate $c+
+        --  in $c+ x y
+        --
+        -- And we were compiling:
+        --
+        --  let $cnegate = \x y -> ...[$fNumNat]
+        --      $c+ = \x y -> ...[$fNumNat]
+        --      $fNumNat = D:Num Nat $cnegate $c+
+        --      a = fromInteger Nat $fNumNat c
+        --      b = negate Nat $fNumNat d
+        --      c = __integer 0
+        --  in $c+ a b
+        --
+        -- So one of the "remaining" terms was:
+        --
+        --  let $cnegate = \x y -> ...[$fNumNat]
+        --      $c+ = \x y -> ...[$fNumNat]
+        --      $fNumNat = D:Num Nat $cnegate $c+
+        --  in negate Nat $fNumNat d
+        --
+        -- And after inlining of the negate selector+body we just get a call to ($c+) which is identical to what we began with,
+        -- since in the class defaults (negate x) is defined as (0 - x) and (a - b) is defined as (a + negate b).
+        --
+        -- Of course, this particular program will most likely cause a runtime loop, but the compiler shouldn't diverge too!
+        --
+        -- I think a reasonable fix is twofold:
+        --  1. Record instance matches in the memotable (!). Still no need to do it for truly *exact* matches.
+        --  2. When tying back, always prefer exact matches over instance matches.
+        --     This is something I want to do anyway because we should also be able to detect instance matches that are strictly
+        --     better than other possible instance matches, and we should prefer those as well with the same code.
+        --
+        -- BUT we have to be so so careful if we do this that we don't introduce accidental extra loops.
+
+        -- Note [Type generalisation and rollback]
+        -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+        --
+        -- When doing type generalisation, we can end up in the situation where a fulfilled promise in another part of the tree
+        -- refers to an as-yet unfulfilled promise we place newly onto the stack. If we then rollback that promise, what do we
+        -- do with that overwritten fulfilment?
+        --
+        -- This problem doesn't occur without type generalisation because the h functions mentioned freely by the fulfilments/
+        -- partial fulfilments (latent on the stack) at the time we made the promise we are rolling back to can *only* mention
+        -- promises that we already made at that time!
+        --
+        -- WITH type generalisation we go and modify those existing fulfilments to point into the portion of the stack which
+        -- is in danger of rollback.
+        -- (A similar problem would occur if we messed about with previous fulfilments when we detected a MSG opportunity.)
+        --
+        -- A formalisation of this idea is below:
+        --
+        -- Imagine there is a quantity "time" which ticks forward by at least 1 unit whenever a new promise{-/fulfilment-} occurs. Call:
+        --  * tp(h) the time at which h was promised
+        -- {-
+        --  * tf(h) the time at which h was fulfilled
+        -- -}
+        --  * fvs(h)(t) the h-function FVs for the fulfilment for h as of time t
+        --
+        -- Then we have:
+        --  1. P-before-F-use: if h' \elem fvs(h)(t) then tp(h') <= t
+        -- {-
+        --  2. Monotonicity: fvs(h)(t) `subVarSet` fvs(h)(t+1)
+        --  3. Fix: if t >= tf(h) then fvs(h)(t) = fvs(h)(tf(h))
+        --
+        -- Due to monotonicity and fix, we can usefully define fvs(h) = fvs(h)(tf(h))
+        -- where if h' \elem fvs(h)(t) then h' \elem fvs(h)
+        --
+        --  3. P-before-F: tp(h) < tf(h)
+        --  4. Depth-first: if h' \elem fvs(h) and tf(h') > tf(h) then tp(h') < tp(h)
+        -- 
+        -- We can derive a simple lemma immediately:
+        --  3. Dependent-P-before-F: if h' \elem fvs(h) then tp(h') < tf(h)
+        --     Proof: by cases on tf(h') `compare` tf(h):
+        --      Case (==): then h == h' and by P-before-F, (tp(h') = tp(h)) < tf(h)
+        --      Case (<): using P-before-F, tp(h') < tf(h') < tf(h)
+        --      Case (>): tf(h') > tf(h) so using depth-first and P-before-F, tp(h') < tp(h) < tf(h)
+        -- -}
+        --
+        -- Rollback to tp(h) from time t (tp(h) < t, tf(h) > t) relies on:
+        --  forall h'. tp(h') < tp(h) ==> if h'' \elem fvs(h')(t) then tp(h'') <= tp(h)
+        -- Because h' is *not* on the top of the stack at t, we know that we don't *add* any new FVs to it:
+        --  fvs(h')(t) `subVarSet` fvs(h')(tp(h))
+        -- (In fact it will be completely invariant, but it is more general to insist only on shrinkage). It follows that since:
+        --  h'' \elem fvs(h')(t)
+        -- Then by the subset relation:
+        --  h'' \elem fvs(h')(tp(h))
+        -- And so by P-before-F-use:
+        --  tp(h'') <= tp(h)
+        -- Which is what we wanted.
+        --
+        --
+        -- All that said, what are we going to do about this problem?
+        --  * It would be safe to only do type generalisation if the older promise is on the stack, because in that case
+        --    we could easily code it to only add normal fulfilments to the state if one isn't already present, and it wouldn't
+        --    cause any rollback issues
+        --  * But if it's on the stack anyway, we would get better results by just rolling back immediately with the normal MSG
+        --    doing the work of dumping the type information for us (albeit this will likely immediately terminate b/c types aren't tagged)
+        --
+        -- I think the simplest thing to do is just treat it as a normal instance match, and not worry about overwriting the older code!
+
+        -- FIXME: this code is all super-horrible how
+        let remember what = (do { traceRenderM ">sc {" (fun p, stateTags state, PrettyDoc (pPrintFullState quietStatePrettiness state))
+                                ; res <- addParentM p (what (Just (getOccString (varName (fun p))))) state
+                                ; traceRenderM "<sc }" (fun p, PrettyDoc (pPrintFullState quietStatePrettiness state), res)
+                                ; fulfillM res }, s { scpMemoState = ms' })
+              where (ms', p) = promise (scpMemoState s) (state, reduced_state)
+        in case fmap (\(exact, ((p, is_ancestor), mr)) -> case mr of
+                       RightIsInstance (Heap h_inst ids_inst) rn_lr k_inst -> (exact, do { traceRenderM ("=sc" ++ if exact then "" else "(inst)")
+                                                                                                        (fun p
+                                                                                                        , PrettyDoc (pPrintFullState quietStatePrettiness state)
+                                                                                                        --, PrettyDoc (pPrintFullState quietStatePrettiness reduced_state)
+                                                                                                        , PrettyDoc (pPrintFullState quietStatePrettiness (meaning p))
+                                                                                                        --, case msgMaybe (MSGMode { msgCommonHeapVars = emptyInScopeSet }) (meaning p) reduced_state of Just result -> PrettyDoc (pprMSGResult result)
+                                                                                                        --, res
+                                                                                                        )
+                                                                                         ; stuff <- instanceSplit memo_opt (remaining_deeds, Heap (foldr (\x -> M.insert x lambdaBound) h_inst (fun p:varSetElems extraOutputFvs)) ids_inst, k_inst, applyAbsVars (fun p) (Just rn_lr) (abstracted p))
+                                                                                         ; insertTagsM stuff })
+                         where
+                          -- This will always succeed because the state had deeds for everything in its heap/stack anyway:
+                          Just remaining_deeds = claimDeeds (releaseStateDeed state) (pureHeapSize h_inst + stackSize k_inst)
+                        -- NB: when the state we are type-genning against is on the stack OR we are not rolling back
+                        -- then this codepath can also overwrite the fulfilment for the old state to call into the generalised version,
+                        -- otherwise we have to leave it in place
+                        --
+                        -- NB: don't record a promise for type generalisation! This is OK for termination because all type gens
+                        -- are non-trivial so we will eventually have to stop genning. Furthermore, it means that we can't end
+                        -- up with a FIXME: continue
+                       RightGivesTypeGen rn_l s rn_r -> -- pprTrace "typegen" (pPrintFullState fullStatePrettiness state $$ pPrintFullState fullStatePrettiness s) $
+                                                        trace "typegen" $
+                                                                          (True, do { (deeds, e') <- memo_opt s
+                                                                                    ; (_, e'_r) <- renameSCResult (case s of (_, Heap _ ids, _, _) -> ids) (rn_r, e')
+                                                                                      -- OH MY GOD:
+                                                                                      --  - If we do memo-rollback or sc-rollback then we CAN'T overwrite old fulfilments
+                                                                                      --    because they might end up pointing to a promise which gets rolled back
+                                                                                      --  - So we can *either* overwrite old fulfilments, or not RB to ancestors (e.g. upon type gen)
+                                                                                      --  - But overwriting old fulfilments is the main thing we wanted to achieve, so we better make that choice :(
+                                                                                    ; when (not sC_ROLLBACK && not is_ancestor) $ do
+                                                                                        (_, e'_l) <- renameSCResult (case s of (_, Heap _ ids, _, _) -> ids) (rn_l, e')
+                                                                                        refulfillM p e'_l
+                                                                                    ; return (deeds, e'_r) })) $
+                     listToMaybe $
+                     sortBest (\(p, _) -> if dumped p then Just (fun p) else Nothing)
+                              [ ((p, is_ancestor), mr)
+                                | let (parented_ps, unparented_ps) = trainToList (promises (scpMemoState s))
+                                , (p, is_ancestor, common_h_vars) <- [ (p_sibling, fun p_parent == fun p_sibling, common_h_vars)
+                                                                     | (p_parent, p_siblings) <- parented_ps
+                                                                     , let common_h_vars = case meaning p_parent of (_, Heap _ ids, _, _) -> ids
+                                                                     , p_sibling <- p_parent:p_siblings ] ++
+                                                                     [ (p_root,    False,                         emptyInScopeSet)
+                                                                     | p_root <- unparented_ps ]
+                                , let inst_mtch = case iNSTANCE_MATCHING of
+                                                    NoInstances            -> NoInstances
+                                                    InstancesOfGeneralised -> InstancesOfGeneralised
+                                                    AllInstances           -> if is_ancestor then AllInstances else InstancesOfGeneralised
+                                      mm = MSGMode { msgCommonHeapVars = common_h_vars }
+                                --      mm = MM { matchInstanceMatching = inst_mtch, matchCommonHeapVars = common_h_vars }
+                                --, Just (heap_inst, k_inst, rn_lr) <- [-- (\res -> if isNothing res then pprTraceSC "no match:" (ppr (fun p)) res   else   pprTraceSC "match!" (ppr (fun p)) res) $
+                                --                                      match' mm (meaning p) reduced_state]
+                                , Just mr <- [{- trace "match" $ (\res -> trace "match'" res) -} (msgMaybe mm (meaning p) reduced_state >>= msgMatch inst_mtch)]
+                                ] of Just (skip, res) -> pure $ if skip then (res, s) else remember (\_ _ -> liftM ((,) False) res)
+                                     Nothing          | CheckOnly <- memo_how
+                                                      -> pure (liftM snd $ opt Nothing state, s)
+                                                      | otherwise
+                                                      -> pure (remember opt)
+      where (_state_did_reduce, reduced_state) = reduceForMatch state
+
+            -- Prefer more exact matches (see Note [Instance matching and loops]: it is essential to choose exact matches over instances)
+            sortBest :: forall promise.
+                        (promise -> Maybe Var)
+                     -> [(promise, MSGMatchResult)]
+                     -> [(Bool, (promise, MSGMatchResult))]
+            sortBest dumped ress = filter suitable $ map ((,) True) best_ress ++ map ((,) False) (sortBy ((\x y -> if x `moreSpecific` y then LT else GT) `on` snd) other_ress)
+              where suitable (_, (p, mr))
+                      | Just fun <- dumped p                     = pprTraceSC "tieback-to-dumped" (ppr fun) False
+                      | not tYPE_GEN, RightGivesTypeGen {} <- mr = False
+                      | otherwise                                = True
+
+                    -- Stop early upon exact match (as an optimisation)
+                    (best_ress, other_ress) = partition (mostSpecific . snd) ress
+
+                    mostSpecific :: MSGMatchResult -> Bool
+                    mostSpecific (RightIsInstance (Heap h _) rn k) = isPureHeapEmpty h && isStackEmpty k && isEmptyRenaming rn
+                    mostSpecific (RightGivesTypeGen _ _ _)         = False
+
+                    moreSpecific :: MSGMatchResult -> MSGMatchResult -> Bool
+                    -- Just a heuristic to decide between different instance matches: try to discard *least* stuff
+                    moreSpecific (RightIsInstance (Heap h_l _) _ k_l) (RightIsInstance (Heap h_r _) _ k_r)
+                      = (pureHeapSize h_l + stackSize k_l) <= (pureHeapSize h_r + stackSize k_r)
+                    -- Another heuristic: try to discard *most* type information (this is only more specific in the sense that it is more specific about what can be thrown away)
+                    -- Something to think about is what would happen if we did rollback to implement type gen. i.e. if we were driving:
+                    --   h1 = f (G Int)
+                    -- And we had a prior promise for:
+                    --   h0 = f (G Bool)
+                    -- We should rollback to just after the h0 promise and drive
+                    --   h2 = f (G a)
+                    -- (using an instantiated verison of h2 to fulfill h0). Then if we later drive:
+                    --   h3 = f (T a)
+                    -- We should rollback to just after the h2 promise (*) and drive:
+                    --   h4 = f (b a)
+                    -- (using an instantiated version of h4 to fulfill h3).
+                    --
+                    -- Note that at (*) we rolled back from h3 (T a) to h2 (G a) and NOT to h0 (G Int) (which is still on the stack, unlike h1, which was rolled back)
+                    -- This means that we preferred to roll back to something which gives an MSG with the *smallest possible* renaming (i.e. is more specific).
+                    -- This is the opposite of what I've implemented below (FIXME).
+                    -- TODO: maybe it actually would be OK (for SC termination) to roll back to h0 at (*)? Though it would mean dumping any tiebacks to h2 unnecessarily.
+                    moreSpecific (RightGivesTypeGen _ _s_l rn_l) (RightGivesTypeGen _ _s_r rn_r)
+                      -- OK, here is what I have concluded after long thought.
+                      -- As long as we DON'T CREATE A PROMISE when type generalising, then there will be a always be a maximum of one
+                      -- possible type generalisation. A simple example demonstrates the principal behind this. Imagine that there were two
+                      -- possible generalisations e.g. the promises held states for:
+                      --   f Int   Char
+                      --   f Float Bool
+                      -- And we were driving
+                      --   f Int   Bool
+                      -- Looking at the promises, we could generalise to either
+                      --   f Int   alpha
+                      --   f alpha Bool
+                      -- But in fact if you think about it if we aren't recording promises when we type generalise then there shouldn't be two
+                      -- such promises to begin with! The reason is when we get to driving the later of the two (say f Float Bool), we would
+                      -- type generalise it against the earlier promise (i.e. f Int Char) and hence would have driven (f alpha beta) without
+                      -- recording (f Float Bool). Now when we come to supercompile (f Int Bool) we can just tie back to (f alpha beta),
+                      -- which is absolutely what we want.
+                      | pREINITALIZE_MEMO_TABLE
+                      = renamingSize rn_r <= renamingSize rn_l -- NB: although the argument above *is* true, we can have two type-genable promises due to memoiser preinit!
+                      | otherwise
+                      = pprPanic "moreSpecific: two possible type gens, this should not happen!" (ppr rn_l $$ ppr rn_r $$ pPrintFullState quietStatePrettiness _s_l $$ pPrintFullState quietStatePrettiness _s_r)
+                    -- Prefer instance matches to type generalisation (don't have a good sense about what is best here):
+                    moreSpecific (RightIsInstance _ _ _) _ = True
+                    moreSpecific _ (RightIsInstance _ _ _) = False
+
+                    renamingSize (_, tv_subst, co_subst) = sumMap typeSize (varEnvElts tv_subst) + sumMap coercionSize (varEnvElts co_subst)
+
+                    -- TODO: it might be OK to insist the incoming renaming is invertible, but this should definitely work:
+                    isEmptyRenaming (_, tv_subst, co_subst) = all isTyVarTy (varEnvElts tv_subst) && all isCoVarCo (varEnvElts co_subst)
+                    isCoVarCo = isJust . getCoVar_maybe
+
+            -- I do not currently know how to prevent the supercompiler from building loops when doing float-to-match
+            -- (i.e. instance matching where we match var-to-term, not just var-to-var) is on. With it, we tend to do things like:
+            --
+            --  [a] h0 = D[let f = v in f] = let f = h1 in f
+            --      h1 = D[let f = v in v] = h0
+            --
+            -- In standard supercompilation, we still need to skip some tieback attempts for reasons of correctness:
+            -- our correctness proof does not work if we try to memo values. Luckily, this doesn't hurt us at all because
+            -- it doesn't sacrifice termination of the supercompiler. See the thesis for details.
+            --
+            -- The main example we are worried about when we discuss correctness is this:
+            --
+            --  [a] h0 = D[let x = True in x] = let x = h1 in x
+            --      h1 = D[True] = h0
+            --
+            -- (Note that the meaning of h1 is cost equivalent to the meaning of h0, so superficially this tieback is justified!
+            --  If x bound a lambda instead then we would be able to make an argument that the tieback was not cost equivalent.)
+            memo_how -- If eager value splitting is on, we'll always check, which
+                     -- is certainly terminating (even without EVS) and is also safe (this is due to EVS).
+                     --
+                     -- Even with EVS, it is not safe to skip memoisation if the state in the focus is a value, an indirection, or stuck
+                     -- on a free variable. Examples that prove each case incorrect are:
+                     --  1. let f = \x -> C f in C f
+                     --     (a value state, eager-splits to itself)
+                     --  2. let f = \x -> f |> co in f |> co
+                     --     (an indirection, eager-splits to itself)
+                     --  3. let f = \x -> g f x in g f x
+                     --     (blocked on FV, eager-splits to itself)
+                     | eAGER_SPLIT_VALUES
+                     = CheckAndRemember
+
+                     -- If eager value splitting is off, we do this, which is sufficient to avoid non-correct output
+                     -- but not enough to avoid non-termination (consider the input let xs = 1:xs in 1:xs, which splits to
+                     -- itself and is always irreducible)
+                     | isStateIrreducible state
+                     = Skip
+
+                     | otherwise
+                     = CheckAndRemember
+
+data MemoHow = Skip | CheckOnly | CheckAndRemember
+
+-- NB: don't garbage collect when reducing for a match!
+--
+-- If you do then you can start with this term:
+--   [1] let $dNum = ww3 in * a $dNum
+--
+-- Looks like this after reduction+GC (the update for $dNum is dead):
+--   [2] case ww3 of Num ...
+--
+-- And if we reduce+split [1] instead we get (the update for $dNum is residualised):
+--   [3] case $dNum of Num ...
+--
+-- Reducing+GCing [3] gives us [3] again, and that is alpha equivalent to [2],
+-- so we tie back to it rather than continuing. But that means our code is:
+--   let h @a ww3 = let $dNum = ww3
+--                  in h a $dNum
+--
+-- Which is a loop. So we need to do one of:
+--  1. Not GC before matching
+--  2. GC *after* reduction in the main codepath.
+--  3. Not eliminate dead update frames when GCing
+reduceForMatch :: State -> (Bool, State)
+reduceForMatch state | rEDUCE_BEFORE_MATCH = {- second gc $ -} reduceWithFlag (case state of (_, h, k, e) -> (maxBound, h, k, e)) -- Reduce ignoring deeds for better normalisation
+                     | otherwise           = (False, state)
+
+supercompile :: M.Map Var Term -> Term -> Term
+supercompile unfoldings e = fVedTermToTerm $ start (liftM snd . sc)
+  where (bvs_unfoldings, no_preinit, preinit) = prepareTerm unfoldings e
+        (to_bind, letty_preinit_with, state) = no_preinit -- Delay forcing these to suppress
+        (preinit_with, preinit_state)        = preinit    -- prepareTerm debug prints
+        start k | uSE_LET_BINDINGS = bindManyMixedLiftedness fvedTermFreeVars to_bind $ run state $ preinitalise letty_preinit_with >> k state
+                | otherwise        = run preinit_state $ preinitalise preinit_with >> withScpEnv (\e -> e { scpAlreadySpeculated = bvs_unfoldings `S.union` scpAlreadySpeculated e }) (k preinit_state)
+        run tags_state = runScpM (tagAnnotations tags_state)
+
+preinitalise :: [(State, FVedTerm)] -> ScpM ()
+preinitalise states_fulfils
+  | not pREINITALIZE_MEMO_TABLE = return () -- If you do this, expect your output code to grow a lot!
+  | otherwise = forM_ states_fulfils $ \(state, e') -> do
+    ScpM $ StateT $ \s -> do
+        --unless (isEmptyVarSet (stateUncoveredVars state)) $ pprPanic "preinitalise" (pPrintFullState fullStatePrettiness state $$ ppr e')
+        let (ms', _p) = promise (scpMemoState s) (state, snd (reduceForMatch state))
+        return ((), s { scpMemoState = ms' })
+    fulfillM (emptyDeeds, e')
--- a/compiler/supercompile/Supercompile/Drive/Split.hs
+++ b/compiler/supercompile/Supercompile/Drive/Split.hs
+{-# OPTIONS_GHC -fno-warn-name-shadowing #-}
+module Supercompile.Drive.Split (
+    MonadStatics(..), split, instanceSplit, generalise,
+    ResidTags, plusResidTags, emptyResidTags) where
+
+#include "HsVersions.h"
+
+import Supercompile.Core.FreeVars
+import Supercompile.Core.Renaming
+import Supercompile.Core.Syntax
+
+import Supercompile.Evaluator.Deeds
+import Supercompile.Evaluator.Evaluate (normalise)
+import Supercompile.Evaluator.FreeVars
+import Supercompile.Evaluator.Residualise
+import Supercompile.Evaluator.Syntax
+
+import Supercompile.GHC (coreAltConToAltCon, altConToCoreAltCon)
+
+import Supercompile.Termination.Generaliser (Generaliser(..))
+
+import Supercompile.StaticFlags
+import Supercompile.Utilities hiding (tails)
+
+import CoreUtils (filterAlts)
+import Id        (idUnique, idType, isDeadBinder, localiseId, isOneShotBndr)
+import Var       (varUnique)
+import PrelNames (undefinedName, wildCardKey)
+import Util      (zipWithEqual, zipWith3Equal, thirdOf3)
+import Digraph
+import UniqFM    (delListFromUFM_Directly)
+import VarEnv
+
+import Data.Traversable (fmapDefault, foldMapDefault)
+import qualified Data.Map as M
+import qualified Data.IntSet as IS
+import qualified Data.IntMap as IM
+
+
+class Monad m => MonadStatics m where
+    bindCapturedFloats :: FreeVars -> m a -> m (Out [(Var, FVedTerm)], a)
+    -- Free variables of h-functions generated in this context
+    monitorFVs :: m a -> m (FreeVars, a)
+
+
+-- | Implementation of Kleene fixed-point theorem <http://en.wikipedia.org/wiki/Kleene_fixed-point_theorem>
+-- for bounded join semi-lattices. Forces the function to be monotone.
+{-# INLINE lfpFrom #-}
+lfpFrom :: (Eq a) => (a -> a -> a) -> a -> (a -> a) -> a
+lfpFrom join init_x f = lfpFrom' init_x (\x -> f x `join` x)
+  where
+    -- Least point of a partially ordered monotone function. Does not checks that the function is monotone.
+    lfpFrom' init_x f = go init_x
+      where go x | x' == x      = x
+                 | otherwise    = go x'
+              where x' = f x
+
+
+type ResidTags = IM.IntMap Int
+
+emptyResidTags :: ResidTags
+emptyResidTags = IM.empty
+
+oneResidTag :: Tag -> ResidTags
+oneResidTag (TG i _) = IM.singleton (unFin i) 1
+
+plusResidTags :: ResidTags -> ResidTags -> ResidTags
+plusResidTags = IM.unionWith (+)
+
+plusResidTagss :: [ResidTags] -> ResidTags
+plusResidTagss = IM.unionsWith (+)
+
+-- FIXME:
+--
+--   let f = \x -> e
+--   in <f>
+--
+-- Splits to:
+--
+--    let f = \x -> e
+--    in <\x -> e>
+--
+-- For some reason that (\x -> e) actualy has the same tag as <f> (see Accumulator without rollback),
+-- and disaster ensues...
+
+--
+-- == Gathering entry information for the splitter ==
+--
+
+type ContextId = Unique
+
+data Entered = Once ContextId -- ^ If a binding is Entered twice from the same context it's really a single Entrance
+             | Many           -- ^ A result of anything other than Once (or None, represented implicitly) is uninteresting for optimisation purposes
+             deriving (Eq, Show)
+
+instance Outputable Entered where
+    ppr = text . show
+
+isOnce :: Entered -> Bool
+isOnce (Once _) = True
+isOnce _        = False
+
+plusEntered :: Entered -> Entered -> Entered
+plusEntered (Once id1) (Once id2)
+  | id1 == id2 = Once id1
+  | otherwise  = Many
+plusEntered _ _ = Many
+
+
+type EnteredEnv = VarEnv Entered
+
+mkEnteredEnv :: Entered -> FreeVars -> EnteredEnv
+mkEnteredEnv ent = mapVarEnv (const ent)
+
+
+--
+-- == The splitter ==
+
+-- Note [Phantom variables and bindings introduced by scrutinisation]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- If we never introduced bindings from scrutinisation, the world of phantom bindings would be relatively
+-- simple. In such a world, we would have this property:
+--
+--   The free variables of h-functions generated while supercompiling some term would never have
+--   more free variables than the term being supercompiled
+--
+-- Unfortunately, this is not true in the real world. What can happen is this. We supercompile:
+--  h1 x = case x of True -> e1; False -> e2
+--
+-- Which leads to the two recursively-supercompiled components:
+--  h2 = let <x = True> in e1
+--  h3 = let <x = False> in e2
+--
+-- Note that x was not static (free) in h1, but it is static (free) in h2. Thus, h-functions generated
+-- during supercompilation (h2, h3) have more free variables than the term from which they were generated (h1).
+--
+--
+-- Note [When to bind captured floats]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- Ordinarily, we only need to check to see if we residualise some floating h-functions when we produce
+-- a residual let-binding.  This is because in the normal course of things any binding that was originally
+-- introduced as a lambda/alt-binding will never be made into a free variable of a final h-function.  However,
+-- there are two situations which break this invariant:
+--  1. We might choose to create a LetBound heap binding when driving the branches of a residual case expression
+--     that scrutinises a free variable. This changes a LambdaBound thing into a LetBound one, so we need to be
+--     careful to residualise the resulting h-function under that lambda-binder.
+--
+--     In fact, we used to do this but don't any more - see Note [Phantom variables and bindings introduced by scrutinisation]
+--  2. More relevantly, we might implement an optimisation that prevents h-functions from being lambda-abstracted
+--     over anything lambda-bound above a let-binding that we can see will trap the h-function under a let. For example,
+--     when driving:
+--
+--       \x -> let f = \y -> ...
+--             in D[<x |-> \lambda{}, f |-> l{\y -> ...} | ... f ... x ...>]
+--
+--     There is no point lambda-abstracting over x because we're going to have to drop the h-function under the f
+--     binding anyway. To implement this we might drive with (x |-> l{}) instead, but once again this converts a
+--     lambda-binding to a let-binding.
+--
+-- For this reason, we are careful to use bindCapturedFloats even when driving the arms of case expressions/bodies of lambdas.
+--
+--
+-- Note [Bind captured floats fixed point]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-- Because bound h-functions (e.g. h2 or h3) may be referred to by other h-functions (e.g. h1) which do not
+-- refer to any of the free variables of the h-functions we are about to bind, we have a fixed point in bindCapturedFloats.
+-- This fixed point ensures we bind those h-functions that have as free variables any h-functions we are about to bind.
+
+
+{-# INLINE split #-}
+split :: MonadStatics m
+      => State
+      -> (State -> m (Deeds, Out FVedTerm))
+      -> m (ResidTags, Deeds, Out FVedTerm)
+split (deeds, Heap h ids, k, qa) opt
+  = generaliseSplit opt ctxt_ids0 (IS.empty, emptyVarSet) deeds (Heap h ids, nameStack k, \ids -> (Just qa, splitQA ctxt_ids1 ids (annedTag qa) (annee qa)))
+  where (ctxt_ids0, ctxt_ids1) = splitUniqSupply splitterUniqSupply
+
+{-# INLINE instanceSplit #-}
+instanceSplit :: MonadStatics m
+              => (Deeds, Heap, Stack, Out FVedTerm)
+              -> (State -> m (Deeds, Out FVedTerm))
+              -> m (ResidTags, Deeds, Out FVedTerm)
+instanceSplit (deeds, heap, k, focus) opt = generaliseSplit opt splitterUniqSupply (IS.empty, emptyVarSet) deeds (heap, nameStack k, \_ -> (Nothing, noneBracketed' IM.empty focus)) -- FIXME: residualised tags? FIXME: scrutinee identity?
+-- TODO: arguably I should try to get a QA for the thing in the focus. This will help in cases like where we MSG together:
+--  < H | v | >
+-- and:
+--  < H, H' | v | update f >
+-- Since ideally instance splitting the second state should allow us to drive H' with the value binding f |-> v. A similar argument applies to questions in focus.
+
+nameStack :: Stack -> NamedStack
+nameStack = snd . trainCarMapAccumL (\i kf -> (i + 1, (i, kf))) 0
+
+{-# INLINE generalise #-}
+generalise :: MonadStatics m
+           => Generaliser
+           -> State
+           -> Maybe ((State -> m (Deeds, Out FVedTerm)) -> m (ResidTags, Deeds, Out FVedTerm))
+generalise gen (deeds, Heap h ids, k, qa) = do
+    let named_k = nameStack k
+    
+    (gen_kfs, gen_xs') <- case sPLIT_GENERALISATION_TYPE of
+        NoGeneralisation -> Nothing
+        AllEligible -> guard (not (IS.null gen_kfs) || not (isEmptyVarSet gen_xs'')) >> return (gen_kfs, gen_xs'')
+          where gen_kfs = IS.fromList [i   | (i, kf) <- trainCars named_k, generaliseStackFrame gen kf]
+                gen_xs'' = mkVarSet [x'' | (x'', hb) <- M.toList h, generaliseHeapBinding gen x'' hb, ASSERT2(not (howBound hb == LambdaBound && isNothing (heapBindingTerm hb)), ppr (x'', hb, heapBindingTag hb)) True]
+        StackFirst -> (guard (not (IS.null gen_kfs)) >> return (gen_kfs, emptyVarSet)) `mplus`
+                      (guard (not (isEmptyVarSet gen_xs''))  >> return (IS.empty, gen_xs''))
+          where gen_kfs = IS.fromList [i   | (i, kf) <- trainCars named_k, generaliseStackFrame gen kf]
+                gen_xs'' = mkVarSet [x'' | (x'', hb) <- M.toList h, generaliseHeapBinding gen x'' hb, ASSERT2(not (howBound hb == LambdaBound && isNothing (heapBindingTerm hb)), ppr (x'', hb, heapBindingTag hb)) True]
+        DependencyOrder want_first -> listToMaybe ((if want_first then id else reverse) possibilities)
+          where -- We consider possibilities starting from the root of the term -- i.e. the bottom of the stack.
+                -- This is motivated by how the interaction with subgraph generalisation for TreeFlip/TreeSum.
+                -- FIXME: explain in more detail if this turns out to be sane.
+                possibilities = findGeneralisable False emptyVarSet (reverse (trainCars named_k)) h
+                
+                findGeneralisable done_qa pending_xs' unreached_kfs unreached_hbs
+                   | done_qa && null pending_kfs && M.null pending_hbs
+                   = []
+                   | otherwise
+                   = [(gen_kf_is, gen_xs'') | not (IS.null gen_kf_is) || not (isEmptyVarSet gen_xs'')] ++
+                     findGeneralisable done_qa' reached_xs' unreached_kfs' unreached_hbs'
+                  where
+                    (done_qa', extra_pending_xs') = if done_qa || not (null unreached_kfs) then (done_qa, emptyVarSet) else (True, annedFreeVars qa)
+                    (pending_kfs, unreached_kfs') = splitAt 1 unreached_kfs
+                    (pending_hbs, unreached_hbs') = M.partitionWithKey (\x' _hb -> x' `elemVarSet` (pending_xs' `unionVarSet` extra_pending_xs')) unreached_hbs
+                    
+                    gen_kf_is = IS.fromList [i  | (i, kf) <- pending_kfs, generaliseStackFrame gen kf]
+                    gen_xs'' = mkVarSet [x'' | (x'', hb) <- M.toList pending_hbs, generaliseHeapBinding gen x'' hb, ASSERT2(not (howBound hb == LambdaBound && isNothing (heapBindingTerm hb)), ppr (x'', hb, heapBindingTag hb)) True]
+                    
+                    reached_xs' = M.foldrWithKey (\_x' hb fvs -> heapBindingFreeVars hb `unionVarSet` fvs)
+                                                 (unionVarSets (map (stackFrameFreeVars . tagee . snd) pending_kfs))
+                                                 pending_hbs
+    
+    -- If we can find some fraction of the stack or heap to drop that looks like it will be admissable, just residualise those parts and continue
+    pprTrace "generalise: (gen_kfs, gen_xs')" (ppr (gen_kfs, gen_xs')) $ return ()
+    
+    let (ctxt_id, ctxt_ids) = takeUniqFromSupply splitterUniqSupply
+    return $ \opt -> generaliseSplit opt ctxt_ids (gen_kfs, gen_xs') deeds (Heap h ids, named_k, \ids -> (Just qa, oneBracketed' (qaType qa) (Once ctxt_id, (emptyDeeds, Heap M.empty ids, Loco False, annedQAToInAnnedTerm ids qa))))
+
+{-# INLINE generaliseSplit #-}
+generaliseSplit :: MonadStatics m
+                => (State -> m (Deeds, Out FVedTerm))
+                -> UniqSupply
+                -> (IS.IntSet, Out VarSet)
+                -> Deeds
+                -> (Heap, NamedStack, InScopeSet -> (Maybe (Anned QA), Bracketed (Entered, UnnormalisedState)))
+                -> m (ResidTags, Deeds, Out FVedTerm)
+generaliseSplit opt ctxt_ids split_from deeds (heap, named_k, focus) = optimiseSplit opt deeds'' bracketeds_heap bracketed_focus
+  where -- After we complete cheapification, the in-scope-set will not change at all, so we can poke it into the focus
+        (deeds', heap'@(Heap _ ids')) = cheapifyHeap deeds heap
+        (deeds'', bracketeds_heap, bracketed_focus) = splitt ctxt_ids split_from deeds' (heap', named_k, focus ids')
+
+-- Discard dead bindings:
+--  let x = ...
+--  in 1
+-- ==>
+--  1
+--
+-- But include transitively needed ones:
+--  let w = ...
+--      x = ...
+--      y = ... x ...
+--      z = ... y ...
+--  in z
+-- ==>
+--  let z = let x = ...
+--              y = ... x ...
+--          in ... y ...
+--  in z
+--
+-- Inline values and linear things into residual bindings:
+--  let x = ... y ...
+--      y = ...
+--  in \_ -> ... x ...
+-- ===>
+--  let x = let y = ...
+--          in ... y ...
+--  in \_ -> ... x ...
+--
+-- Inline values into residual non-linear things:
+--  let x = (y:ys)
+--  in \_ -> ... x ...
+-- ==>
+--  \_ -> let x = (y:ys)
+--        in ... x ...
+--
+-- Do NOT inline linear things into non-linear things:
+--  let x = (y:ys)
+--      y = ...
+--  in \_ -> ... x ...
+-- =/=>
+-- \_ -> let x = let y = ...
+--               in (y:ys)
+--       in ... x ...
+-- ===>
+--  let y = ...
+--  in \_ -> let x = (y:ys)
+--           in ... x ...
+--
+-- Inline things that are (apparently) used non-linearly times into linear things:
+--  let w = ...
+--      x = ... w ...
+--      y = ... w ...
+--      z = (x, y)
+--  in Just z
+-- ===>
+--  let z = let w = ...
+--              x = ... w ...
+--              y = ... w ...
+--          in (x, y)
+--  in Just z
+--
+-- Treat non-linearity due only to |case| branches as linearity:
+--  let x = ...
+--  in case unk of C -> ... x ...; D -> ... x ...
+-- ===>
+--  case unk of C -> let x = ... in ... x ...
+--              D -> let x = ... in ... x ...
+--
+-- Let-float things to trivialise them:
+--  let x = let y = ... in (y:xs)
+--  in \_ -> ... x ...
+-- ===>
+--  let y = ....
+--  \_ -> let x = (y:xs) in ... x ...
+--
+-- Note [EC binds something we need to refer to above]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--   let z = f x
+--       y = unk + z
+--       x = case y of _ -> 2
+--   in x + 2
+--
+-- After splitting, we might want to drive this child term:
+--   let x = 2
+--   in x + 2
+-- 
+-- That's fine, but how are we going to get a reference to the "x" when residualising the y binding above?
+--  let z = f x
+--      y = unk + z
+--  in case y of _ -> h0
+--
+-- Lacking extra language features, our only option is to under-specialise the floats by inlining less
+-- evaluation context.
+data Shell = Shell {
+    shellExtraTags :: IM.IntMap Int,
+    shellExtraFvs  :: FreeVars,                      -- ^ Maximum free variables added by the residual wrapped around the holes
+    shellWrapper   :: [Out FVedTerm] -> Out FVedTerm -- ^ Wrap the contents of the holes
+  }
+
+
+data Hole a = Hole {
+    holeBvs    :: [Var], -- ^ Maximum bound variables added at each hole by the residual wrapped around the holes, most tightly binding last
+    holeFiller :: a      -- ^ Hole-fillers themselves. Usually State
+  }
+
+instance Functor Hole where fmap = fmapDefault
+instance Foldable Hole where foldMap = foldMapDefault
+
+instance Traversable Hole where
+    traverse f h = Hole (holeBvs h) <$> f (holeFiller h)
+
+instance Accumulatable Hole where
+    mapAccumTM f acc h = liftM (\(acc', filler') -> (acc', h { holeFiller = filler' })) $ f acc (holeFiller h)
+
+
+data TailishHole a = TailishHole {
+    tailishIsTailHole :: Bool,
+    tailishHole :: Hole a
+  }
+
+instance Functor TailishHole where fmap = fmapDefault
+instance Foldable TailishHole where foldMap = foldMapDefault
+
+instance Traversable TailishHole where
+    traverse f h = TailishHole (tailishIsTailHole h) <$> traverse f (tailishHole h)
+
+instance Accumulatable TailishHole where
+    mapAccumTM f acc h = liftM (\(acc', hole') -> (acc', h { tailishHole = hole' })) $ mapAccumTM f acc (tailishHole h)
+
+
+data Bracketed a = TailsKnown Type (Type -> Shell) [TailishHole a]
+                 | TailsUnknown             Shell  [Hole a]
+
+instance Functor Bracketed where fmap = fmapDefault
+instance Foldable Bracketed where foldMap = foldMapDefault
+
+instance Traversable Bracketed where
+    traverse f (TailsKnown ty mk_shell holes) = TailsKnown ty mk_shell <$> traverse (traverse f) holes
+    traverse f (TailsUnknown  shell    holes) = TailsUnknown shell     <$> traverse (traverse f) holes
+
+instance Accumulatable Bracketed where
+    mapAccumTM f acc (TailsKnown ty mk_shell holes) = liftM (second (TailsKnown ty mk_shell)) $ mapAccumTM (mapAccumTM f) acc holes
+    mapAccumTM f acc (TailsUnknown  shell    holes) = liftM (second (TailsUnknown shell))     $ mapAccumTM (mapAccumTM f) acc holes
+
+noneBracketed :: Tag -> Out FVedTerm -> Bracketed a
+noneBracketed = noneBracketed' . oneResidTag
+
+noneBracketed' :: IM.IntMap Int -> Out FVedTerm -> Bracketed a
+noneBracketed' tgs a = TailsUnknown (Shell { shellExtraTags = tgs, shellExtraFvs = freeVars a, shellWrapper = \[] -> a }) []
+
+-- NB: I could use normalise here to make my life easier if transitiveInline didn't treat Bracketed heaps specially
+--
+-- NB: it is VERY IMPORTANT that you use oneBracketed' instead in contexts where you might want to use the tails of the bracketed.
+-- In particular, if you use oneBracketed to prepare the branches of a case expression then map-map fusion won't work!
+oneBracketed :: UniqSupply -> Type -> (Entered, (Heap, Stack, In AnnedTerm)) -> Bracketed (Entered, UnnormalisedState)
+oneBracketed ctxt_ids ty (ent, (Heap h ids, k, in_e))
+  | eAGER_SPLIT_VALUES
+  , Just (cast_by, mb_update) <- isTrivialStack_maybe k -- NB: this might find a cast even when we have an answer in the context since the state is unnormalised
+  , Just anned_a0 <- termToCastAnswer ids in_e -- NB: I could push extra heap into the bracketed_a1 using the mb_update if it is Just, but I don't think I usually need to
+  , let anned_a1 = castByAnswer ids cast_by (annedToTagged anned_a0)
+        bracketed_a1 = fmap (\(ent', (deeds, Heap h' ids', k', in_e')) -> (if isOnce ent then ent' else Many, (deeds, Heap (h' `M.union` h) ids', k', in_e'))) $ -- Push heap of positive information/new lambda-bounds down + fix hole Entereds
+                       modifyShell (\shell -> shell { shellExtraFvs = shellExtraFvs shell `minusVarSet` fst (pureHeapVars h) LambdaBound }) $                    -- Fix bracket FVs by removing anything lambda-bound above
+                       splitCoerced (splitAnswer ctxt_ids ids) anned_a1
+  = case mb_update of
+      Nothing                          -> bracketed_a1
+      Just (Tagged tg_x' x', cast_by') -> zipBracketeds (TailsUnknown shell [Hole { holeBvs = [x'], holeFiller = bracketed_a1 }])
+        where shell = case cast_by' of
+                CastBy co co_tg -> Shell { shellExtraTags = oneResidTag tg_x' `plusResidTags` oneResidTag co_tg, shellExtraFvs = tyCoVarsOfCo co, shellWrapper = \[e'] -> letRec [(x', e')] (var x' `cast` co) }
+                Uncast          -> Shell { shellExtraTags = oneResidTag tg_x',                                   shellExtraFvs = emptyVarSet,     shellWrapper = \[e'] -> letRec [(x', e')] (var x') }
+  | otherwise
+  = oneBracketed' ty (ent, (emptyDeeds, Heap h ids, k, in_e))
+
+oneBracketed' :: Type -> a -> Bracketed a
+oneBracketed' ty x = TailsKnown ty (\_ -> Shell { shellExtraTags = IM.empty, shellExtraFvs = emptyVarSet, shellWrapper = \[e] -> e }) [TailishHole True (Hole [] x)]
+
+zipBracketeds :: Bracketed (Bracketed a)
+              -> Bracketed a
+zipBracketeds (TailsUnknown bshell bholes) = TailsUnknown (Shell shell_tags shell_fvs (\es -> shell_wrapper es [])) holes
+  where (shell_tags, shell_fvs, shell_wrapper, holes) = foldr go (shellExtraTags bshell, shellExtraFvs bshell, \[] rev_es' -> shellWrapper bshell (reverse rev_es'), []) bholes
+        go (Hole bvs bracketed) (shell_extra_tags, shell_extra_fvs, shell_wrapper, holes)
+          = (plusResidTags shell_extra_tags (bracketedExtraTags rbracketed),
+             shell_extra_fvs `unionVarSet` nonRecBindersFreeVars bvs (bracketedExtraFvs rbracketed),
+             \es rev_es' -> case splitBy (bracketedHoles rbracketed) es of
+                              (es_here, Right es_later) -> shell_wrapper es_later (shellWrapper (bracketedShell rbracketed) es_here:rev_es'),
+             bracketedHoles rbracketed ++ holes)
+          where rbracketed = rigidizeBracketed bracketed
+zipBracketeds (TailsKnown bty mk_bshell bholes) = case ei_holes of
+    Left holes  -> TailsUnknown          (Shell (mk_shell_tags bty) (mk_shell_fvs bty) (\es -> mk_shell_wrapper bty es [])) holes
+    Right holes -> TailsKnown bty (\ty -> Shell (mk_shell_tags ty)  (mk_shell_fvs ty)  (\es -> mk_shell_wrapper ty  es [])) holes
+  where (mk_shell_tags, mk_shell_fvs, mk_shell_wrapper, ei_holes) = foldr go (\ty -> shellExtraTags (mk_bshell ty),
+                                                                              \ty -> shellExtraFvs (mk_bshell ty),
+                                                                              \ty [] rev_es' -> shellWrapper (mk_bshell ty) (reverse rev_es'), Right []) bholes
+        go (TailishHole is_tail (Hole bvs bracketed)) (shell_extra_tags, shell_extra_fvs, shell_wrapper, ei_holes) = case bracketed of
+          TailsKnown _ty mk_shell holes'
+            | is_tail, Right holes <- ei_holes
+            -> (\ty -> plusResidTags (shell_extra_tags ty) (shellExtraTags (mk_shell ty)),
+                \ty -> shell_extra_fvs ty `unionVarSet` nonRecBindersFreeVars bvs (shellExtraFvs (mk_shell ty)),
+                \ty es rev_es' -> case splitBy holes' es of
+                                    (es_here, Right es_later) -> shell_wrapper ty es_later (shellWrapper (mk_shell ty) es_here:rev_es'),
+                Right (holes' ++ holes))
+          _ -> (\ty -> plusResidTags (shell_extra_tags ty) (bracketedExtraTags rbracketed),
+                \ty -> shell_extra_fvs ty `unionVarSet` nonRecBindersFreeVars bvs (bracketedExtraFvs rbracketed),
+                \ty es rev_es' -> case splitBy (bracketedHoles rbracketed) es of
+                                    (es_here, Right es_later) -> shell_wrapper ty es_later (shellWrapper (bracketedShell rbracketed) es_here:rev_es'),
+                case ei_holes of Left holes -> Left (bracketedHoles rbracketed ++ holes)
+                                 Right holes | is_tail   -> Left (bracketedHoles rbracketed ++ map tailishHole holes)
+                                             | otherwise -> Right (map (TailishHole False) (bracketedHoles rbracketed) ++ holes))
+            where rbracketed = rigidizeBracketed bracketed
+
+modifyShell :: (Shell -> Shell) -> Bracketed a -> Bracketed a
+modifyShell f (TailsKnown ty mk_shell holes) = TailsKnown ty (f . mk_shell) holes
+modifyShell f (TailsUnknown  shell    holes) = TailsUnknown  (f shell)      holes
+
+modifyTails_ :: (Type -> Type) -> ([a] -> [a]) -> Bracketed a -> Maybe (Bracketed a)
+modifyTails_ mk_ty f = fmap snd . modifyTails mk_ty (\as -> ((), f as))
+
+modifyTails :: forall a b. (Type -> Type) -> ([a] -> (b, [a])) -> Bracketed a -> Maybe (b, Bracketed a)
+modifyTails _     _ (TailsUnknown _ _)             = Nothing
+modifyTails mk_ty f (TailsKnown ty mk_shell holes) = Just (b, TailsKnown (mk_ty ty) mk_shell holes')
+  where (b, holes') = traverseSome (tailishIsTailHole :: TailishHole a -> Bool) (second unComp . traverseAll f . Comp) (holes :: [TailishHole a])
+
+
+data RBracketed a = RBracketed { bracketedShell :: Shell, bracketedHoles :: [Hole a] }
+
+instance Functor RBracketed where fmap = fmapDefault
+instance Foldable RBracketed where foldMap = foldMapDefault
+
+instance Traversable RBracketed where traverse f (RBracketed shell holes) = RBracketed shell <$> traverse (traverse f) holes
+
+bracketedExtraTags :: RBracketed a -> IM.IntMap Int
+bracketedExtraTags = shellExtraTags . bracketedShell
+
+bracketedExtraFvs :: RBracketed a -> FreeVars
+bracketedExtraFvs = shellExtraFvs . bracketedShell
+
+bracketedFreeVars :: (a -> FreeVars) -> RBracketed a -> FreeVars
+bracketedFreeVars fvs rbracketed = bracketedExtraFvs rbracketed `unionVarSet` unionVarSets [nonRecBindersFreeVars (holeBvs hole) (fvs (holeFiller hole)) | hole <- bracketedHoles rbracketed]
+
+rigidizeBracketed :: Bracketed a -> RBracketed a
+rigidizeBracketed (TailsKnown ty mk_shell holes) = RBracketed (mk_shell ty) (map tailishHole holes)
+rigidizeBracketed (TailsUnknown  shell    holes) = RBracketed shell         holes
+
+
+optimiseMany :: Monad m
+             => ((Deeds, a) -> m (ResidTags, Deeds, b))
+             -> (Deeds, [a])
+             -> m (ResidTags, Deeds, [b])
+optimiseMany opt (deeds, xs) = liftM fixup $ mapAccumLM opt' deeds xs
+  where opt' x y = liftM (\(resid_tags, deeds, b) -> (deeds, (resid_tags, b))) $ opt (x, y)
+        fixup (deeds, resid_tagss_bs) = (plusResidTagss resid_tagss, deeds, bs)
+          where (resid_tagss, bs) = unzip resid_tagss_bs
+
+optimiseBracketed :: MonadStatics m
+                  => (State -> m (Deeds, Out FVedTerm))
+                  -> (Deeds, RBracketed State)
+                  -> m (ResidTags, Deeds, Out FVedTerm)
+optimiseBracketed opt (deeds, rbracketed) = liftM (\(resid_tags, deeds, es) -> (shellExtraTags shell `plusResidTags` resid_tags, deeds, shellWrapper shell es)) $ optimiseMany optimise_one (deeds, bracketedHoles rbracketed)
+  where shell = bracketedShell rbracketed
+        optimise_one (deeds, (Hole extra_bvs (s_deeds, s_heap, s_k, s_e))) = liftM (\(xes, (deeds, e)) -> (IM.empty, deeds, bindManyMixedLiftedness fvedTermFreeVars xes e)) $ bindCapturedFloats (mkVarSet extra_bvs) $ opt (deeds `plusDeeds` s_deeds, s_heap, s_k, s_e)
+        -- Because h-functions might potentially refer to the lambda/case-alt bound variables around this hole,
+        -- we use bindCapturedFloats to residualise such bindings within exactly this context.
+        -- See Note [When to bind captured floats]
+
+
+-- TODO: when driving a residual binding:
+--   let x = D[e]
+--   in ..
+--
+-- Arjan Boeijink suggested driving the following instead of D[e]:
+--   D[< | e | update x>]
+--
+-- This can help propagate more positive information, e.g. if e contains an occurrence of x itself
+--
+-- I'm not doing this right now because I'm wary about the termination issues. We should also be careful that we
+-- don't create loops as a result...
+
+data BracketedStuff a = BracketedStuff (RBracketed a) (M.Map (Out Var) (RBracketed a))
+
+instance Functor BracketedStuff where fmap = fmapDefault
+instance Foldable BracketedStuff where foldMap = foldMapDefault
+
+instance Traversable BracketedStuff where
+    traverse f (BracketedStuff a b) = BracketedStuff <$> traverse f a <*> traverse (traverse f) b
+
+optimiseSplit :: MonadStatics m
+              => (State -> m (Deeds, Out FVedTerm))
+              -> Deeds
+              -> M.Map (Out Var) (RBracketed State)
+              -> RBracketed State
+              -> m (ResidTags, Deeds, Out FVedTerm)
+optimiseSplit opt deeds bracketeds_heap bracketed_focus = {-# SCC "optimiseSplit'" #-}do
+    -- 0) The "process tree" splits at this point. We can choose to distribute the deeds between the children in a number of ways
+    let (deeds_initial, BracketedStuff bracketed_deeded_focus bracketeds_deeded_heap) = flip traverseAll (BracketedStuff bracketed_focus bracketeds_heap) $
+          \states -> let (deeds_initial:deedss) = splitDeeds deeds (1:map stateSize states)
+                     in (deeds_initial, zipWithEqual "optimiseSplit" addStateDeeds deedss states)
+    
+    MASSERT2(noChange (sumMapMonoid (sumMapMonoid releaseStateDeed) bracketeds_heap        `plusDeeds` sumMapMonoid releaseStateDeed bracketed_focus        `plusDeeds` deeds)
+                      (sumMapMonoid (sumMapMonoid releaseStateDeed) bracketeds_deeded_heap `plusDeeds` sumMapMonoid releaseStateDeed bracketed_deeded_focus `plusDeeds` deeds_initial),
+             ppr deeds)
+    
+    (hes, (resid_tags, deeds, xes, e_focus)) <- bindCapturedFloats (dataSetToVarSet (M.keysSet bracketeds_heap)) $ do
+        -- 1) Recursively drive the focus itself
+        (fvs_focus_hs, (resid_tags0, leftover_deeds, e_focus)) <- monitorFVs $ optimiseBracketed opt (deeds_initial, bracketed_deeded_focus)
+        
+        -- 2) We now need to think about how we are going to residualise the letrec. In fact, we need to loop adding
+        -- stuff to the letrec because it might be the case that:
+        --  * One of the hes from above refers to some heap binding that is not referred to by the let body
+        --  * So after we do withStatics above we need to drive some element of the bracketeds_heap
+        --  * And after driving that we find in our new hes a new h function referring to a new free variable
+        --    that refers to some binding that is as yet unbound...
+        let resid_fvs = fvs_focus_hs `unionVarSet` fvedTermFreeVars e_focus
+        (resid_tags1, leftover_deeds, bracketeds_deeded_heap, _fvs, xes) <- optimiseLetBinds opt leftover_deeds bracketeds_deeded_heap resid_fvs
+    
+        return (resid_tags0 `plusResidTags` resid_tags1, sumMapMonoid (sumMapMonoid releaseStateDeed) bracketeds_deeded_heap `plusDeeds` leftover_deeds, xes, e_focus)
+    
+    -- 3) Combine the residualised let bindings with the let body
+    return (resid_tags, deeds, bindManyMixedLiftedness fvedTermFreeVars (xes ++ hes) e_focus)
+
+
+-- We only want to drive (and residualise) as much as we actually refer to. This loop does this: it starts
+-- by residualising the free variables of the focus residualisation (or whatever is in the let body),
+-- and then transitively inlines any bindings whose corresponding binders become free.
+optimiseLetBinds :: MonadStatics m
+                 => (State -> m (Deeds, Out FVedTerm))
+                 -> Deeds
+                 -> M.Map (Out Var) (RBracketed State)
+                 -> FreeVars
+                 -> m (ResidTags, Deeds, M.Map (Out Var) (RBracketed State), FreeVars, Out [(Var, FVedTerm)])
+optimiseLetBinds opt leftover_deeds bracketeds_heap fvs' = -- traceRender ("optimiseLetBinds", M.keysSet bracketeds_heap, fvs') $
+                                                           go IM.empty leftover_deeds bracketeds_heap [] fvs'
+  where
+    go resid_tags leftover_deeds bracketeds_deeded_heap_not_resid xes_resid resid_fvs
+      | M.null h_resid = return (resid_tags, leftover_deeds, bracketeds_deeded_heap_not_resid, resid_fvs, xes_resid)
+      | otherwise = {- traceRender ("optimiseSplit", xs_resid') $ -} do
+        -- Recursively drive the new residuals arising from the need to bind the resid_fvs
+        (fvs_es_hs, (extra_resid_tags, leftover_deeds, es_resid')) <- monitorFVs $ optimiseMany (optimiseBracketed opt) (leftover_deeds, bracks_resid)
+        -- Recurse, because we might now need to residualise and drive even more stuff (as we have added some more FVs)
+        let resid_fvs_delta = fvs_es_hs `unionVarSet` unionVarSets (map fvedTermFreeVars es_resid')
+        go (resid_tags `plusResidTags` extra_resid_tags)
+           leftover_deeds
+           bracketeds_deeded_heap_not_resid'
+           (xes_resid ++ zip xs_resid' es_resid')
+           (resid_fvs `unionVarSet` resid_fvs_delta)
+      where
+        -- When assembling the final list of things to drive, ensure that we exclude already-driven things
+        (h_resid, bracketeds_deeded_heap_not_resid') = M.partitionWithKey (\x _br -> x `elemVarSet` resid_fvs) bracketeds_deeded_heap_not_resid
+        (xs_resid', bracks_resid) = unzip $ M.toList h_resid
+
+-- TODO: I could use the improved entered info that comes from the final FVs to adjust the split and float more stuff inwards..
+
+type NamedStack = Train (Int, Tagged StackFrame) Generalised
+
+splitt :: UniqSupply
+       -> (IS.IntSet, Out VarSet)
+       -> Deeds
+       -> (Heap, NamedStack, (Maybe (Anned QA), Bracketed (Entered, UnnormalisedState))) -- ^ The thing to split, and the Deeds we have available to do it
+       -> (Deeds,                              -- The Deeds still available after splitting
+           M.Map (Out Var) (RBracketed State), -- The residual "let" bindings
+           RBracketed State)                   -- The residual "let" body
+splitt ctxt_ids (gen_kfs, gen_xs) deeds (Heap h ids, named_k, (mb_anned_qa, bracketed_qa))
+    = {-# SCC "splitt'" #-} snd $ split_step split_fp
+      -- Once we have the correct fixed point, go back and grab the associated information computed in the process
+      -- of obtaining the fixed point. That is what we are interested in, not the fixed point itselF!
+      -- TODO: eliminate redundant recomputation here?
+  where    
+    (ctxt_ids0, ctxt_ids1) = splitUniqSupply ctxt_ids
+
+    -- We compute the correct way to split as a least fixed point, slowly building up a set of variables
+    -- (bound by heap bindings and update frames) that it is safe *not* to residualise.
+    --
+    -- Note that as an optimisation, optimiseSplit will only actually creates those residual bindings if the
+    -- corresponding variables are free *after driving*. Of course, we have no way of knowing which bindings
+    -- will get this treatment here, so just treat resid_xs as being exactly the set of residualised stuff.
+    split_fp = lfpFrom (\(xs1, ys1) (xs2, ys2) -> (xs1 `unionVarSet` xs2, ys1 `unionVarSet` ys2)) (emptyVarSet, emptyVarSet) (fst . split_step)
+    
+    -- Simultaneously computes the next fixed-point step and some artifacts computed along the way,
+    -- which happen to correspond to exactly what I need to return from splitt.
+    split_step (safe_not_resid_xs, deeds_resid_xs) = -- let pPrintBracketedState = map pPrintFullState . fillers in traceRender ("split_step", (not_resid_xs, bound_xs S.\\ not_resid_xs), pureHeapBoundVars h_not_residualised, pureHeapBoundVars h_residualised, M.map pPrintBracketedState bracketeds_heap', pPrintBracketedState bracketed_focus') $
+                                                     ((safe_not_resid_xs', deeds_resid_xs'), (deeds4, bracketeds_heap', bracketed_focus'))
+      where
+        -- 0) Compute the set of variables that I can *actually* get away without residualising, once deeds are accounted for
+        -- See Note [Deeds and splitting] for further details on this.
+        not_resid_xs = safe_not_resid_xs `minusVarSet` deeds_resid_xs
+        
+        -- 1) Build a candidate splitting for the Stack and QA components
+        -- When creating the candidate stack split, we ensure that we create a residual binding
+        -- for any variable in the resid_xs set, as we're not going to inline it to continue.
+        --
+        -- NB: do NOT normalise at this stage because in transitiveInline we assume that State heaps are droppable!
+        scruts = case fmap annee mb_anned_qa of Just (Question x') -> [x']; _ -> []
+        (deeds1a, bracketeds_updated, bracketed_focus)
+          = pushStack ctxt_ids0 ids deeds scruts (fmapCars (\(i, kf) -> (need_not_resid_kf i kf, kf)) named_k) bracketed_qa
+            
+        need_not_resid_kf i kf
+          | i `IS.member` gen_kfs
+          = False
+          | Update x' <- tagee kf -- We infer the stack frames we're not residualising based on the *variables* we're not residualising
+          = x' `elemVarSet` not_resid_xs
+          | otherwise
+          = True
+        
+        -- 2) Build a splitting for those elements of the heap we propose to residualise not in not_resid_xs
+        -- TODO: I should residualise those Unfoldings whose free variables have become interesting due to intervening scrutinisation
+        (h_not_residualised, h_residualised) = M.partitionWithKey (\x' _ -> x' `elemVarSet` not_resid_xs) h
+        bracketeds_nonupdated0 = M.mapMaybeWithKey (\x' hb -> do { guard (howBound hb == InternallyBound); return $ case heapBindingTerm hb of Nothing -> (error "Unimplemented: no tag for undefined", undefined "FIXME FIXME" (noneBracketed (error "No tag for undefined") (fvedTerm (Var (undefined "tcLookupId" undefinedName))))); Just in_e@(_, e) -> (annedTag e, oneBracketed ctxt_ids1 (idType x') (Once (idUnique x'), (Heap M.empty ids, Loco False, in_e))) }) h_residualised
+        -- An idea from Arjan, which is sort of the dual of positive information propagation:
+        -- TODO: this is too dangerous presently: we often end up adding an Update at the end just after we generalised it away, building ourselves a nice little loop :(
+        -- I have tried to work around this by only introducing Update frames for those things that don't presently have one... but that also doesn't work because if we start
+        -- with (let x = v in x) then we reduce to (let x = v in \underbar{x}) and then split to (let x = v in x)
+        --(deeds1, bracketeds_nonupdated) = M.mapAccumWithKey (\deeds x' (update_tg, brack) -> modifyTails (\states -> case claimDeeds deeds (length states) of Nothing -> (deeds, states); Just deeds -> (deeds, map (\(entered, (deeds, heap, k, in_e)) -> (entered, (deeds, heap, k ++ [Tagged update_tg (Update x')], in_e))) states)) brack `orElse` (deeds, brack)) deeds1a bracketeds_nonupdated0
+        (deeds1, bracketeds_nonupdated) = (deeds1a, M.map snd bracketeds_nonupdated0)
+        -- For every heap binding we ever need to deal with, contains a version of that heap binding as a concrete Bracketed thing
+        bracketeds_heap = bracketeds_updated `M.union` bracketeds_nonupdated
+        
+        -- 3) Inline as much of the Heap as possible into the candidate splitting
+        
+        -- 3a) Release deeds
+        -- In order to make the Deeds-based stuff less conservative, my first action here is to release our claims to those deeds
+        -- which we do *not* intend to create a residual let binding for here and now. This will let us always inline a heap-bound
+        -- thing into *at least one* context (unless it really is referred to by the residual code).
+        --
+        -- The equivalent process is done for the stack in splitStack itself: we just subtract 1 from the number of deeds we need to
+        -- claim when duplicating a stack frame.
+        deeds2 = releasePureHeapDeeds deeds1 h_not_residualised
+        
+        -- 3b) Work out which part of the heap is admissable for inlining
+        -- * We are allowed to inline concrete things which are duplicatable or are not residualised right here and now
+        -- * Non-concrete stuff should be inlined if and only if it is not explicitly residualised by the caller. The motivation that
+        --   if we generalise away a term, we want to generalise away the staticness as well. Furthermore, it is clear that if we are
+        --   just generalising away staticness itself we certainly should not push the corresponding non-concrete binding down.
+        -- * We take this opportunity to mark all residualised things as static (being careful to not override actual definitions in h_cheap).
+        --   It important that we do not mark residualised things as phantoms just because they are in bracketeds_heap. If we did, it would mean
+        --   that *concrete residualised stuff* is recorded as a phantom even if it was explicitly residualised in the initial iteration (since
+        --   anything residualised in the first iteration is certainly in bracketeds_heap).
+        -- * If we are inlining a value (into a non-linear context), we are careful to only inline an *indirection* to that value. That
+        --   allows us to prevent duplicating the allocation of such values. NB: we still duplicate allocation of cheap non-values, but never mind...
+        --
+        -- Inlineable things are either:
+        --  1) Heap bindings from the input (i.e from the heap and update frames) that have not been residualised for work duplication reasons
+        --  2) Concrete values and cheap expressions from the input, in a form that is suitable for pushing down (i.e. values have been turned into indirections).
+        --  3) Phantom versions of phantom input heap bindings (just copied verbatim).
+        --  4) Phantom versions of concrete input heap bindings
+        -- The range of this heap is lte that of bracketeds_heap. We explicitly EXCLUDE those bindings that we are residualising based on the generalisation heuristic.
+        -- We prefer input heap bindings to everything else, and concrete values/cheap expressions to phantoms. For example, even if a value is residualised, we would
+        -- like to push down *some* version of it, hence the h_cheap full of indirections. And even if a concrete term is residualised we'd like a phantom version of it.
+        --
+        -- Basically the idea of this heap is "stuff we want to make available to push down"
+        h_updated_phantoms = M.fromDistinctAscList [(x', lambdaBound) | x' <- M.keys bracketeds_updated] -- TODO: move this into h_cheap_and_phantoms?
+        h_inlineable = varSetToDataMap generalisedLambdaBound gen_xs `M.union` -- The exclusion just makes sure we don't inline explicitly generalised bindings (even phantom ones)
+                       (h_not_residualised `M.union`                           -- Take any non-residualised bindings from the input heap/stack...
+                        h_cheap_and_phantom `M.union`                          -- ...failing which, take concrete definitions for cheap heap bindings (even if they are also residualised) or phantom definitions for expensive ones...
+                        h_updated_phantoms)                                    -- ...failing which, take phantoms for things bound by update frames (if supercompilation couldn't turn these into values, GHC is unlikely to get anything good from seeing defs)
+        
+        -- Generalising the final proposed floats may cause some bindings that we *thought* were going to be inlined to instead be
+        -- residualised. We need to account for this in the Entered information (for work-duplication purposes), and in that we will
+        -- also require any FVs of the new residualised things that are bound in the stack to residualise more frames.
+        inlineHeapT :: Accumulatable t
+                    => (Deeds -> a   -> (Deeds, EnteredEnv, b))
+                    ->  Deeds -> t a -> (Deeds, EnteredEnv, t b)
+        inlineHeapT f deeds b = (deeds', entered', b')
+          where ((deeds', entered'), b') = mapAccumT (\(deeds, entered) s -> case f deeds s of (deeds, entered', s) -> ((deeds, plusVarEnv_C plusEntered entered entered'), s)) (deeds, emptyVarEnv) b
+
+        -- Like inlineHeapT, but removes from the EnteredEnv any mention of the actual binder being analysed, so we push more stuff down
+        -- NB: this would be (partially?) subsumed if we found a way to push an Update frame for such a thing into its Bracketed, since then it wouldn't even be a FV
+        --
+        -- This is required to prevent self-recursive heap bindings from being unconditionally residualised:
+        --  let xs = x : xs
+        --      z = head xs
+        --  in Just z
+        -- ==>
+        --  let z = let xs = x : xs
+        --          in head xs
+        --  in Just z
+        --
+        -- Without this hack, xs is Once from both z and xs, so it is Many overall and can't be inlined.
+        --
+        -- More generally, we might have mutual recursion:
+        --  let xs = x : ys
+        --      ys = y : xs
+        --      z = head xs + head ys
+        --  in Just z
+        -- ==>
+        --  let z = let xs = x : xs
+        --              ys = y : ys
+        --          in head xs + head ys
+        --  in Just z
+        --
+        -- To deal with the more general case, we have to identify strongly-connected-components in the
+        -- graph of heap Bracketed things. This is well-motivated because SCCs must be either pushed
+        -- or residualised as a group.
+        --
+        -- Note [Greatest fixed point]
+        -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
+        -- You are probably thinking that this is all an awful workaround for the fact that I'm not using a greatest fixed point. This is true.
+        -- However, although GFP would work perfectly in the example above, it is difficult to get right in general because we don't know exactly how
+        -- to grow the set of "always residualise" bindings/frames on each iteration. Consider:
+        --
+        --  let x = e1
+        --      y = e2[x]
+        --  in \z -> e3[y]
+        --
+        -- After optimistic GFP inlining we get:
+        --
+        --  \z -> let x = e1
+        --            y = e2[x]
+        --        in e3[y]
+        --
+        -- But now we've duplicated computation! We can see this because the post-inlining entered info for the non-value bindings x and y is
+        -- both \infty. However, we can't on the next iteration just mark both x and y as "always residualise" because then we won't get the optimal code i.e.
+        --
+        --  let y = let x = e1
+        --          in e2[x]
+        --  in \z -> e3[y]
+        --
+        -- It seems really hard to work out a minimal set of things to add to the "always residualise" set every time. One promising approach is to:
+        --  1. Construct a graph from the heap/stack/context to split with these nodes:
+        --     a) x for (x |-> d) in heap: outgoing edges go to the update frames/heap bindings binding the free variables of d
+        --     b) i for i in [1..length(stack)]: outgoing edges go to the update frames/heap bindings binding the free variables of stack_i
+        --  2. Mark all the nodes in the graph with \infty occurrence that are *not* heap-bound value nodes
+        --  3. Remove any marks that are strictly dominated by other marks along ALL of their incoming edges
+        --
+        -- The problem with this scheme is that the mark-removal can lead to ambiguity if more than one thing in a SCC is marked.  Consider:
+        --
+        --  let x = e1[y]
+        --      y = e2[x]
+        --      a = e3[x]
+        --      b = e4[x]
+        --  in (a, b)
+        --
+        -- In the first iteration, both a and b will be marked "always residualise" since the marks on x and y will be dominated by those on a and b.
+        -- On the next iteration, both x and y will be marked again (they will both be trial-inlined into both the a and b bindings) and we have the
+        -- choice of unmarking either of them but not both.
+        --
+        -- In this case the right thing to do is unmark "y" and leave "x" marked since unmarking "x" will just lead to it being residualised on the
+        -- next iteration anyway, but unmarking "y" will allow it to be inlined into the x binding.
+        inlineHeapWithKey :: (Deeds -> a                 -> (Deeds, EnteredEnv, b))
+                          ->  Deeds -> M.Map (Out Var) a -> (Deeds, EnteredEnv, M.Map (Out Var) b)
+        inlineHeapWithKey f deeds b = (deeds', overall_entered', b')
+          where
+            ((deeds', heap_entered'), b') = M.mapAccumWithKey (\(deeds, heap_entered) x' brack -> case f deeds brack of (deeds, entered', brack) -> ((deeds, M.insert x' entered' heap_entered), brack)) (deeds, M.empty) b
+
+            overall_entered' = -- pprTrace "overall_entered'" (ppr (M.toList heap_entered')) $
+                               foldr go emptyVarEnv $ stronglyConnCompG $ graphFromEdgedVertices [(entered', varUnique x', varEnvKeys entered') | (x', entered') <- M.toList heap_entered']
+
+            go (AcyclicSCC (entered', _, _)) overall_entered = plusVarEnv_C plusEntered entered' overall_entered
+            go (CyclicSCC  nodes)            overall_entered = foldr (\entered' overall_entered' -> plusVarEnv_C plusEntered (entered' `delListFromUFM_Directly` xs') overall_entered') overall_entered entereds'
+              where (entereds', xs', _) = unzip3 nodes
+
+        -- Inline what we can of the heap, and compute the Entered information for the resulting thing.
+        -- See Note [transitiveInline and entered information] for the story about Entered information.
+        --
+        -- TODO: I (probably) need to transfer the EnteredEnv safely out of Bracketed things, taking account of bound variables
+        -- over the holes. However, I think it's probably safe to ignore that for now because those bound variables will have been
+        -- renamed so as not to coincide with any of the heap/stack bindings above that we actually care about the entered information for.
+        -- So the outgoing entered envs will have a bit of junk in them, but who cares?
+        inlineBracketHeap :: Deeds -> Bracketed (Entered, UnnormalisedState) -> (Deeds, EnteredEnv, RBracketed State)
+        inlineBracketHeap init_deeds = third3 rigidizeBracketed . inlineHeapT inline_one init_deeds
+          where
+            inline_one deeds (ent, state) = -- pprTrace "inline_one" (ppr (ent, stateFreeVars state', state)) $
+                                            (deeds', mkEnteredEnv ent $ stateFreeVars state', (emptyDeeds, heap', k', in_e'))
+              where
+                -- The elements of the Bracketed may contain proposed heap bindings gathered from Case frames.
+                -- However, we haven't yet claimed deeds for them :-(.
+                --
+                -- This is OK, because transitiveInline treats the heap of its state "specially". NB: the correctness
+                -- of this relies on the fact that only "optional" bindings that shadow something bound above are ever
+                -- present in this heap.
+                --
+                -- We do the normalisation immediately afterwards - we can't do it before transitiveInline, precisely
+                -- because of the above hack (normalisation might add bindings to the heap).
+                state'@(deeds', heap', k', in_e') = normalise $ transitiveInline h_inlineable (deeds `addStateDeeds` state)
+        
+        -- 3c) Actually do the inlining of as much of the heap as possible into the proposed floats
+        -- We also take this opportunity to strip out the Entered information from each context.
+        (deeds3, entered_focus, bracketed_focus') =                   inlineBracketHeap deeds2 bracketed_focus
+        (deeds4, entered_heap,  bracketeds_heap') = inlineHeapWithKey inlineBracketHeap deeds3 bracketeds_heap
+
+        -- 4) Construct the next element of the fixed point process:
+        --  a) We should residualise:
+        --     * Any x in the extraFvs of a bracketed thing, because we need to be able to refer to it right here, whatever happens
+        --     * Anything explicitly generalised
+        must_resid_xs = bracketedExtraFvs bracketed_focus' `unionVarSet` unionVarSets (map bracketedExtraFvs (M.elems bracketeds_heap'))
+                          `unionVarSet` gen_xs
+        --  b) We should *stop* residualising bindings that got Entered only once in the proposal.
+        --     I once thought that we should only add a single variable to non_resid_xs' every time around the loop, because I worried
+        --     that choosing not to residualise some binding would cause some other bindings to stop being candiates (i.e. would increase
+        --     the number of times they were entered).
+        --
+        --     However, I've revised my opinion and decided to add all candidate variables every time. This is because if we inline a binding
+        --     into a context where it is still evaluated Once, anything it refers to is still evaluated Once. So the existing Entered information
+        --     does not appear to be invalidated when we decide not to residualise an additional binding.
+        entered    = plusVarEnv_C plusEntered entered_focus entered_heap
+        safe_not_resid_xs' = -- pprTrace "candidates" (ppr (safe_not_resid_xs, entered, onces, must_resid_xs)) $
+                             safe_not_resid_xs `unionVarSet` (onces `minusVarSet` must_resid_xs)
+          where onces = filterVarSet (\x' -> maybe True isOnce (lookupVarEnv entered x')) bound_xs
+        --   c) We should *start* residualising those bindings we thought were safe to inline but we actually couldn't inline because
+        --      deeds issues prevented us from inlining them into *all* contexts that needed them. See also Note [Deeds and splitting]
+        --
+        --      This should also deal with residualising any InternallyBound stuff that we decided to instead let/lambda bound to e.g. prevent
+        --      value duplication, because the names of such bound things will be free in the proposed states.
+        deeds_resid_xs' = deeds_resid_xs `unionVarSet` (safe_not_resid_xs `intersectVarSet` (bracketedFreeVars stateFreeVars bracketed_focus' `unionVarSet`
+                                                                                             unionVarSets (map (bracketedFreeVars stateFreeVars) (M.elems bracketeds_heap'))))
+    
+    -- Bound variables: those variables that I am interested in making a decision about whether to residualise or not
+    bound_xs = pureHeapBoundVars h `unionVarSet` stackBoundVars (fmapCars snd named_k)
+    
+    -- Heap full of cheap expressions and any phantom stuff from the input heap but NOT from update frames
+    -- Used within the main loop in the process of computing h_inlineable -- see comments there for the full meaning of this stuff.
+    extract_cheap_hb hb
+       -- We better not try to push down any bindings that would introduce work-duplication issues
+      | InternallyBound <- howBound hb
+      , Just (_, e) <- heapBindingTerm hb
+      = if termIsCheap e
+        then hb {                                         howBound = howToBindCheap e } -- Use binding heuristics to determine how to refer to the cheap thing
+        else hb { heapBindingMeaning = Left (Left False), howBound = LambdaBound }      -- GHC is unlikely to get any benefit from seeing the binding sites for non-cheap things
+       -- Inline phantom/unfolding stuff verbatim: there is no work duplication issue (the caller would not have created the bindings unless they were safe-for-duplication)
+      | otherwise
+      = hb
+    h_cheap_and_phantom0 = M.map extract_cheap_hb h
+    h_cheap_and_phantom | (_, Tagged _ (Update x')) `Car` _ <- named_k -- NB: by normalisation, there can't be a cast before the update
+                        , Just anned_qa <- mb_anned_qa
+                        , Right anned_a <- caseAnnedQA anned_qa -- FIXME: having a question here might also be legit
+                        , let in_e@(_, e) = annedAnswerToInAnnedTerm anned_a
+                        = M.insert x' ((internallyBound in_e) { howBound = howToBindCheap e }) h_cheap_and_phantom0
+                        | otherwise
+                        = h_cheap_and_phantom0
+
+howToBindCheap :: AnnedTerm -> HowBound
+howToBindCheap e
+  | not lOCAL_TIEBACKS = InternallyBound
+  | dUPLICATE_VALUES_SPLITTER = InternallyBound
+  | Value v <- annee e = case v of
+    TyLambda _ _ -> LetBound -- Heuristic: GHC would lose too much if we cut the
+    Lambda   _ _ -> LetBound -- connection between the definition and use sites
+    Data _ as cos xs | null as, null cos, null xs -> InternallyBound -- Heuristic: GHC will actually statically allocate data with no arguments (this also has the side effect of preventing tons of type errors due to [] getting shared)
+                     | otherwise                  -> LambdaBound
+    Literal _  -> InternallyBound -- No allocation duplication since GHC will float them (and common them up, if necessary)
+    Coercion _ -> InternallyBound -- Not allocated at all
+   -- GHC is unlikely to get anything useful from seeing the definition of cheap non-values, so we'll have them as unfoldings
+  | otherwise = LambdaBound
+
+-- Note [Deeds and splitting]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~
+-- Some heap bindings are safe to inline (from a work-duplication perspective), but bad to inline from a deeds perspective
+-- because it can prove impossible to get enough deeds to actually inline them. We apply a rather unsubtle (but safe)
+-- heuristic to deal with this situation, by monotonically growing a set of variables that we should *not* attempt
+-- to inline even though they appear in the safe_not_resid_xs set.
+-- 
+-- This really is extremely conservative, but if we're running out of deeds bad things will happen anyway, so who cares?
+--
+-- If we did not do this, then the bracketed_heap outgoing from splitt may not bind some of the variables free in what
+-- it intends to drive, because bracketeds_heap only contains those bindings that splitt decided should be residualised.
+
+
+-- Note [Residualisation of things referred to in extraFvs]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-- We need to residualise stuff like the a and b in this:
+--  <a |-> 1, b |-> 2 | (a, b) | >
+--
+-- But *not* the x in this:
+--  < | foo | case foo of \Delta, update x, [_] + 1 >
+--
+-- How the hell do we accomplish that? The trick is to change how update frames get split. After splitting an
+-- update frame for x, I continue splitting the rest of the stack with a oneBracketed rather than a noneBracketed in
+-- the focus.
+--
+-- If I didn't do this, the extraFvs of the bracket would indicate that x was free in a created residual term, so I
+-- would be forced to residualise the binding just as if e.g. "Just x" had been in the focus of the state. Since I don't,
+-- x doesn't appear in the extraFvs, and I can compute Entered information for it with transitiveInline. If this says x
+-- was entered Once in aggregate I can stop residualising the update frame! Beautiful!
+--
+-- FIXME: this buggered up when I started splitting terms like < | I# a# | update a > because they split to *themselves*
+-- by pushing the recovered value heap { a |-> I# a# } into the oneBracketed "a" from the update frame. I only really need
+-- this trick because I start by assuming that x is non-pushable, anyway, so I should probably rewrite the splitter
+-- to use that new more agressive algorithm.. (I can remove the eager value splittin for Update frames too since that wasn't
+-- causing the loops I was seeing - this was)
+--
+-- Note [Entered information]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~
+-- Consider:
+--   expensive |-> fact 100
+--   a |-> Just expensive
+--   b |-> Just expensive
+--   (a, b)
+--
+-- We need to residualise expensive, but what is the mechanism for doing so? Both a and b will get residualised
+-- by the rule above because they are FVs of the focus.
+--
+-- We gather Entered information from each proposed Bracketed to collect Entered information for each free variable.
+-- This reports how many times a variable would be (in the worst case) get reevaluated if the binding was made available
+-- for inlining and thus pushed into that context. So in this case, Entered information for the a and b bindings report
+-- that expensive would get evaluated Once in each context, which joins together to make Many times.
+--
+-- This is the basis on which we choose to residualise expensive.
+
+-- We are going to use this helper function to inline any eligible inlinings to produce the expressions for driving.
+--
+-- WARNING! We treat bindings in the incoming Heap very specially we assume that we haven't yet claimed any deeds for them
+--
+-- This is a consequence of the fact that this heap is only non-empty in the splitter for states originating from the
+-- branch of some residual case expression.
+transitiveInline :: PureHeap          -- ^ What to inline. We have not claimed deeds for any of this.
+                 -> UnnormalisedState -- ^ What to inline into
+                 -> UnnormalisedState
+transitiveInline init_h_inlineable _state@(deeds, Heap h ids, k, in_e)
+    = -- (if not (S.null not_inlined_vs') then traceRender ("transitiveInline: generalise", not_inlined_vs') else id) $
+      -- traceRender ("transitiveInline", "had bindings for", pureHeapBoundVars init_h_inlineable, "FVs were", state_fvs, "so inlining", pureHeapBoundVars h') $
+      ASSERT2(isEmptyVarSet (unnormalisedStateUncoveredVars final_state), ppr (M.keysSet h_inlineable, PrettyDoc $ pPrintFullUnnormalisedState quietStatePrettiness _state, PrettyDoc $ pPrintFullUnnormalisedState quietStatePrettiness final_state, unnormalisedStateUncoveredVars final_state, M.keysSet h', live'))
+      final_state
+  where
+    final_state = (deeds', Heap h' ids, k, in_e)
+    
+    (live', deeds', h') = heap_worker 0 deeds M.empty (unnormalisedStateFreeVars (deeds, Heap M.empty ids, k, in_e)) emptyVarSet
+    
+    -- NB: we prefer bindings from h to those from init_h_inlineable if there is any conflict. This is motivated by
+    -- the fact that bindings from case branches are usually more informative than e.g. a phantom binding for the scrutinee.
+    h_inlineable = h `M.union` init_h_inlineable
+    
+    -- This function is rather performance critical: I originally benchmarked transitiveInline as taking 59.2% of runtime for DigitsOfE2!
+    heap_worker :: Int -> Deeds -> PureHeap -> FreeVars -> FreeVars -> (FreeVars, Deeds, PureHeap)
+    heap_worker n deeds h_output live live_in_let
+      = -- traceRender ("go", n, M.keysSet h_inlineable, M.keysSet h_output, fvs) $
+        if live == live'
+        then (live', deeds', neutraliseLetLives live_in_let' h_output') -- NB: it's important we use the NEW versions of h_output/deeds, because we might have inlined extra stuff even though live hasn't changed!
+        else heap_worker (n + 1) deeds' h_output' live' live_in_let'
+      where 
+        (deeds', h_output', live', live_in_let') = M.foldrWithKey consider_inlining (deeds, h_output, live, live_in_let) ((h_inlineable `restrictDataMapVarSet` live) M.\\ h_output)
+        
+        -- NB: we rely here on the fact that our caller will still be able to fill in bindings for stuff from h_inlineable
+        -- even if we choose not to inline it into the State, and that such bindings will not be evaluated until they are
+        -- actually demanded (or we could get work duplication by inlining into only *some* Once contexts).
+        --
+        -- NB: we also rely here on the fact that the original h contains "optional" bindings in the sense that they are shadowed
+        -- by something bound above - i.e. it just tells us how to unfold case scrutinees within a case branch.
+        --
+        -- NB: It's important that type variables become live after inlining a binding, or we won't
+        -- necessarily lambda-abstract over all the free type variables of a h-function
+        consider_inlining x' hb (deeds, h_output, live, live_in_let)
+          = (deeds', M.insert x' inline_hb h_output, live `unionVarSet` fvs, if howBound inline_hb == LetBound then live_in_let `unionVarSet` fvs else live_in_let)
+          where fvs = heapBindingFreeVars inline_hb `unionVarSet` varBndrFreeVars x'
+                (deeds', inline_hb) = case claimDeeds deeds (heapBindingSize hb) of -- Do we have enough deeds to inline an unmodified version?
+                  Just deeds' ->                                                       (deeds', hb)
+                  Nothing     -> trace (showSDoc $ text "inline-deeds:" <+> pPrint x') (deeds,  makeFreeForDeeds hb)
+
+    -- Given a HeapBinding that costs some deeds, return one that costs no deeds (and so can be inlined unconditionally)
+    makeFreeForDeeds (HB InternallyBound (Right in_e))
+      | not lOCAL_TIEBACKS     = lambdaBound         -- Without local tiebacks, we just lose information here
+      | termIsCheap (snd in_e) = HB how (Right in_e) -- With local tiebacks, we can keep the RHS (perhaps we can use it in the future?) but have to make it be able to pass it in from the caller somehow
+      | otherwise              = lambdaBound         -- All non-cheap things
+      where how | termIsValue (snd in_e) = LetBound    -- Heuristic: only refer to *values* via a free variable, as those are the ones GHC will get some benefit from. TODO: make data/function distinction here?
+                | otherwise              = LambdaBound
+    makeFreeForDeeds hb = panic "howToBind: should only be needed for internally bound things with a term" (pPrint hb)
+    
+    -- Enforce the invariant that anything referred to by a LetBound thing cannot be LambdaBound
+    neutraliseLetLives live_in_let = M.mapWithKey (\x' hb -> if howBound hb == LambdaBound && x' `elemVarSet` live_in_let then hb { howBound = LetBound } else hb)
+
+-- TODO: replace with a genuine evaluator. However, think VERY hard about the termination implications of this!
+-- I think we can only do it when the splitter is being invoked by a non-whistling invocation of sc.
+cheapifyHeap :: Deeds -> Heap -> (Deeds, Heap)
+cheapifyHeap deeds heap | sPECULATION = (deeds, heap)
+cheapifyHeap deeds (Heap h ids) = (deeds', Heap (M.fromList [(x', internallyBound in_e) | (x', in_e) <- floats] `M.union` h') ids')
+  where
+    ((deeds', ids', floats), h') = M.mapAccum (\(deeds, ids, floats0) hb -> case hb of HB InternallyBound (Right in_e) -> (case cheapify deeds ids in_e of (deeds, ids, floats1, in_e') -> ((deeds, ids, floats0 ++ floats1), HB InternallyBound (Right in_e'))); _ -> ((deeds, ids, floats0), hb)) (deeds, ids, []) h
+    
+    -- TODO: make cheapification more powerful (i.e. deal with case bindings)
+    cheapify :: Deeds -> InScopeSet -> In AnnedTerm -> (Deeds, InScopeSet, [(Out Var, In AnnedTerm)], In AnnedTerm)
+    cheapify deeds0 ids0 (rn, anned_e)
+      | LetRec xes e <- annee anned_e
+      , let deeds1 = deeds0 `releaseDeeds` 1
+            (        ids1, rn', in_xes) = renameBounds ids0 rn xes
+            (in_xs, in_es) = unzip in_xes
+            (deeds2, ids2, floats0, in_es') = cheapifyMany deeds1 ids1 in_es
+            (deeds3, ids3, floats1, in_e')  = cheapify deeds2 ids2 (rn', e)
+      = (deeds3, ids3, zip in_xs in_es' ++ floats0 ++ floats1, in_e')
+      | Let x e1 e2 <- annee anned_e
+      , let deeds1 = deeds0 `releaseDeeds` 1
+            (ids1, rn', (x', in_e1)) = renameNonRecBound ids0 rn (x, e1)
+            (deeds2, ids2, floats0, in_e1')  = cheapify deeds1 ids1 in_e1
+            (deeds3, ids3, floats1, in_e2')  = cheapify deeds2 ids2 (rn', e2)
+      = (deeds3, ids3, (x', in_e1') : floats0 ++ floats1, in_e2')
+    cheapify deeds ids in_e = (deeds, ids, [], in_e)
+
+    cheapifyMany :: Deeds -> InScopeSet -> [In AnnedTerm] -> (Deeds, InScopeSet, [(Out Var, In AnnedTerm)], [In AnnedTerm])
+    cheapifyMany deeds ids = reassociate . mapAccumL ((associate .) . uncurry cheapify) (deeds, ids)
+      where associate (deeds, ids, floats, in_e) = ((deeds, ids), (floats, in_e))
+            reassociate ((deeds, ids), floatss_in_es) = (deeds, ids, concat floatss, in_es)
+                where (floatss, in_es) = unzip floatss_in_es
+
+
+-- TODO: I have a clever idea. Currently, if we supercompile:
+--  D[ < H | if x then y else z | K > ]
+--
+-- And we don't know anything about y or z we get:
+--  if x
+--   then K(True/x)[y]
+--   else K(False/x)[z]
+--
+-- This is not too bad, but I suspect that it is common that K doesn't actually depend on x, in which case we could
+-- instead produce:
+--  let $j it = K[it]
+--  in if x then $j y else $j z
+--
+-- This is an improvement because we get code sharing. Furthermore, $j won't be causing extra allocation because it's
+-- pretty much guaranteed to be a let-no-escape.
+--
+-- The best part is that making this happen isn't really much much work (I think). One option would be to actually add
+-- a (JoinPoint Var) stack frame, and introduce them (along with their corresponding bindings) in the splitter. The reduction
+-- rule would be:
+--   < H | v | $j [_], K > --> < H, x |-> v | e | K >
+--      \x.e = deref(H, $j)
+--
+-- If we said join points were LetBound this would also let us delay inlining them (and hence consuming deeds) until we
+-- were sure we could get some benefit from it.
+--
+-- The major issue is exactly what *should* be bound up into a join point. We could create one per stack frame, but that
+-- might lead to quite a lot of code bloat. I think that ideally we want to create one per shared stack suffix: there is no
+-- point creating join points that are only used in one place! But how to detect that?? After all, because h-functions can
+-- be tied back to at any later point it looks like we should create one for every possible prefix as they might be useful
+-- for guys in the future.
+pushStack :: UniqSupply
+          -> InScopeSet
+          -> Deeds
+          -> [Out Var]
+          -> Train (Bool, Tagged StackFrame) Generalised
+          -> Bracketed (Entered, UnnormalisedState)
+          -> (Deeds,
+              M.Map (Out Var) (Bracketed (Entered, UnnormalisedState)),
+              Bracketed (Entered, UnnormalisedState))
+pushStack _        _   deeds _      (Loco gen)               bracketed_hole = (deeds, M.empty, setStackGeneralised gen bracketed_hole)
+pushStack ctxt_ids ids deeds scruts ((may_push, kf) `Car` k) bracketed_hole = second3 (`M.union` bracketed_heap') $ pushStack ctxt_ids2 ids deeds' scruts' k bracketed_hole'
+  where
+    (ctxt_ids1, ctxt_ids2) = splitUniqSupply ctxt_ids
+
+    -- If we have access to hole tail positions, we should try to inline this stack frame into that tail position.
+    -- If we do not have access to the tail positions of the hole, all we can do is rebuild a bit of residual syntax around the hole.
+    (deeds', (scruts', bracketed_heap', bracketed_hole'))
+      | may_push  = fmap (\(deeds', bracketed_hole') -> (deeds', ([], M.empty, bracketed_hole'))) (pushStackFrame kf deeds bracketed_hole) `orElse`
+                    (deeds, splitStackFrame ctxt_ids1 ids kf scruts                           bracketed_hole)
+      | otherwise = (deeds, splitStackFrame ctxt_ids1 ids kf scruts (setStackGeneralised True bracketed_hole))
+
+setStackGeneralised :: Generalised -> Bracketed (Entered, UnnormalisedState) -> Bracketed (Entered, UnnormalisedState)
+setStackGeneralised gen bracketed_hole = modifyTails_ id (map (second (third4 (fmapLoco (\_old_gen -> gen))))) bracketed_hole `orElse` bracketed_hole
+
+pushStackFrame :: Tagged StackFrame
+               -> Deeds
+               -> Bracketed (Entered, UnnormalisedState)
+               -> Maybe (Deeds, Bracketed (Entered, UnnormalisedState))
+pushStackFrame kf deeds bracketed_hole = do
+    (Just deeds', bracketed_hole') <- modifyTails (stackFrameType kf) push bracketed_hole
+    return (deeds', bracketed_hole')
+  where
+    -- Inline parts of the evaluation context into each branch only if we can get that many deeds for duplication
+    push :: [(Entered, UnnormalisedState)] -> (Maybe Deeds, [(Entered, UnnormalisedState)])
+    push fillers = case claimDeeds deeds (stackFrameSize (tagee kf) * (branch_factor - 1)) of -- NB: subtract one because one occurrence is already "paid for". It is OK if the result is negative (i.e. branch_factor 0)!
+            Nothing    -> trace (showSDoc $ text "pushStack-deeds" <+> pPrint branch_factor) (Nothing, fillers)
+            Just deeds ->                                                                    (Just deeds, map (\(ent, state) -> (ent, third4 (`trainAppend` \_gen -> kf `Car` Loco False) state)) fillers)
+      where branch_factor = length fillers
+
+{-
+data Scrutinee = Scrut {
+    scrutAliases    :: [Out Var],
+    scrutBottomness :: Maybe Int -- Number of value arguments remaining until scrutinee is _|_
+  }
+
+-- The scrutinee bottomness check was introduced to deal with the fact that genregexps was
+-- compiling lots of specialisations of the *error case* of "succ" and "expand".
+--
+-- You might think that this wouldn't do much harm, because in the error case we have just
+-- as much information about the scrutinee as when it was totally unknown (e.g. when it is
+-- succ of an unknown quantity). In this case we would expect the unspecialised supercompiled
+-- context in which the errors occur would be reusable at other sites where the scrutinee was unknown.
+--
+-- Unfortunately, this is *not* the case because the normal case of "succ @Char" is to return a C#
+-- box, but the error case returns an error outside of a C# box. FIXME: this is still not much change,
+-- just need one extra residual stack frame to unpack the box before we reach shared code.
+--
+-- Note that this check only really helps functions for which we do not know the RHS (e.g. primops).
+-}
+
+splitStackFrame :: UniqSupply
+                -> InScopeSet
+                -> Tagged StackFrame
+                -> [Out Var]
+                -> Bracketed (Entered, UnnormalisedState)
+                -> ([Out Var],
+                    M.Map (Out Var) (Bracketed (Entered, UnnormalisedState)),
+                    Bracketed (Entered, UnnormalisedState))
+splitStackFrame ctxt_ids ids kf scruts bracketed_hole
+  | Update x' <- tagee kf = splitUpdate ctxt_ids ids tg scruts x' bracketed_hole
+  | otherwise = ([], M.empty, case tagee kf of
+    Update x' -> pprPanic "splitStackFrame" (text "Encountered update frame for" <+> pPrint x' <+> text "that was handled above")
+    TyApply ty' -> zipBracketeds $ TailsUnknown (shell (tyVarsOfType ty') $ \[e] -> e `tyApp` ty') [Hole [] bracketed_hole]
+    CoApply co' -> zipBracketeds $ TailsUnknown (shell (tyCoVarsOfCo co') $ \[e] -> e `coApp` co') [Hole [] bracketed_hole]
+    Apply x2'   -> zipBracketeds $ TailsUnknown (shell (unitVarSet x2')   $ \[e] -> e `app` x2')   [Hole [] bracketed_hole]
+    CastIt co'  -> zipBracketeds $ TailsUnknown (shell (tyCoVarsOfCo co') $ \[e] -> e `cast` co')  [Hole [] bracketed_hole]
+    Scrutinise x' ty' (rn, unfiltered_alts)
+      -> -- (if null k_remaining then id else traceRender ("splitStack: FORCED SPLIT", M.keysSet entered_hole, [x' | Tagged _ (Update x') <- k_remaining])) $
+         -- (if not (null k_not_inlined) then traceRender ("splitStack: generalise", k_not_inlined) else id) $
+         zipBracketeds $ TailsKnown ty' (\final_ty' -> shell (tyVarsOfType final_ty') $ \(e_hole:es_alts) -> case_ e_hole x' final_ty' (alt_cons' `zip` es_alts)) (TailishHole False (Hole [] bracketed_hole) : zipWithEqual "Scrutinise" (\alt_bvs -> TailishHole True . Hole (x':alt_bvs)) alt_bvss bracketed_alts)
+      where -- These lines achieve two things:
+            --   1. Filter out any branches of the case which we know are impossible due to type refinement
+            --   2. Turn any remaining default cases into explicit constructors if possible (helps positive information propagation)
+            alts = [ (coreAltConToAltCon altcon xs, e)
+                   | (altcon, xs, e) <- (if rEFINE_ALTS then thirdOf3 . filterAlts (repeat wildCardKey) (idType x') [] else id)
+                                            [(altcon', xs, e) | (altcon, e) <- unfiltered_alts, let (altcon', xs) = altConToCoreAltCon altcon]
+                   ]
+
+            (alt_cons, alt_es) = unzip alts
+            scruts' = x':scruts
+
+            -- 0) Manufacture context identifier
+            ctxt_id = uniqFromSupply ctxt_ids
+            
+            -- 1) Construct the floats for each case alternative
+            -- We have to carefully zap OccInfo here because one of the case binders might be marked as dead,
+            -- yet could become live due to positive information propagation!    
+            (alt_idss, alt_rns, alt_cons') = unzip3 $ map (renameAltCon ids rn) $ if any (not . isDeadBinder) scruts'
+                                                                                   then map zapAltConIdOccInfo alt_cons
+                                                                                   else alt_cons
+            -- Bind something to the case scrutinee (if possible). This means that:
+            --  let y = (\z -> case z of C -> ...) unk
+            --  in y
+            -- ===>
+            --  case x of C -> let unk = C; z = C in ...
+            alt_in_es = alt_rns `zip` alt_es
+            alt_hs = zipWithEqual "alt_hs" (\alt_con' alt_bvs -> M.fromList (do guard pOSITIVE_INFORMATION
+                                                                                Just scrut_v <- [altConToValue (idType x') alt_con']
+                                                                                let in_scrut_e@(_, scrut_e) = renamedTerm (fmap Value scrut_v)
+                                                                                scrut <- scruts'
+                                                                                -- Localise the Id just in case this is the occurrence of a lambda-bound variable.
+                                                                                -- We don't really want a Let-bound external name in the output!
+                                                                                return (localiseId scrut, HB (howToBindCheap scrut_e) (Right in_scrut_e)))
+                                                                 `M.union` M.fromList [(x, lambdaBound) | x <- x':alt_bvs]) -- NB: x' might be in scruts and union is left-biased
+                                           alt_cons' alt_bvss -- NB: don't need to grab deeds for these just yet, due to the funny contract for transitiveInline
+            alt_bvss = map altConBoundVars alt_cons'
+            bracketed_alts = zipWith3Equal "bracketed_alts" (\alt_h alt_ids alt_in_e -> oneBracketed' ty' (Once ctxt_id, (emptyDeeds, Heap alt_h alt_ids, Loco False, alt_in_e))) alt_hs alt_idss alt_in_es
+    StrictLet x' in_e -> zipBracketeds $ TailsKnown ty' (\_final_ty' -> shell emptyVarSet $ \[e_hole, e_body] -> let_ x' e_hole e_body) [TailishHole False $ Hole [] bracketed_hole, TailishHole True $ Hole [x'] $ oneBracketed' ty' (Once ctxt_id, (emptyDeeds, Heap (M.singleton x' lambdaBound) ids, Loco False, in_e))]
+      where ctxt_id = uniqFromSupply ctxt_ids
+            ty' = inTermType ids in_e
+    PrimApply pop tys' in_vs in_es -> zipBracketeds $ TailsUnknown (shell emptyVarSet $ primOp pop tys') (zipWith Hole (repeat []) $ bracketed_vs ++ bracketed_hole : bracketed_es)
+      where -- 0) Manufacture context identifier (actually, an infinite number of them)
+            (ctxt_ids0, ctxt_ids1) = splitUniqSupply ctxt_ids
+            ctxt_idss0 = listSplitUniqSupply ctxt_ids0
+            ctxt_idss1 = listSplitUniqSupply ctxt_ids1
+            
+            -- 1) Split every value and expression remaining apart
+            bracketed_vs = zipWith (\ctxt_ids in_v -> splitCoerced (splitAnswer ctxt_ids ids) (annedToTagged in_v)) ctxt_idss0 in_vs
+            bracketed_es  = zipWith (\ctxt_ids in_e -> let (ctxt_id, ctxt_ids0) = takeUniqFromSupply ctxt_ids in oneBracketed ctxt_ids0 (inTermType ids in_e) (Once ctxt_id, (Heap M.empty ids, Loco False, in_e))) ctxt_idss1 in_es)
+  where
+    tg = tag kf
+    shell = Shell (oneResidTag tg)
+
+-- I'm making use of a clever trick: after splitting an update frame for x, instead of continuing to split the stack with a
+-- noneBracketed for x in the focus, I split the stack with a oneBracketed for it in the focus.
+--
+-- You might think this is utterly worthless, since by definition the splitter will never be able to push the actual definition of
+-- x into this hole in the bracketed. However, the fact that we do this is *critical* to the algorithm I use to ensure that
+-- we can make variables bound by update frames as non-residualised: see Note [Residualisation of things referred to in extraFvs]
+splitUpdate :: UniqSupply -> InScopeSet -> Tag -> [Out Var] -> Var -> Bracketed (Entered, UnnormalisedState)
+            -> ([Out Var], M.Map (Out Var) (Bracketed (Entered, UnnormalisedState)), Bracketed (Entered, UnnormalisedState))
+splitUpdate ctxt_ids ids tg_kf scruts x' bracketed_hole
+  = (x' : scruts, M.singleton x' bracketed_hole,
+     oneBracketed ctxt_ids (idType x') (Once ctxt_id, (Heap M.empty ids, Loco False, (mkIdentityRenaming (unitVarSet x'), annedTerm tg_kf (Var x')))))
+  where ctxt_id = idUnique x'
+
+splitValue :: UniqSupply -> InScopeSet -> Tag -> In AnnedValue -> Bracketed (Entered, UnnormalisedState)
+splitValue ctxt_ids ids tg (rn, Lambda x e)   = splitLambdaLike Lambda   ent            ctxt_ids0 ids tg (rn, (x, e))
+  where -- This is really necessary if we want to fuse a top-level non-value with some consuming context in the IO monad
+        --
+        -- NB: this is rather interesting. If I have:
+        --   x = e1
+        --   y = v2[x]
+        --   z = \a -> e3[y]
+        --
+        -- If I start supercompiling with y as the root, I might be able to fuse e1 into v2 if the occurrence
+        -- within the RHS of y is Once. However, if I start from z then I may not be able to do this fusion if
+        -- y occurs in a Many context because the value portion v2 will be moved down and the expression portion
+        -- x will be left residualised above the lambda.
+        --
+        -- What I gain from this behaviour, of course, is that v2 may fuse with e3, which is probably more valuable
+        -- in general anyway. 
+        (ent, ctxt_ids0) | isOneShotBndr x = first Once $ takeUniqFromSupply ctxt_ids
+                         | otherwise       = (Many, ctxt_ids)
+splitValue ctxt_ids ids tg (rn, TyLambda a e) = splitLambdaLike TyLambda (Once ctxt_id) ctxt_ids0 ids tg (rn, (a, e))
+  where (ctxt_id, ctxt_ids0) = takeUniqFromSupply ctxt_ids
+splitValue _        ids tg in_v               = noneBracketed tg (value (annedValueToFVedValue' $ renameIn (renameAnnedValue' ids) in_v))
+
+-- We create LambdaBound entries in the Heap for both type and value variables, so we can share the code:
+splitLambdaLike :: (Var -> FVedTerm -> ValueF FVed) -> Entered
+                -> UniqSupply -> InScopeSet -> Tag -> In (Var, AnnedTerm) -> Bracketed (Entered, UnnormalisedState)
+splitLambdaLike rebuild entered ctxt_ids ids tg (rn, (x, e)) = zipBracketeds $ TailsUnknown (Shell (oneResidTag tg) emptyVarSet $ \[e'] -> value (rebuild x' e')) [Hole [x'] $ oneBracketed ctxt_ids (inTermType ids' in_e) (entered, (Heap (M.singleton x' lambdaBound) ids', Loco False, in_e))]
+  where (ids', rn', x') = renameNonRecBinder ids rn x
+        in_e = (rn', e)
+
+splitCoerced :: (Tagged a -> Bracketed (Entered, UnnormalisedState))
+             -> Tagged (Coerced a) -> Bracketed (Entered, UnnormalisedState)
+splitCoerced f (Tagged tg_a  (Uncast,         a)) = f (Tagged tg_a a)
+splitCoerced f (Tagged tg_co (CastBy co tg_a, a)) = zipBracketeds $ TailsUnknown (Shell (oneResidTag tg_co) (tyCoVarsOfCo co) $ \[e'] -> cast e' co) [Hole [] (f (Tagged tg_a a))]
+ -- NB: the tg' in the CastBy is wrapped around the *x*, not the whole cast, so pass it down
+
+splitQA :: UniqSupply -> InScopeSet -> Tag -> QA -> Bracketed (Entered, UnnormalisedState)
+splitQA _        _   tg (Question x') = noneBracketed tg (var x')
+splitQA ctxt_ids ids tg (Answer a)    = splitAnswer ctxt_ids ids (Tagged tg a)
+
+splitAnswer :: UniqSupply -> InScopeSet -> Tagged Answer -> Bracketed (Entered, UnnormalisedState)
+splitAnswer ctxt_ids ids (Tagged tg a) = splitValue ctxt_ids ids tg a
+
+inTermType :: InScopeSet -> In AnnedTerm -> Type
+inTermType ids = renameIn (renameType ids) . fmap termType
--- a/compiler/supercompile/Supercompile/Drive/Split2.hs
+++ b/compiler/supercompile/Supercompile/Drive/Split2.hs
+module Supercompile.Drive.Split2 (
+    ResidTags, plusResidTags, emptyResidTags,
+
+    split, instanceSplit, generaliseSplit
+  ) where
+
+#include "HsVersions.h"
+
+import Supercompile.Core.FreeVars
+import Supercompile.Core.Renaming
+import Supercompile.Core.Syntax
+
+import Supercompile.Evaluator.Deeds
+import Supercompile.Evaluator.Evaluate (normalise)
+import Supercompile.Evaluator.FreeVars
+import Supercompile.Evaluator.Syntax
+
+import Supercompile.GHC (coreAltConToAltCon, altConToCoreAltCon)
+
+import Supercompile.Termination.Generaliser (Generaliser(..))
+
+import Supercompile.StaticFlags
+import Supercompile.Utilities hiding (tails)
+
+import CoreUtils (filterAlts)
+import Id        (idType, isDeadBinder, localiseId, isOneShotBndr)
+import PrelNames (wildCardKey)
+import Util      (thirdOf3)
+import MonadUtils (concatMapM)
+import qualified State
+
+import qualified Data.Map as M
+import qualified Data.Set as S
+import qualified Data.IntSet as IS
+import qualified Data.IntMap as IM
+
+
+type LGraph node edge = M.Map node (M.Map node edge)
+
+filterEdges :: Ord node
+            => (edge -> node -> Bool)
+            -> LGraph node edge
+            -> LGraph node edge
+filterEdges keep_edge = M.map (M.mapMaybeWithKey (\n e -> if keep_edge e n then Just e else Nothing))
+
+trimUnreachable :: Ord node
+                => node
+                -> LGraph node edge
+                -> LGraph node edge
+trimUnreachable root_n g = go (S.singleton root_n) S.empty
+  where go n_todo n_done | S.null n_todo' = M.filterWithKey (\n _ -> n `S.member` n_done') g -- NB: all outgoing edges of retained nodes will still be present by definition
+                         | otherwise      = go n_todo' n_done'
+          where n_done' = n_todo `S.union` n_done
+                n_todo' = S.fold (\n n_todo' -> M.keysSet (M.findWithDefault (error "trimUnreachable") n g) `S.union` n_todo') S.empty n_todo S.\\ n_done'
+
+shortcutEdges :: forall node edge.
+                 Ord node
+              => (node -> Bool)
+              -> (edge -> edge -> edge)         -- Used to join edges if after shortcutting there is more than one path from a node to another one
+              -> (edge -> node -> edge -> edge) -- Used when joining two edges in a contiguous path (the node always satisfys the predicate)
+              -> LGraph node edge
+              -> LGraph node edge
+shortcutEdges should_shortcut combine_edges combine g = State.evalState visit_graph M.empty
+  where
+    visit_graph :: State.State (M.Map node [(node, edge)]) (LGraph node edge)
+    visit_graph = liftM M.fromDistinctAscList $ sequence $ flip mapMaybe (M.toAscList g) $ \(n, ens) -> do
+        guard (not (should_shortcut n))
+        return $ liftM (((,) n) . M.fromListWith combine_edges) $ visit S.empty ens
+
+    visit :: S.Set node -> M.Map node edge -> State.State (M.Map node [(node, edge)]) [(node, edge)]
+    -- Given the outgoing edges for some node, returns all the outgoing edges for that node
+    -- after shortcutting
+    visit path ens = concatMapM (uncurry (visit' path)) (M.toList ens)
+
+    visit' :: S.Set node -> node -> edge -> State.State (M.Map node [(node, edge)]) [(node, edge)]
+    -- Given an edge label and the node reached via that label, returns all the nodes reached
+    -- after shortcutting
+    visit' path n' e | n' `S.member` path       = return []        -- Doesn't contribute any extra paths: all paths will considered by a caller
+                     | not (should_shortcut n') = return [(n', e)] -- Won't be shortcutted away, no need to look further
+                     | otherwise                = do
+                        -- Since n' is not in the path, we can try to memoise
+                        mb_res <- liftM (M.lookup n') State.get
+                        res <- case mb_res of
+                          Just res -> return res
+                          Nothing  -> do
+                            res <- visit (S.insert n' path) (M.findWithDefault (error "shortcutEdges") n' g)
+                            State.modify (M.insert n' res)
+                            return res
+                        return $ map (second (combine e n')) res
+
+-- Given a graph, returns:
+--  1. An acyclic graph of the strongly connected components of the input graph.
+--     Each SCC is identified by a unique Int.
+--  2. A mapping from Ints to the "sub-graph" corresponding to each SCC. Each sub-graph
+--     contains all the nodes in the SCC as well as any edges between those nodes.
+--     Note that in particular the sub-graph for an acyclic SCC will contain exactly one node and no edges.
+--
+-- Uses an adaptation of Tarjan's algorithm <http://en.wikipedia.org/wiki/Tarjan's_strongly_connected_components_algorithm>
+-- Returns SCCs in topological order (i.e. the SCC with no *incoming* edges is first in the output, and that with no *outgoing* edges is last)
+sccs :: forall node edge.
+        (Outputable node, Ord node)
+     => LGraph node edge
+     -> ([(Int, M.Map Int (M.Map (node, node) edge))],
+         IM.IntMap (LGraph node edge))
+sccs g = case State.execState strongconnect_graph (0, M.empty, [], [], IM.empty, M.empty, M.empty) of (_, _, _, sccs, scc_datas, _, _) -> (sccs, scc_datas)
+  where
+    -- Observations about Tarjan's algorithm:
+    --  1. strongconnect(v) is only called if v.index is undefined
+    --  2. Vertex v's lowlink is only mutated by strongconnect(v)
+    --  3. Once index is set it is never changed
+    --  4. Nodes occur in the stack in decreasing order of index
+    --
+    -- We can use these facts to build an implementation that makes minimal use of the state monad
+
+    strongconnect_graph = flip traverseWithKey_ g $ \n ens -> do
+      ix_defined <- liftM (\(_, ixs, _, _, _, _, _) -> n `M.member` ixs) State.get
+      unless ix_defined $ void $ strongconnect n ens
+
+    -- (strongconnect n ens) returns:
+    --  1. Index of a node n' reachable from n such that that index[n'] < index[n],
+    --     if possible. Otherwise returns index[n].
+    --  2. Whether we didn't just create a new SCC containing n. If no new SCC was created then n is guaranteed
+    --     to still be on the stack (which occurs iff we managed to find a suitable index[n'])
+    --
+    -- Precondition: there is no assigned index for n
+    strongconnect :: node -> M.Map node edge
+                  -> State.State (-- Next node index to assign
+                                  Int,
+                                  -- Mapping from nodes to their assigned index (if any)
+                                  -- NB: after the node has been removed from the stack, we update the Int in the mapping
+                                  -- to instead be the lowlink of the SCC it was assigned to. This is OK because we don't
+                                  -- need the raw index of the node after that point: we only need record the fact that
+                                  -- it had some index at a point in the past
+                                  M.Map node Int,
+                                  -- Stack containing expanded nodes that are not presently in a SCC
+                                  [node],
+                                  -- Work-in-progress graph of SCC
+                                  [(Int, M.Map Int (M.Map (node, node) edge))],
+                                  -- Work-in-progress SCC sub-graph mapping
+                                  IM.IntMap (LGraph node edge),
+                                  -- Records all discovered "internal" edges from expanded nodes to somewhere *within* their SCC
+                                  M.Map node (M.Map node edge),
+                                  -- Records all discovered "external" edges from the current SCC-in-progress to some other (already existant) SCC
+                                  -- It might seem more obvious to use a [([(edge, node)], Int)] here, but that makes it awkward to common up multiple
+                                  -- edges from this SCC going to the same external SCC
+                                  M.Map Int (M.Map (node, node) edge))
+                                 (Int, Bool)
+    strongconnect n ens = do
+      ix <- State.state $ \(next_ix, ixs, s, sccs, scc_datas, all_internal_ens, all_external_ens) -> (next_ix, (next_ix + 1, M.insert n next_ix ixs, n:s, sccs, scc_datas, all_internal_ens, all_external_ens))
+      (lowlink, internal_ens, external_ens) <- (\f -> foldlWithKeyM' f (ix, M.empty, M.empty) ens) $ \(lowlink, internal_ens, external_ens) n' e -> do
+        (mb_ix', in_s') <- liftM (\(_, ixs, s, _, _, _, _) -> (M.lookup n' ixs, n' `elem` s)) State.get
+        (lowlink, mb_scc) <- case mb_ix' of
+                                  -- Successor not yet visited: recurse on it
+                                  -- Whether we add an internal or external edge depends on whether the recursive call created an SCC or not.
+                                  -- If it did create an SCC, that SCC will be identified by lowlink'
+          Nothing              -> do (lowlink', in_s') <- strongconnect n' (M.findWithDefault (pprPanic "sccs: unknown successor" (ppr n')) n' g)
+                                     return (lowlink `min` lowlink', if in_s' then Nothing else Just lowlink')
+                                  -- Successor is in the stack and hence the current SCC, so record an internal edge
+          Just ix' | in_s'     -> return (lowlink `min` ix', Nothing)
+                                  -- Successor visited but not in stack: it is already part of another SCC, so record an external edge
+                                  -- NB: this makes use of my hack whereby ix' will actually be a SCC lowlink for such successors
+                   | otherwise -> return (lowlink, Just ix')
+        (internal_ens, external_ens) <- return $ case mb_scc of
+                                          Nothing  -> (M.insert n' e internal_ens, external_ens)
+                                          Just scc -> (internal_ens, M.insertWith (M.unionWith (error "strongconnect: non-distinct")) scc (M.singleton (n, n') e) external_ens)
+        return (lowlink, internal_ens, external_ens)
+      -- Record accumulated internal/external edges. We don't need to record them as we go along because they can only possibly be used by one of our callers, not our callees
+      State.modify $ \(next_ix, ixs, s, sccs, scc_datas, all_internal_ens, all_external_ens) -> (next_ix, ixs, s, sccs, scc_datas, M.insert n internal_ens all_internal_ens, M.unionWith (M.unionWith (error "strongconnect: non-distinct")) external_ens all_external_ens)
+      -- Since lowlink is at most ix, this condition can only be true if we failed to find a node reachable
+      -- from n with a lower index. We use this as our cue to form a new SCC.
+      in_s <- if (lowlink == ix)
+               -- NB: because nodes on the stack are in decreasing order of index, this operation never pops a node with index < ix
+              then do State.modify $ \(next_ix, ixs, s, sccs, scc_datas, all_internal_ens, all_external_ens) -> let (s_scc, _n:s') = span (/= n) s
+                                                                                                                    scc = M.fromList [(n, M.findWithDefault (error "sccs") n all_internal_ens) | n <- n:s_scc]
+                                                                                                                    -- Replace node indexes with the lowlink of the SCC they were assigned to (a small hack to save one map lookup):
+                                                                                                                    ixs' = foldr (\n -> M.insert n lowlink) ixs (n:s_scc)
+                                                                                                                in (next_ix, ixs', s', (lowlink, all_external_ens):sccs, IM.insert lowlink scc scc_datas, all_internal_ens, M.empty)
+                      return False
+              else return True
+      -- Return this nodes final lowlink for use when computing the predecessors lowlink
+      return (lowlink, in_s)
+
+
+fromListDisjoint :: Ord k => [(k, v)] -> M.Map k v
+fromListDisjoint = M.fromListWith (error "fromListDisjoint")
+
+unionDisjoint :: Ord k => M.Map k v -> M.Map k v -> M.Map k v
+unionDisjoint = M.unionWith (error "unionDisjoint")
+
+
+type ResidTags = IM.IntMap Int
+
+emptyResidTags :: ResidTags
+emptyResidTags = IM.empty
+
+oneResidTag :: Tag -> ResidTags
+oneResidTag (TG i _) = IM.singleton (unFin i) 1
+
+plusResidTags :: ResidTags -> ResidTags -> ResidTags
+plusResidTags = IM.unionWith (+)
+
+
+data Context = HeapContext Var
+             | StackContext Int
+             | FocusContext
+             deriving (Eq, Ord)
+
+instance Show Context where
+    show = showPpr
+
+instance Outputable Context where
+    pprPrec prec (HeapContext x') = pprPrec prec x'
+    pprPrec prec (StackContext i) = pprPrec prec i
+    pprPrec _    FocusContext     = text "[_]"
+
+data Entries = OneEntry | ManyEntries
+
+instance Outputable Entries where
+    ppr OneEntry    = text "1"
+    ppr ManyEntries = text "Many"
+
+
+varEdges :: Entries -> FreeVars -> M.Map Context Entries
+varEdges ents xs = M.fromList [(HeapContext x, ents) | x <- varSetElems xs]
+
+plusEntries :: Entries -> Entries -> Entries
+plusEntries OneEntry OneEntry = OneEntry
+plusEntries _        _        = ManyEntries
+
+-- Used when the maps come from the same contexts
+plusEntered :: M.Map Context Entries -> M.Map Context Entries -> M.Map Context Entries
+plusEntered = M.unionWith plusEntries
+
+-- Used when the maps come from two distinct contexts
+multEntered :: M.Map Context Entries -> M.Map Context Entries -> M.Map Context Entries
+multEntered = M.unionWith (\_ _ -> ManyEntries)
+
+
+split :: (Applicative m, Monad m)
+      => (State -> m            (Deeds, Out FVedTerm))
+      -> State  -> m (ResidTags, Deeds, Out FVedTerm)
+split opt (deeds, heap, k, qa) = recurse opt $ push (S.singleton FocusContext) (deeds, heap, k, QAFocus qa)
+
+instanceSplit :: (Applicative m, Monad m)
+              => (State                             -> m            (Deeds, Out FVedTerm))
+              -> (Deeds, Heap, Stack, Out FVedTerm) -> m (ResidTags, Deeds, Out FVedTerm)
+instanceSplit opt (deeds, heap, k, e) = recurse opt $ push (S.singleton FocusContext) (deeds, heap, k, OpaqueFocus e)
+
+applyGeneraliser :: Generaliser -> State -> Maybe (S.Set Context)
+applyGeneraliser gen (_deeds, Heap h _, k, qa) = fmap (\(gen_kfs, gen_xs) -> S.fromList $ map StackContext (IS.elems gen_kfs) ++ map HeapContext (varSetElems gen_xs)) $ case sPLIT_GENERALISATION_TYPE of
+    NoGeneralisation -> Nothing
+    AllEligible -> guard (not (IS.null gen_kfs) || not (isEmptyVarSet gen_xs'')) >> return (gen_kfs, gen_xs'')
+      where gen_kfs = IS.fromList [i   | (i, kf) <- named_k, generaliseStackFrame gen kf]
+            gen_xs'' = mkVarSet [x'' | (x'', hb) <- M.toList h, generaliseHeapBinding gen x'' hb, ASSERT2(not (howBound hb == LambdaBound && isNothing (heapBindingTerm hb)), ppr (x'', hb, heapBindingTag hb)) True]
+    StackFirst -> (guard (not (IS.null gen_kfs)) >> return (gen_kfs, emptyVarSet)) `mplus`
+                  (guard (not (isEmptyVarSet gen_xs''))  >> return (IS.empty, gen_xs''))
+      where gen_kfs = IS.fromList [i   | (i, kf) <- named_k, generaliseStackFrame gen kf]
+            gen_xs'' = mkVarSet [x'' | (x'', hb) <- M.toList h, generaliseHeapBinding gen x'' hb, ASSERT2(not (howBound hb == LambdaBound && isNothing (heapBindingTerm hb)), ppr (x'', hb, heapBindingTag hb)) True]
+    DependencyOrder want_first -> listToMaybe ((if want_first then id else reverse) possibilities)
+      where -- We consider possibilities starting from the root of the term -- i.e. the bottom of the stack.
+            -- This is motivated by how the interaction with subgraph generalisation for TreeFlip/TreeSum.
+            -- FIXME: explain in more detail if this turns out to be sane.
+            possibilities = findGeneralisable False emptyVarSet (reverse named_k) h
+            
+            findGeneralisable done_qa pending_xs' unreached_kfs unreached_hbs
+               | done_qa && null pending_kfs && M.null pending_hbs
+               = []
+               | otherwise
+               = [(gen_kf_is, gen_xs'') | not (IS.null gen_kf_is) || not (isEmptyVarSet gen_xs'')] ++
+                 findGeneralisable done_qa' reached_xs' unreached_kfs' unreached_hbs'
+              where
+                (done_qa', extra_pending_xs') = if done_qa || not (null unreached_kfs) then (done_qa, emptyVarSet) else (True, annedFreeVars qa)
+                (pending_kfs, unreached_kfs') = splitAt 1 unreached_kfs
+                (pending_hbs, unreached_hbs') = M.partitionWithKey (\x' _hb -> x' `elemVarSet` (pending_xs' `unionVarSet` extra_pending_xs')) unreached_hbs
+                
+                gen_kf_is = IS.fromList [i  | (i, kf) <- pending_kfs, generaliseStackFrame gen kf]
+                gen_xs'' = mkVarSet [x'' | (x'', hb) <- M.toList pending_hbs, generaliseHeapBinding gen x'' hb, ASSERT2(not (howBound hb == LambdaBound && isNothing (heapBindingTerm hb)), ppr (x'', hb, heapBindingTag hb)) True]
+                
+                reached_xs' = M.foldrWithKey (\_x' hb fvs -> heapBindingFreeVars hb `unionVarSet` fvs)
+                                             (unionVarSets (map (stackFrameFreeVars . tagee . snd) pending_kfs))
+                                             pending_hbs
+  where named_k = [0..] `zip` trainCars k
+
+generaliseSplit :: (Applicative m, Monad m)
+                => (State -> m (Deeds, Out FVedTerm))
+                -> Generaliser -> State -> Maybe (m (ResidTags, Deeds, Out FVedTerm))
+generaliseSplit opt gen state@(deeds, heap, k, qa) = flip fmap (applyGeneraliser gen state) $ \generalised -> recurse opt $ push generalised (deeds, heap, k, QAFocus qa)
+
+
+recurse :: (Applicative m, Monad m)
+        => (State -> m (Deeds, Out FVedTerm))
+        -> (Deeds, PushedHeap, PushedStack, PushedFocus) -> m (ResidTags, Deeds, Out FVedTerm)
+recurse opt (deeds, h', k', focus') = recurseFocus opt focus' deeds >>= recurseStack opt k' >>= recurseHeap opt h'
+
+recurseFocus :: Applicative m
+             => (State -> m (Deeds, Out FVedTerm))
+             -> PushedFocus -> Deeds -> m (ResidTags, Deeds, Out FVedTerm)
+recurseFocus opt (QAFocus (Tagged tg_qa qa)) deeds = case qa of
+    Question x -> pure (oneResidTag tg_qa, deeds, var x)
+    Answer   a -> recurseAnswer opt deeds (Tagged tg_qa a)
+recurseFocus opt (TermFocus state) deeds = liftA (uncurry ((,,) emptyResidTags)) $ recurseState opt deeds state
+recurseFocus _   (OpaqueFocus e)   deeds = pure (emptyResidTags, deeds, e)
+
+recurseCoerced :: Applicative m
+               => (Tagged a -> m (ResidTags, Deeds, Out FVedTerm))
+               -> Tagged (Coerced a) -> m (ResidTags, Deeds, Out FVedTerm)
+recurseCoerced recurse (Tagged tg_a  (Uncast,         a)) = recurse (Tagged tg_a a)
+recurseCoerced recurse (Tagged tg_co (CastBy co tg_a, a)) = fmap (\(resid, deeds, e') -> (resid `plusResidTags` oneResidTag tg_co, deeds, e' `cast` co)) $ recurse (Tagged tg_a a)
+
+recurseAnswer :: Applicative m
+              => (State -> m (Deeds, Out FVedTerm))
+              -> Deeds -> Tagged (ValueG State) -> m (ResidTags, Deeds, Out FVedTerm)
+recurseAnswer opt deeds (Tagged tg_v v) = liftA (uncurry ((,,) (oneResidTag tg_v))) $ case v of
+    Literal l          -> pure (deeds, value (Literal l))
+    Coercion co        -> pure (deeds, value (Coercion co))
+    Data dc tys cos xs -> pure (deeds, value (Data dc tys cos xs))
+    TyLambda a state   -> liftA (second (value . TyLambda a)) $ recurseState opt deeds state
+    Lambda   x state   -> liftA (second (value . Lambda   x)) $ recurseState opt deeds state
+
+recurseState :: (State -> m (Deeds, Out FVedTerm))
+             -> Deeds -> State -> m (Deeds, Out FVedTerm)
+recurseState opt deeds state = opt (deeds `addStateDeeds` state)
+
+recurseStack :: (Applicative m, Monad m)
+             => (State -> m (Deeds, Out FVedTerm))
+             -> PushedStack -> (ResidTags, Deeds, Out FVedTerm) -> m (ResidTags, Deeds, [(Var, FVedTerm)], FVedTerm)
+recurseStack opt k (init_resid_tgs, init_deeds, init_e) = (\f -> foldM f (init_resid_tgs, init_deeds, [], init_e) k) $ \(resid_tgs, deeds, xes, e) (Tagged tg_kf kf) -> do
+    (resid_tgs, deeds, xes, e) <- case kf of
+      TyApply ty                  -> return (resid_tgs, deeds, xes, e `tyApp` ty)
+      CoApply co                  -> return (resid_tgs, deeds, xes, e `coApp` co)
+      Apply   x                   -> return (resid_tgs, deeds, xes, e `app`   x)
+      Scrutinise x ty alts        -> liftM (\(deeds, alts') -> (resid_tgs, deeds, xes, case_ e x ty alts')) $ (\f -> mapAccumLM f deeds alts) $ \deeds (alt_con, state) ->
+                                        liftM (\(deeds, e) -> (deeds, (alt_con, e))) $ recurseState opt deeds state
+      PrimApply pop tys as states -> do (resid_tgs, deeds, as_es')     <- mapAccumLM' (recurseValue opt) resid_tgs deeds as
+                                        (           deeds, states_es') <- mapAccumLM  (recurseState opt)           deeds states
+                                        return (resid_tgs, deeds, xes, primOp pop tys (as_es' ++ e:states_es'))
+      StrictLet x state           -> liftM (\(extra_deeds, e') -> (resid_tgs, extra_deeds, xes, let_ x e e')) $ recurseState opt deeds state
+      Update x                    -> return (resid_tgs, deeds, (x, e) : xes, var x)
+      CastIt co                   -> return (resid_tgs, deeds, xes, e `cast` co)
+    return (oneResidTag tg_kf `plusResidTags` resid_tgs, deeds, xes, e)
+
+recurseValue :: Applicative m
+             => (State -> m (Deeds, Out FVedTerm))
+             -> Deeds -> PushedValue -> m (ResidTags, Deeds, Out FVedTerm)
+recurseValue opt = recurseCoerced . recurseAnswer opt
+
+recurseHeap :: (Applicative m, Monad m)
+            => (State -> m (Deeds, Out FVedTerm))
+            -> PushedHeap -> (ResidTags, Deeds, [(Var, FVedTerm)], FVedTerm) -> m (ResidTags, Deeds, FVedTerm)
+recurseHeap opt init_h (init_resid_tgs, init_deeds, init_xes, e)
+  -- Unfortunately, it is necessary to remove elements from init_h that already have a residual binding in init_xes.
+  -- The reason for this is that if the stack has an initial update and a value is in focus, we can get a residual
+  -- binding for that from either the "stack" or the "heap" portion. What we must avoid is binding both in a let at the same time!
+  = go (foldr (M.delete . fst) init_h init_xes) init_resid_tgs init_deeds init_xes
+       (foldr (\(x, e) fvs -> varBndrFreeVars x `unionVarSet` fvedTermFreeVars e `unionVarSet` fvs) (fvedTermFreeVars e) init_xes)
+  where go h resid_tgs deeds xes do_fvs
+          -- | pprTrace "go" (ppr do_fvs $$ ppr (M.keysSet h)) False = undefined
+          | M.null h_to_do = return (resid_tgs, deeds, bindManyMixedLiftedness fvedTermFreeVars xes e)
+          | otherwise      = do (resid_tgs, deeds, extra_xes) <- mapAccumLM' (\deeds (x, e) -> {- pprTrace "go1" (ppr x) $ -} liftM (third3 ((,) x)) $ either (recurseValue opt deeds) (liftM (\(deeds, e) -> (emptyResidTags, deeds, e)) . recurseState opt deeds) e) resid_tgs deeds (M.toList h_to_do)
+                                go h' resid_tgs deeds (extra_xes ++ xes)
+                                   (foldr (\(x, e) do_fvs -> varBndrFreeVars x `unionVarSet` fvedTermFreeVars e `unionVarSet` do_fvs) emptyVarSet extra_xes)
+         where (h_to_do, h') = M.partitionWithKey (\x _ -> x `elemVarSet` do_fvs) h
+
+mapAccumLM' :: Monad m => (Deeds -> x -> m (ResidTags, Deeds, y)) -> ResidTags -> Deeds -> [x] -> m (ResidTags, Deeds, [y])
+mapAccumLM' f resid_tgs deeds xs
+  = liftM (\((resid_tgs, deeds), ys) -> (resid_tgs, deeds, ys)) $
+      mapAccumLM (\(resid_tgs, deeds) x -> liftM (\(resid_tgs', deeds', y) -> ((resid_tgs `plusResidTags` resid_tgs', deeds'), y)) $ f deeds x)
+                 (resid_tgs, deeds) xs
+
+{-
+
+-- Push as much stuff as possible transitively into heap bindings
+-- ~~~
+-- Expect z to be residualised, x and y to be pushed in
+test1 = Just z
+  where x = sumseq 100
+        y = sumseq x
+        z = sumseq y
+
+-- Push stuff transitively in even if there are multiple syntactic occurrences
+-- ~~~
+-- Expect z to be residualised, w, x and y to be pushed in
+test2 = Just z
+  where w = sumseq 100
+        x = sumseq w
+        y = sumseq w
+        z = sumseq x + sumseq y
+
+-- Inline values even into non-linear contexts
+-- ~~~
+-- Expect that x is pushed into the lambda
+test3 y ys = \_ -> head x
+  where x = y:ys
+
+-- Do not inline non-values into non-linear contexts
+-- ~~~
+-- Expect that x is residualised outside the lambda
+test4 = \_ -> x+1
+  where x = sumseq 100
+
+-- Treat non-linearity due to cases appropriately
+-- ~~~
+-- Expect that y is pushed down instead of residualised
+test5 x = if x then y + 1 else y + 2
+  where y = sumseq 100
+
+-- Even when the case braches themselves refer to a thing whose value depends on the scrut
+-- ~~~
+-- Expect that the update for ys and scrutinisation on ys is pushed into each branch
+test6 :: Bool -> Int
+test6 unk = case ys of x : _ -> x; [] -> 0
+  where ys = if unk then 1 : ys else 2 : ys -- NB: case branches refer to "ys" bound by an update frame at time of split
+
+-- Deal with cycles through the stack
+-- ~~~
+-- Expect that everything is residualised except z, which can be pushed into y
+test7 :: Int -> Int
+test7 unk = x + 2
+  where z = sumseq x
+        y = unk + z
+        x = case y of 1 -> 2; _ -> 3
+
+-- Allowing pushing of things which are cyclic due to loops
+-- ~~~
+-- Expect that xs is pushed into the z heap binding
+test8 x = Just z
+  where xs = cons x xs
+        z = head xs
+
+-- Push cyclic things even if they are mutually recursive
+-- ~~~
+-- Expect that xs and ys are pushed into the z heap binding
+test9 :: Int -> Maybe Int
+test9 x = Just z
+  where xs = cons x ys
+        ys = cons x xs
+        z = head xs + head ys
+
+-- Do not push cyclic things naively
+-- ~~~
+-- Expect that both xs and ys are residualised (I had a real bug where they weren't)
+test10 x = (a, b)
+  where xs = cons x ys
+        ys = cons x xs
+        a = head xs
+        b = head ys
+
+-- Preferentially push down values when we have a choice
+-- ~~~
+-- Expect that y is pushed into the lambda body and x is residualised around the lambda
+-- (Note that it would also be "valid" to residualise f and push x into the heap binding,
+-- to get resid around the f lambda. If f were a type lambda then we might even get fusion this way,
+-- so this is something of a heuristic)
+test11 = \_ -> f () + 1
+  where x = sumseq 100
+        f = \_ -> x + 1
+
+-}
+
+
+newtype DeedsA a = DeedsA (Deeds, [Size], State.State [Deeds] a)
+
+instance Functor DeedsA where
+    fmap = liftA
+
+instance Applicative DeedsA where
+    pure x = DeedsA (emptyDeeds, [], return x)
+    DeedsA (deeds1, sizes1, mf) <*> DeedsA (deeds2, sizes2, mx) = DeedsA (deeds1 `plusDeeds` deeds2, sizes1 ++ sizes2, mf <*> mx)
+
+yieldDeeds :: Deeds -> DeedsA ()
+yieldDeeds deeds = DeedsA (deeds, [], return ())
+
+askForDeeds :: Size -> DeedsA Deeds
+askForDeeds sz = DeedsA (emptyDeeds, [sz], State.state $ \(deeds:deedss) -> (deeds, deedss))
+
+runDeedsA :: DeedsA a -> a
+runDeedsA (DeedsA (deeds, sizes, mx)) = x
+  where (x, []) = State.runState mx (splitDeeds deeds sizes)
+
+
+data PushFocus qa term = QAFocus qa
+                       | TermFocus term
+                       | OpaqueFocus FVedTerm
+
+type PushedHeapBinding = Either PushedValue State
+type PushedHeap  = M.Map (Out Var) PushedHeapBinding
+type PushedStack = [Tagged (StackFrameG PushedValue State [AltG State])]
+type PushedValue = Tagged (Coerced (ValueG State))
+type PushedQA    = Tagged (QAG (ValueG State))
+type PushedFocus = PushFocus PushedQA State
+
+-- NB: it is not necessary for the traversal order here to match that in "recurse", even when doing FCFS
+traversePushedState :: Applicative t => (State -> t State)
+                    -> (PushedHeap, PushedStack, PushedFocus) -> t (PushedHeap, PushedStack, PushedFocus)
+traversePushedState f (heap, stack, focus) = liftA3 (,,) (traversePushedHeap f heap) (traversePushedStack f stack) (traversePushedFocus f focus)
+
+traversePushedHeap :: Applicative t => (State -> t State)
+                   -> PushedHeap -> t PushedHeap
+traversePushedHeap f = traverse traverse_heap_binding
+  where traverse_heap_binding = either (fmap Left . traversePushedValue f) (fmap Right . f)
+
+traversePushedValue :: Applicative t => (State -> t State)
+                    -> PushedValue -> t PushedValue
+traversePushedValue f = traverse (traverse (traverse f))
+
+traversePushedStack :: Applicative t => (State -> t State)
+                    -> PushedStack -> t PushedStack
+traversePushedStack f = traverse (traverse traverse_stack_frame)
+  where traverse_stack_frame kf = case kf of
+          TyApply ty              -> pure $ TyApply ty
+          CoApply co              -> pure $ CoApply co
+          Apply x                 -> pure $ Apply x
+          Scrutinise x ty alts    -> liftA (Scrutinise x ty) (traverse (traverse f) alts)
+          PrimApply pop tys as es -> liftA2 (PrimApply pop tys) (traverse (traversePushedValue f) as) (traverse f es)
+          StrictLet x e           -> liftA (StrictLet x) (f e)
+          Update x                -> pure $ Update x
+          CastIt co               -> pure $ CastIt co
+
+traversePushedFocus :: Applicative t => (State -> t State)
+                    -> PushedFocus -> t PushedFocus
+traversePushedFocus f (QAFocus qa)    = fmap QAFocus $ traverse (traverse (traverse f)) qa
+traversePushedFocus f (TermFocus e)   = fmap TermFocus $ f e
+traversePushedFocus _ (OpaqueFocus e) = pure $ OpaqueFocus e
+
+push :: S.Set Context
+     -> (Deeds, Heap, Stack, PushFocus (Anned QA) (In AnnedTerm))
+     -> (Deeds, PushedHeap, PushedStack, PushedFocus)
+push generalised (deeds, Heap h ids, k, focus) = -- pprTrace "push" (ppr verts $$ ppr marked)
+                                                 runDeedsA $
+                                                   liftA (\(deeds, (heap, stack, focus)) -> (deeds, heap, stack, focus)) $
+                                                     yieldDeeds deeds *>
+                                                       liftA2 (,) (askForDeeds 1)
+                                                                  (traversePushedState (\state -> liftA (`addStateDeeds` state) $ askForDeeds (stateSize state)) (h', k', focus'))
+  where -- TODO: arguably I should try to get a QA for the thing in the focus. This will help in cases like where we MSG together:
+        --  < H | v | >
+        -- and:
+        --  < H, H' | v | update f >
+        -- Since ideally instance splitting the second state should allow us to drive H' with the value binding f |-> v. A similar argument applies to questions in focus.
+        mb_scrut = case focus of QAFocus qa | Question x' <- annee qa -> Just x'; _ -> Nothing
+
+        (verts_h,     prepare_h, mk_h)     = splitPureHeap ids h
+        (verts_k,     prepare_k, mk_k)     = splitStack    ids k     mb_scrut
+        (verts_focus,            mk_focus) = splitFocus    ids focus (FocusContext `S.member` generalised)
+
+        -- We *always* mark values. This is really a rather interesting choice. If I have:
+        --   x = e1
+        --   y = v2[x]
+        --   z = \a -> e3[y]
+        --
+        -- If I start supercompiling with y as the root, I might be able to fuse e1 into v2 if the occurrence
+        -- within the RHS of y is Once. However, if I start from z then I may not be able to do this fusion if
+        -- y occurs in a Many context because the value portion v2 will be moved down and the expression portion
+        -- x will be left residualised above the lambda.
+        --
+        -- What I gain from this behaviour, of course, is that v2 may fuse with e3, which is probably more valuable
+        -- in general anyway. 
+        --
+        -- NB: must explicitly avoid collapsing away any value nodes if they are marked as generalised
+        --
+        -- We have to remove any unreachable nodes, or they may pessimise my results by acting as extra "roots" and hence
+        -- forcing more things to be unmarked. In particular, we have to watch out for:
+        --   1. Vertices originating from dead heap bindings
+        --   2. Heap verticies originating from on-stack updates that bind dead variables
+        cheap_marked = S.fromDistinctAscList [HeapContext x' | (x', hb) <- M.toAscList h, maybe True (termIsCheap . snd) (heapBindingTerm hb)] S.\\ generalised
+        verts = trimUnreachable FocusContext $
+                shortcutEdges (`S.member` cheap_marked)
+                              plusEntries (\ent1 _ _ent2 -> ent1) -- NB: we discard ent2. Consider inlining binding (x = Just y) [which marks y as Many] into context (case x of Just y -> ...) [which marks x as Once]. We don't want to mark y with Many (i.e. Once+Many) because we can in fact push it down safely.
+                              (verts_h `unionDisjoint` (verts_k `unionDisjoint` verts_focus))
+        extra_marked = solve generalised verts
+        marked = cheap_marked `S.union` extra_marked
+
+        -- Prepare a version of the heap and stack suitable for inlining
+        h_prep_h           = prepare_h generalised marked
+        (h_prep_k, k_prep) = prepare_k generalised marked
+        h_prep             = h_prep_h `M.union` h_prep_k
+
+        -- Produce final syntax with nested States
+        h'     = mk_h     h_prep {- We never inline any stack information into the heap -}
+        k'     = mk_k     h_prep k_prep
+        focus' = mk_focus h_prep k_prep
+
+solve :: S.Set Context
+      -> LGraph Context Entries
+      -> S.Set Context
+solve generalised = M.keysSet . go_graph
+  where
+    go_graph = uncurry (flip $ go M.empty) . sccs
+
+    -- NB: the input list is ascending, so lower indexes come first, so we process all predecessors of a SCC before the SCC itself
+    go :: M.Map Context (Maybe Context)                         -- Successor |-> Just context (iff you end up in a *single* context by inlining into all predecessors, and which context that is).
+       -> IM.IntMap (LGraph Context Entries)                    -- Information about the internal structure of each SCC
+       -> [(Int, M.Map Int (M.Map (Context, Context) Entries))] -- Topologically sorted SCC graph
+       -> M.Map Context Context                                 -- Marked contexts, mapped to the context they will end up in after inlining
+    go _            _       []                             = M.empty
+    go predecessors scc_map ((lowlink, external_ens):rest) = marks' `unionDisjoint` go predecessors' scc_map rest
+      where
+        scc = IM.findWithDefault (error "solveSCCs: no SCC info") lowlink scc_map
+        -- 1. Find all entry points to the SCC
+        -- 2. Check whether *all* the entry points can be marked
+        --   a) If so, mark everything in the SCC
+        --   b) Otherwise, meddle with the subgraph to remove all edges pointing to the entry points
+        --      and recursively solve this subgraph (treating all nodes with no incoming edges as unmarkable roots),
+        --      getting back a set possibly containing extra marks for some of the things in the SCC
+        -- 3. Recurse with the new mark set
+        predecessors_here = M.filterWithKey (\n _ -> n `M.member` scc) predecessors
+
+        marks' = case M.null predecessors_here of
+              -- No predecessors to whole SCC: this must be a root node (which incidentally may never be a self-cycle).
+              -- A root node is either a FocusContext or an element of a SCC forced to be resid to break cycles in the
+              -- induction step. We NEVER want to mark such a node.
+              True -> M.empty
+              -- SCC has predecessors
+              False | Just common_ctxt <- foldr1 plusContext $ M.elems predecessors_here
+                    , S.null (M.keysSet scc `S.intersection` generalised)
+                    -- Inlining along *all* of the predecessors for *all* of the entry points arrives at a
+                    -- common destination, and all of the SCC nodes are ungeneralised, so we can mark the whole SCC.
+                    -- If even one node in the SCC is marked as generalised then we can't do a thing about it,
+                    -- because marking even one entry node in the SCC will require us to mark that generalised
+                    -- node as well, which is not allowed.
+                    -> M.fromDistinctAscList [(scc_node, common_ctxt) | scc_node <- M.keys scc]
+                    | otherwise
+                    -- Inlining failed along at least one path. By a theorem, we can only mark an entry node of the SCC
+                    -- if we can mark *every* entry node. Thus at this point we will force all such nodes to be unmarked
+                    -- and then recursively solve the simplified SCC graph to see if we can inline anything that previously
+                    -- participated in a cycle but was not itself an entry node.
+                    , let force_unmarked = M.keysSet predecessors_here
+                          scc_cut = filterEdges (\_ n' -> n' `S.notMember` force_unmarked) scc
+                          -- NB: no need to specify any predecessors in this recursive call because all nodes with a
+                          -- predecessor in a previous SCC have been force unmarked
+                    -> go_graph scc_cut
+
+        predecessors' = foldr (uncurry $ M.insertWith plusContext) predecessors
+                              [ (ctxt', mb_destination)
+                              | external_ens' <- M.elems external_ens
+                              , ((ctxt, ctxt'), ent) <- M.toList external_ens'
+                              , let mb_destination | Just dest_ctxt <- M.lookup ctxt marks' = Just dest_ctxt -- Marked, inherits final context (NB: in this case the edge annotation is irrelevant)
+                                                   | ManyEntries <- ent                     = Nothing        -- Not marked and inlining would duplicate work, so prevent marking of successors
+                                                   | otherwise                              = Just ctxt      -- Not marked, so any inlining (which would not duplicate work) stops here
+                              ]
+
+plusContext :: Maybe Context -> Maybe Context -> Maybe Context
+plusContext (Just c1) (Just c2) | c1 == c2 = Just c1
+plusContext _ _ = Nothing
+
+splitFocus :: InScopeSet -> PushFocus (Anned QA) (In AnnedTerm) -> Generalised -> (LGraph Context Entries,
+                                                                                   PureHeap -> IM.IntMap Stack -> PushedFocus)
+splitFocus ids (QAFocus qa)     True  = (M.singleton FocusContext $ M.insert (StackContext 0) ManyEntries qa_verts,
+                                         \h_prep k_prep -> QAFocus (mk_qa h_prep k_prep))
+  where (qa_verts, mk_qa) = splitQA ids qa
+splitFocus ids (QAFocus qa)     False = (M.singleton FocusContext $ M.insert (StackContext 0) OneEntry (varEdges OneEntry (annedFreeVars qa)),
+                                         \h_prep k_prep -> TermFocus (emptyDeeds, Heap h_prep ids, lookupStackPrep 0 k_prep, qa))
+splitFocus ids (TermFocus in_e) True  = splitOpaque $ annedTermToFVedTerm $ renameIn (renameAnnedTerm ids) in_e
+splitFocus ids (TermFocus in_e) False = (M.singleton FocusContext $ M.insert (StackContext 0) OneEntry e_verts,
+                                         \h_prep k_prep -> TermFocus (mk_e h_prep k_prep))
+  where (e_verts, mk_e) = splitTailKnownTerm ids 0 in_e
+splitFocus _   (OpaqueFocus e') _     = splitOpaque e'
+
+splitOpaque :: FVedTerm -> (LGraph Context Entries,
+                            PureHeap -> IM.IntMap Stack -> PushedFocus)
+splitOpaque e' = (M.singleton FocusContext $ M.insert (StackContext 0) ManyEntries $ varEdges ManyEntries (fvedTermFreeVars e'), \_ _ -> OpaqueFocus e')
+
+splitQA :: InScopeSet -> Anned QA -> (M.Map Context Entries,
+                                      PureHeap -> IM.IntMap Stack -> PushedQA)
+splitQA ids anned_qa = (qa_verts, \h_prep _k_prep -> Tagged (annedTag anned_qa) (mk_untagged_qa h_prep))
+  where (qa_verts, mk_untagged_qa) = case annee anned_qa of
+          Question x' -> (M.singleton (HeapContext x') ManyEntries, \_ -> Question x')
+          Answer a    -> second (Answer .) $ splitAnswer ids a
+
+splitCoerced :: (a         -> (M.Map Context Entries, PureHeap ->         b))
+              -> Coerced a -> (M.Map Context Entries, PureHeap -> Coerced b)
+splitCoerced split (cast_by, a) = (plusEntered (varEdges ManyEntries (castByFreeVars cast_by)) *** (((,) cast_by) .)) $ split a
+
+splitAnswer :: InScopeSet -> Answer -> (M.Map Context Entries,
+                                        PureHeap -> ValueG State)
+splitAnswer ids (rn, v) = case renameValueG (,,) ids rn v of
+    -- The isOneShotBndr check is really necessary if we want to fuse a top-level non-value with some consuming context in the IO monad.
+    Lambda   x' ids_in_e  -> second (Lambda x' .)   $ split_lambda x' (if isOneShotBndr x' then OneEntry else ManyEntries) ids_in_e
+    TyLambda a' ids_in_e  -> second (TyLambda a' .) $ split_lambda a' OneEntry                                             ids_in_e
+    Literal l'            -> holeless_v $ Literal l'
+    Coercion co'          -> holeless_v $ Coercion co'
+    Data dc tys' cos' xs' -> holeless_v $ Data dc tys' cos' xs'
+  where
+    holeless_v v' = (varEdges ManyEntries $ valueGFreeVars' (const emptyVarSet) v', \_ -> v')
+    split_lambda x' entries (ids', rn', e) = (varBndrEdges x' e_verts, mk_e . M.insert x' lambdaBound)
+      where (e_verts, mk_e) = splitTerm ids' entries (rn', e)
+
+splitTerm :: InScopeSet -> Entries -> In AnnedTerm -> (M.Map Context Entries,
+                                                       PureHeap -> State)
+splitTerm ids entries (rn, e) = (varEdges entries (annedTermFreeVars (renameAnnedTerm ids rn e)),
+                                 \h_prep -> normalise (emptyDeeds, Heap h_prep ids, Loco False, (rn, e)))
+
+splitTailKnownTerm :: InScopeSet -> Int -> In AnnedTerm -> (M.Map Context Entries,
+                                                            PureHeap -> IM.IntMap Stack -> State)
+splitTailKnownTerm ids frame (rn, e) = (varEdges OneEntry (annedTermFreeVars (renameAnnedTerm ids rn e)),
+                                        \h_prep k_prep -> normalise (emptyDeeds, Heap h_prep ids, lookupStackPrep frame k_prep, (rn, e)))
+
+-- NB: when driving a residual binding:
+--   let x = D[e]
+--   in ..
+--
+-- Arjan Boeijink suggested driving the following instead of D[e]:
+--   D[< | e | update x>]
+--
+-- This can help propagate more positive information, e.g. if e contains an occurrence of x itself
+--
+-- I'm not doing this right now because I'm wary about the termination issues. We should also be careful that we
+-- don't create loops as a result...
+
+-- NB: we need to add elements to the graph even for empty lambdaBound bindings to avoid references to nonexistant nodes
+splitPureHeap :: InScopeSet -> PureHeap -> (LGraph Context Entries,
+                                            S.Set Context -> S.Set Context -> PureHeap,
+                                            PureHeap -> PushedHeap)
+splitPureHeap ids h = (M.fromDistinctAscList [ (HeapContext x', fmap fst mb_split_hb `orElse` M.empty)
+                                             | (x', mb_split_hb) <- M.toAscList split_h ],
+                       \generalised marked -> (\f -> M.mapWithKey f h) $ \x' hb -> if HeapContext x' `S.member` marked then hb else if HeapContext x' `S.member` generalised then generalisedLambdaBound else lambdaBound, -- FIXME: bugger around with howToBindCheap?
+                       \h_prep -> (\f -> M.mapMaybe f split_h) $ \mb_split_hb -> do
+                                    -- TODO: we could only include in the output those bindings that are either NOT marked for inlining,
+                                    -- or are cheap (and thus had marking forced regardless of whether they are used in the residual).
+                                    -- Similarly, it would be cool to exclude bindings arising from the first update frame to avoid messiness in recurseHeap
+                                    (_, (how_bound, mk_e)) <- mb_split_hb
+                                    guard (how_bound == InternallyBound)
+                                    return (mk_e h_prep))
+  where
+    split_h :: M.Map Var (Maybe (M.Map Context Entries, (HowBound, PureHeap -> PushedHeapBinding)))
+    split_h = flip M.map h $ \hb -> fmap ((second ((,) (howBound hb))) . splitHeapTerm) (heapBindingTerm hb)
+
+    splitHeapTerm :: In AnnedTerm -> (M.Map Context Entries,
+                                      PureHeap -> PushedHeapBinding)
+    splitHeapTerm (rn, e)
+      | eAGER_SPLIT_VALUES
+      , Just anned_a <- termToCastAnswer ids (rn, e)
+      , let (qa_verts, mk_value) = splitValue ids anned_a
+      = (qa_verts, \h_prep -> Left (mk_value h_prep))
+      | otherwise
+      , let (e_verts, mk_e) = splitTerm ids OneEntry (rn, e)
+      = (e_verts, \h_prep -> Right (mk_e h_prep))
+
+-- NB: we need to add an explicit final frame to prevent the stack and QA graphs from having references to nonexistant nodes
+splitStack :: InScopeSet -> Stack -> Maybe Var -> (LGraph Context Entries,
+                                                   S.Set Context -> S.Set Context -> (PureHeap, IM.IntMap Stack),
+                                                   PureHeap -> IM.IntMap Stack -> PushedStack)
+splitStack ids k mb_scrut = go (fmap (\x' -> ((Uncast, x'), [])) mb_scrut, 0, [], \_ _ -> (M.empty, (Nothing, IM.empty)), \_ _ -> [] {- \_ _ -> ss_unknown_tail [] -}) k
+  where
+    finish_prep_k :: Generalised -> (Maybe (Int, Stack -> Stack), IM.IntMap Stack) -> IM.IntMap Stack
+    finish_prep_k gen (mb_next_run, done_runs) = maybe id (\(next_run_frame, next_run) -> IM.insert next_run_frame (next_run (Loco gen))) mb_next_run done_runs
+
+    go (_,         last_frame, verts, prep_k, mk_k) (Loco gen)                = (fromListDisjoint ((StackContext last_frame, M.empty):verts), (second (finish_prep_k gen) .) . prep_k, (reverse .) . mk_k)
+    go (mb_scruts, frame,      verts, prep_k, mk_k) (Tagged tg_kf kf `Car` k) = go (mb_scruts', next_frame, verts' ++ verts, prep_k', mk_k') k
+      where
+        -- NB: we insert dummies into done_runs so we can signal to mk_k that frame was marked, even if the frame is not the first in a run of marked frames
+        -- NB: this *does* produce the stack in the right order, since:
+        --       xs == foldl (\rest x -> (. (x:)) rest) id xs []
+        prep_k' generalised marked | StackContext frame `S.member` marked = (h_update,                                                   (Just $ second (. (Tagged tg_kf kf `Car`)) $ fromMaybe (frame, id) mb_next_run, IM.insert frame (error "prep_k': dummy") done_runs))
+                                   | otherwise                            = (maybe id (flip M.insert lambdaBound) mb_update_x' h_update, (Nothing, finish_prep_k (StackContext frame `S.member` generalised) (mb_next_run, done_runs)))
+          where (h_update, (mb_next_run, done_runs)) = prep_k generalised marked
+
+        mk_k' h_prep k_prep | frame `IM.member` k_prep = mk_k h_prep k_prep -- If the frame was marked (inlined), we needn't residualise it
+                            | otherwise                = Tagged tg_kf (kf_prep h_prep k_prep) : mk_k h_prep k_prep
+
+        scruts_flat = maybe [] (uncurry (:)) mb_scruts
+
+        next_frame = frame + 1
+        verts' = (StackContext frame, M.insert (StackContext next_frame) (if know_tail then OneEntry else ManyEntries) edges):update_verts
+        update_verts = case mb_update_x' of
+          Just x' -> [(HeapContext x', M.singleton (StackContext frame) ManyEntries)]
+          Nothing -> []
+
+        (mb_scruts', mb_update_x', know_tail, edges, kf_prep) = case kf of
+          TyApply ty'                  -> (Nothing, Nothing, False, M.empty, \_ _ -> TyApply ty')
+          CoApply co'                  -> (Nothing, Nothing, False, varEdges ManyEntries (tyCoVarsOfCo co'), \_ _ -> CoApply co')
+          Apply x'                     -> (Nothing, Nothing, False, M.singleton (HeapContext x') ManyEntries, \_ _ -> Apply x')
+          Scrutinise x' ty' (rn, alts) -> (Nothing, Nothing, True, varBndrEdges x' $ foldr plusEntered M.empty alts_verts,
+                                           \h_prep k_prep -> Scrutinise x' (stackType (lookupStackPrep next_frame k_prep) ty') (map (($ k_prep) . ($ h_prep)) mk_alts))
+            where any_scrut_live = any (not . isDeadBinder . snd) scruts_flat
+                  
+                  -- These lines achieve two things:
+                  --   1. Filter out any branches of the case which we know are impossible due to type refinement
+                  --   2. Turn any remaining default cases into explicit constructors if possible (helps positive information propagation)
+                  refined_alts | not rEFINE_ALTS = alts
+                               | otherwise = [ (coreAltConToAltCon altcon xs, e)
+                                             | (altcon, xs, e) <- thirdOf3 $ filterAlts (repeat wildCardKey) (idType x') []
+                                                                                        [ (altcon', xs, e)
+                                                                                        | (altcon, e) <- alts
+                                                                                        , let (altcon', xs) = altConToCoreAltCon altcon ] ]
+
+                  (alts_verts, mk_alts) = unzip [ (foldr varBndrEdges e_verts alt_bvs',
+                                                   \h_prep k_prep -> let h_pos | pOSITIVE_INFORMATION
+                                                                               , Just anned_v <- altConToValue (idType x') alt_con'
+                                                                               , let anned_e = fmap Value anned_v
+                                                                               -- State:  < | x :: A | [_] |> (co1 :: A ~ B), update (y :: B), [_] |> (co0 :: B ~ C), case ([_] :: C) ... >
+                                                                               -- Scruts: [(co0, y), (co1, x)]
+                                                                               -- Result: x |-> alt |> sym co0 `trans` sym co1
+                                                                               --         y |-> alt |> sym co0
+                                                                               = snd $ (\f -> foldl f (Uncast, M.empty) scruts_flat) $ \(overall_cast_by, h_pos) (cast_by', y') ->
+                                                                                                let overall_cast_by' = mkTransCastBy ids' overall_cast_by (mkSymCastBy ids' cast_by')
+                                                                                                    -- Localise the Id just in case this is the occurrence of a lambda-bound variable.
+                                                                                                    -- We don't really want a Let-bound external name in the output!
+                                                                                                in (overall_cast_by', M.insert (localiseId y') (internallyBound (renamedTerm (castAnnedTerm overall_cast_by' anned_e))) h_pos)
+                                                                               | otherwise
+                                                                               = M.empty
+                                                                     in (alt_con', mk_e (h_pos `M.union` foldr (\y' -> M.insert y' lambdaBound) h_prep (x':alt_bvs')) k_prep))
+                                                | (alt_con, e) <- refined_alts
+                                                  -- We have to carefully zap OccInfo here because one of the case binders might be marked as dead,
+                                                  -- yet could become live due to positive information propagation! 
+                                                , let (ids', rn', alt_con') = renameAltCon ids rn (if any_scrut_live then zapAltConIdOccInfo alt_con else alt_con)
+                                                      (e_verts, mk_e) = splitTailKnownTerm ids' next_frame (rn', e)
+                                                      alt_bvs' = altConBoundVars alt_con' ]
+          PrimApply pop tys' as in_es  -> (Nothing, Nothing, False, foldr multEntered M.empty (as_verts ++ es_verts),
+                                           \h_prep _k_prep -> PrimApply pop tys' (map ($ h_prep) mk_as) (map ($ h_prep) mk_es))
+            where (as_verts, mk_as) = unzip $ map (splitValue ids) as
+                  (es_verts, mk_es) = unzip $ map (splitTerm ids OneEntry) in_es
+          StrictLet x' in_e            -> (Nothing, Nothing, True, varBndrEdges x' e_verts,
+                                           \h_prep k_prep -> StrictLet x' (mk_e (M.insert x' lambdaBound h_prep) k_prep))
+            where (e_verts, mk_e) = splitTailKnownTerm ids next_frame in_e
+          CastIt co'                   -> (fmap (\((cast_by, x'), rest) -> ((castBy (maybe co' (\co'' -> mkTransCo ids co'' co') (castByCo cast_by)) tg_kf, x'), rest)) mb_scruts,
+                                           Nothing, False, varEdges ManyEntries (tyCoVarsOfCo co'), \_ _ -> CastIt co')
+          Update x'                    -> (Just ((Uncast, x'), scruts_flat),
+                                           Just x', False, varEdges ManyEntries (varBndrFreeVars x'), \_ _ -> Update x')
+
+splitValue :: InScopeSet -> Anned (Coerced Answer) -> (M.Map Context Entries,
+                                                       PureHeap -> Tagged (Coerced (ValueG State)))
+splitValue ids anned_a = second (Tagged (annedTag anned_a) .) $ splitCoerced (splitAnswer ids) (annee anned_a)
+
+lookupStackPrep :: Int -> IM.IntMap Stack -> Stack
+lookupStackPrep = IM.findWithDefault (Loco False)
+
+varBndrEdges :: Var -> M.Map Context Entries -> M.Map Context Entries
+varBndrEdges x' verts = varEdges ManyEntries (varBndrFreeVars x') `plusEntered` M.delete (HeapContext x') verts
--- a/compiler/supercompile/Supercompile/Evaluator/Deeds.hs
+++ b/compiler/supercompile/Supercompile/Evaluator/Deeds.hs
+module Supercompile.Evaluator.Deeds where
+
+import Supercompile.StaticFlags
+import Supercompile.Utilities
+
+import Data.Monoid (Monoid(mappend, mempty))
+
+
+-- | Number of unclaimed deeds. Invariant: always greater than or equal to 0
+type Unclaimed = Int
+
+-- | A deed supply shared amongst all expressions
+data Deeds = Deeds {
+    sizeLimit :: {-# UNPACK #-} !Int,
+    stepLimit :: {-# UNPACK #-} !Int
+  } deriving (Eq)
+
+instance Outputable Deeds where
+    ppr d = ppr (sizeLimit d, stepLimit d)
+
+instance Monoid Deeds where
+    mempty = emptyDeeds
+    mappend = plusDeeds
+
+instance Bounded Deeds where
+    maxBound = Deeds { sizeLimit = maxBound `div` 2, stepLimit = maxBound `div` 2 } -- Try to avoid overflow :-)
+    minBound = emptyDeeds
+
+emptyDeeds :: Deeds
+emptyDeeds = Deeds { sizeLimit = 0, stepLimit = 0 }
+
+plusDeeds :: Deeds -> Deeds -> Deeds
+plusDeeds d1 d2 = d1 `seq` d2 `seq` Deeds { sizeLimit = sizeLimit d1 + sizeLimit d2, stepLimit = stepLimit d1 + stepLimit d2 }
+
+plusDeedss :: [Deeds] -> Deeds
+plusDeedss = foldr plusDeeds emptyDeeds
+
+claimStep :: Deeds -> Maybe Deeds
+claimStep deeds = guard (stepLimit deeds > 0) >> return (deeds { stepLimit = stepLimit deeds - 1 })
+
+-- NB: it is OK if the number of deeds to claim is negative -- that just causes some deeds to be released
+claimDeeds :: Deeds -> Int -> Maybe Deeds
+claimDeeds deeds want = guard (not dEEDS || sizeLimit deeds >= want) >> return (deeds { sizeLimit = sizeLimit deeds - want })
+
+releaseDeeds :: Deeds -> Int -> Deeds
+releaseDeeds deeds release = deeds { sizeLimit = sizeLimit deeds + release }
+
+apportionDeeds :: Deeds -> [Int] -> [Deeds]
+apportionDeeds deeds weights = zipWith Deeds (apportion (sizeLimit deeds) weights) (apportion (stepLimit deeds) weights)
+
+splitDeeds :: Deeds -> [Size] -> [Deeds]
+splitDeeds _     []           = error "splitDeeds: no sizes"
+splitDeeds deeds (size:sizes) = case dEEDS_POLICY of
+    Proportional -> apportionDeeds deeds (size:sizes)
+    FCFS         -> deeds : map (const emptyDeeds) sizes
+
+
+noChange, noGain :: Deeds -> Deeds -> Bool
+noChange = (==)
+noGain d1 d2 = (sizeLimit d1 >= sizeLimit d2) && (stepLimit d1 >= stepLimit d2)
--- a/compiler/supercompile/Supercompile/Evaluator/Evaluate.hs
+++ b/compiler/supercompile/Supercompile/Evaluator/Evaluate.hs
+module Supercompile.Evaluator.Evaluate (normalise, step, gc, shouldExposeUnfolding) where
+
+#include "HsVersions.h"
+
+import Supercompile.Evaluator.Deeds
+import Supercompile.Evaluator.FreeVars
+import Supercompile.Evaluator.Residualise
+import Supercompile.Evaluator.Syntax
+
+import Supercompile.Core.Renaming
+import Supercompile.Core.Syntax
+
+import Supercompile.GHC (termToCoreExpr)
+import Supercompile.StaticFlags
+import Supercompile.Utilities
+
+import qualified Data.Map as M
+
+import qualified CoreSyn as CoreSyn
+import CoreUnfold
+import DynFlags (DynFlag(..), defaultDynFlags, dopt_set)
+import Coercion (liftCoSubstWith, coercionKind, isReflCo, mkUnsafeCo)
+import TyCon
+import Type
+import PrelRules
+import Id
+import Module
+import Name (nameModule_maybe)
+import IdInfo (isShortableIdInfo)
+import DataCon
+import Pair
+import BasicTypes
+import Demand (splitStrictSig, isBotRes)
+
+
+-- FIXME: this doesn't really work very well if the Answers are indirections, which is a common case!
+-- However, this is not so serious a problem since I started spotting manifest primops applications and
+-- avoiding going through the wrapper to compile them.
+evaluatePrim :: InScopeSet -> Tag -> PrimOp -> [Type] -> [Coerced Answer] -> Maybe (Anned (Coerced Answer))
+evaluatePrim iss tg pop tys args = do
+    args' <- fmap (map CoreSyn.Type tys ++) $ mapM to args
+    (res:_) <- return [res | CoreSyn.BuiltinRule { CoreSyn.ru_nargs = nargs, CoreSyn.ru_try = f }
+                          <- primOpRules pop (error "evaluatePrim: dummy primop name")
+                      , nargs == length args
+                      , Just res <- [f (const CoreSyn.NoUnfolding) args']]
+    fmap (annedCoercedAnswer tg) $ fro res
+  where
+    to :: Coerced Answer -> Maybe CoreSyn.CoreExpr
+    to (mb_co, (rn, v)) = fmap coerce $ case v of
+        Literal l          -> Just (CoreSyn.Lit l)
+        Coercion co        -> Just (CoreSyn.Coercion co)
+        Data dc tys cos xs -> Just (CoreSyn.Var (dataConWrapId dc) `CoreSyn.mkTyApps` map (renameType iss rn) tys `CoreSyn.mkCoApps` cos `CoreSyn.mkVarApps` map (renameId rn) xs)
+        _                  -> Nothing
+      where
+        -- It is quite important that we don't wrap things with spurious refl coercions when it comes
+        -- to RULEs, because the default constant-folding rules don't trigger if there are too many coercions.
+        -- Luckily, the coercion within a CastBy is guaranteed to be non-refl.
+        coerce | Just co <- castByCo mb_co
+               = (`CoreSyn.Cast` co)
+               | otherwise
+               = id
+
+    fro :: CoreSyn.CoreExpr -> Maybe (Coerced Answer)
+    fro (CoreSyn.Cast e co)   = fmap (\(mb_co', in_v) -> (castBy (maybe co (\co' -> mkTransCo iss co' co) (castByCo mb_co')) tg, in_v)) $ fro e
+    fro (CoreSyn.Lit l)       = Just (Uncast, (emptyRenaming, Literal l))
+    fro (CoreSyn.Coercion co) = Just (Uncast, (mkIdentityRenaming (tyCoVarsOfCo co), Coercion co))
+    fro e = do (dc, univ_tys, e_args0) <- exprIsConApp_maybe (const CoreSyn.NoUnfolding) e
+               case newTyConCo_maybe (dataConTyCon dc) of
+                 Just co_axiom -> let [e_arg0] = e_args0 in fro (e_arg0 `CoreSyn.Cast` mkAxInstCo co_axiom univ_tys)
+                 Nothing -> do
+                   let (ex_tys, e_args1) = takeWhileJust toType_maybe     e_args0
+                       (cos,    e_args2) = takeWhileJust toCoercion_maybe e_args1
+                       (xs,     e_args3) = takeWhileJust toVar_maybe      e_args2
+                   [] <- return e_args3
+                   return (Uncast, (renamedValue (Data dc (univ_tys ++ ex_tys) cos xs)))
+      where toType_maybe (CoreSyn.Type ty) = Just ty
+            toType_maybe _                 = Nothing
+            
+            toCoercion_maybe (CoreSyn.Coercion co) = Just co
+            toCoercion_maybe _                     = Nothing
+            
+            toVar_maybe (CoreSyn.Var x) = Just x
+            toVar_maybe _               = Nothing
+
+type ContextSummary = (Bool, [ArgSummary], CallCtxt)
+
+summariseContext :: PureHeap -> Stack -> ContextSummary
+summariseContext h k = trainCarFoldr go (True, [], BoringCtxt) k
+  where go kf (lone_variable, arg_infos, cont_info) = case tagee kf of
+          TyApply _         -> (lone_variable,           arg_infos, cont_info)
+          CoApply _         -> (False, TrivArg         : arg_infos, ValAppCtxt)
+          Apply x'          -> (False, summariseArg x' : arg_infos, ValAppCtxt)
+          Scrutinise _ _ _  -> (True,                           [], CaseCtxt)
+          PrimApply _ _ _ _ -> (True,                           [], ArgCtxt False)
+          StrictLet _ _     -> (True,                           [], ArgCtxt False)
+          Update _          -> (True,                           [], BoringCtxt)
+          CastIt _          -> (lone_variable,           arg_infos, cont_info)
+
+        summariseArg x' = case M.lookup x' h of
+          Just hb | Just (_, e) <- heapBindingTerm hb -> if termIsValue e then ValueArg else NonTrivArg
+          _ -> TrivArg
+
+-- Can't use callSiteInline because it never inlines stuff with DFunUnfolding!
+ghcHeuristics :: Id -> AnnedTerm {- try not to pull on this, it does a lot of work -}
+              -> ContextSummary -> Bool
+ghcHeuristics x e (lone_variable, arg_infos, cont_info)
+  | isStrongLoopBreaker (idOccInfo x)
+  = False -- NB: have to check this (not just use idUnfolding) because we might consider "e"
+  | otherwise
+  = (try (realIdUnfolding x) `mplus` try answer_unf) `orElse` trce (text "No unfolding") False
+  where
+    try unf = case unf of
+      CoreSyn.CoreUnfolding { CoreSyn.uf_is_top = is_top, CoreSyn.uf_is_work_free = is_work_free, CoreSyn.uf_expandable = expandable
+                            , CoreSyn.uf_arity = arity, CoreSyn.uf_guidance = guidance }
+                               -> trce_fail (ppr (CoreSyn.uf_tmpl unf)) $
+                                  Just $ tryUnfolding dflags1 x lone_variable 
+                                                      arg_infos cont_info is_top 
+                                                      is_work_free expandable
+                                                      arity guidance
+      -- GHC actually only looks through DFunUnfoldings in exprIsConApp_maybe,
+      -- so I'll do this rough heuristic instead:
+      CoreSyn.DFunUnfolding {} -> trce_fail (text "Unsaturated dictionary unfolding") $
+                                  Just $ length arg_infos >= idArity x
+      CoreSyn.NoUnfolding      -> Nothing
+      CoreSyn.OtherCon {}      -> Nothing
+
+    dflags0 = defaultDynFlags (error "ghcHeuristics: Settings in DynFlags used!")
+    -- Set these two flags so that we get information about failed inlinings:
+    dflags1 | tRACE     = dopt_set (dopt_set dflags0 Opt_D_verbose_core2core) Opt_D_dump_inlinings
+            | otherwise = dflags0
+
+    trce_fail :: SDoc -> Maybe Bool -> Maybe Bool
+    --trce_fail doc (Just False) = trce doc (Just False)
+    trce_fail _   mb_x         = mb_x
+
+    trce :: SDoc -> a -> a
+    trce | tRACE     = pprTrace ("Considering inlining: " ++ showSDoc (ppr x))
+         | otherwise = flip const
+
+    -- NB: I'm not particularly happy that I may have to make up a whole new unfolding at ever
+    -- occurrence site, but GHC makes it hard to do otherwise because any binding with a non-stable
+    -- Unfolding pinned to it gets the Unfolding zapped by GHC's renamer
+    answer_unf = mkUnfolding CoreSyn.InlineRhs False False (termToCoreExpr (annedTermToTerm e))
+
+-- | Non-expansive simplification we can do everywhere safely
+--
+-- Normalisation only ever releases deeds: it is *never* a net consumer of deeds. So normalisation
+-- will never be impeded by a lack of deeds.
+normalise :: UnnormalisedState -> State
+normalise = snd . step' True . Right
+
+-- | Possibly non-normalising simplification we can only do if we are allowed to by a termination test
+--
+-- Unlike normalisation, stepping may be a net consumer of deeds and thus be impeded by a lack of them.
+step :: State -> Maybe State
+step s = guard reduced >> return result
+  where (reduced, result) = step' False $ Left s
+
+step' :: Bool -> Either State UnnormalisedState -> (Bool, State) -- The flag indicates whether we managed to reduce any steps *at all*
+step' normalising ei_state = {-# SCC "step'" #-}
+    -- pprTrace "step'" (either (pPrintFullState quietStatePrettiness) (pPrintFullUnnormalisedState quietStatePrettiness) ei_state) $
+    (\res@(_reduced, stepped_state) -> let _deeds = either releaseStateDeed releaseUnnormalisedStateDeed ei_state
+                                           _doc = either (pPrintFullState quietStatePrettiness) (pPrintFullUnnormalisedState quietStatePrettiness) ei_state
+                                           _fvs = either stateFreeVars unnormalisedStateFreeVars ei_state in
+                                       ASSERT2(not dEEDS || noChange _deeds (releaseStateDeed stepped_state),
+                                               hang (text "step': deeds lost or gained:") 2 (_doc $$ pPrintFullState quietStatePrettiness stepped_state))
+                                       ASSERT2(subVarSet (stateFreeVars stepped_state) _fvs,
+                                               text "step': FVs" $$ hang (text "Before:") 2 (pPrint _fvs $$ _doc) $$
+                                                                    hang (text "After:")  2 (pPrint (stateFreeVars stepped_state) $$ pPrintFullState quietStatePrettiness stepped_state))
+                                       -- traceRender (text "normalising" $$ nest 2 (pPrintFullUnnormalisedState state) $$ text "to" $$ nest 2 (pPrintFullState stepped_state)) $
+                                       res) $
+    go_entry ei_state
+  where
+    go_entry :: Either State UnnormalisedState -> (Bool, State)
+    go_entry (Left (deeds, heap, k, anned_qa)) = case annee anned_qa of
+      Question _ -> go_question (deeds, heap, k, fmap (\(Question x') -> x') anned_qa)
+      Answer   _ -> go_answer   (deeds, heap, k, fmap (\(Answer a) -> a)     anned_qa)
+    go_entry (Right state) = go state
+
+    go :: UnnormalisedState -> (Bool, State)
+    go (deeds, heap@(Heap h ids), k, (rn, e)) = case annee e of
+        Var x            -> go_question (deeds, heap, k, fmap (\(rn, Var _)   -> renameId rn x) (renameAnned (rn, e)))
+        Value v          -> go_answer   (deeds, heap, k, fmap (\(rn, Value _) -> (rn, v))       (renameAnned (rn, e)))
+        TyApp e ty       -> go (deeds, heap,        Tagged tg (TyApply (renameType ids rn ty))                                   `Car` k, (rn, e))
+        CoApp e co       -> go (deeds, heap,        Tagged tg (CoApply (renameCoercion ids rn co))                               `Car` k, (rn, e))
+        App e x          -> go (deeds, heap,        Tagged tg (Apply (renameId rn x))                                            `Car` k, (rn, e))
+        PrimOp pop tys es
+          | (e:es) <- es -> go (deeds, heap,        Tagged tg (PrimApply pop (map (renameType ids rn) tys) [] (map ((,) rn) es)) `Car` k, (rn, e))
+          | otherwise    -> pprPanic "step': nullary primops unsupported" (ppr pop)
+        Case e x ty alts -> go (deeds, Heap h ids', Tagged tg (Scrutinise x' (renameType ids rn ty) (rn', alts))                 `Car` k, (rn, e))
+          where (ids', rn', x') = renameNonRecBinder ids rn x
+        Cast e co        -> go (deeds', heap,                                                                                         k', (rn, e))
+          where (deeds', k') = appendCast deeds ids tg (renameCoercion ids rn co) k
+        Let x e1 e2
+          | isUnLiftedType (idType x) -> go (deeds,                  Heap h                                       ids', Tagged tg (StrictLet x' (rn', e2)) `Car` k, in_e1)
+          | otherwise                 -> go (deeds `releaseDeeds` 1, Heap (M.insert x' (internallyBound in_e1) h) ids',                                          k, (rn', e2))
+          where (ids', rn', (x', in_e1)) = renameNonRecBound ids rn (x, e1)
+        LetRec xes e     -> go (deeds `releaseDeeds` 1, Heap (h `M.union` M.fromList [(x', internallyBound in_e) | (x', in_e) <- xes']) ids', k, (rn', e))
+          where (ids', rn', xes') = renameBounds ids rn xes
+      where tg = annedTag e
+
+    appendCast deeds ids tg co' k
+      | isReflCo final_co' = (deeds' `releaseDeeds` 1,                              k')
+      | otherwise          = (deeds',                  Tagged tg (CastIt final_co') `Car` k')
+      where
+        -- Maintain the invariant that there are no adjacent casts in the stack
+        (deeds', final_co', k') = case k of
+          Tagged _old_tg (CastIt old_co') `Car` k' -> (deeds `releaseDeeds` 1, mkTransCo ids co' old_co', k')
+          _                                        -> (deeds,                  co',                       k)
+
+    -- TODO: in all the code below, I'm no longer sure whether we preserve deeds or not. In particular, the CastBy cases
+    -- are a worry. But since the deeds stuff is probably on the way out I'm not trying to fix it right now.
+
+    go_question (deeds, h, k, anned_x) = maybe (False, (deeds, h, k, fmap Question anned_x)) ((,) True) $ force  deeds h k (annedTag anned_x) (annee anned_x)
+    go_answer   (deeds, h, k, anned_a) = maybe (False, (deeds, h, k, fmap Answer anned_a))   ((,) True) $ unwind deeds h k (annedTag anned_a) (annee anned_a)
+
+    -- Deal with a variable at the top of the stack
+    -- Might have to claim deeds if inlining a non-value non-internally-bound thing here
+    -- FIXME: look inside unfoldings
+    force :: Deeds -> Heap -> Stack -> Tag -> Out Var -> Maybe State
+    force deeds (Heap h ids) k tg x'
+      -- Try to trim the stack if the Id is guaranteed to bottom out after a certain number of arguments
+      -- This is really amazingly important because so many case branches bottom out in at least one branch,
+      -- and we can save supercompiling big case nests if we trim them out eagerly.
+      --
+      -- The supercompiled size of Bernouilli decreased from 19193 to 16173 with this change.
+      -- FIXME: do this another way? (with freevars)
+      | Just (ds, res_d) <- fmap splitStrictSig $ idStrictness_maybe x'
+      , isBotRes res_d
+      , Just (h_extra, k) <- trimUnreachable (length ds) (idType x') k
+      = Just (deeds, Heap (h `M.union` h_extra) ids, k, fmap (\(Var x') -> Question x') (annedTerm tg (Var x'))) -- Kind of a hacky way to get an Anned Question!
+      | otherwise = do
+        -- NB: it doesn't matter if the thing we look up is superinlinable because we'll actually only need to check then when we come to
+        -- look it up as a *value*. In any case, the vast majority of SUPERINLINABLE things will be values, so we'll never get here.
+        hb <- M.lookup x' h
+        in_e <- heapBindingTerm hb
+        case termToCastAnswer ids in_e of
+          -- Consider the case of non-internal bindings. We want to get at the values of them, but since we can't create updates we can
+          -- only do so if they are already values. This is perhaps weird, but continuing using unwind' is exactly the right thing to do
+          Just anned_cast_a -> do
+            let deeds0 = releaseDeeds deeds 1 -- We can release the deed for the reference itself
+            deeds1 <- if howBound hb == InternallyBound
+                       then return deeds0
+                       else claimDeeds deeds0 (annedSize anned_cast_a)
+            unwind' deeds1 (Heap h ids) k (Just (x', (mkIdentityRenaming (unitVarSet x'), annedTerm tg (Var x'))))
+                                          (annedToTagged anned_cast_a)
+                                          (if dUPLICATE_VALUES_EVALUATOR then in_e else (mkIdentityRenaming (unitVarSet x'), fmap Var $ annedVar tg x'))
+          Nothing -> do
+            -- NB: we MUST NOT create update frames for non-concrete bindings!! This has bitten me in the past, and it is seriously confusing. 
+            guard (howBound hb == InternallyBound)
+            -- Avoid creating consecutive update frames: implements "stack squeezing" to maintain stack invariants
+            -- NB: suprisingly this is not broken, even if there are cycles in the heap:
+            --     <x |-> y, y |-> x | x |>
+            -- --> <         y |-> x | y | update x>
+            -- --> <         y |-> x | x | update x>
+            --     The reason is that squeezing adds a heap binding that just points into an update frame bound thing,
+            --     and once a binder is on the stack it won't be turned into a heap binder until we have got a value,
+            --     so inlining the squeezed heap binding will either just get us stuck immediately or get to a value
+            --
+            -- NB: squeezing will discard any flag marking x' as superinlinable. However, I think this is totally OK:
+            -- the update frame already on the stack (which is preserved by squeezing) contains the name by which the *user*
+            -- tried to access the function, and it is in keeping with the rest of GHC (where the inlinability you see is
+            -- based on the label you wrote in your own code) that that is the relevant flag.
+            return $ normalise $ case (fst (peelUpdateStack k)) of
+              Nothing                        -> (deeds, Heap (M.delete x' h)                            ids, Tagged tg (Update x') `Car` k, in_e)
+              Just (cast_by, Tagged tg_y y') -> (deeds, Heap (M.insert x' (internallyBound in_e_ref) h) ids,                             k, in_e)
+                where in_e_ref = mkVarCastBy tg_y y' cast_by
+
+    -- TODO: this function totally ignores deeds
+    trimUnreachable :: Int   -- Number of value arguments needed before evaluation bottoms out
+                    -> Type  -- Type of the possibly-bottoming thing in the hole
+                    -> Stack -- Stack consuming the hole
+                    -> Maybe (PureHeap, -- Heap bindings arising from any update frames we trimmed off
+                              Stack)    -- Trimmed stack (strictly "less" than the input one -- not necessarily shorter since we will replace e.g. a trailing Scrutinise with a Cast)
+    trimUnreachable = go
+      where
+        -- Ran out of stack: even if n == 0 we don't want to
+        -- trim the stack in these cases because we musn't return
+        -- Just if the tail of the stack is already trivial: doing
+        -- so would risk non-termination
+        go _ _       (Loco _)                           = Nothing
+        go _ _       (Tagged _ (CastIt _) `Car` Loco _) = Nothing
+        -- Got some non-trivial stack that is unreachable due to bottomness: kill it (remembering to bind any updated stuff)
+        go 0 hole_ty k@(Tagged cast_tg _ `Car` _) = Just $
+          trainFoldl' (\(!hole_ty,    !h) (Tagged tg kf) -> (stackFrameType' kf hole_ty, case kf of Update x' -> M.insert x' (internallyBound (renamedTerm (annedTerm tg (Var x')))) h; _ -> h))
+                      (\(!overall_ty, !h) gen -> (h, (if hole_ty `eqType` overall_ty then id else (Tagged cast_tg (CastIt (mkUnsafeCo hole_ty overall_ty)) `Car`)) $ Loco gen)) (hole_ty, M.empty) k
+        -- Haven't yet reached a bottom, but we might get enough arguments to reach
+        -- one in the future, so keep going
+        go n hole_ty (kf `Car` k) = mb_n' >>= \n' -> liftM (second (kf `Car`)) $ go n' (stackFrameType kf hole_ty) k
+          where mb_n' = case tagee kf of
+                          TyApply _         -> Just n
+                          CoApply _         -> Just (n - 1)
+                          Apply _           -> Just (n - 1)
+                          Scrutinise _ _ _  -> Nothing
+                          PrimApply _ _ _ _ -> Nothing
+                          StrictLet _ _     -> Nothing
+                          Update _          -> Just n
+                          CastIt _          -> Just n
+
+    -- Deal with a value at the top of the stack
+    unwind :: Deeds -> Heap -> Stack -> Tag -> Answer -> Maybe State
+    unwind deeds heap k tg_a a = unwind' deeds heap k Nothing (Tagged tg_a (Uncast, a)) (taggedAnswerToInAnnedTerm (Tagged tg_a a))
+
+    unwind' :: Deeds -> Heap -> Stack
+            -> Maybe (Var,             -- Variable name of thing in focus, for normalisation check/SUPERINLINABLE purposes
+                      In AnnedTerm)    -- Version of the focus that should go into *focus* (useful if we are reducing <x = v | x | update y >, we want x in the focus in next step, not y!)
+            -> Tagged (Coerced Answer) -- "Meaning" of the focus, in value terms
+            -> In AnnedTerm            -- Version of the focus that should go into heap (may just be a variable reference to a value)
+            -> Maybe State
+    unwind' deeds heap@(Heap h ids) k mb_x' (Tagged tg_a cast_a@(mb_co, (rn, v))) in_e_heap = case k of
+     Loco _ -> Nothing
+     Car kf k -> case tagee kf of
+        TyApply ty'                    ->                  tyApply    (deeds `releaseDeeds` 1)          ty'
+        CoApply co'                    ->                  coApply    (deeds `releaseDeeds` 1)          co'
+        Apply x2'                      ->                  apply      deeds                    (tag kf) x2'
+        Scrutinise x' ty' in_alts      -> fmap normalise $ scrutinise (deeds `releaseDeeds` 1)          x' ty' in_alts
+        PrimApply pop tys' in_vs in_es -> fmap normalise $ primop     deeds                    (tag kf) pop tys' in_vs in_es
+        StrictLet x' in_e2             -> fmap normalise $ strictLet  (deeds `releaseDeeds` 1)          x' in_e2
+        -- NB: since there are never two adjacent casts on the stack, our reference expressions will always have at most 1 coercion
+        CastIt co'                     -> unwind' deeds heap k (fmap (second cast_e) mb_x') (castAnswer ids (tag kf) co' (Tagged tg_a cast_a)) (cast_e in_e_heap) -- TODO: can potentially release deeds from cast_a double-cast here
+          where cast_e in_e = (mkInScopeIdentityRenaming ids, annedTerm (tag kf) (renameIn (renameAnnedTerm ids) in_e `Cast` co'))
+        Update x'                      -> do
+            -- If duplicating values, we ensure normalisation by not executing updates
+            guard (not normalising || not dUPLICATE_VALUES_EVALUATOR)
+            return $ normalise (deeds', Heap (M.insert x' (internallyBound in_e_heap) h) ids, k, in_e')
+          where Just deeds' = claimDeeds (deeds `releaseDeeds` castAnswerSize cast_a) (annedTermSize (snd in_e_heap))
+                in_e' = case mb_x' of
+                          Nothing              -> (mkIdentityRenaming (unitVarSet x'), fmap Var $ annedVar (tag kf) x')
+                          Just (_, in_e_focus) -> in_e_focus
+      where
+        -- Should check this is not Nothing before using the "meaning" a
+        checkShouldExposeUnfolding = case mb_x' of
+                     -- FIXME: if we eliminate "dead" update frames, we should return True here or else WAY TOO MUCH will be treated as superinlinable
+                     Nothing -> Just True
+                     -- We have to check this here (not just when preparing unfoldings) because
+                     -- we would like to also exclude local recursive loops, not just top-level ones
+                     Just (x', _) -> case shouldExposeUnfolding x' of
+                       Right super  -> Just super
+                       Left why_not -> pprTrace "Unavailable:" (ppr x' <+> parens (text why_not)) $
+                                       fail why_not
+
+        -- NB: assumes that ei_state has the same size as what we would return from "step" if the dereferencing fails.
+        -- At the time of writing, this invariant holds (since turning terms into equivalent stack frames doesn't change size)
+        checkLambdaish :: UnnormalisedState -> Maybe State
+        checkLambdaish s'_unnormalised
+          -- If not duplicating values, we ensure normalisation by not executing applications to non-explicit-lambdas
+          | normalising, isJust mb_x', not dUPLICATE_VALUES_EVALUATOR = Nothing
+          | otherwise = do
+            super <- checkShouldExposeUnfolding
+            let s' = normalise s'_unnormalised
+            -- NB: you might think it would be OK to inline even when *normalising*, as long as the inlining
+            -- makes the term strictly smaller. This is very tempting, but we would have to check that the size
+            -- measure decreased by the inlining is the same as the size measure needed to prove normalisation of
+            -- the reduction system, and I'm too lazy to do that right now.
+            --
+            -- NB: it is hard to get a size-decrease check like this to work well. Consider:
+            --  bindIO getArgs (\x -> e)
+            -- This has 2 applications and one lambda (I've omitted all casts, since they cost 0). If we inline we get:
+            --  \s -> case getArgs s of (# x, s #) -> e s
+            -- This has 2 applications, one lambda, and ONE CASE EXPRESSION. So things must have got worse! But bindIO with
+            -- one known argument is almost a canonical example of the kind of thing we *would* like to inline, and the example
+            -- above assumes perfect reduction: note in particular that we've beta-reduced the (\x -> e) away entirely (and
+            -- done so within a case branch), so to even get this far we need "deep normalisation" *and* inlining used-once lams
+            -- as part of this check.
+            --
+            -- Perhaps if we had deep normalisation + GC we could get these results by penalising heap allocation heavily?
+            -- If so we must remember to do it for heap bindings *and* letrecs.
+            let k_summary = summariseContext h (Car kf k)
+            guard $ case () of
+              _ -- If the lambda is marked SUPERINLINABLE, always inline it
+                | super
+                -> True
+                -- If inlining gets us to a value, accept it. This is a bit ad-hoc for two reasons:
+                --  1. We might reach a value now, and then later apply two more arguments to effectively unconditionally inline a full application
+                --  2. We might need to do another round of inlining to actually expose a value e.g. g in (f = \x y -> e; g = \x -> f x; h = g x)
+                -- However, this is important so that speculation is able to turn partial applications into explicit values
+                | (_, _, k, qa) <- s'
+                , Answer _ <- annee qa
+                , Just _ <- isCastStack_maybe k -- Might be a trailing cast
+                -> True
+                -- If the result is actually smaller, accept it (this catches manifest values)
+                -- NB: garbage collect before comparison in case we inlined an internallyBound thing
+                -- into its only use site, which is a very important case to catch!
+                -- TODO: should gc the unnormalised state as well.. but risks non-termination?
+                | stateSize (gc s') <= either (stateSize . gc) unnormalisedStateSize ei_state
+                -> True
+                -- It might still be OK to get larger if GHC's inlining heuristics say we should
+                | Just (x', _) <- mb_x'
+                , ghcHeuristics x' (annedTerm tg_a (coercedAnswerToAnnedTerm' ids cast_a)) k_summary -- NB: the tag is irrelevant
+                -> True
+                -- Otherwise, we don't want to beta-reduce
+                | otherwise
+                -> pprTrace "Unwanted:" (pPrint (coercedAnswerToAnnedTerm' ids cast_a)) False
+            (case mb_x' of Just (x', _) -> pprTrace "Inlining" (ppr x'); Nothing -> id) $
+              return s'
+
+        tyApply :: Deeds -> Out Type -> Maybe State
+        tyApply deeds0 ty' = do
+          TyLambda x e_body <- return v
+          checkLambdaish $
+            let deeds1 = deeds0 `releaseDeeds` 1 -- Release deed associated with the lambda (but not its body)
+                (deeds2, k') = case mb_co of Uncast            -> (deeds1, k)
+                                             CastBy co' _tg_co -> appendCast deeds1 ids tg_a (co' `mk_inst` ty') k
+            in (deeds2, heap, k', (insertTypeSubst rn x ty', e_body))
+          where mk_inst = mkInstCo ids
+
+        coApply :: Deeds -> Out Coercion -> Maybe State
+        coApply deeds0 apply_co' = do
+          Lambda x e_body <- return v
+          checkLambdaish $
+            let deeds1 = deeds0 `releaseDeeds` 1 -- Release deed associated with the lambda (but not its body)
+            in case mb_co of
+                 Uncast            -> (deeds1, heap, k,  (insertCoercionSubst rn x apply_co',      e_body))
+                 CastBy co' _tg_co -> (deeds2, heap, k', (insertCoercionSubst rn x cast_apply_co', e_body))
+                   where -- Implements the special case of beta-reduction of cast lambda where the argument is an explicit coercion value.
+                         -- You can derive this rule from the rules in "Practical aspects of evidence-based compilation" by combining:
+                         --  1. TPush, to move the co' from the lambda to the argument and result (arg_co' and res_co')
+                         --  2. The rules in Figure 5, to replace a cast of a coercion value with a simple coercion value
+                         --  3. The fact that nth commutes with sym to clean up the result (can be proven from Figure 4)
+                         (arg_co', res_co') = (mkNthCo 0 co', mkNthCo 1 co')
+                         (arg_from_co', arg_to_co') = (mkNthCo 0 arg_co', mkNthCo 1 arg_co')
+                         cast_apply_co' = arg_from_co' `mk_trans` apply_co' `mk_trans` mk_sym arg_to_co'
+                         mk_trans = mkTransCo ids
+                         mk_sym = mkSymCo ids
+                         -- Maintain the no-adjacent-casts invariant
+                         (deeds2, k') = appendCast deeds1 ids tg_a res_co' k
+
+        apply :: Deeds -> Tag -> Out Var -> Maybe State
+        apply deeds0 tg_kf x' = do
+          Lambda x e_body <- return v
+          checkLambdaish $ case mb_co of
+              Uncast -> (deeds1, Heap h ids, k, (insertIdRenaming rn x x', e_body))
+                where deeds1 = deeds0 `releaseDeeds` 2 -- Release deed associated with the lambda (but not its body), AND that from the stack frame
+              CastBy co' _tg_co -> (deeds1, Heap (M.insert y' (internallyBound (renamedTerm e_arg)) h) ids', k', (rn', e_body))
+                where (ids', rn', y') = renameNonRecBinder ids rn (x `setIdType` arg_co_from_ty')
+                      Pair arg_co_from_ty' _arg_co_to_ty' = coercionKind arg_co'
+                      (arg_co', res_co') = (mkNthCo 0 co', mkNthCo 1 co')
+                      e_arg = annedTerm tg_a (annedTerm tg_kf (Var x') `Cast` mkSymCo ids arg_co')
+                      (deeds1, k') = appendCast deeds0 ids tg_a res_co' k -- Might release a deed if the final body coercion is refl
+
+        -- TODO: use checkShouldExposeUnfolding
+        scrutinise :: Deeds -> Out Var -> Out Type -> In [AnnedAlt] -> Maybe UnnormalisedState
+        scrutinise deeds_init wild' _ty' (rn_alts, alts)
+           -- Literals are easy -- we can make the simplifying assumption that the types of literals are
+           -- always simple TyCons without any universally quantified type variables.
+          | Literal l <- v
+          , case mb_co_kind of Nothing -> True; Just (_, _, Pair from_ty' to_ty') -> from_ty' `eqType` to_ty' -- NB: should never see refl here!
+          , (deeds2, alt_e):_ <- [(deeds1 `releaseDeeds` annedAltsSize rest, (rn_alts, alt_e)) | ((LiteralAlt alt_l, alt_e), rest) <- bagContexts alts, alt_l == l]
+          = Just (deeds2, Heap h1 ids, k, alt_e)
+          
+           -- Data is a big stinking mess! I hate you, KPush rule.
+          | Data dc tys cos xs <- v
+           -- a) Ensure that the coercion on the data (if any) lets us do the reduction, and determine
+           --    the appropriate coercions to use (if any) on each value argument to the DataCon
+          , Just mb_dc_cos <- case mb_co_kind of
+                                    Nothing -> return Nothing
+                                    Just (co', tg_co, Pair from_ty' to_ty') -> do
+                                      (from_tc, _from_tc_arg_tys') <- splitTyConApp_maybe from_ty'
+                                      (to_tc,   _to_tc_arg_tys')   <- splitTyConApp_maybe to_ty'
+                                      guard $ from_tc == to_tc
+                                      return $ Just $
+                                        let -- Substantially copied from CoreUnfold.exprIsConApp_maybe:
+                                            tc_arity       = tyConArity from_tc
+                                            dc_univ_tyvars = dataConUnivTyVars dc
+                                            dc_ex_tyvars   = dataConExTyVars dc
+                                            arg_tys        = dataConRepArgTys dc
+                                        
+                                            -- Make the "theta" from Fig 3 of the paper
+                                            (_univ_tys, ex_tys) = splitAt tc_arity tys
+                                            gammas = decomposeCo tc_arity co'
+                                            theta_subst = ASSERT2(length dc_univ_tyvars == tc_arity,      ppr dc $$ ppr dc_univ_tyvars $$ ppr tc_arity)
+                                                          ASSERT2(length dc_ex_tyvars   == length ex_tys, ppr dc $$ ppr dc_ex_tyvars $$ ppr (length ex_tys))
+                                                          liftCoSubstWith (dc_univ_tyvars ++ dc_ex_tyvars)
+                                                                          (gammas         ++ map mkReflCo ex_tys)
+                                        in map (\arg_ty -> (theta_subst arg_ty, tg_co)) arg_tys -- Use tag from the original coercion everywhere
+           -- b) Identify the first appropriate branch of the case and reduce -- apply the discovered coercions if necessary
+          , (deeds3, h', ids', alt_e):_ <- [ res
+                                           | ((DataAlt alt_dc alt_as alt_qs alt_xs, alt_e), rest) <- bagContexts alts
+                                           , alt_dc == dc
+                                           , let tys' = map (renameType ids rn) tys
+                                                 cos' = map (renameCoercion ids rn) cos
+                                                 xs' = map (renameId rn) xs
+                                                 rn_alts' = insertTypeSubsts rn_alts (alt_as `zip` tys')
+                                                 deeds2 = deeds1 `releaseDeeds` annedAltsSize rest
+                                           , Just res <- [do (deeds3, h', ids', rn_alts') <- case mb_dc_cos of
+                                                               Nothing     -> return (deeds2, h1, ids, insertIdRenamings (insertCoercionSubsts rn_alts' (alt_qs `zip` cos')) (alt_xs `zip` xs'))
+                                                               Just dc_cos -> foldM (\(deeds, h, ids, rn_alts) (uncast_e_arg', alt_y, (dc_co, tg_co)) ->
+                                                                                        let Pair _dc_co_from_ty' dc_co_to_ty' = coercionKind dc_co -- TODO: use to_tc_arg_tys' from above?
+                                                                                            (ids', rn_alts', y') = renameNonRecBinder ids rn_alts (alt_y `setIdType` dc_co_to_ty')
+                                                                                            e_arg = annedTerm tg_co $ annedTerm tg_a uncast_e_arg' `Cast` dc_co
+                                                                                        in fmap (\deeds' -> (deeds', M.insert y' (internallyBound (renamedTerm e_arg)) h, ids', rn_alts')) $ claimDeeds deeds (annedSize e_arg))
+                                                                                    (deeds2, h1, ids, rn_alts') (zip3 (map (Value . Coercion) cos' ++ map Var xs') (alt_qs ++ alt_xs) dc_cos)
+                                                             return (deeds3, h', ids', (rn_alts', alt_e))]
+                                           ]
+          = Just (deeds3, Heap h' ids', k, alt_e)
+          
+           -- Thank god, default alternatives are trivial:
+          | (deeds2, alt_e):_ <- [(deeds1 `releaseDeeds` annedAltsSize rest, (rn_alts, alt_e)) | ((DefaultAlt, alt_e), rest) <- bagContexts alts]
+          = Just (deeds2, Heap h1 ids, k, alt_e)
+          
+           -- This can legitimately occur, e.g. when supercompiling (if x then (case x of False -> 1) else 2)
+          | otherwise
+          = Nothing
+          where mb_co_kind = case mb_co of
+                               Uncast          -> Nothing
+                               CastBy co tg_co -> Just (co, tg_co, coercionKind co)
+                deeds0 = deeds_init `releaseDeeds` castAnswerSize cast_a
+                (deeds1, h1) | isDeadBinder wild' = (deeds0,                         h)
+                             | otherwise          = (deeds0', M.insert wild' wild_hb h)
+                               where wild_hb = internallyBound in_e_heap
+                                     Just deeds0' = claimDeeds deeds0 (annedTermSize (snd in_e_heap))
+                                     -- NB: we add the *non-dereferenced* value to the heap for a case wildcard, because anything else may duplicate allocation
+
+        -- NB: this actually duplicates the answer "a" into the answers field of the PrimApply, even if that "a" is update-bound
+        -- This isn't perhaps in the spirit of the rest of the evaluator, but it probably doesn't matter at all in practice.
+        -- TODO: use checkShouldExposeUnfolding?
+        primop :: Deeds -> Tag -> PrimOp -> [Out Type] -> [Anned (Coerced Answer)] -> [In AnnedTerm] -> Maybe UnnormalisedState
+        primop deeds tg_kf pop tys' anned_as [] = do
+            guard eVALUATE_PRIMOPS -- NB: this is not faithful to paper 1 because we still turn primop expressions into
+                                   -- stack frames.. this is bad because it will impede good specilations (without smart generalisation)
+            let tg_kf' = tg_kf { tagOccurrences = if oCCURRENCE_GENERALISATION then tagOccurrences tg_kf + sum (map tagOccurrences (tg_a : map annedTag anned_as)) else 1 }
+            anned_cast_a' <- evaluatePrim ids tg_kf' pop tys' (map annee anned_as ++ [cast_a])
+            deeds <- claimDeeds (deeds `releaseDeeds` (sum (map annedSize anned_as) + castAnswerSize cast_a + 1)) (annedSize anned_cast_a') -- I don't think this can ever fail
+            return (deeds, heap, k, taggedCastAnswerToInAnnedTerm ids (annedToTagged anned_cast_a'))
+        primop deeds tg_kf pop tys' anned_as in_es = case in_es of
+            (in_e:in_es) -> Just (deeds, heap, Tagged tg_kf (PrimApply pop tys' (anned_as ++ [annedCoercedAnswer tg_a cast_a]) in_es) `Car` k, in_e)
+            []           -> Nothing
+
+        strictLet :: Deeds -> Out Var -> In AnnedTerm -> Maybe UnnormalisedState
+        strictLet deeds x' in_e2 = Just (deeds', Heap (M.insert x' (internallyBound in_e_heap) h) ids, k, in_e2)
+          where Just deeds' = claimDeeds (deeds `releaseDeeds` castAnswerSize cast_a) (annedTermSize (snd in_e_heap))
+
+
+-- We don't want to expose an unfolding if it would not be inlineable in the initial phase.
+-- This gives normal RULES more of a chance to fire.
+--
+-- NB: this only controls whether a particular definition is available for inlining at all.
+-- For the inlining to actualy happen we also make a check at the call-site that considers
+-- whether it is benefical to inline the definition into the particular context we find.
+--
+-- NB: a place where these heuristics really hurt is:
+--   {-# INLINE [0] foldr #-}
+--   foldr k z = go
+--             where
+--               go []     = z
+--               go (y:ys) = y `k` go ys
+--
+-- Although foldr gets inlined by the supercompiler, "go" is a local recursive loop which
+-- doesn't get inlined at all. See also:
+--  {-# SUPERINLINABLE sum #-}
+--  sum     l       = sum' l 0
+--    where
+--      sum' []     a = a
+--      sum' (x:xs) a = sum' xs (a+x)
+--
+-- A possible solution is to mark all those Ids syntactically contained within a SUPERINLINABLE
+-- unfolding as SUPERINLINABLE if they are not explicitly INLINE/NOINLINE. We can do this when
+-- we construct the unfoldings in the first place.
+shouldExposeUnfolding :: Id -> Either String Superinlinable
+shouldExposeUnfolding x = case inl_inline inl_prag of
+    -- FIXME: God help my soul
+    _ | Just mod <- nameModule_maybe (idName x)
+      , moduleName mod `elem` map mkModuleName [
+          "Data.Complex", "GHC.List",
+          "QSort", -- awards
+          "Checker", "Lisplikefns", "Rewritefns", "Rulebasetext", -- boyer2
+          "Auxil", "Interval", "Key", "Prog", -- cichelli
+          "MonadState", "MonadTrans", -- cryptarithm2
+          "StateMonad", -- cse
+          "Knowledge", "Result", "Search", "Table", "Match", -- expert
+          "Fourier", "Complex_Vectors", -- fft2
+          "RA", "RC", "RG", "RU", "Types", -- nucleic2
+          "ChessSetArray", "ChessSetList", "KnightHeuristic", "Queue", "Sort", -- knights
+          "Mandel", -- mandel
+          "Move", "Problem", "Solution", -- mate
+          "Board", "Game", "Prog", "Tree", "Wins", -- minimax
+          "CharSeq", "Pretty", -- pretty
+          "IntLib", "MyRandom", "Prime", -- primetest
+          "Digraph" -- scc
+        ]
+      -> Right True
+    -- These get wrappers generated for them: be very eager to inline the wrappers
+      | isPrimOpId x || isDataConWorkId x
+      -> Right True
+    -- NB: we don't check the activation on INLINE things because so many activations
+    -- are used to ensure that e.g. RULE-based fusion works properly, and NOINLINE will
+    -- generally impede supercompiler-directed fusion.
+    --
+    -- Our philosophy: if it is *ever* inlinable (in any phase), expose it
+    Inline                                 -> Right True -- Don't check for size increase at all if marked INLINE
+    Inlinable super
+      | only_if_superinlinable, not super  -> Left "INLINEABLE but not SUPERINLINABLE"
+      | otherwise                          -> Right super
+    NoInline
+      | isNeverActive (inl_act inl_prag)   -> Left "unconditional NONLINE"
+      | only_if_superinlinable             -> Left "conditional NOINLINE, not SUPERINLINABLE"
+    EmptyInlineSpec 
+      | only_if_superinlinable             -> Left "not SUPERINLINABLE"
+    _                                      -> Right False
+  where inl_prag = idInlinePragma x
+        -- EXPERIMENT: only respect the SUPERINLINABLE distinction on *loop breakers*
+        -- The motivation is that we don't really want to go around annotating (GHC.Base.>>=),
+        -- bindIO, etc etc as SUPERINLINABLE.
+        only_if_superinlinable = case sUPERINLINABILITY of
+          ForRecursion  -> isStrongLoopBreaker (idOccInfo x)
+          ForEverything -> True
+          ForNothing    -> False
+
+
+-- We used to garbage-collect in the evaluator, when we executed the rule for update frames. This had two benefits:
+--  1) We don't have to actually update the heap or even claim a new deed
+--  2) We make the supercompiler less likely to terminate, because GCing so tends to reduce TagBag sizes
+--
+-- However, this caused problems with speculation: to prevent incorrectly garbage collecting bindings from the invisible "enclosing"
+-- heap when we speculated one of the bindings from the heap, we had to pass around an extra "live set" of parts of the heap that might
+-- be referred to later on. Furthermore:
+--  * Finding FVs when executing every update step was a bit expensive (though they were memoized on each of the State components)
+--  * This didn't GC cycles (i.e. don't consider stuff from the Heap that was only referred to by the thing being removed as "GC roots")
+--  * It didn't seem to make any difference to the benchmark numbers anyway
+--
+-- You might think a good alternative approach is to:
+-- 1. Drop dead update frames in transitiveInline (which is anyway responsible for ensuring there is no dead stuff in the stack)
+-- 2. "Squeeze" just before the matcher: this shorts out indirections-to-indirections and does update-frame stack squeezing.
+--    You might also think that it would be cool to just do this in normalisation, but then when normalising during specualation the enclosing
+--    context wouldn't get final_rned :-(
+--
+-- HOWEVER. That doesn't work properly because normalisation itself can introduce dead bindings - i.e. in order to be guaranteed to
+-- catch all the junk we have to GC normalised bindings, not the pre-normalised ones that transitiveInline sees. So instead I did
+-- both points 1 and 2 right just before we go to the matcher.
+--
+-- HOWEVER. Simon suggested something that made me realise that actually we could do squeezing of consecutive update frames and
+-- indirection chains in the evaluator (and thus the normaliser) itself, which is even cooler. Thus all that is left to do in the
+-- GC is to make a "global" analysis that drops stuff that is definitely dead. We *still* want to run this just before the matcher because
+-- although dead heap bindings don't bother it, it would be confused by dead update frames.
+--
+-- TODO: have the garbage collector collapse (let x = True in x) to (True) -- but note that this requires onceness analysis
+gc :: State -> State
+gc _state@(deeds0, Heap h ids, k, in_e)
+  = {-# SCC "gc" #-}
+    ASSERT2(stateUncoveredVars gced_state `subVarSet` stateUncoveredVars _state, ppr (stateUncoveredVars gced_state, PrettyDoc (pPrintFullState quietStatePrettiness _state), PrettyDoc (pPrintFullState quietStatePrettiness gced_state)))
+    gced_state -- We do not insist that *no* variables are uncovered because when used from the speculator this may not be true
+  where
+    gced_state = (deeds2, Heap h' ids, k', in_e)
+    
+    -- We have to use stateAllFreeVars here rather than stateFreeVars because in order to safely prune the live stack we need
+    -- variables bound by k to be part of the live set if they occur within in_e or the rest of the k
+    live0 = stateAllFreeVars (deeds0, Heap M.empty ids, k, in_e)
+    (deeds1, h', live1) = inlineLiveHeap deeds0 h live0
+    -- Collecting dead update frames doesn't make any new heap bindings dead since they don't refer to anything
+    (deeds2, k') | False     = pruneLiveStack deeds1 k live1
+                 | otherwise = (deeds1, k) -- FIXME: turned this off for now because it means that the resulting term might not be normalised (!!!)
+                                           -- NB: if you change this check out checkShouldExposeUnfolding as well
+    
+    inlineLiveHeap :: Deeds -> PureHeap -> FreeVars -> (Deeds, PureHeap, FreeVars)
+    inlineLiveHeap deeds h live = (foldr (flip releaseHeapBindingDeeds . snd) deeds h_dead_kvs, h_live, live')
+      where
+        (h_dead_kvs, h_live, live') = heap_worker (M.toAscList h) M.empty live
+        
+        -- This is just like Split.transitiveInline, but simpler since it never has to worry about running out of deeds:
+        heap_worker :: [(Var, HeapBinding)]   -- Possibly-dead heap as sorted list. NB: not a PureHeap map because creating..
+                    -> PureHeap -> FreeVars   -- ..the map on every iteration showed up as 6% of SC allocations!
+                    -> ([(Var, HeapBinding)], -- Dead heap
+                        PureHeap, FreeVars)
+        heap_worker h_pending h_output live
+          = if live == live'
+            then (h_pending', h_output', live')
+            else heap_worker h_pending' h_output' live'
+          where 
+            (h_pending', h_output', live') = foldr consider_inlining ([], h_output, live) h_pending
+        
+            -- NB: It's important that type variables become live after inlining a binding, or we won't
+            -- necessarily lambda-abstract over all the free type variables of a h-function
+            consider_inlining (x', hb) (h_pending_kvs, h_output, live)
+              | x' `elemVarSet` live = (h_pending_kvs,            M.insert x' hb h_output, live `unionVarSet` heapBindingFreeVars hb `unionVarSet` varBndrFreeVars x')
+              | otherwise            = ((x', hb) : h_pending_kvs, h_output,                live)
+    
+    -- NB: doing this is cool yet also dangerous at the same time. What if we have:
+    --  {-# NOINLINE foo #-}
+    --  foo = \x -> e
+    --
+    --  root = case foo 100 of \Delta
+    --
+    -- After normalisation + GCing (including dropping dead update frames) we will basically get:
+    --  case (\x -> e) 100 of \Delta
+    --
+    -- So this is really bad because we have lost the NOINLINE information!
+    -- Of course, this is also sometimes cool because it turns non-normalising beta-reductions into manifestly normalising ones.
+    --
+    -- My compromise is to allow dumping only those binders with "shortable" IdInfo, where shortability
+    -- is a notion stolen from GHCs simplifier.
+    --
+    -- TODO: perhaps this same check should be applied in the Update frame compressor, though that would destroy some stack invariants
+    pruneLiveStack :: Deeds -> Stack -> FreeVars -> (Deeds, Stack)
+    pruneLiveStack init_deeds k live = trainFoldr (\kf (deeds, k_live) -> if (case tagee kf of Update x' | isShortableIdInfo (idInfo x') -> x' `elemVarSet` live; _ -> True)
+                                                                          then (deeds, kf `Car` k_live)
+                                                                          else (deeds `releaseStackFrameDeeds` kf, k_live))
+                                                  (\gen deeds -> (deeds, Loco gen)) init_deeds k
--- a/compiler/supercompile/Supercompile/Evaluator/FreeVars.hs
+++ b/compiler/supercompile/Supercompile/Evaluator/FreeVars.hs
+module Supercompile.Evaluator.FreeVars (
+    inFreeVars,
+    heapBindingFreeVars,
+    pureHeapBoundVars, stackBoundVars, stackFreeVars, stackOpenFreeVars, stackFrameBoundVars, stackFrameFreeVars,
+    qaFreeVars, pureHeapVars,
+    unnormalisedStateFreeVars, unnormalisedStateUncoveredVars,
+    stateFreeVars, stateAllFreeVars, stateLetBounders, stateLambdaBounders, stateInternalBounders, stateUncoveredVars,
+    module Supercompile.Core.FreeVars
+  ) where
+
+import Supercompile.Evaluator.Syntax
+
+import Supercompile.Core.FreeVars
+import Supercompile.Core.Renaming
+
+import Supercompile.Utilities
+
+import qualified Data.Map as M
+
+
+-- | Finds the set of things "referenced" by a 'HeapBinding': this is only used to construct tag-graphs
+heapBindingFreeVars :: HeapBinding -> FreeVars
+heapBindingFreeVars = maybe emptyVarSet (inFreeVars annedTermFreeVars) . heapBindingTerm
+
+-- | Returns all the variables bound by the heap that we might have to residualise in the splitter
+pureHeapBoundVars :: PureHeap -> BoundVars
+pureHeapBoundVars = mkVarSet . M.keys -- I think its harmless to include variables bound by phantoms in this set
+
+-- | Returns all the variables bound by the stack that we might have to residualise in the splitter
+stackBoundVars :: Stack -> BoundVars
+stackBoundVars = fst . stackOpenFreeVars
+
+-- | Returns all the variables referred to by the stack, even ones also bound by the stack
+stackFreeVars :: Stack -> FreeVars
+stackFreeVars = snd . stackOpenFreeVars
+
+stackOpenFreeVars :: Stack -> (BoundVars, FreeVars)
+stackOpenFreeVars = (unionVarSets *** unionVarSets) . unzip . map (stackFrameOpenFreeVars . tagee) . trainCars
+
+stackFrameBoundVars :: StackFrame -> BoundVars
+stackFrameBoundVars = fst . stackFrameOpenFreeVars
+
+stackFrameFreeVars :: StackFrame -> FreeVars
+stackFrameFreeVars = snd . stackFrameOpenFreeVars
+
+stackFrameOpenFreeVars :: StackFrame -> (BoundVars, FreeVars)
+stackFrameOpenFreeVars kf = case kf of
+    TyApply ty'              -> (emptyVarSet, tyVarsOfType ty')
+    CoApply co'              -> (emptyVarSet, tyCoVarsOfCo co')
+    Apply x'                 -> (emptyVarSet, unitVarSet x')
+    Scrutinise x' ty in_alts -> (emptyVarSet, (nonRecBinderFreeVars x' (inFreeVars annedAltsFreeVars in_alts)) `unionVarSet` tyVarsOfType ty)
+    PrimApply _ tys as in_es -> (emptyVarSet, unionVarSets (map tyVarsOfType tys) `unionVarSet` unionVarSets (map annedFreeVars as) `unionVarSet` unionVarSets (map (inFreeVars annedTermFreeVars) in_es))
+    StrictLet x' in_e2       -> (emptyVarSet, nonRecBinderFreeVars x' (inFreeVars annedTermFreeVars in_e2))
+    Update x'                -> (unitVarSet x', idBndrFreeVars x')
+    CastIt co'               -> (emptyVarSet, tyCoVarsOfCo co')
+
+
+-- | Computes the variables bound and free in a state
+unnormalisedStateVars :: UnnormalisedState -> (HowBound -> BoundVars, FreeVars)
+stateVars :: State -> (HowBound -> BoundVars, FreeVars)
+pureHeapVars :: PureHeap -> (HowBound -> BoundVars, FreeVars)
+(unnormalisedStateVars, stateVars, pureHeapVars)
+  = (\(_, Heap h _, k, in_e) -> finish $ pureHeapOpenFreeVars h (stackOpenFreeVars' k (inFreeVars annedFreeVars in_e)),
+     \(_, Heap h _, k, a)    -> finish $ pureHeapOpenFreeVars h (stackOpenFreeVars' k (annedFreeVars a)),
+     \h -> finish $ pureHeapOpenFreeVars h (emptyVarSet, emptyVarSet))
+  where
+    finish ((bvs_internal, bvs_lambda, bvs_let), fvs) = (\how -> case how of InternallyBound -> bvs_internal; LambdaBound -> bvs_lambda; LetBound -> bvs_let, fvs)
+    
+    pureHeapOpenFreeVars :: PureHeap -> (BoundVars, FreeVars) -> ((BoundVars, BoundVars, BoundVars), FreeVars)
+    pureHeapOpenFreeVars h (bvs_internal, fvs) = (\f -> M.foldrWithKey f ((bvs_internal, emptyVarSet, emptyVarSet), fvs) h) $ \x' hb ((bvs_internal, bvs_lambda, bvs_let), fvs) -> (case howBound hb of
+        InternallyBound -> (bvs_internal `extendVarSet` x', bvs_lambda, bvs_let)
+        LambdaBound     -> (bvs_internal, bvs_lambda `extendVarSet` x', bvs_let)
+        LetBound        -> (bvs_internal, bvs_lambda, bvs_let `extendVarSet` x'),
+        fvs `unionVarSet` varBndrFreeVars x' `unionVarSet` heapBindingFreeVars hb)
+    
+    stackOpenFreeVars' :: Stack -> FreeVars -> (BoundVars, FreeVars)
+    stackOpenFreeVars' k fvs = case stackOpenFreeVars k of (k_bvs, k_fvs) -> (k_bvs, fvs `unionVarSet` k_fvs)
+
+
+qaFreeVars :: QA -> FreeVars
+qaFreeVars (Question x') = unitVarSet x'
+qaFreeVars (Answer a)    = answerFreeVars' a
+
+
+-- | Returns (an overapproximation of) the free variables that the state would have if it were residualised right now (i.e. variables bound by phantom bindings *are* in the free vars set)
+stateFreeVars :: State -> FreeVars
+stateFreeVars s = fvs `minusVarSet` bvs InternallyBound
+  where (bvs, fvs) = stateVars s
+
+unnormalisedStateFreeVars :: UnnormalisedState -> FreeVars
+unnormalisedStateFreeVars s = fvs `minusVarSet` bvs InternallyBound
+  where (bvs, fvs) = unnormalisedStateVars s
+
+unnormalisedStateUncoveredVars :: UnnormalisedState -> FreeVars
+unnormalisedStateUncoveredVars s = fvs `minusVarSet` bvs InternallyBound `minusVarSet` bvs LetBound `minusVarSet` bvs LambdaBound
+  where (bvs, fvs) = unnormalisedStateVars s
+
+
+stateAllFreeVars :: State -> FreeVars
+stateAllFreeVars = snd . stateVars
+
+stateLetBounders :: State -> BoundVars
+stateLetBounders = ($ LetBound) . fst . stateVars
+
+stateLambdaBounders :: State -> BoundVars
+stateLambdaBounders = ($ LambdaBound) . fst . stateVars
+
+stateInternalBounders :: State -> BoundVars
+stateInternalBounders = ($ InternallyBound) . fst . stateVars
+
+stateUncoveredVars :: State -> FreeVars
+stateUncoveredVars s = fvs `minusVarSet` bvs InternallyBound `minusVarSet` bvs LetBound `minusVarSet` bvs LambdaBound
+  where (bvs, fvs) = stateVars s
--- a/compiler/supercompile/Supercompile/Evaluator/Residualise.hs
+++ b/compiler/supercompile/Supercompile/Evaluator/Residualise.hs
+module Supercompile.Evaluator.Residualise (
+    residualiseState, residualiseHeapBinding,
+    
+    pPrintHeap,
+
+    StatePrettiness(..), fullStatePrettiness, quietStatePrettiness,
+    pPrintFullState, pPrintFullUnnormalisedState
+  ) where
+
+import Supercompile.Evaluator.Deeds
+import Supercompile.Evaluator.Syntax
+
+import Supercompile.Core.FreeVars
+import Supercompile.Core.Renaming
+import Supercompile.Core.Syntax
+
+import Supercompile.Utilities
+
+import Var (isLocalId)
+
+import Data.Either
+import qualified Data.Map as M
+import qualified Data.Set as S
+import Data.Ord
+
+
+class Symantics ann => Symantics' ann where
+    inject :: AnnedTerm -> ann (TermF ann)
+    fvs :: ann (TermF ann) -> FreeVars
+
+instance Symantics' Identity where
+    inject = annedTermToTerm
+    fvs = termFreeVars
+
+instance Symantics' FVed where
+    inject = annedTermToFVedTerm
+    fvs = fvedTermFreeVars
+
+
+{-# SPECIALISE residualiseState :: State -> (Deeds, Out [(Var, PrettyFunction)], Out FVedTerm, Generalised) #-}
+residualiseState :: Symantics' ann => State -> (Deeds, Out [(Var, PrettyFunction)], Out (ann (TermF ann)), Generalised)
+residualiseState s = (deeds, floats_static, bindManyMixedLiftedness fvs floats_nonstatic e, gen)
+ where (deeds, floats_static, floats_nonstatic, e, gen) = residualiseUnnormalisedState (denormalise s)
+
+residualiseUnnormalisedState :: Symantics' ann => UnnormalisedState -> (Deeds, Out [(Var, PrettyFunction)], Out [(Var, ann (TermF ann))], Out (ann (TermF ann)), Generalised)
+residualiseUnnormalisedState (deeds, heap, k, in_e) = (deeds, floats_static, floats_nonstatic, e, gen)
+  where (floats_static, floats_nonstatic, e, gen) = residualiseHeap heap (\ids -> residualiseStack ids k (residualiseTerm ids in_e))
+
+residualiseCoercedAnswer :: Symantics' ann => InScopeSet -> Anned (Coerced Answer) -> Out (ann (TermF ann))
+residualiseCoercedAnswer ids = inject . fmap (coercedAnswerToAnnedTerm' ids)
+
+residualiseTerm :: Symantics' ann => InScopeSet -> In AnnedTerm -> Out (ann (TermF ann))
+residualiseTerm ids = inject . renameIn (renameAnnedTerm ids)
+
+residualiseHeap :: Symantics' ann => Heap -> (InScopeSet -> ((Out [(Var, PrettyFunction)], Out [(Var, ann (TermF ann))]), Out (ann (TermF ann)), Generalised)) -> (Out [(Var, PrettyFunction)], Out [(Var, ann (TermF ann))], Out (ann (TermF ann)), Generalised)
+residualiseHeap (Heap h ids) resid_body = (floats_static_h ++ floats_static_k, floats_nonstatic_h ++ floats_nonstatic_k, e, gen)
+  where (floats_static_h, floats_nonstatic_h) = residualisePureHeap ids h
+        ((floats_static_k, floats_nonstatic_k), e, gen) = resid_body ids
+
+residualisePureHeap :: Symantics' ann => InScopeSet -> PureHeap -> (Out [(Var, PrettyFunction)], Out [(Var, ann (TermF ann))])
+residualisePureHeap ids h = partitionEithers [fmapEither ((,) x') ((,) x') (residualiseHeapBinding ids hb) | (x', hb) <- M.toList h]
+
+residualiseHeapBinding :: Symantics' ann => InScopeSet -> HeapBinding -> Either (Out PrettyFunction) (Out (ann (TermF ann)))
+residualiseHeapBinding ids (HB InternallyBound (Right in_e)) = Right (residualiseTerm ids in_e)
+residualiseHeapBinding _   hb                                = Left (asPrettyFunction hb)
+
+residualiseStack :: Symantics' ann => InScopeSet -> Stack -> Out (ann (TermF ann)) -> ((Out [(Var, PrettyFunction)], Out [(Var, ann (TermF ann))]), Out (ann (TermF ann)), Generalised)
+residualiseStack _   (Loco gen) e_body = (([], []), e_body, gen)
+residualiseStack ids (Car kf k) e_body = first3 ((static_floats ++) *** (nonstatic_floats ++)) $ residualiseStack ids k e
+  where ((static_floats, nonstatic_floats), e) = residualiseStackFrame ids (tagee kf) e_body
+
+residualiseStackFrame :: Symantics' ann => InScopeSet -> StackFrame -> Out (ann (TermF ann)) -> ((Out [(Var, PrettyFunction)], Out [(Var, ann (TermF ann))]), Out (ann (TermF ann)))
+residualiseStackFrame _   (TyApply ty')               e  = (([], []), e `tyApp` ty')
+residualiseStackFrame _   (CoApply co')               e  = (([], []), e `coApp` co')
+residualiseStackFrame _   (Apply x2')                 e1 = (([], []), e1 `app` x2')
+residualiseStackFrame ids (Scrutinise x' ty in_alts)  e  = (([], []), case_ e x' ty (map (second inject) $ renameIn (renameAnnedAlts ids) in_alts))
+residualiseStackFrame ids (PrimApply pop tys' as es') e  = (([], []), primOp pop tys' (map (residualiseCoercedAnswer ids) as ++ e : map (residualiseTerm ids) es'))
+residualiseStackFrame ids (StrictLet x' in_e2)        e1 = (([], []), let_ x' e1 (residualiseTerm ids in_e2))
+residualiseStackFrame _   (Update x')                 e  = (([], [(x', e)]), var x')
+residualiseStackFrame _   (CastIt co')                e  = (([], []), e `cast` co')
+
+
+pPrintHeap :: Heap -> SDoc
+pPrintHeap (Heap h ids) = pPrint $ map (first (PrettyDoc . pPrintBndr LetBind)) $ floats_static_h ++ [(x, asPrettyFunction1 (e :: Term)) | (x, e) <- floats_nonstatic_h]
+  where (floats_static_h, floats_nonstatic_h) = residualisePureHeap ids h
+
+data StatePrettiness = SP { includeLams :: Bool, includeStatics :: Bool, excludeBindings :: S.Set Var }
+
+fullStatePrettiness, quietStatePrettiness :: StatePrettiness
+fullStatePrettiness = SP True True S.empty
+quietStatePrettiness = SP False False S.empty
+
+pPrintFullState :: StatePrettiness -> State -> SDoc
+pPrintFullState sp = pPrintFullUnnormalisedState sp . denormalise
+
+pPrintFullUnnormalisedState :: StatePrettiness -> UnnormalisedState -> SDoc
+pPrintFullUnnormalisedState sp state
+  = {-# SCC "pPrintFullUnnormalisedState" #-}
+    text "Deeds:" <+> pPrint deeds $$ (if includeStatics sp then pPrint (map (first (PrettyDoc . pPrintBndr LetBind)) floats_static) else empty) $$ body $$ (if null floats_nonstatic_excluded then empty else ppr (S.fromList (map fst floats_nonstatic_excluded)))
+  where (deeds, floats_static, floats_nonstatic_unfiltered, e, gen) = residualiseUnnormalisedState state
+        (floats_nonstatic_excluded, floats_nonstatic) = partition (flip S.member (excludeBindings sp) . fst) floats_nonstatic_unfiltered
+        floats_nonstatic_pretty
+          | includeLams sp = map (second asPrettyFunction) floats_nonstatic
+          | otherwise      = map snd $ sortBy (comparing (Down . fst)) $
+                               [(non_lam, (x, if non_lam then asPrettyFunction e
+                                                         else PrettyFunction (\_ -> text "..." <+> braces (hsep [ppr x <> char ',' | x <- varSetElems (fvedTermFreeVars e), isLocalId x]))))
+                               | (x, e) <- floats_nonstatic
+                               , let non_lam = case extract e of Value (Lambda _ _) -> False; Value (TyLambda _ _) -> False; _ -> True]
+        body = pPrintPrecWhere noPrec floats_nonstatic_pretty (PrettyDoc ((if includeStatics sp && gen then char '?' else empty) <> angleBrackets (pPrint e)))
--- a/compiler/supercompile/Supercompile/Evaluator/Syntax.hs
+++ b/compiler/supercompile/Supercompile/Evaluator/Syntax.hs
+{-# OPTIONS_GHC -fno-warn-missing-signatures #-}
+module Supercompile.Evaluator.Syntax where
+
+#include "HsVersions.h"
+
+import Supercompile.Evaluator.Deeds
+
+import Supercompile.Core.FreeVars
+import Supercompile.Core.Renaming
+import Supercompile.Core.Size
+import Supercompile.Core.Syntax
+import Supercompile.Core.Tag
+
+import Supercompile.StaticFlags
+import Supercompile.Utilities
+
+import Id       (Id, idType, zapIdOccInfo)
+import PrimOp   (primOpType)
+import Type     (applyTy, applyTys, isUnLiftedType, splitTyConApp_maybe, mkTyVarTy)
+import Pair     (pSnd)
+import Coercion (coercionType, coercionKind, mkCoVarCo)
+
+import qualified Data.Map as M
+import qualified Data.Foldable as Foldable
+import qualified Data.Traversable as Traversable
+
+
+type Anned = O Tagged (O Sized FVed)
+type AnnedTerm = Anned (TermF Anned)
+type AnnedValue = ValueF Anned
+type AnnedAlt = AltF Anned
+
+annee :: Anned a -> a
+annee = extract
+
+annedSize :: Anned a -> Size
+annedSize = size . unComp . tagee . unComp
+
+annedFreeVars :: Anned a -> FreeVars
+annedFreeVars = freeVars . sizee . unComp . tagee . unComp
+
+annedTag :: Anned a -> Tag
+annedTag = tag . unComp
+
+annedToTagged :: Anned a -> Tagged a
+annedToTagged x = Tagged (annedTag x) (annee x)
+
+renameAnned :: In (Anned a) -> Anned (In a)
+renameAnned (rn, Comp (Tagged tg (Comp (Sized sz (FVed fvs x)))))
+  = Comp (Tagged tg (Comp (Sized sz (FVed (renameFreeVars rn fvs) (rn, x)))))
+
+
+renamedValue :: AnnedValue -> In AnnedValue
+renamedValue v = (mkIdentityRenaming (annedValueFreeVars' v), v)
+
+renamedTerm :: AnnedTerm -> In AnnedTerm
+renamedTerm e = (mkIdentityRenaming (annedTermFreeVars e), e)
+
+
+annedVarFreeVars' = taggedSizedFVedVarFreeVars'
+annedTermFreeVars = taggedSizedFVedTermFreeVars
+annedTermFreeVars' = taggedSizedFVedTermFreeVars'
+annedValueFreeVars = taggedSizedFVedValueFreeVars
+annedValueFreeVars' = taggedSizedFVedValueFreeVars'
+annedAltsFreeVars = taggedSizedFVedAltsFreeVars
+
+annedTermSize' = taggedSizedFVedTermSize'
+annedTermSize = taggedSizedFVedTermSize
+annedValueSize' = taggedSizedFVedValueSize'
+annedValueSize = taggedSizedFVedValueSize
+annedAltsSize = taggedSizedFVedAltsSize
+
+renameAnnedTerm = renameTaggedSizedFVedTerm :: InScopeSet -> Renaming -> AnnedTerm -> AnnedTerm
+renameAnnedValue = renameTaggedSizedFVedValue
+renameAnnedValue' = renameTaggedSizedFVedValue'
+renameAnnedAlts = renameTaggedSizedFVedAlts
+
+annedTermToTerm = taggedSizedFVedTermToTerm
+annedTermToFVedTerm = taggedSizedFVedTermToFVedTerm
+annedTermToFVedTerm' = taggedSizedFVedTermToFVedTerm'
+annedValueToFVedValue = taggedSizedFVedValueToFVedValue
+annedValueToFVedValue' = taggedSizedFVedValue'ToFVedValue'
+annedAltsToFVedAlts = taggedSizedFVedAltsToFVedAlts
+
+
+annedTerm :: Tag -> TermF Anned -> AnnedTerm
+annedTerm  tg e = Comp (Tagged tg (Comp (Sized (annedTermSize' e)  (FVed (annedTermFreeVars' e)  e))))
+
+annedValue :: Tag -> ValueF Anned -> Anned AnnedValue
+annedValue tg v = Comp (Tagged tg (Comp (Sized (annedValueSize' v) (FVed (annedValueFreeVars' v) v))))
+
+annedVar :: Tag -> Out Var -> Anned Var
+annedVar tg x = Comp (Tagged tg (Comp (Sized 1 (FVed (annedVarFreeVars' x) x))))
+
+annedAnswer :: Tag -> Answer -> Anned Answer
+annedAnswer tg a = Comp (Tagged tg (Comp (Sized (answerSize' a) (FVed (answerFreeVars' a) a))))
+
+annedCoercedAnswer :: Tag -> Coerced Answer -> Anned (Coerced Answer)
+annedCoercedAnswer tg cast_a = Comp (Tagged tg (Comp (Sized (coercedSize answerSize' cast_a) (FVed (coercedFreeVars answerFreeVars' cast_a) cast_a))))
+
+annedQA :: Tag -> QA -> Anned QA
+annedQA tg (Question x) = fmap Question (annedVar tg x)
+annedQA tg (Answer a)   = fmap Answer (annedAnswer tg a)
+
+
+toAnnedTerm :: UniqSupply -> Term -> AnnedTerm
+toAnnedTerm tag_ids = tagFVedTerm tag_ids . reflect
+
+
+mkVarCastBy :: Tag -> Out Var -> CastBy -> In AnnedTerm
+mkVarCastBy tg_y y' cast_by = (mkIdentityRenaming (castByFreeVars cast_by `extendVarSet` y'), cast_by `castAnnedTerm` annedTerm tg_y (Var y'))
+
+
+-- NB: the way tags are laid out in an Answer is rather "funny". Unlike most other uses of CastBy,
+-- the tag in the CastBy (if present) should be wrapped around the *uncast* value, NOT around the
+-- *cast* value.
+--
+-- This means that if we want correct tag propagation we have to be very careful when we use an
+-- existing CastBy in the construction of an Answer.
+
+answerSize' :: Answer -> Size
+answerSize' = annedValueSize' . snd
+
+answerFreeVars' :: Answer -> FreeVars
+answerFreeVars' = inFreeVars annedValueFreeVars'
+
+type Question = Out Id
+type Answer = In AnnedValue
+
+data QAG answer = Question Question
+                | Answer   answer
+
+instance Functor QAG where
+    fmap = Traversable.fmapDefault
+
+instance Foldable QAG where
+    foldMap = Traversable.foldMapDefault
+
+instance Traversable QAG where
+    traverse _ (Question x) = pure $ Question x
+    traverse f (Answer a)   = fmap Answer (f a)
+
+type QA = QAG Answer
+
+instance Outputable QA where
+    pprPrec prec = pPrintPrec prec . qaToAnnedTerm' emptyInScopeSet
+
+caseAnnedQA :: Anned QA -> Either (Anned Question) (Anned Answer)
+caseAnnedQA anned_qa = case extract anned_qa of
+    Question anned_q -> Left  (fmap (const anned_q) anned_qa)
+    Answer   anned_a -> Right (fmap (const anned_a) anned_qa)
+
+annedQAToInAnnedTerm :: InScopeSet -> Anned QA -> In AnnedTerm
+annedQAToInAnnedTerm iss anned_qa = case caseAnnedQA anned_qa of
+    Left  anned_q -> (mkInScopeIdentityRenaming iss, fmap Var anned_q)
+    Right anned_a -> annedAnswerToInAnnedTerm anned_a
+
+termToCastAnswer :: InScopeSet -> In AnnedTerm -> Maybe (Anned (Coerced Answer))
+termToCastAnswer iss in_anned_e = flip traverse (renameAnned in_anned_e) $ \(rn, e) -> case e of
+    Value v          -> Just (Uncast, (rn, v))
+    Cast anned_e' co -> case extract anned_e' of
+        Value v -> Just (castBy (renameCoercion iss rn co) (annedTag anned_e'), (rn, v))
+        _       -> Nothing
+    _ -> Nothing
+
+castAnswer :: InScopeSet -> Tag -> NormalCo -> Tagged (Coerced Answer) -> Tagged (Coerced Answer)
+castAnswer _   tg_co co (Tagged tg_a    (Uncast, a))          = Tagged tg_co (CastBy co tg_a, a)
+castAnswer ids tg_co co (Tagged _tg_co' (CastBy co' tg_a, a)) = Tagged tg_co (castBy (mkTransCo ids co co') tg_a, a)
+
+castByAnswer :: InScopeSet -> CastBy -> Tagged (Coerced Answer) -> Tagged (Coerced Answer)
+castByAnswer _   Uncast            cast_a = cast_a
+castByAnswer ids (CastBy co tg_co) cast_a = castAnswer ids tg_co co cast_a
+
+castAnswerSize :: Coerced Answer -> Size
+castAnswerSize = coercedSize answerSize'
+
+castAnnedQAToInAnnedTerm :: InScopeSet -> Anned QA -> CastBy -> In AnnedTerm
+castAnnedQAToInAnnedTerm iss anned_qa cast_by = (mkInScopeIdentityRenaming iss, castAnnedTerm cast_by (renameIn (renameAnnedTerm iss) (annedQAToInAnnedTerm iss anned_qa)))
+
+annedAnswerToInAnnedTerm :: Anned Answer -> In AnnedTerm
+annedAnswerToInAnnedTerm = taggedAnswerToInAnnedTerm . annedToTagged
+
+taggedAnswerToInAnnedTerm :: Tagged Answer -> In AnnedTerm
+taggedAnswerToInAnnedTerm (Tagged tg_a (rn, v)) = (rn, fmap Value $ annedValue tg_a v)
+
+taggedCastAnswerToInAnnedTerm :: InScopeSet -> Tagged (Coerced Answer) -> In AnnedTerm
+taggedCastAnswerToInAnnedTerm _   (Tagged tg_a  (Uncast,         a)) = taggedAnswerToInAnnedTerm (Tagged tg_a a)
+taggedCastAnswerToInAnnedTerm iss (Tagged tg_co (CastBy co tg_a, a)) = (mkInScopeIdentityRenaming iss, annedTerm tg_co (renameIn (renameAnnedTerm iss) (taggedAnswerToInAnnedTerm (Tagged tg_a a)) `Cast` co))
+
+answerToAnnedTerm' :: InScopeSet -> Answer -> TermF Anned
+answerToAnnedTerm' iss (rn, v) = Value $ renameAnnedValue' iss rn v
+
+coercedAnswerToAnnedTerm' :: InScopeSet -> Coerced Answer -> TermF Anned
+coercedAnswerToAnnedTerm' iss (Uncast,       e) = answerToAnnedTerm' iss e
+coercedAnswerToAnnedTerm' iss (CastBy co tg, e) = annedTerm tg (answerToAnnedTerm' iss e) `Cast` co
+
+{-
+-- TODO: callers are probably wrong for deeds
+castAnnedAnswer :: InScopeSet -> Anned Answer -> Out CastBy -> Anned Answer
+castAnnedAnswer _   anned_a Uncast           = anned_a
+castAnnedAnswer iss anned_a (CastBy co' tg') = snd $ castTaggedAnswer iss (annedToTagged anned_a) (co', tg')
+
+castTaggedAnswer :: InScopeSet -> Tagged Answer -> (NormalCo, Tag) -> (Maybe Tag, Anned Answer)
+castTaggedAnswer iss (Tagged tg (cast_by, in_v)) (co', tg') = (mb_dumped, annedAnswer tg' (castBy co'' tg, in_v))
+  where (mb_dumped, co'') = case cast_by of Uncast              -> (Nothing,        co')
+                                            CastBy co dumped_tg -> (Just dumped_tg, mkTransCo iss co co')
+-}
+
+castAnnedTerm :: CastBy -> AnnedTerm -> AnnedTerm
+castAnnedTerm Uncast         e = e
+castAnnedTerm (CastBy co tg) e = annedTerm tg (Cast e co)
+
+qaToAnnedTerm' :: InScopeSet -> QA -> TermF Anned
+qaToAnnedTerm' _   (Question x) = Var x
+qaToAnnedTerm' iss (Answer a)   = answerToAnnedTerm' iss a
+
+qaToAnswer :: QA -> Maybe Answer
+qaToAnswer qa = case qa of Answer a -> Just a; Question _ -> Nothing
+
+
+type Generalised = Bool
+
+mayInstantiate :: InstanceMatching -> Generalised -> Bool
+mayInstantiate NoInstances            _   = False
+mayInstantiate InstancesOfGeneralised gen = gen
+mayInstantiate AllInstances           _   = True
+
+
+type UnnormalisedState = (Deeds, Heap, Stack, In AnnedTerm)
+type State = (Deeds, Heap, Stack, Anned QA)
+
+-- NB: denormalise could actually eagerly put any frame arising from CastBy into the stack, but this is more modular:
+denormalise :: State -> UnnormalisedState
+denormalise (deeds, h@(Heap _ ids), k, qa) = (deeds, h, k, annedQAToInAnnedTerm ids qa)
+
+
+-- Invariant: LetBound things cannot refer to LambdaBound things.
+--
+-- This is motivated by:
+--  1. There is no point lambda-abstracting over things referred to by LetBounds because the resulting h-function would be
+--     trapped under the appropriate let-binding anyway, at which point all the lambda-abstracted things would be in scope as FVs.
+--  2. It allows (but does not require) the matcher to look into the RHS of LetBound stuff (rather than just doing nominal
+--     matching).
+data HowBound = InternallyBound | LambdaBound | LetBound
+              deriving (Eq, Show)
+
+instance Outputable HowBound where
+    ppr = text . show
+
+-- Left (Left gen) == no term, previously generalised if gen
+-- Left (Right tg) == no term, bound by environment with tag tg (used by LetBound only I think)
+-- Right in_e      == term in_e
+data HeapBinding = HB { howBound :: HowBound, heapBindingMeaning :: Either (Either Generalised Tag) (In AnnedTerm) }
+
+pPrintPrecAnned :: Outputable (f a)
+                => (f a -> FreeVars)
+                -> (InScopeSet -> Renaming -> f a -> f a)
+                -> Rational -> In (f a) -> SDoc
+pPrintPrecAnned fvs rename prec in_e = pprPrec prec $ renameIn (rename (mkInScopeSet (inFreeVars fvs in_e))) in_e
+
+pPrintPrecAnnedAlts :: In [AnnedAlt] -> [(AltCon, PrettyFunction)]
+pPrintPrecAnnedAlts in_alts = map (second asPrettyFunction) $ renameIn (renameAnnedAlts (mkInScopeSet (inFreeVars annedAltsFreeVars in_alts))) in_alts
+
+pPrintPrecAnnedValue :: Rational -> In (Anned AnnedValue) -> SDoc
+pPrintPrecAnnedValue prec in_e = pPrintPrec prec $ extract $ renameIn (renameAnnedValue (mkInScopeSet (inFreeVars annedValueFreeVars in_e))) in_e
+
+pPrintPrecAnnedTerm :: Rational -> In AnnedTerm -> SDoc
+pPrintPrecAnnedTerm prec in_e = pprPrec prec $ renameIn (renameAnnedTerm (mkInScopeSet (inFreeVars annedTermFreeVars in_e))) in_e
+
+pPrintPrecAnnedCastAnswer :: Rational -> Anned (Coerced Answer) -> SDoc
+pPrintPrecAnnedCastAnswer prec a = pprPrec prec $ fmap (\a -> PrettyFunction $ \prec -> pPrintPrecCoerced prec a) a
+
+instance Outputable HeapBinding where
+    pprPrec prec (HB how mb_in_e) = case how of
+        InternallyBound -> either (const empty) (pPrintPrecAnnedTerm prec) mb_in_e
+        LambdaBound     -> text "λ" <> angles (either (either (\gen -> if gen then text "?" else empty) (const empty)) (pPrintPrecAnnedTerm noPrec) mb_in_e)
+        LetBound        -> text "l" <> angles (either (either (\gen -> if gen then text "?" else empty) (const empty)) (pPrintPrecAnnedTerm noPrec) mb_in_e)
+
+heapBindingCheap :: HeapBinding -> Bool
+heapBindingCheap = either (const True) (termIsCheap . snd) . heapBindingMeaning
+
+lambdaBound :: HeapBinding
+lambdaBound = HB LambdaBound (Left (Left False))
+
+generalisedLambdaBound :: HeapBinding
+generalisedLambdaBound = HB LambdaBound (Left (Left True))
+
+internallyBound :: In AnnedTerm -> HeapBinding
+internallyBound in_e = HB InternallyBound (Right in_e)
+
+environmentallyBound :: Tag -> HeapBinding
+environmentallyBound tg = HB LetBound (Left (Right tg))
+
+letBound :: In AnnedTerm -> HeapBinding
+letBound in_e = HB LetBound (Right in_e)
+
+-- INVARIANT: the Heap might contain bindings for TyVars as well, but will only map them to lambdaBound/generalised
+-- TODO: when we lambda-abstract over lambdaBounds, we implicitly rely on the fact that the lambdaBound IdInfo will work
+-- out properly (unfortunately lambda-bounds can't be brought into scope all at the same time). We should probably fix
+-- this -- perhaps by zapping all lambdaBound IdInfo when we abstract.
+type PureHeap = M.Map (Out Var) HeapBinding
+data Heap = Heap PureHeap InScopeSet
+
+instance Outputable Heap where
+    pprPrec prec (Heap h _) = pprPrec prec h
+
+
+-- INVARIANT: no adjacent frames in the patterns:
+--  Cast, Cast
+--  Update, Update
+--  Update, Cast, Update
+-- NB: Cast, Update, Cast *is* allowed
+type Stack = Train (Tagged StackFrame) Generalised
+type StackFrame = StackFrameG (Anned (Coerced Answer)) (In AnnedTerm) (In [AnnedAlt])
+data StackFrameG answer term alts = TyApply (Out Type)
+                                  | CoApply (Out NormalCo)
+                                  | Apply (Out Id)
+                                  | Scrutinise (Out Id) (Out Type) alts
+                                  | PrimApply PrimOp [Out Type] [answer] [term]
+                                  | StrictLet (Out Id) term
+                                  | Update (Out Id)
+                                  | CastIt (Out NormalCo)
+
+instance Outputable StackFrame where
+    pprPrec prec kf = case kf of
+        TyApply ty'                    -> pPrintPrecApp prec (PrettyDoc $ text "[_]") ty'
+        CoApply co'                    -> pPrintPrecApp prec (PrettyDoc $ text "[_]") co'
+        Apply x'                       -> pPrintPrecApp prec (PrettyDoc $ text "[_]") x'
+        Scrutinise x' _ty in_alts      -> pPrintPrecCase prec (PrettyDoc $ text "[_]") x' (pPrintPrecAnnedAlts in_alts)
+        PrimApply pop tys' in_vs in_es -> pPrintPrecPrimOp prec pop tys' (map (PrettyFunction . flip pPrintPrecAnnedCastAnswer) in_vs ++ map (PrettyFunction . flip pPrintPrecAnnedTerm) in_es)
+        StrictLet x' in_e2             -> pPrintPrecLet prec x' (PrettyDoc $ text "[_]") (PrettyFunction $ flip pPrintPrecAnnedTerm in_e2)
+        Update x'                      -> pPrintPrecApp prec (PrettyDoc $ text "update") x'
+        CastIt co'                     -> pPrintPrecCast prec (PrettyDoc $ text "[_]") co'
+
+
+stateType :: State -> Type
+stateType (_, _, k, qa) = stackType k (qaType qa)
+
+stackType :: Stack -> Type -> Type
+stackType k ty = trainCarFoldl' (flip stackFrameType) ty k
+
+stackFrameType :: Tagged StackFrame -> Type -> Type
+stackFrameType = stackFrameType' . tagee
+
+stackFrameType' :: StackFrame -> Type -> Type
+stackFrameType' kf hole_ty = case kf of
+    TyApply ty                    -> hole_ty `applyTy` ty
+    CoApply co                    -> hole_ty `applyFunTy` coercionType co
+    Apply x                       -> hole_ty `applyFunTy` idType x
+    Scrutinise _ ty _             -> ty
+    PrimApply pop tys in_as in_es -> ((primOpType pop `applyTys` tys) `applyFunTys` map (coercedType answerType . annee) in_as) `applyFunTy` hole_ty `applyFunTys` map (\in_e@(rn, e) -> termType (renameAnnedTerm (mkInScopeSet (inFreeVars annedFreeVars in_e)) rn e)) in_es
+    StrictLet _ in_e@(rn, e)      -> termType (renameAnnedTerm (mkInScopeSet (inFreeVars annedFreeVars in_e)) rn e)
+    Update _                      -> hole_ty
+    CastIt co                     -> pSnd (coercionKind co)
+
+qaType :: Anned QA -> Type
+qaType anned_qa = case caseAnnedQA anned_qa of
+    Left  anned_q -> idType (extract anned_q)
+    Right anned_a -> answerType (extract anned_a)
+
+coercedType :: (a -> Type)
+            -> Coerced a -> Type
+coercedType typ (Uncast,      e) = typ e
+coercedType _   (CastBy co _, _) = pSnd (coercionKind co)
+
+answerType :: Answer -> Type
+answerType (rn, v) = valueType (renameAnnedValue' (mkInScopeSet (inFreeVars annedValueFreeVars' (rn, v))) rn v)
+
+
+heapBindingTerm :: HeapBinding -> Maybe (In AnnedTerm)
+heapBindingTerm = either (const Nothing) Just . heapBindingMeaning
+
+heapBindingLambdaBoundness :: HeapBinding -> Maybe Generalised
+heapBindingLambdaBoundness = either (either Just (const Nothing)) (const Nothing) . heapBindingMeaning
+
+{-# INLINE heapBindingTag #-} -- Showed up as causing 2% of allocation in a run despite resulting Maybe never being stored
+heapBindingTag :: HeapBinding -> Maybe Tag
+heapBindingTag = either (either (const Nothing) Just) (Just . annedTag . snd) . heapBindingMeaning
+
+-- | Size of HeapBinding for Deeds purposes
+heapBindingSize :: HeapBinding -> Size
+heapBindingSize (HB InternallyBound (Right (_, e))) = annedSize e
+heapBindingSize _                                   = 0
+
+-- | Size of StackFrame for Deeds purposes
+stackFrameSize :: StackFrame -> Size
+stackFrameSize kf = 1 + case kf of
+    TyApply _                -> 0
+    CoApply _                -> 0
+    Apply _                  -> 0
+    Scrutinise _ _ (_, alts) -> annedAltsSize alts
+    PrimApply _ _ as in_es   -> sum (map annedSize as ++ map (annedTermSize . snd) in_es)
+    StrictLet _ (_, e)       -> annedTermSize e
+    Update _                 -> 0
+    CastIt _                 -> 0
+
+heapSize :: Heap -> Size
+heapSize (Heap h _) = pureHeapSize h
+
+pureHeapSize :: PureHeap -> Size
+pureHeapSize = sum . map heapBindingSize . M.elems
+
+stackSize :: Stack -> Size
+stackSize = trainCarFoldl' (\size kf -> size + stackFrameSize (tagee kf)) 0
+
+stateSize :: State -> Size
+stateSize (_, h, k, qa) = heapSize h + stackSize k + annedSize qa
+
+unnormalisedStateSize :: UnnormalisedState -> Size
+unnormalisedStateSize (_, h, k, (_, e)) = heapSize h + stackSize k + annedSize e
+
+
+-- Detects three kinds of state:
+--  1. < H | v | >
+--  2. < H, x |-> v | x | >
+--  3. < H | x | K > (x \notin |dom|(H))
+--
+-- Note that in the final case it is OK for x to either be free or to be bound by the stack
+isStateIrreducible :: State -> Bool
+isStateIrreducible (_, Heap h _, k, anned_qa) = case annee anned_qa of
+    Answer _    -> stack_empty
+    Question x' -> maybe True (\hb -> case heapBindingMeaning hb of Left _ -> True; Right (_, e) -> termIsValue e && stack_empty) $ M.lookup x' h
+  where stack_empty = isJust $ isCastStack_maybe k
+
+isStackEmpty :: Stack -> Bool
+isStackEmpty (Loco _) = True
+isStackEmpty _        = False
+
+isPureHeapEmpty :: PureHeap -> Bool
+isPureHeapEmpty = Foldable.all (isJust . heapBindingLambdaBoundness)
+
+
+addStateDeeds :: Deeds -> (Deeds, a, b, c) -> (Deeds, a, b, c)
+addStateDeeds extra_deeds (deeds, h, k, in_e) = (extra_deeds `plusDeeds` deeds, h, k, in_e)
+
+releaseHeapBindingDeeds :: Deeds -> HeapBinding -> Deeds
+releaseHeapBindingDeeds deeds hb = deeds `releaseDeeds` heapBindingSize hb
+
+releasePureHeapDeeds :: Deeds -> PureHeap -> Deeds
+releasePureHeapDeeds = M.fold (flip releaseHeapBindingDeeds)
+
+releaseStackFrameDeeds :: Deeds -> Tagged StackFrame -> Deeds
+releaseStackFrameDeeds deeds kf = deeds `releaseDeeds` stackFrameSize (tagee kf)
+
+releaseStackDeeds :: Deeds -> Stack -> Deeds
+releaseStackDeeds = trainCarFoldl' releaseStackFrameDeeds
+
+releaseUnnormalisedStateDeed :: UnnormalisedState -> Deeds
+releaseUnnormalisedStateDeed (deeds, Heap h _, k, (_, e)) = releaseStackDeeds (releasePureHeapDeeds (deeds `releaseDeeds` annedSize e) h) k
+
+releaseStateDeed :: State -> Deeds
+releaseStateDeed (deeds, Heap h _, k, a) = releaseStackDeeds (releasePureHeapDeeds (deeds `releaseDeeds` annedSize a) h) k
+
+
+isCastStack_maybe :: Stack -> Maybe CastBy
+isCastStack_maybe = fmap fst . isTrivialStack_maybe
+
+isTrivialStack_maybe :: Stack -> Maybe (CastBy, Maybe (Tagged Var, CastBy))
+isTrivialStack_maybe k = case k of
+    (Tagged tg (CastIt co) `Car` k) -> fmap ((,) (CastBy co tg)) $ isTrivialValueStack_maybe k
+    _                               -> fmap ((,) Uncast)         $ isTrivialValueStack_maybe k
+  where
+    isTrivialValueStack_maybe :: Stack -> Maybe (Maybe (Tagged Var, CastBy))
+    isTrivialValueStack_maybe k = case peelValueStack k of
+        (mb_peeled, Loco _) -> Just mb_peeled
+        _                   -> Nothing
+
+    peelValueStack :: Stack -> (Maybe (Tagged Var, CastBy), Stack)
+    peelValueStack (Tagged x_tg (Update x) `Car` Tagged co_tg (CastIt co) `Car` k) = (Just (Tagged x_tg x, CastBy co co_tg), k)
+    peelValueStack (Tagged x_tg (Update x) `Car` k)                                = (Just (Tagged x_tg x, Uncast), k)
+    peelValueStack k                                                               = (Nothing, k)
+
+peelUpdateStack :: Stack -> (Maybe (CastBy, Tagged Var), Stack)
+peelUpdateStack (Tagged co_tg (CastIt co) `Car` Tagged x_tg (Update x) `Car` k) = (Just (CastBy co co_tg, Tagged x_tg x), k)
+peelUpdateStack (Tagged x_tg (Update x) `Car` k)                                = (Just (Uncast, Tagged x_tg x), k)
+peelUpdateStack k                                                               = (Nothing, k)
+
+
+-- NB: I used to source the tag for the positive information from the tag of the case branch RHS, but that
+-- leads to WAY TOO MUCH specialisation for examples like gen_regexps because we get lots of e.g. cons cells
+-- that are all given different tags.
+altConToValue :: Type -> AltCon -> Maybe (Anned AnnedValue)
+altConToValue ty (DataAlt dc as qs xs) = do
+    (_, univ_tys) <- splitTyConApp_maybe ty
+    Just (annedValue (dataConTag dc) (Data dc (univ_tys ++ map mkTyVarTy as) (map mkCoVarCo qs) xs))
+altConToValue _  (LiteralAlt l) = Just (annedValue (literalTag l) (Literal l))
+altConToValue _  DefaultAlt     = Nothing -- NB: could actually put an indirection in the heap in this case, for fun..
+
+zapAltConIdOccInfo :: AltCon -> AltCon
+zapAltConIdOccInfo (DataAlt dc as qs xs) = DataAlt dc as qs (map zapIdOccInfo xs)
+zapAltConIdOccInfo (LiteralAlt l)        = LiteralAlt l
+zapAltConIdOccInfo DefaultAlt            = DefaultAlt
+
+
+-- Unlifted bindings are irritating. They mean that the PureHeap has an implicit order that we need to carefully
+-- preserve when we turn it back into a term: unlifted bindings must be bound by a "let".
+--
+-- An alternative to this would be to record the binding struture in the PureHeap itself, but that would get pretty
+-- fiddly (in particuar, update frames would need to hold a "cursor" saying where in the PureHeap to update upon
+-- completion). It's probably better to take the complexity hit here and now.
+bindManyMixedLiftedness :: Symantics ann => (ann (TermF ann) -> FreeVars) -> [(Var, ann (TermF ann))] -> ann (TermF ann) -> ann (TermF ann)
+bindManyMixedLiftedness get_fvs = go
+  where go []  = id
+        go xes = case takeFirst (\(x, _) -> isUnLiftedType (idType x)) xes of
+            Nothing                 -> letRec xes
+            Just ((x, e), rest_xes) -> go xes_above . let_ x e . go xes_below
+              where (xes_above, xes_below) = partition_one (unitVarSet x) rest_xes
+
+        partition_one bvs_below xes | bvs_below' == bvs_below = (xes_above, xes_below)
+                                    | otherwise               = second (xes_below ++) $ partition_one bvs_below' xes_above
+          where (xes_below, xes_above) = partition (\(_, e) -> get_fvs e `intersectsVarSet` bvs_below) xes
+                bvs_below' = bvs_below `unionVarSet` mkVarSet (map fst xes_below)
--- a/compiler/supercompile/Supercompile/GHC.hs
+++ b/compiler/supercompile/Supercompile/GHC.hs
+module Supercompile.GHC where
+
+-- FIXME: I need to document the basis on which I push down unlifted heap bindings (they are all values, IIRC)
+-- TODO:
+--  * Why does the supercompiler not match as much as it should? (e.g. Interpreter, UInterpreter)
+--  * We should probably claimStep when supercompiling the RHS of an explicit lambda bound by a recursive "let".
+--    Reason: it is tantamount to inlining the body one time. Note that we don't care about non-lambdas (we don't
+--    pay for inlining them) or non-values (we don't put a copy of a non-value in the heap along with the RHS).
+--
+--    If there isn't enough left, what do we do?? Obvious answer: lambda abstract over the function name.
+--    Better(?) answer: add it as a let-bound Nothing to the heap, so the resulting h-function is trapped by the residual letrec...
+
+-- TODO: pre-transforming (case e1 of y { C z -> e2[z] }) to case (case e1 of y { C z -> z }) of z -> e2[z]
+-- might help us replace CPR more because even if we generalise away the e2[z] we potentially keep the unboxing.
+-- Probably can't/shouldn't do this if the wildcard binder y is used in the RHS.
+
+import Supercompile.Utilities
+import qualified Supercompile.Core.Syntax as S
+import qualified Supercompile.Core.FreeVars as S
+import qualified Supercompile.Evaluator.Syntax as S
+
+import CoreSyn
+import MkCore     (mkImpossibleExpr)
+import CoreUtils  (exprType, bindNonRec)
+import CoreUnfold
+import Coercion   (Coercion, isCoVar, isCoVarType, mkCoVarCo, mkAxInstCo)
+import DataCon    (DataCon, dataConWorkId, dataConTyCon, dataConName)
+import Var        (isTyVar)
+import PrimOp     (primOpSig)
+import Id
+import MkId       (mkPrimOpId)
+import FastString (mkFastString)
+import Type       (isUnLiftedType)
+import TyCon      (newTyConCo_maybe)
+
+import Control.Monad
+
+topUnique :: Unique
+anfUniqSupply' :: UniqSupply
+(topUnique, anfUniqSupply') = takeUniqFromSupply anfUniqSupply
+
+
+-- | Descriptions of terms: used for building readable names for ANF-introduced variables
+data Description = Opaque String | ArgumentOf Description
+
+descriptionString :: Description -> String
+descriptionString = go (0 :: Int)
+  where
+    go n (Opaque s)     = s ++ (if n > 0 then show n else "")
+    go n (ArgumentOf d) = go (n + 1) d
+
+desc :: S.Term -> Description
+desc = desc' . unI
+
+desc' :: S.TermF Identity -> Description
+desc' (S.Var x)            = Opaque (S.varString x)
+desc' (S.Value _)          = Opaque "value"
+desc' (S.TyApp e1 _)       = desc e1 -- NB: no argOf for type arguments because they don't get ANFed, so it's a bit redundant
+desc' (S.CoApp e1 _)       = argOf (desc e1)
+desc' (S.App e1 _)         = argOf (desc e1)
+desc' (S.PrimOp pop as es) = foldr (\() d -> argOf d) (Opaque (show pop)) (map (const ()) as ++ map (const ()) es)
+desc' (S.Case _ _ _ _)     = Opaque "case"
+desc' (S.Cast _ _)         = Opaque "cast"
+desc' (S.Let _ _ e)        = desc e
+desc' (S.LetRec _ e)       = desc e
+
+argOf :: Description -> Description
+argOf = ArgumentOf
+
+
+newtype ParseM a = ParseM { unParseM :: UniqSupply -> (UniqSupply, [(Var, S.Term)], a) }
+
+instance Functor ParseM where
+    fmap = liftM
+
+instance Monad ParseM where
+    return x = ParseM $ \s -> (s, [], x)
+    mx >>= fxmy = ParseM $ \s -> case unParseM mx s of (s, floats1, x) -> case unParseM (fxmy x) s of (s, floats2, y) -> (s, floats1 ++ floats2, y)
+
+instance MonadUnique ParseM where
+    getUniqueSupplyM = ParseM $ \us -> case splitUniqSupply us of (us1, us2) -> (us1, [], us2)
+
+runParseM' :: UniqSupply -> ParseM a -> ([(Var, S.Term)], a)
+runParseM' us act = (floats, x)
+  where (_s, floats, x) = unParseM act us
+
+runParseM :: UniqSupply -> ParseM S.Term -> S.Term
+runParseM us = uncurry (S.bindManyMixedLiftedness S.termFreeVars) . runParseM' us
+
+freshFloatId :: String -> (CoreExpr, S.Term) -> ParseM (Maybe (Var, S.Term), Var)
+freshFloatId _ (_, I (S.Var x)) = return (Nothing, x)
+freshFloatId n (old_e, e)       = fmap (\x -> let x' = x `setIdUnfolding` mkUnfolding InlineRhs False (isBottomingId x) old_e in (Just (x', e), x')) $ mkSysLocalM (mkFastString n) (S.termType e)
+ -- NB: we are careful to give fresh binders an unfolding so that the evaluator can use
+ -- GHC's inlining heuristics to decide whether it is profitable to inline the RHS
+ -- FIXME: this doesn't work at all because substituting into binders zaps their (unstable) unfoldings
+
+freshFloatCoVar :: String -> S.Term -> ParseM (Maybe (Var, S.Term), Coercion)
+freshFloatCoVar _ (I (S.Value (S.Coercion co))) = return (Nothing, co)
+freshFloatCoVar n e                             = fmap (\x -> (Just (x, e), mkCoVarCo x)) $ mkSysLocalM (mkFastString n) (S.termType e)
+
+floatIt :: [(Var, S.Term)] -> ParseM ()
+floatIt floats = ParseM $ \s -> (s, floats, ())
+
+nameIt :: Description -> (CoreExpr, S.Term) -> ParseM Var
+nameIt d e = freshFloatId ("a" ++ descriptionString d) e >>= \(mb_float, x) -> floatIt (maybeToList mb_float) >> return x
+
+nameCo :: Description -> S.Term -> ParseM Coercion
+nameCo d e = freshFloatCoVar ("c" ++ descriptionString d) e >>= \(mb_float, co) -> floatIt (maybeToList mb_float) >> return co
+
+bindFloats :: ParseM S.Term -> ParseM S.Term
+bindFloats = bindFloatsWith . fmap ((,) [])
+
+bindFloatsWith :: ParseM ([(Var, S.Term)], S.Term) -> ParseM S.Term
+bindFloatsWith act = ParseM $ \s -> case unParseM act s of (s, floats, (xes, e)) -> (s, [], S.bindManyMixedLiftedness S.termFreeVars (xes ++ floats) e)
+
+bindUnliftedFloats :: ParseM S.Term -> ParseM S.Term
+bindUnliftedFloats act = ParseM $ \s -> case unParseM act s of (s, floats, e) -> if any (isUnLiftedType . idType . fst) floats
+                                                                                 then (s, [], S.bindManyMixedLiftedness S.termFreeVars floats e)
+                                                                                 else (s, floats, e)
+
+appE :: S.Term -> (CoreExpr, S.Term) -> ParseM S.Term
+appE e1 (old_e2, e2)
+  | isCoVarType (S.termType e2) = fmap (e1 `S.coApp`) $ nameCo (argOf (desc e1)) e2
+  | otherwise                   = fmap (e1 `S.app`)   $ nameIt (argOf (desc e1)) (old_e2, e2)
+
+
+
+conAppToTerm :: DataCon -> [CoreExpr] -> ParseM S.Term
+conAppToTerm dc es
+  | Just co_axiom <- newTyConCo_maybe (dataConTyCon dc)
+  , let [co_val_e] = co_val_es -- NB: newtypes may not have existential arguments
+  = fmap (`S.cast` mkAxInstCo co_axiom tys') $ coreExprToTerm co_val_e
+  | otherwise
+  = do -- Put each argument into a form suitable for an explicit value
+       -- NB: if any argument is non-trivial then the resulting binding will not be a simple value
+       -- (some let-bindings will surround it) and inlining will be impeded.
+       (d, cos') <- mapAccumLM (\d co_e -> fmap ((,) (argOf d)) $ coreExprToTerm co_e >>= nameCo (argOf d))
+                               (Opaque (S.nameString (dataConName dc))) co_es
+       (_, xs') <- mapAccumLM (\d val_e -> fmap ((,) (argOf d)) $ coreExprToTerm val_e >>= \val_e' -> nameIt (argOf d) (val_e, val_e'))
+                              d val_es
+       return $ S.value (S.Data dc tys' cos' xs')
+  where
+    -- Divide into type/coercion/value
+    (tys', co_val_es) = takeWhileJust fromType_maybe es
+    (co_es, val_es) = span (isCoVarType . exprType) co_val_es
+
+    fromType_maybe (Type ty) = Just ty
+    fromType_maybe _         = Nothing
+
+-- NB: this function must not float stuff out of bindings, so that later SUPERINLINABLE propagation will work properly
+coreExprToTerm :: CoreExpr -> ParseM S.Term
+coreExprToTerm init_e = {-# SCC "coreExprToTerm" #-} term init_e
+  where
+    -- Partially-applied PrimOp and Data are dealt with later on by generating appropriate unfoldings
+    -- We use exprIsConApp_maybe here to ensure we desugar explicit constructor use into something that looks cheap,
+    -- and we do our own thing to spot saturated primop applications
+    term e | Just (dc, univ_tys, es) <- exprIsConApp_maybe (const NoUnfolding) e
+           = conAppToTerm dc (map Type univ_tys ++ es)
+           | (Var x, es) <- collectArgs e
+           , Just pop <- isPrimOpId_maybe x
+           , (tys, es) <- takeWhileJust (\e -> case e of Type ty -> Just ty; _ -> Nothing) es
+           , all isValArg es
+           , (_,_,_,arity,_) <- primOpSig pop
+           , length es == arity
+           = fmap (S.primOp pop tys) (mapM term es)
+    term (Var x)                   = return $ S.var x
+    term (Lit l)                   = return $ S.value (S.Literal l)
+    term (App e_fun (Type ty_arg)) = fmap (flip S.tyApp ty_arg) (term e_fun)
+    term (App e_fun e_arg)         = join $ liftM2 appE (term e_fun) (fmap ((,) e_arg) $ maybeUnLiftedTerm (exprType e_arg) e_arg)
+    term (Lam x e) | isTyVar x     = fmap (S.value . S.TyLambda x) (bindFloats (term e))
+                   | otherwise     = fmap (S.value . S.Lambda x) (bindFloats (term e))
+    term (Let (NonRec x e1) e2)    = liftM2 (S.let_ x) (bindFloats (term e1)) (bindFloats (term e2))
+    term (Let (Rec xes) e)         = bindFloatsWith (liftM2 (,) (mapM (secondM (bindFloats . term)) xes) (term e))
+    term (Case e x ty alts)        = liftM2 (\e alts -> S.case_ e x ty alts) (bindFloats (term e)) (mapM alt alts)
+    term (Cast e co)               = fmap (flip S.cast co) (term e)
+    term (Tick _ e)                = term e -- FIXME: record ticks
+    term (Type ty)                 = pprPanic "termToCoreExpr" (ppr ty)
+    term (Coercion co)             = return $ S.value (S.Coercion co)
+    
+    -- We can float unlifted bindings out of an unlifted argument/let
+    -- because they were certain to be evaluated anyway. Otherwise we have
+    -- to residualise all the floats if any of them were unlifted.
+    maybeUnLiftedTerm ty e
+      | isUnLiftedType ty = term e
+      | otherwise         = bindUnliftedFloats (term e)
+
+    alt (altcon, xs, e) = fmap ((,) (coreAltConToAltCon altcon xs)) $ bindFloats (term e)
+
+coreAltConToAltCon :: AltCon -> [Var] -> S.AltCon
+coreAltConToAltCon DEFAULT      [] = S.DefaultAlt
+coreAltConToAltCon (LitAlt l)   [] = S.LiteralAlt l
+coreAltConToAltCon (DataAlt dc) xs = S.DataAlt dc as qs zs
+      where (as, ys) = span isTyVar xs
+            (qs, zs) = span isCoVar ys
+coreAltConToAltCon altcon       xs = pprPanic "coreAltConToAltCon" (ppr (altcon, xs))
+
+{-# INLINABLE termToCoreExpr #-}
+termToCoreExpr :: Copointed ann => ann (S.TermF ann) -> CoreExpr
+termToCoreExpr = term
+  where
+    term e = case extract e of
+        S.Var x             -> Var x
+        S.Value v           -> value v
+        S.TyApp e ty        -> term e `App` Type ty
+        S.CoApp e co        -> term e `App` Coercion co
+        S.App e x           -> term e `App` Var x
+        S.PrimOp pop tys es -> Var (mkPrimOpId pop) `mkTyApps` tys `mkApps` map term es
+        S.Case e x ty alts  -> Case (term e) x ty (if null alts then [(DEFAULT, [], mkImpossibleExpr ty)] else map alt alts)
+        S.Let x e1 e2       -> bindNonRec x (term e1) (term e2)
+        S.LetRec xes e      -> Let (Rec (map (second term) xes)) (term e)
+        S.Cast e co         -> Cast (term e) co
+    
+    value (S.Literal l)          = Lit l
+    value (S.Coercion co)        = Coercion co
+    value (S.TyLambda a e)       = Lam a (term e)
+    value (S.Lambda x e)         = Lam x (term e)
+    value (S.Data dc tys cos xs) = ((Var (dataConWorkId dc) `mkTyApps` tys) `mkCoApps` cos) `mkVarApps` xs
+    
+    alt (altcon, e) = (altcon', xs, term e)
+      where (altcon', xs) = altConToCoreAltCon altcon
+
+altConToCoreAltCon :: S.AltCon -> (AltCon, [Var])
+altConToCoreAltCon (S.DataAlt dc as qs ys) = (DataAlt dc, as ++ qs ++ ys)
+altConToCoreAltCon (S.LiteralAlt l)        = (LitAlt l,   [])
+altConToCoreAltCon (S.DefaultAlt)          = (DEFAULT,    [])
--- a/compiler/supercompile/Supercompile/StaticFlags.hs
+++ b/compiler/supercompile/Supercompile/StaticFlags.hs
+module Supercompile.StaticFlags where
+
+import Data.Char (toLower)
+
+import FastString
+import StaticFlags
+
+
+parseEnum :: String -> a -> [(String, a)] -> a
+parseEnum prefix def opts = maybe def parse $ lookup_str prefix
+  where parse = maybe (error "parseEnum: unknown option") id . flip lookup opts . map toLower
+
+
+-- The StaticFlagsParser admits any option beginning with -fsupercompiler
+
+-- | The situations in which will demand a SUPERINLINABLE annotation is present
+data Superinlinability = ForEverything | ForRecursion | ForNothing
+
+sUPERINLINABILITY :: Superinlinability
+sUPERINLINABILITY = parseEnum "-fsupercompiler-superinlinability" ForRecursion [("", ForRecursion), ("recursion", ForRecursion), ("everything", ForEverything), ("nothing", ForNothing)]
+
+data InstanceMatching = NoInstances | InstancesOfGeneralised | AllInstances
+
+-- I've decided that allowing arbitrary tiebacks to any ancestor state overlaps too much with the combination
+-- of MSG-based generalisation+rollback, and has the potential to lose more useful optimisation than that combo does.
+-- Matching back to generalised stuff is still a good idea, but we need to propagate generalised flags more agressively (FIXME)
+iNSTANCE_MATCHING :: InstanceMatching
+iNSTANCE_MATCHING = parseEnum "-fsupercompiler-instance-matching" NoInstances [("full", AllInstances), ("generalised", InstancesOfGeneralised), ("none", NoInstances)]
+
+-- This is not remotely safe:
+fLOAT_TO_MATCH :: Bool
+fLOAT_TO_MATCH = False
+
+-- This is also not safe as implemented. It could be made safe with some fiddling, but it's not worth it:
+rEDUCE_BEFORE_MATCH :: Bool
+rEDUCE_BEFORE_MATCH = False
+
+-- At the moment, this HAS to be on for termination to be assured:
+eAGER_SPLIT_VALUES :: Bool
+eAGER_SPLIT_VALUES = not $ lookUp $ fsLit "-fsupercompiler-no-eager-split-values"
+
+rEFINE_ALTS :: Bool
+rEFINE_ALTS = not $ lookUp $ fsLit "-fsupercompiler-no-refine-alts"
+--rEFINE_ALTS = False
+
+dEEDS :: Bool
+dEEDS = lookUp $ fsLit "-fsupercompiler-deeds"
+--dEEDS = True
+
+bOUND_STEPS :: Bool
+bOUND_STEPS = lookUp $ fsLit "-fsupercompiler-bound-steps"
+--bOUND_STEPS = True
+
+-- For debugging very long-running supercompilation
+dEPTH_LIIMT :: Maybe Int
+dEPTH_LIIMT = Just (lookup_def_int "-fsupercompiler-depth-limit" maxBound)
+--dEPTH_LIIMT = Just 10
+
+rEDUCE_STOP_LIMIT :: Int
+rEDUCE_STOP_LIMIT = lookup_def_int "-fsupercompiler-reduce-stop-limit" 1
+
+pOSITIVE_INFORMATION :: Bool
+pOSITIVE_INFORMATION = not $ lookUp $ fsLit "-fsupercompiler-no-positive-information"
+--pOSITIVE_INFORMATION = True
+
+-- In favour of preinitialization:
+--  1. We can reuse the normal speculation mechanism of the supercompiler to expose
+--     more almost-cheap top-level bindings
+--  2. No need to mark some heap bindings as "let" bindings, so things are simpler
+--  3. None of the complications associated with speculating "let" marked bindings (see below)
+--  4. Probably more reliable at finding reuse opportunities: "let"-marked bindings basically
+--     only work for expressions like (Just map) or (map), though they are very good at
+--     preventing lambda-abstraction over names we don't have unfoldings for.
+--     Preinitialization with eta-expansion is particularly good at finding (rare) lambdas
+--     in the arguments of data/casts within other lambdas. (FIXME: data not implemented)
+--
+-- Against preinitialization:
+--  1. If an unfolding is bound at top level but non-cheap, we need to:
+--    a) Prevent inlining it (so we don't duplicate work)
+--    b) Still refer to it with a simple free variable rather than via a lambda
+--   It is hard to accomplish either of these things with preinit, especially
+--   if we delay cheapness detection to the normal speculation mechanism! (FIXME: I'm not doing either..)
+--  2. Have to "eta-expand" to get better tieback chance. Speculation is needed for "accurate" eta-expansion.
+--  3. The memo table is much larger (in benchmarks matching is not a major hotspot, though)
+--  4. It is convenient to hide let-bound bindings in the pretty-printer (perhaps we could do
+--     something else though -- e.g. the speculator should mark bindings as "do-not-print" so
+--     only new bindings from reduce are printed each time)
+--
+--
+-- NB: it *is* important that we speculate the unfoldings, even though you would think that
+-- GHC would have already done its best to make them into manifest values. Sample things that can be
+-- improved by this are:
+--
+-- 1. $d = let x = (y |> co) in D x
+--    GHC doesn't let-float the x binding since it originated from ANFing. See also:
+--    i = let i# = 1# in I# i#
+--
+-- 2. GHC.Base.$fMonadIO_$c>>= = bindIO
+--    GHC doesn't necessarily eliminate a trivial equality like this if both names are exported.
+--
+-- 3. choice [parser_1, parser_2, parser_3] :: Parser
+--    This expression has arity 1 but GHC can't see that because it is unwilling to push
+--    the case-scrutinisation of the list input to "choice" under a lambda. Our evaluator
+--    eliminates that scrutinisation entirely so we don't have any such problem.
+--
+-- GHC is pretty good at discovering all the obvious eta-expansions by itself before this point.
+--
+--
+-- NB: speculating unfoldings is not as trivial as you might think! Consider the first
+-- example above:
+--   $d = let x = (y |> co) in D x
+--
+-- If we speculate this we presumably want to let-float:
+--   x = (y |> co)
+--   $d = D x
+--
+-- But there is no "real" top-level x binding we can just refer to as a free variable of the
+-- supercompiled term.
+--
+-- Here are some possible responses:
+--
+-- 0. Manually create some appropriate top-level bindings in the current module.
+--    Of course, this is only appropriate if *every* one of the new bindings are values, or we risk
+--    work duplication. If even one is not, as in:
+--     x = f_with_arity_4 1 2 (fib 100)
+--
+--    Then we cannot provide an unfolding for x in the state's heap since doing so cannot help but
+--    lose the shared (fib 100) work. (Perhaps we could make an exception for saturated datacons
+--    where we can sometimes extract the shared work thunk by case analysis, but this is complex.)
+--
+-- 1. We could normalise each unfolding to something of the form
+--      letrec x1 = u1; ...; xn = un in v [|> co]
+--    Where:
+--      u ::= v [|> co]
+--          | x [|> co]
+--   
+--    Such that x \elem {x1,..,xn} OR is the name of another succesfully-normalised unfolding.
+--    With unfoldings of this form, we can just copy in the whole thing whenever we want to inline
+--    the unfolding at a use site.
+--
+-- 2. We could guarantee that any let-marked thing in the heap is available to duplicate. In this case,
+--    I think it is safe to allow the evaluator to create actual update frames for them. This will
+--    allow the actual unfolding to be done as a normal part of evaluation.
+--
+--    When the update frames are popped the new bindings should be added to the heap as simple internal
+--    bindings, probably. NB: this does lose the benefits of being a let-binding! Alternatively we could
+--    add them as let bindings as long as it won't refer to any non-let bindings, which gets the benefit
+--    of let bindings in almost all cases where it is possible.
+--
+--    In this scheme, we can speculate to ensure that we get the right form of binding and then throw
+--    away the work of the speculator.
+--
+-- 3. We could speculate and then throw away any unfoldings that refer to newly-created let bindings.
+--    This precludes use of the speculation mechanism for examples 1 and 3 above, but OK for 2.
+--
+--    Of course, if this only solves 2 then perhaps a simple renaming pass would be just as good and easier!
+--
+--    This solution is good for finding partial applications as well (at least for those partial applications
+--    to arguments that have already been floated out by the previous GHC invocation), but this isn't useful
+--    in practice.
+
+-- TODO: tying back to preinits has become harder since we have to add both (f x) and (\x -> e) states
+-- to the cache since we no longer reduce before matching...
+pREINITALIZE_MEMO_TABLE :: Bool
+pREINITALIZE_MEMO_TABLE = not $ lookUp $ fsLit "-fsupercompiler-no-preinitalize"
+
+mODULE_SUPERINLINABLE :: Bool
+mODULE_SUPERINLINABLE = not $ lookUp $ fsLit "-fsupercompiler-no-module-superinlinable"
+
+-- FIXME: turning this off is actually broken right now
+uSE_LET_BINDINGS :: Bool
+uSE_LET_BINDINGS = not $ lookUp $ fsLit "-fsupercompiler-no-let-bindings"
+
+data DeedsPolicy = FCFS | Proportional
+                 deriving (Read)
+
+dEEDS_POLICY :: DeedsPolicy
+dEEDS_POLICY = parseEnum "-fsupercompiler-deeds-policy" Proportional [("fcfs", FCFS), ("proportional", Proportional)]
+
+bLOAT_FACTOR :: Int
+--bLOAT_FACTOR = fromMaybe 10 $ listToMaybe [read val | arg <- aRGS, Just val <- [stripPrefix "--bloat=" arg]]
+bLOAT_FACTOR = lookup_def_int "-fsupercompiler-bloat-factor" 10
+ -- NB: need a bloat factor of at least 5 to get append/append fusion to work. The critical point is:
+ --
+ --  let (++) = ...
+ --  in case (case xs of []     -> ys
+ --                      (x:xs) -> x : (xs ++ ys)) of
+ --    []     -> zs
+ --    (x:xs) -> x : (xs ++ zs)
+ --
+ -- We need to duplicate the case continuation into each branch, so at one time we will have:
+ --  1) Two copies of (++) in the [] branch of the inner case
+ --    a) One in the heap
+ --    b) One from the stack (from [_] ++ zs)
+ --  2) Similarly two copies in the (:) branch of the inner case
+ --  3) One copy manifested in the residual branch of xs
+ --
+ -- Total = 5 copies (due to tiebacks, the residual program will do better than this)
+ --
+ -- 
+ -- Unfortunately, my implementation doesn't tie back as eagerly as you might like, so we actually peel the loop once and
+ -- hence need a bloat factor of 8 here (5 + 3 other case statements derived from (++))
+ -- TODO: figure out how to reduce this number.
+
+
+cALL_BY_NAME :: Bool
+cALL_BY_NAME = lookUp $ fsLit "-fsupercompiler-call-by-name"
+
+
+dUPLICATE_VALUES_EVALUATOR, dUPLICATE_VALUES_SPLITTER :: Bool
+dUPLICATE_VALUES_EVALUATOR = lookUp $ fsLit "-fsupercompiler-duplicate-values-evaluator"
+dUPLICATE_VALUES_SPLITTER = lookUp $ fsLit "-fsupercompiler-duplicate-values-splitter"
+
+
+data TagBagType = TBT { tagBagPairwiseGrowth :: Bool }
+                deriving (Show)
+
+tAG_COLLECTION :: TagBagType
+tAG_COLLECTION = parseEnum "-fsupercompiler-tag-collection" (TBT False) [("bags", TBT False), ("bags-strong", TBT True)]
+
+
+data GeneralisationType = NoGeneralisation | AllEligible | DependencyOrder Bool | StackFirst
+
+sPLIT_GENERALISATION_TYPE :: GeneralisationType
+sPLIT_GENERALISATION_TYPE = parseEnum "-fsupercompiler-split-generalisation-type" StackFirst [("none", NoGeneralisation), ("all-eligible", AllEligible), ("first-reachable", DependencyOrder True), ("last-reachable", DependencyOrder False), ("stack-first", StackFirst)]
+
+oCCURRENCE_GENERALISATION :: Bool
+oCCURRENCE_GENERALISATION = not $ lookUp $ fsLit "-fsupercompiler-no-occurrence-generalisation"
+
+gENERALISATION :: Bool
+gENERALISATION = not $ lookUp $ fsLit "-fsupercompiler-no-generalisation"
+
+mSG_GENERALISATION :: Bool
+mSG_GENERALISATION = not $ lookUp $ fsLit "-fsupercompiler-no-msg-generalisation"
+
+tYPE_GEN :: Bool
+tYPE_GEN = not $ lookUp $ fsLit "-fsupercompiler-no-type-generalisation"
+
+eVALUATE_PRIMOPS :: Bool
+eVALUATE_PRIMOPS = not $ lookUp $ fsLit "-fsupercompiler-no-primops"
+
+sPECULATION :: Bool
+sPECULATION = not $ lookUp $ fsLit "-fsupercompiler-no-speculation"
+
+lOCAL_TIEBACKS :: Bool
+lOCAL_TIEBACKS = lookUp $ fsLit "-fsupercompiler-local-tiebacks"
+
+rEFINE_FULFILMENT_FVS :: Bool
+rEFINE_FULFILMENT_FVS = not $ lookUp $ fsLit "-fsupercompiler-no-refine-fulfilment-fvs"
+
+
+hISTORY_TREE :: Bool
+hISTORY_TREE = not $ lookUp $ fsLit "-fsupercompiler-no-history-tree"
+
+rEDUCE_ROLLBACK :: Bool
+rEDUCE_ROLLBACK = not $ lookUp $ fsLit "-fsupercompiler-no-reduce-rollback"
+
+sC_ROLLBACK :: Bool
+sC_ROLLBACK = not $ lookUp $ fsLit "-fsupercompiler-no-sc-rollback"
+
+
+tRACE :: Bool
+tRACE = lookUp $ fsLit "-fsupercompiler-trace"
--- a/compiler/supercompile/Supercompile/Termination/Combinators.hs
+++ b/compiler/supercompile/Supercompile/Termination/Combinators.hs
+{-# LANGUAGE GADTs, TypeFamilies, FlexibleContexts #-}
+module Supercompile.Termination.Combinators (
+    -- | Termination tests
+    TTest,
+    alwaysT,
+    Cofunctor(..),
+    Finite, finiteT,
+    WellOrdered, wellOrderedT,
+    eitherT, pairT,
+    Zippable(..), zippableT,
+    HasDomain(..), equalDomainT,
+    
+    -- | Histories
+    History(..),
+    TermRes(..), isContinue,
+    LinearHistory, mkLinearHistory,
+    NodeKey, GraphicalHistory, generatedKey, mkGraphicalHistory
+  ) where
+
+import Supercompile.Utilities (Nat)
+
+import Control.Arrow ((***))
+
+import Data.Monoid (All(..))
+import qualified Data.Foldable as Foldable
+import Data.List (sortBy)
+import Data.Ord (comparing)
+
+import qualified Data.IntMap as IM
+import qualified Data.IntSet as IS
+import qualified Data.Map as M
+import qualified Data.Set as S
+
+
+-- Termination tests
+-- ~~~~~~~~~~~~~~~~~
+
+data TTest a = forall repr. WQO (a -> repr)                    -- Prepare item
+                                (repr -> repr -> (Bool, Bool)) -- Embed two prepared items
+
+-- -- | Tests whether two elements are embedding according to the given embedding operator.
+-- (<|) :: TTest a -> a -> a -> Bool
+-- (<|) (WQO prepare embed) x y = fst (embed (prepare x) (prepare y))
+
+
+-- | Picks out type constructors that are contravariant functors, i.e. we have:
+--
+--   1. Identity: @cofmap id = id@
+--
+--   2. Composition: cofmap f . cofmap g = cofmap (g . f)
+class Cofunctor f where
+    cofmap :: (a -> b) -> f b -> f a
+
+instance Cofunctor TTest where
+    -- Trivially correct
+    cofmap f (WQO prepare embed) = WQO (prepare . f) embed
+
+
+-- | Trivial termination test: terminates immediately
+alwaysT :: TTest a
+alwaysT = WQO (\_ -> ()) (\() () -> (True, True))
+
+
+-- | Picks out types @a@ that satisfy these two properties:
+--
+--  1. The @Eq@ instance provides a total equivalence relation (i.e. symmetric, transitive, reflexive)
+--
+--  2. There are only a finite number of elements of type @a@ that are distinguishable via the @Eq@ instance
+class Eq a => Finite a where
+
+instance Finite ()
+instance Finite Int
+instance Finite IS.IntSet
+instance Finite v => Finite (IM.IntMap v)
+instance (Finite k) => Finite (S.Set k)
+instance (Finite k, Finite v) => Finite (M.Map k v)
+
+-- | Embedding on finite types. Correct by the pigeonhole principle.
+{-# INLINE finiteT #-}
+finiteT :: Finite a => TTest a
+finiteT = WQO id $ \x y -> if x == y then (True, True) else (False, False)
+
+
+-- | Picks out types @a@ that satisfy these two properties:
+--
+--   1. The @Ord@ instance's (>=) defines a total order (i.e. total, antisymmetric, transitive)
+--
+--   2. There is no infinite descending chain @x_1 > x_2 > ...@ where each @x_i :: a@
+class Ord a => WellOrdered a where
+
+instance WellOrdered Int
+
+-- | Embedding on well-orders. Correct because well-orders are strictly stronger than well-quasi-orders.
+{-# INLINE wellOrderedT #-}
+wellOrderedT :: WellOrdered a => TTest a
+wellOrderedT = WQO id $ \x y -> case x `compare` y of LT -> (True, False); EQ -> (True, True); GT -> (False, True)
+
+
+-- | Embedding on sums of things. Correct by appealing to partition of input into |Left|s and |Right|s.
+{-# INLINE eitherT #-}
+eitherT :: TTest a -> TTest b -> TTest (Either a b)
+eitherT (WQO prepare_a embed_a) (WQO prepare_b embed_b) = WQO (either (Left . prepare_a) (Right . prepare_b)) go
+  where go (Left a1)  (Left a2)  = a1 `embed_a` a2
+        go (Right b1) (Right b2) = b1 `embed_b` b2
+        go _          _          = (False, False)
+
+
+-- | Embedding on pairs of things. Correct by a Ramsey argument.
+{-# INLINE pairT #-}
+pairT :: TTest a -> TTest b -> TTest (a, b)
+pairT (WQO prepare_a embed_a) (WQO prepare_b embed_b) = WQO (prepare_a *** prepare_b) go
+  where go (a1, b1) (a2, b2) = zipPair ((&&), (&&)) (a1 `embed_a` a2) (b1 `embed_b` b2)
+
+-- NB: zipPair is lazy in the second pair so that when f/g are (&&) we can doing some evaluation
+zipPair :: (a -> b -> c, d -> e -> f)
+        -> (a, d) -> (b, e) -> (c, f)
+zipPair (f, g) (a, d) ~(b, e) = (f a b, g d e)
+
+-- | Type class of zippable things. Instances should satisfy the laws:
+--
+-- Naturality:
+-- > fmap (f *** g) (zip_ as bs) == zip_ (fmap f as) (fmap g bs)
+--
+-- Information preservation:
+-- > fmap (const ()) as == fmap (const ()) bs
+-- >  ==>
+-- > fmap fst (zip_ ma mb) == ma
+-- > fmap snd (zip_ ma mb) == mb
+class Functor z => Zippable z where
+    zip_ :: z a -> z b -> z (a, b)
+    zip_ = zipWith_ (,)
+
+    zipWith_ :: (a -> b -> c) -> z a -> z b -> z c
+    zipWith_ f as bs = fmap (uncurry f) (zip_ as bs)
+
+instance Zippable [] where
+    zipWith_ = zipWith
+
+instance Zippable IM.IntMap where
+    zipWith_ = IM.intersectionWith
+
+instance Ord k => Zippable (M.Map k) where
+    zipWith_ = M.intersectionWith
+
+-- | Embedding on things with exactly corresponding "shapes", derived from an embedding on the elements.
+-- Correct (for finite "shapes") because it can be implemented by mapping the elements of the container to
+-- a fixed length tuple and then iterating the 'product' lemma.
+{-# INLINE zippableT #-}
+zippableT :: (Finite (t ()), Zippable t, Foldable.Foldable t) => TTest a -> TTest (t a)
+zippableT (WQO prepare embed) = WQO (fmap prepare) $ \xs ys -> (getAll *** getAll) $ Foldable.fold (zipWith_ (\x y -> (All *** All) (embed x y)) xs ys)
+
+
+-- | Picks out types that we can extract a ``domain'' for.
+class Functor f => HasDomain f where
+    type Domain f :: *
+    
+    -- | Extract the domain of the object.
+    --  > domain x == domain y ==> fmap (const ()) x == fmap (const ()) y
+    domain :: f a -> Domain f
+
+instance HasDomain [] where
+    type Domain [] = Nat
+    domain = length
+
+instance HasDomain IM.IntMap where
+    type Domain IM.IntMap = IS.IntSet
+    domain = IM.keysSet
+
+instance Ord k => HasDomain (M.Map k) where
+    type Domain (M.Map k) = S.Set k
+    domain = M.keysSet
+
+-- | Convenience combinator allowing refining a chain of collections with varying domains into several subchains with uniform domains
+{-# INLINE equalDomainT #-}
+equalDomainT :: (HasDomain f, Finite (Domain f))
+             => TTest (f a)
+             -> TTest (f a)
+equalDomainT wqo = cofmap (\x -> (domain x, x)) $ pairT finiteT wqo
+
+
+-- Histories
+-- ~~~~~~~~~
+
+class History h where
+    terminate :: h a -> a -> TermRes h a
+
+data TermRes h a = Stop a | Continue (h a)
+
+isContinue :: TermRes h a -> Bool
+isContinue (Continue _) = True
+isContinue _            = False
+
+
+newtype LinearHistory a = LH { unLH :: a -> TermRes LinearHistory a }
+
+instance History LinearHistory where
+    terminate = unLH
+
+{-# INLINE mkLinearHistory #-}
+mkLinearHistory :: forall a. TTest a -> LinearHistory a
+mkLinearHistory (WQO (prepare :: a -> b) embed) = go_init []
+  where
+    -- Search the history starting with the earliest elements -- i.e. those towards the head of the list
+    go_init abs = LH $ \a -> go [] abs a (prepare a)
+    
+    go :: [(a, b)] -> [(a, b)] -> a -> b -> TermRes LinearHistory a
+    go new_abs []           new_a new_b = Continue $ go_init (reverse ((new_a, new_b):new_abs))
+    go new_abs ((a, b):abs) new_a new_b = case b `embed` new_b of
+        (True, _)  -> Stop a
+        (_, True)  -> go new_abs          abs new_a new_b
+        (_, False) -> go ((a, b):new_abs) abs new_a new_b
+
+
+data GraphicalHistory a = GH { terminateGH :: a -> TermRes GraphicalHistory a, generatedKey :: NodeKey, showGH :: String }
+
+instance Show (GraphicalHistory a) where
+    show = showGH
+
+instance History GraphicalHistory where
+    terminate = terminateGH
+
+{-# INLINE mkGraphicalHistory #-}
+mkGraphicalHistory :: forall a. TTest a -> GraphicalHistory (NodeKey, a)
+mkGraphicalHistory (WQO (prepare :: a -> b) embed) = go_init emptyTopologicalOrder [] 0
+  where
+    go_init topo abs generated_key = GH {
+        terminateGH = \(key, a) -> let new_abkey = generated_key + 1
+                                       Just topo' = insertTopologicalOrder topo (key, new_abkey)
+                                   in go topo' [] abs new_abkey a (prepare a),
+        generatedKey = generated_key,
+        showGH = show ([abkey | (abkey, _a, _b) <- abs], topo)
+      }
+
+    go topo new_abs []                  new_abkey new_a new_b = Continue $ go_init topo (reverse ((new_abkey, new_a, new_b):new_abs)) new_abkey
+    go topo new_abs ((abkey, a, b):abs) new_abkey new_a new_b = case b `embed` new_b of
+       -- Doesn't embed, but embeds the other way: continue, don't include in history (transitivity)
+      (False, True)  -> go topo new_abs                 abs new_abkey new_a new_b
+       -- Doesn't embed either way: continue, but include in history for later checking
+      (False, False) -> go topo ((abkey, a, b):new_abs) abs new_abkey new_a new_b
+       -- Embeds! Potential termination :-(
+      (True, emb) | not emb
+                  , Just topo' <- insertTopologicalOrder topo (new_abkey, abkey)
+                  -- We can continue because we added a "dependency edge" without creating a cycle
+                  -> go topo' ((abkey, a, b):new_abs) abs new_abkey new_a new_b
+                  -- Embeds the other way as well, or creates a cycle: stop
+                  | otherwise
+                  -> Stop (abkey, a)
+
+
+type NodeKey = Int
+
+data TopologicalOrder = TO {
+    ord      :: IM.IntMap Int, -- ^ Maps NodeKey to node position in the topological order.
+                               -- If there exists an edge (x, y) then IM.lookup x ord < IM.lookup y ord
+    minPos   :: Int,
+    maxPos   :: Int,
+    inEdges  :: IM.IntMap IS.IntSet, -- ^ Maps "to"   NodeKey to all "from" nodes
+    outEdges :: IM.IntMap IS.IntSet  -- ^ Maps "from" NodeKey to all "to" nodes
+  } deriving (Show)
+
+emptyTopologicalOrder :: TopologicalOrder
+emptyTopologicalOrder = TO { ord = IM.empty, maxPos = -1, minPos = 0, outEdges = IM.empty, inEdges = IM.empty }
+
+-- | An algorithm for solving the dynamic topological order problem that performs well on
+-- sparse graphs (which I expect to occur a lot) and is empirically (not asymptotically!) fast.
+--
+-- See <http://homepages.mcs.vuw.ac.nz/~djp/files/PK-JEA07.pdf>
+insertTopologicalOrder :: TopologicalOrder -> (NodeKey, NodeKey) -> Maybe TopologicalOrder
+insertTopologicalOrder (TO { ord = ord, maxPos = maxPos, minPos = minPos, outEdges = outEdges, inEdges = inEdges }) (x, y) = case (IM.lookup x ord, IM.lookup y ord) of
+     -- Both nodes do not yet occur in the graph: arbitrarily insert them at the maximum of the ordering
+    (Nothing,  Nothing) -> Just $ TO { ord = IM.insert y (maxPos + 2) $ IM.insert x (maxPos + 1) ord, maxPos = maxPos + 2, minPos = minPos,     outEdges = outEdges', inEdges = inEdges' }
+    -- The "to" node does not yet exist in the graph: insert the "to" node at the maximum of the ordering
+    (Just _,   Nothing) -> Just $ TO { ord = IM.insert y (maxPos + 1)                            ord, maxPos = maxPos + 1, minPos = minPos,     outEdges = outEdges', inEdges = inEdges' }
+    -- The "from" node does not yet exist in the graph: insert the "from" node at the minimum of the ordering
+    (Nothing,  Just _)  -> Just $ TO { ord =                            IM.insert x (minPos - 1) ord, maxPos = maxPos,     minPos = minPos - 1, outEdges = outEdges', inEdges = inEdges' }
+    -- Both nodes already exist in the graph, so this might introduce a cycle (the hard case)
+    (Just ub, Just lb)  -> case ub `compare` lb of
+      -- Self-cycle: immediate failure
+      EQ -> Nothing
+      -- The nodes are already ordered in the existing order
+      LT -> Just $ TO { ord = ord, maxPos = maxPos, minPos = minPos, outEdges = outEdges', inEdges = inEdges' }
+      -- They appear to be unordered in the existing order, try to fix it
+      GT -> dfs_f IM.empty [y] >>= \delta_f -> let delta_b = dfs_b IM.empty [x] in Just $ TO { ord = reorder delta_f delta_b, maxPos = maxPos, minPos = minPos, outEdges = outEdges', inEdges = inEdges' }
+       where
+        -- NB: for convenience the visited "set" actually maps the NodeKey to its position in the input order
+        -- This is because the very next stage would need to look up this information, but we actually have it
+        -- on hand in this previous stage.
+        dfs_f visited []     = Just visited
+        dfs_f visited (n:ns) = case n_ord `compare` ub of
+            GT                      -> dfs_f visited ns
+            _ | IM.member n visited -> dfs_f visited ns
+            EQ -> Nothing -- Cycle detected!
+            LT -> dfs_f (IM.insert n n_ord visited) (IS.fold (:) ns (IM.findWithDefault IS.empty n outEdges))
+          where n_ord = IM.findWithDefault (error "dfs_f: unknown node") n ord
+        
+        dfs_b visited []     = visited
+        dfs_b visited (n:ns) = case lb `compare` n_ord of
+            LT | not (IM.member n visited) -> dfs_b (IM.insert n n_ord visited) (IS.fold (:) ns (IM.findWithDefault IS.empty n inEdges))
+            _                              -> dfs_b visited ns
+          where n_ord = IM.findWithDefault (error "dfs_b: unknown node") n ord
+        
+        sortDelta = sortBy (comparing snd) . IM.toList
+        
+        mergeDelta [] [] = []
+        mergeDelta xs [] = xs
+        mergeDelta [] ys = ys
+        mergeDelta init_xs@((x, x_ord):xs) init_ys@((y, y_ord):ys)
+          | x_ord < y_ord = (x, x_ord):mergeDelta xs init_ys
+          | otherwise     = (y, y_ord):mergeDelta init_xs ys
+
+        -- Reassign available positions in the ordering to affected vertices so that
+        -- all of delta_b are to the left of delta_f
+        reorder unsorted_delta_f unsorted_delta_b = foldr (uncurry IM.insert) ord (l `zip` r)
+          where
+            delta_f = sortDelta unsorted_delta_f
+            delta_b = sortDelta unsorted_delta_b
+            l = map fst (delta_b ++ delta_f)
+            r = map snd (mergeDelta delta_b delta_f)
+  where
+    outEdges' = IM.insertWith IS.union x (IS.singleton y) outEdges
+    inEdges'  = IM.insertWith IS.union y (IS.singleton x) inEdges
--- a/compiler/supercompile/Supercompile/Termination/Generaliser.hs
+++ b/compiler/supercompile/Supercompile/Termination/Generaliser.hs
+module Supercompile.Termination.Generaliser where
+
+import Supercompile.Core.Syntax   (Var)
+import Supercompile.Core.Renaming (Out)
+
+import Supercompile.Evaluator.Syntax
+
+import Supercompile.Utilities (Nat, Tagged, tag, Tag, tagInt, injectTag, FinMap, FinSet)
+
+import Outputable
+
+import qualified Data.IntMap as IM
+import qualified Data.IntSet as IS
+
+
+data Generaliser = Generaliser {
+    generaliseStackFrame  :: Tagged StackFrame -> Bool,
+    generaliseHeapBinding :: Out Var -> HeapBinding -> Bool
+  }
+
+generaliseNothing :: Generaliser
+generaliseNothing = Generaliser (\_ -> False) (\_ _ -> False)
+
+generaliserFromGrowing :: (a -> FinMap Nat) -> a -> a -> Generaliser
+generaliserFromGrowing extract x y | IS.null generalise_what = pprTrace "no growth" (ppr generalise_what) generaliseNothing
+                                   | otherwise               = generaliserFromFinSet generalise_what
+  where generalise_what = IM.keysSet (IM.filter id (IM.intersectionWith (<) (extract x) (extract y)))
+
+generaliserFromFinSet :: FinSet -> Generaliser
+generaliserFromFinSet generalise_what = Generaliser {
+      generaliseStackFrame  = \kf   -> should_generalise (stackFrameTag' kf),
+      generaliseHeapBinding = \_ hb -> maybe False (should_generalise . pureHeapBindingTag') $ heapBindingTag hb
+    }
+  where should_generalise tg = IS.member (tagInt tg) generalise_what
+
+
+{-# INLINE pureHeapBindingTag' #-}
+pureHeapBindingTag' :: Tag -> Tag
+pureHeapBindingTag' = injectTag 5
+
+{-# INLINE stackFrameTag' #-}
+stackFrameTag' :: Tagged StackFrame -> Tag
+stackFrameTag' = injectTag 3 . tag
+
+{-# INLINE qaTag' #-}
+qaTag' :: Anned QA -> Tag
+qaTag' = injectTag 2 . annedTag
--- a/compiler/supercompile/Supercompile/Termination/TagBag.hs
+++ b/compiler/supercompile/Supercompile/Termination/TagBag.hs
+module Supercompile.Termination.TagBag (
+        embedWithTagBags,
+        TagBag, tagBagTagSet, stateTags
+    ) where
+
+import Supercompile.Termination.Combinators
+import Supercompile.Termination.Generaliser
+
+import Supercompile.Evaluator.Syntax
+
+import Supercompile.Utilities
+import Supercompile.StaticFlags (TagBagType(..))
+
+import Unique (mkUniqueGrimily, unpkUnique)
+import Util
+
+import Data.Char
+import qualified Data.Foldable as Foldable
+import qualified Data.IntMap as IM
+import qualified Data.Map as M
+
+
+newtype TagBag = TagBag { unTagBag :: FinMap Nat }
+
+instance Outputable TagBag where
+    ppr tb = hsep [ pPrintTag tag <> (if n == 1 then empty else braces (ppr n))
+                  | (tag, n) <- IM.toList (unTagBag tb) ]
+
+tagBagTagSet :: TagBag -> FinSet
+tagBagTagSet = IM.keysSet . unTagBag
+
+pPrintTag :: Int -> SDoc
+pPrintTag n = (try 'h' 5 `mplus` try 'k' 3 `mplus` try 'q' 2 `mplus` uniq '?' n) `orElse` nat '?' n
+  where try w divisor = do
+          (q, 0) <- return $ n `quotRem` divisor
+          uniq w q
+        
+        uniq w m
+          | 64 < ord_c && ord_c < 128 = Just $ char w <> ppr u
+          | otherwise                 = Nothing
+          where u = mkUniqueGrimily m
+                (c, _) = unpkUnique u
+                ord_c = ord c
+        
+        nat w m | m < 0     = char w <> char '-' <> text (iToBase62 (negate m))
+                | otherwise = char w <>             text (iToBase62 m)
+
+embedWithTagBags :: TagBagType -> (TTest State, State -> State -> Generaliser)
+embedWithTagBags tbt = if tagBagPairwiseGrowth tbt then embedWithTagBags' (zippableT wellOrderedT) else embedWithTagBags' (cofmap Foldable.sum wellOrderedT)
+
+embedWithTagBags' :: (forall f. (Foldable f, Zippable f, Finite (f ())) => TTest (f Nat))
+                  -> (TTest State, State -> State -> Generaliser)
+embedWithTagBags' nats = (cofmap (unTagBag . stateTags) (equalDomainT nats), generaliserFromGrowing (unTagBag . stateTags))
+
+-- NB: I try very hard to avoid creating intermediate tag bags in this function because it
+-- accounts for a staggering fraction of the supercompiler's total allocation
+stateTags :: State -> TagBag
+stateTags (_, Heap h _, k, qa) = -- traceRender ("stateTags (TagBag)", M.map heapBindingTagBag h, map stackFrameTag' k, qaTag' qa) $
+                                      -- traceRender ("stateTags:heap (TagBag)", M.map heapBindingTag h) $
+                                      -- (\res -> traceRender ("stateTags (TagBag)", res) res) $
+                                      TagBag $ pureHeapTagBag h (stackTagBag k (singletonTagBag (qaTag' qa) IM.empty))
+  where
+    heapBindingTagBag :: HeapBinding -> FinMap Nat -> FinMap Nat
+    heapBindingTagBag = maybe id (singletonTagBag . pureHeapBindingTag') . heapBindingTag
+      
+    pureHeapTagBag :: PureHeap -> FinMap Nat -> FinMap Nat
+    pureHeapTagBag = flip $ M.fold heapBindingTagBag -- NB: really a foldr, but the M.foldr synonym was added in a later version of containers
+ 
+    stackTagBag :: Stack -> FinMap Nat -> FinMap Nat
+    stackTagBag = flip $ trainCarFoldr stackFrameTagBag
+
+    stackFrameTagBag :: Tagged StackFrame -> FinMap Nat -> FinMap Nat
+    stackFrameTagBag = singletonTagBag . stackFrameTag'
+ 
+    singletonTagBag :: Tag -> FinMap Nat -> FinMap Nat
+    singletonTagBag (TG i occs) = IM.insert (unFin i) occs
--- a/compiler/supercompile/Supercompile/Utilities.hs
+++ b/compiler/supercompile/Supercompile/Utilities.hs
+{-# LANGUAGE RankNTypes #-} -- For callCC
+module Supercompile.Utilities (
+    module Supercompile.Utilities,
+
+    module UniqSupply,
+    module Unique,
+    module Outputable,
+    
+    module Control.Arrow,
+    module Control.Applicative,
+    module Control.Monad,
+    
+    module Data.Foldable,
+    module Data.Traversable,
+    module Data.Maybe,
+    module Data.List
+  ) where
+
+#include "HsVersions.h"
+
+import UniqSupply
+import UniqSet
+import Unique (Uniquable(..), Unique, getKey)
+import UniqFM (UniqFM, eltsUFM)
+import Maybes (expectJust)
+import Outputable hiding (Depth)
+import State hiding (mapAccumLM)
+
+import Control.Arrow (first, second, (***), (&&&))
+import Control.Applicative (Applicative(..), (<$>), liftA, liftA2, liftA3)
+import Control.Exception (bracket)
+import Control.Monad hiding (join, forM_)
+import Control.Monad.Fix
+
+import Data.Function (on)
+import Data.Maybe
+import Data.Ord
+import Data.List
+import Data.Foldable (Foldable(foldMap), forM_)
+import Data.Traversable (Traversable(traverse))
+import qualified Data.Traversable as Traversable
+import qualified Data.IntMap as IM
+import qualified Data.IntSet as IS
+import qualified Data.Set as S
+import qualified Data.Map as M
+import qualified Data.Graph as G
+import qualified Data.Foldable as Foldable
+import Data.Monoid (Monoid(mappend, mempty))
+
+import System.Directory
+import System.Exit
+import System.IO
+import System.IO.Unsafe (unsafePerformIO)
+import System.Process
+
+
+{-# NOINLINE pprPreview2 #-}
+pprPreview2 :: String -> SDoc -> SDoc -> a -> a
+pprPreview2 fp_base doc1 doc2 x = unsafePerformIO $ do
+    withTempFile (fp_base ++ "-1") $ \fp1 h1 -> do
+        hPutStrLn h1 (showSDoc doc1) >> hFlush h1
+        withTempFile (fp_base ++ "-2") $ \fp2 h2 -> do
+            hPutStrLn h2 (showSDoc doc2) >> hFlush h2
+            ec <- system $ "$EDITOR " ++ fp1 ++ " " ++ fp2
+            case ec of ExitSuccess   -> return x
+                       ExitFailure c -> error $ "pprPreview2(" ++ fp_base ++ "): preview failed with exit code " ++ show c
+
+withTempFile :: String -> (FilePath -> Handle -> IO a) -> IO a
+withTempFile fp_base act = do
+    tmp_dir <- getTemporaryDirectory
+    bracket (openTempFile tmp_dir fp_base) (hClose . snd) (uncurry act)
+
+
+
+-- | Copointed functors. The defining property is:
+--
+--   extract (fmap f a) == f (extract a)
+class Functor f => Copointed f where
+    extract :: f a -> a
+
+instance Copointed ((,) a) where
+    extract = snd
+
+
+newtype Wrapper1 f a = Wrapper1 { unWrapper1 :: f a }
+
+
+class Show1 f where
+    showsPrec1 :: Show a => Int -> f a -> ShowS
+
+instance (Show1 f, Show a) => Show (Wrapper1 f a) where
+    showsPrec prec = showsPrec1 prec . unWrapper1
+
+
+class Eq1 f where
+    eq1 :: Eq a => f a -> f a -> Bool
+
+instance (Eq1 f, Eq a) => Eq (Wrapper1 f a) where
+    (==) = eq1 `on` unWrapper1
+
+
+class Eq1 f => Ord1 f where
+    compare1 :: Ord a => f a -> f a -> Ordering
+
+instance (Ord1 f, Ord a) => Ord (Wrapper1 f a) where
+    compare = compare1 `on` unWrapper1
+
+
+-- Because we have this class, we can define Outputable for
+-- Supercompile.Core.Syntax.TermF without UndecidableInstances
+class Outputable1 f where
+    pprPrec1 :: Outputable a => Rational -> f a -> SDoc
+    
+    ppr1 :: Outputable a => f a -> SDoc
+    ppr1 = pprPrec1 noPrec
+
+instance (Outputable1 f, Outputable a) => Outputable (Wrapper1 f a) where
+    pprPrec prec = pprPrec1 prec . unWrapper1
+
+-- | Parenthesize an value if the boolean is true.
+prettyParen :: Bool -> SDoc -> SDoc
+prettyParen False = id
+prettyParen True = parens
+
+appPrec, opPrec, noPrec :: Num a => a
+appPrec = 2    -- Argument of a function application
+opPrec  = 1    -- Argument of an infix operator
+noPrec  = 0    -- Others
+
+
+angles, bananas :: SDoc -> SDoc
+angles d = Outputable.char '<' <> d <> Outputable.char '>'
+bananas d = text "(|" <> d <> text "|)"
+
+
+newtype PrettyFunction = PrettyFunction (Rational -> SDoc)
+
+instance Outputable PrettyFunction where
+    pprPrec prec (PrettyFunction f) = f prec
+
+asPrettyFunction :: Outputable a => a -> PrettyFunction
+asPrettyFunction x = PrettyFunction (\prec -> pprPrec prec x)
+
+asPrettyFunction1 :: (Outputable1 f, Outputable a) => f a -> PrettyFunction
+asPrettyFunction1 = asPrettyFunction . Wrapper1
+
+
+instance Outputable IS.IntSet where
+    ppr xs = braces $ hsep (punctuate comma (map ppr $ IS.toList xs))
+
+
+newtype PrettyDoc = PrettyDoc SDoc
+
+instance Outputable PrettyDoc where
+    ppr (PrettyDoc doc) = doc
+
+
+instance Foldable ((,) a) where
+    foldMap = Traversable.foldMapDefault
+
+instance Traversable ((,) a) where
+    traverse f (x, y) = fmap ((,) x) (f y)
+
+
+newtype Identity a = I { unI :: a }
+
+instance Copointed Identity where
+    extract = unI
+
+instance Monad Identity where
+    return = I
+    mx >>= fxmy = fxmy (unI mx)
+
+instance Functor Identity where
+    fmap f (I x) = I (f x)
+
+instance Applicative Identity where
+    pure = I
+    I f <*> I x = I (f x)
+
+instance Foldable Identity where
+    foldMap f (I x) = f x
+
+instance Traversable Identity where
+    traverse f (I x) = pure I <*> f x
+
+instance Show1 Identity where
+    showsPrec1 prec (I x) = showParen (prec >= appPrec) (showString "Identity" . showsPrec appPrec x)
+
+instance Eq1 Identity where
+    eq1 (I x1) (I x2) = x1 == x2
+
+instance Ord1 Identity where
+    compare1 (I x1) (I x2) = x1 `compare` x2
+
+instance Outputable1 Identity where
+    pprPrec1 prec (I x) = pprPrec prec x
+
+instance Show a => Show (Identity a) where
+    showsPrec = showsPrec1
+
+instance Eq a => Eq (Identity a) where
+    (==) = eq1
+
+instance Ord a => Ord (Identity a) where
+    compare = compare1
+
+instance Outputable a => Outputable (Identity a) where
+    pprPrec = pprPrec1
+
+
+newtype (O f g) a = Comp { unComp :: f (g a) }
+
+infixr 9 `O`
+
+instance (Copointed f, Copointed g) => Copointed (O f g) where
+    extract = extract . extract . unComp
+
+instance (Functor f, Show1 f, Show1 g) => Show1 (O f g) where
+    showsPrec1 prec (Comp x) = showParen (prec >= appPrec) (showString "Comp" . showsPrec1 appPrec (fmap Wrapper1 x))
+
+instance (Functor f, Eq1 f, Eq1 g) => Eq1 (O f g) where
+    eq1 (Comp x1) (Comp x2) = fmap Wrapper1 x1 `eq1` fmap Wrapper1 x2
+
+instance (Functor f, Ord1 f, Ord1 g) => Ord1 (O f g) where
+    compare1 (Comp x1) (Comp x2) = fmap Wrapper1 x1 `compare1` fmap Wrapper1 x2
+
+instance (Functor f, Outputable1 f, Outputable1 g) => Outputable1 (O f g) where
+    pprPrec1 prec (Comp x) = pprPrec1 prec (fmap Wrapper1 x)
+
+instance (Functor f, Show1 f, Show1 g, Show a) => Show (O f g a) where
+    showsPrec = showsPrec1
+
+instance (Functor f, Eq1 f, Eq1 g, Eq a) => Eq (O f g a) where
+    (==) = eq1
+
+instance (Functor f, Ord1 f, Ord1 g, Ord a) => Ord (O f g a) where
+    compare = compare1
+
+instance (Functor f, Outputable1 f, Outputable1 g, Outputable a) => Outputable (O f g a) where
+    pprPrec = pprPrec1
+
+instance (Functor f, Functor g) => Functor (O f g) where
+    fmap f (Comp x) = Comp (fmap (fmap f) x)
+
+instance (Applicative f, Applicative g) => Applicative (O f g) where
+    pure = Comp . pure . pure
+    mf <*> mx = Comp $ liftA2 (<*>) (unComp mf) (unComp mx)
+
+instance (Foldable f, Foldable g) => Foldable (O f g) where
+    foldMap f = foldMap (foldMap f) . unComp
+
+instance (Traversable f, Traversable g) => Traversable (O f g) where
+    traverse f = fmap Comp . traverse (traverse f) . unComp
+
+
+-- | Natural numbers on the cheap (for efficiency reasons)
+type Nat = Int
+
+
+newtype Fin = Fin { unFin :: Int } deriving (Eq, Ord)
+
+instance Show Fin where
+    show (Fin x) = show x
+
+instance Outputable Fin where
+    pprPrec prec (Fin x) = pprPrec prec x
+
+
+type FinSet = IS.IntSet
+type FinMap = IM.IntMap
+
+
+data Tag = TG { tagFin :: Fin, tagOccurrences :: Nat } deriving (Eq, Ord, Show)
+
+instance Outputable Tag where
+    ppr (TG i occs) = ppr i <> brackets (ppr occs)
+
+mkTag :: Int -> Tag
+mkTag i = TG (Fin i) 1
+
+{-# INLINE injectTag #-} -- Was accounting for 2% of allocations
+injectTag :: Int -> Tag -> Tag
+injectTag cls (TG (Fin i) occs) = TG (Fin (cls * i)) occs
+
+tagInt :: Tag -> Int
+tagInt = unFin . tagFin
+
+
+data Tagged a = Tagged { tag :: !Tag, tagee :: !a }
+
+instance Copointed Tagged where
+    extract = tagee
+
+instance Functor Tagged where
+    fmap f (Tagged tg x) = Tagged tg (f x)
+
+instance Foldable Tagged where
+    foldMap f (Tagged _ x) = f x
+
+instance Traversable Tagged where
+    traverse f (Tagged tg x) = pure (Tagged tg) <*> f x
+
+instance Show1 Tagged where
+    showsPrec1 prec (Tagged tg x) = showParen (prec >= appPrec) (showString "Tagged" . showsPrec appPrec tg . showsPrec appPrec x)
+
+instance Eq1 Tagged where
+    eq1 (Tagged tg1 x1) (Tagged tg2 x2) = tg1 == tg2 && x1 == x2
+
+instance Ord1 Tagged where
+    compare1 (Tagged tg1 x1) (Tagged tg2 x2) = (tg1, x1) `compare` (tg2, x2)
+
+instance Outputable1 Tagged where
+    pprPrec1 prec (Tagged tg x) = braces (ppr tg) <+> pprPrec prec x
+
+instance Show a => Show (Tagged a) where
+    showsPrec = showsPrec1
+
+instance Eq a => Eq (Tagged a) where
+    (==) = eq1
+
+instance Ord a => Ord (Tagged a) where
+    compare = compare1
+
+instance Outputable a => Outputable (Tagged a) where
+    pprPrec = pprPrec1
+
+
+type Size = Int
+
+data Sized a = Sized { size :: !Size, sizee :: !a }
+
+instance Copointed Sized where
+    extract = sizee
+
+instance Functor Sized where
+    fmap f (Sized sz x) = Sized sz (f x)
+
+instance Foldable Sized where
+    foldMap f (Sized _ x) = f x
+
+instance Traversable Sized where
+    traverse f (Sized sz x) = pure (Sized sz) <*> f x
+
+instance Show1 Sized where
+    showsPrec1 prec (Sized sz x) = showParen (prec >= appPrec) (showString "Sized" . showsPrec appPrec sz . showsPrec appPrec x)
+
+instance Eq1 Sized where
+    eq1 (Sized sz1 x1) (Sized sz2 x2) = sz1 == sz2 && x1 == x2
+
+instance Ord1 Sized where
+    compare1 (Sized sz1 x1) (Sized sz2 x2) = (sz1, x1) `compare` (sz2, x2)
+
+instance Outputable1 Sized where
+    pprPrec1 prec (Sized sz x) = bananas (text (show sz)) <> pprPrec prec x
+
+instance Show a => Show (Sized a) where
+    showsPrec = showsPrec1
+
+instance Eq a => Eq (Sized a) where
+    (==) = eq1
+
+instance Ord a => Ord (Sized a) where
+    compare = compare1
+
+instance Outputable a => Outputable (Sized a) where
+    pprPrec = pprPrec1
+
+
+pPrint :: Outputable a => a -> SDoc
+pPrint = ppr
+
+pPrintPrec :: Outputable a => Rational -> a -> SDoc
+pPrintPrec = pprPrec
+
+
+newtype Down a = Down { unDown :: a } deriving (Eq)
+
+instance Ord a => Ord (Down a) where
+    Down a `compare` Down b = b `compare` a
+
+
+fmapEither :: (a -> b) -> (c -> d) -> Either a c -> Either b d
+fmapEither f g = either (Left . f) (Right . g)
+
+orElse :: Maybe a -> a -> a
+orElse = flip fromMaybe
+
+extractJusts :: (a -> Maybe b) -> [a] -> ([b], [a])
+extractJusts p = foldr step ([], [])
+  where step x rest | Just y <- p x = first  (y:) rest
+                    | otherwise     = second (x:) rest
+
+
+checkEqual :: Eq a => a -> a -> Maybe a
+checkEqual = checkEqualBy (==)
+
+checkEqualBy :: (a -> a -> Bool) -> a -> a -> Maybe a
+checkEqualBy eq x y | x `eq` y  = Just y
+                    | otherwise = Nothing
+
+
+secondM :: Functor f => (b -> f c) -> (a, b) -> f (a, c)
+secondM f (x, y) = fmap ((,) x) (f y)
+
+-- Stricter than 'first'
+first2 :: (a -> c) -> (a, b) -> (c, b)
+first2 f (a, b) = (f a, b)
+
+-- Stricter than 'second'
+second2 :: (b -> c) -> (a, b) -> (a, c)
+second2 f (a, b) = (a, f b)
+
+first3 :: (a -> d) -> (a, b, c) -> (d, b, c)
+first3 f (a, b, c) = (f a, b, c)
+
+second3 :: (b -> d) -> (a, b, c) -> (a, d, c)
+second3 f (a, b, c) = (a, f b, c)
+
+third3 :: (c -> d) -> (a, b, c) -> (a, b, d)
+third3 f (a, b, c) = (a, b, f c)
+
+first4 :: (a -> e) -> (a, b, c, d) -> (e, b, c, d)
+first4 f (a, b, c, d) = (f a, b, c, d)
+
+second4 :: (b -> e) -> (a, b, c, d) -> (a, e, c, d)
+second4 f (a, b, c, d) = (a, f b, c, d)
+
+third4 :: (c -> e) -> (a, b, c, d) -> (a, b, e, d)
+third4 f (a, b, c, d) = (a, b, f c, d)
+
+fourth4 :: (d -> e) -> (a, b, c, d) -> (a, b, c, e)
+fourth4 f (a, b, c, d) = (a, b, c, f d)
+
+
+uncons :: [a] -> Maybe (a, [a])
+uncons []     = Nothing
+uncons (x:xs) = Just (x, xs)
+
+safeHead :: [a] -> Maybe a
+safeHead []    = Nothing
+safeHead (x:_) = Just x
+
+safeTail :: [a] -> [a]
+safeTail []     = []
+safeTail (_:xs) = xs
+
+expectHead :: String -> [a] -> a
+expectHead s = expectJust s . safeHead
+
+splitBy :: [b] -> [a] -> ([a], Either (b, [b]) [a])
+splitBy []     xs     = ([], Right xs)
+splitBy (y:ys) []     = ([], Left (y, ys))
+splitBy (_:ys) (x:xs) = first (x:) $ splitBy ys xs
+
+splitByReverse :: [b] -> [a] -> (Either ([b], b) [a], [a])
+splitByReverse ys xs = case splitBy (reverse ys) (reverse xs) of (xs1, ei_ys1_xs2) -> (either (Left . first reverse . swp) (Right . reverse) ei_ys1_xs2, reverse xs1)
+  where swp = uncurry (flip (,))
+
+listContexts :: [a] -> [([a], a, [a])]
+listContexts xs = zipWith (\is (t:ts) -> (is, t, ts)) (inits xs) (init (tails xs))
+
+bagContexts :: [a] -> [(a, [a])]
+bagContexts xs = [(x, is ++ ts) | (is, x, ts) <- listContexts xs]
+
+dropLastWhile :: (a -> Bool) -> [a] -> [a]
+dropLastWhile p = reverse . dropWhile p . reverse
+
+takeFirst :: (a -> Bool) -> [a] -> Maybe (a, [a])
+takeFirst f = go []
+  where go _   []     = Nothing
+        go acc (x:xs) = if f x
+                        then Just (x, reverse acc ++ xs)
+                        else go (x:acc) xs
+
+takeWhileJust :: (a -> Maybe b) -> [a] -> ([b], [a])
+takeWhileJust f = go
+  where
+    go [] = ([], [])
+    go (x:xs) = case f x of
+        Nothing -> ([], x:xs)
+        Just y  -> first (y:) $ go xs
+
+maximumsComparing :: Ord b => (a -> b) -> [a] -> [a]
+maximumsComparing _ []     = error "maximumsComparing: empty input"
+maximumsComparing f (x:xs) = go (f x) [x] xs
+  where
+    go _       maxs []     = reverse maxs
+    go the_max maxs (x:xs) = case this `compare` the_max of
+        LT -> go the_max maxs     xs
+        EQ -> go the_max (x:maxs) xs
+        GT -> go this    [x]      xs
+      where this = f x
+
+accumLN :: (acc -> (acc, a)) -> acc -> Int -> (acc, [a])
+accumLN f = go
+  where
+    go acc n | n <= 0            = (acc, []) 
+             | (acc, x) <- f acc = second (x:) (go acc (n - 1))
+
+sumMap :: (Foldable f, Num b) => (a -> b) -> f a -> b
+sumMap f = Foldable.foldr (\x n -> f x + n) 0
+
+sumMapMonoid :: (Foldable f, Monoid b) => (a -> b) -> f a -> b
+sumMapMonoid f = Foldable.foldr (\x n -> f x `mappend` n) mempty
+
+{-# INLINE groups #-}
+groups :: Ord b => (a -> b) -> (a -> c) -> [a] -> [(b, [c])]
+groups f g xs = runs f g (sortBy (comparing f) xs)
+
+{-# INLINE runs #-}
+runs :: Eq b => (a -> b) -> (a -> c) -> [a] -> [(b, [c])]
+runs _ _ []     = []
+runs f g (x:xs) = go (f x) [g x] xs
+  where go b pending []     = [(b, reverse pending)]
+        go b pending (x:xs)
+          | b == b'   = go b (g x:pending) xs
+          | otherwise = (b, reverse pending) : go b' [g x] xs
+          where b' = f x
+
+distinct :: Ord a => [a] -> Bool
+distinct xs = length xs == S.size (S.fromList xs)
+
+the_maybe :: Eq a => [a] -> Maybe a
+the_maybe [] = error "the_maybe: empty list"
+the_maybe (x:xs) = go x xs
+  where go x []                 = Just x
+        go x (y:ys) | x == y    = go x ys
+                    | otherwise = Nothing
+
+
+instance Functor G.SCC where
+    fmap f (G.AcyclicSCC x) = G.AcyclicSCC (f x)
+    fmap f (G.CyclicSCC xs) = G.CyclicSCC (fmap f xs)
+
+-- | Orders elements of a map into dependency order insofar as that is possible.
+--
+-- This function ignores any elements reported as reachable that are not present in the input.
+--
+-- An element (b1 :: b) strictly precedes (b2 :: b) in the output whenever b1 is reachable from b2 but not vice versa.
+-- Element b1 occurs in the same SCC as b2 whenever both b1 is reachable from b2 and b1 is reachable from b2.
+topologicalSort :: Ord a => (b -> UniqFM a) -> M.Map a b -> [G.SCC (a, b)]
+topologicalSort f got = [fmap (\(b, a, _) -> (a, b)) scc | scc <- G.stronglyConnCompR [(b, a, eltsUFM (f b)) | (a, b) <- M.toList got]]
+
+
+restrict :: Ord k => M.Map k v -> S.Set k -> M.Map k v
+-- restrict m s
+--   | M.size m < S.size s = M.filterWithKey (\k _ -> k `S.member` s) m                                                   -- O(m * log s)
+--   | otherwise           = S.fold (\k out -> case M.lookup k m of Nothing -> out; Just v -> M.insert k v out) M.empty s -- O(s * log m)
+restrict m s = M.fromDistinctAscList $ merge (M.toAscList m) (S.toAscList s)
+  where
+    -- Theoretically O(m + s), so should outperform previous algorithm...
+    merge _              []       = []
+    merge []             _        = []
+    merge ((k_m, v):kvs) (k_s:ks) = case compare k_m k_s of
+        LT ->          merge kvs            (k_s:ks)
+        EQ -> (k_m, v):merge kvs            ks
+        GT ->          merge ((k_m, v):kvs) ks
+
+exclude :: Ord k => M.Map k v -> S.Set k -> M.Map k v
+--exclude m s = M.filterWithKey (\k _ -> k `S.notMember` s) m -- O(m * log s)
+exclude m s = M.fromDistinctAscList $ merge (M.toAscList m) (S.toAscList s)
+  where
+    -- Theoretically O(m + s), so should outperform previous algorithm...
+    merge kvs            []       = kvs
+    merge []             _        = []
+    merge ((k_m, v):kvs) (k_s:ks) = case compare k_m k_s of
+        LT -> (k_m, v):merge kvs            (k_s:ks)
+        EQ ->          merge kvs            ks
+        GT ->          merge ((k_m, v):kvs) ks
+
+
+dataSetToVarSet :: Uniquable a => S.Set a -> UniqSet a
+dataSetToVarSet = mkUniqSet . S.toList
+
+varSetToDataMap :: Ord a => b -> UniqSet a -> M.Map a b
+varSetToDataMap v = M.fromList . map (flip (,) v) . uniqSetToList
+
+restrictDataMapVarSet :: (Ord k, Uniquable k) => M.Map k v -> UniqSet k -> M.Map k v
+restrictDataMapVarSet m s = M.filterWithKey (\k _v -> k `elementOfUniqSet` s) m
+
+
+class (Functor t, Foldable t) => Accumulatable t where
+    mapAccumT  ::            (acc -> x ->   (acc, y)) -> acc -> t x ->   (acc, t y)
+    mapAccumTM :: Monad m => (acc -> x -> m (acc, y)) -> acc -> t x -> m (acc, t y)
+    
+    mapAccumT f acc x = unI (mapAccumTM (\acc' x' -> I (f acc' x')) acc x)
+
+instance Accumulatable [] where
+    mapAccumT  = mapAccumL
+    mapAccumTM = mapAccumLM
+
+{-# INLINE mapAccumLM #-}
+mapAccumLM :: Monad m => (acc -> x -> m (acc, y)) -> acc -> [x] -> m (acc, [y])
+mapAccumLM f = go []
+  where
+    go ys acc []     = return (acc, reverse ys)
+    go ys acc (x:xs) = do
+      (acc, y) <- f acc x
+      go (y:ys) acc xs
+
+{-# INLINE foldToMapAccumL #-}
+foldToMapAccumL :: (forall acc'. (x -> acc' -> acc') -> acc' -> f_x -> acc')
+                -> (acc -> x -> (acc, y))
+                -> acc -> f_x -> (acc, [y])
+foldToMapAccumL fold f init_acc xs = fold (\x (acc, ys) -> case f acc x of (acc', y) -> (acc', y:ys)) (init_acc, []) xs
+
+
+{-# INLINE traverseWithKey #-}
+-- INLINE so it gets inlined before specialisation, eta-expanded so any automatic SCCs
+-- don't interfere with inlining (inlining doesn't look through SCCs for arguments!)
+traverseWithKey :: Applicative t => (k -> a -> t b) -> M.Map k a -> t (M.Map k b)
+#if (MIN_VERSION_containers(0,4,3))
+traverseWithKey f kvs = M.traverseWithKey f kvs
+#else
+traverseWithKey f = traverse (uncurry f) . M.mapWithKey (\k v -> (k, v))
+#endif
+
+newtype Discard t a = Discard { unDiscard :: t () }
+
+instance Functor (Discard t) where
+    fmap _ = Discard . unDiscard
+
+instance Applicative t => Applicative (Discard t) where
+    pure _ = Discard (pure ())
+    mf <*> mx = Discard $ fmap (\() () -> ()) (unDiscard mf) <*> unDiscard mx
+
+newtype StateTL s m a = StateTL { unStateTL :: s -> m s }
+
+instance Functor (StateTL s m) where
+    fmap _ = StateTL . unStateTL
+
+instance Monad m => Applicative (StateTL s m) where
+    pure _ = StateTL return
+    mf <*> mx = StateTL $ \s -> unStateTL mx s >>= unStateTL mf
+
+newtype State2L s1 s2 a = State2L { unState2L :: s1 -> s2 -> (s1, s2) }
+
+instance Functor (State2L s1 s2) where
+    fmap _ = State2L . unState2L
+
+instance Applicative (State2L s1 s2) where
+    pure _ = State2L (,)
+    mf <*> mx = State2L $ \s1 s2 -> case unState2L mf s1 s2 of (s1, s2) -> unState2L mx s1 s2 -- NB: left side first
+
+newtype State3R s1 s2 s3 a = State3R { unState3R :: s1 -> s2 -> s3 -> (s1, s2, s3) }
+
+instance Functor (State3R s1 s2 s3) where
+    fmap _ = State3R . unState3R
+
+instance Applicative (State3R s1 s2 s3) where
+    pure _ = State3R (,,)
+    mf <*> mx = State3R $ \s1 s2 s3 -> case unState3R mx s1 s2 s3 of (s1, s2, s3) -> unState3R mf s1 s2 s3 -- NB: right side first
+
+{-# INLINE traverseWithKey_ #-}
+traverseWithKey_ :: Applicative t => (k -> a -> t b) -> M.Map k a -> t ()
+traverseWithKey_ f kvs = unDiscard (traverseWithKey (\k v -> Discard (void (f k v))) kvs)
+
+{-# INLINE foldlWithKeyM' #-}
+foldlWithKeyM' :: Monad m
+               => (a -> k -> v -> m a)
+               -> a -> M.Map k v -> m a
+foldlWithKeyM' f a kvs = unStateTL (traverseWithKey (\k v -> StateTL $ \a -> f a k v) kvs) a
+
+-- You might wonder why we don't just use foldlWithKey and make the accumulator into a pair. The reason is that
+-- if we do that then (due to a problem in the strictness analyser, which I've emailed Simon about) the resulting
+-- loop will allocate a new pair on every iteration. If we specialise the traverseWithKey code instead then we can
+-- pass the components of the pair in seperate arguments and totally avoid the allocations. This is a Big Win,
+-- since prepareTerm was accounting for 10% of all allocations in some of my tests.
+{-# INLINE foldl2WithKey' #-}
+foldl2WithKey' :: ((a1, a2) -> k -> v -> (a1, a2)) -> (a1, a2) -> M.Map k v -> (a1, a2)
+foldl2WithKey' f (a1, a2) kvs = unState2L (traverseWithKey (\k v -> State2L $ \a1 a2 -> f (a1, a2) k v) kvs) a1 a2
+
+{-# INLINE foldr3WithKey' #-}
+foldr3WithKey' :: (k -> v -> (a1, a2, a3) -> (a1, a2, a3)) -> (a1, a2, a3) -> M.Map k v -> (a1, a2, a3)
+foldr3WithKey' f (a1, a2, a3) kvs = unState3R (traverseWithKey (\k v -> State3R $ \a1 a2 a3 -> f k v (a1, a2, a3)) kvs) a1 a2 a3
+
+
+traverseAll :: Traversable t => ([a] -> (c, [b])) -> t a -> (c, t b)
+traverseAll f t = if null used_as' then (c, t') else error "traverseAll: replacing with too many elements"
+  where
+    (t', (rev_as, used_as')) = runState (traverse go t) ([], as')
+    (c, as') = f (reverse rev_as)
+    go a = State $ \(as, ~(a':as')) -> (# a', (a:as, as') #)
+
+traverseSome :: Traversable t => (a -> Bool) -> ([a] -> (c, [a])) -> t a -> (c, t a)
+traverseSome p f t = if null used_as' then (c, t') else error "traverseSome: replacing with too many elements"
+  where
+    (t', (rev_as, used_as')) = runState (traverse go t) ([], as')
+    (c, as') = f (reverse rev_as)
+    go a | p a       = State $ \(as, ~(a':as')) -> (# a', (a:as, as') #)
+         | otherwise = return a
+
+
+{-# INLINE zipWithEqualM #-}
+zipWithEqualM :: Monad m => (a -> b -> m c) -> [a] -> [b] -> m [c]
+zipWithEqualM f = go
+  where
+    go []     []     = return []
+    go (x:xs) (y:ys) = liftM2 (:) (f x y) (go xs ys)
+    go _ _ = fail "zipWithEqualM"
+
+{-# INLINE foldZipEqualM #-}
+foldZipEqualM :: Monad m => (a -> b -> c -> m a) -> a -> [b] -> [c] -> m a
+foldZipEqualM f = go
+  where
+   go acc []     []     = return acc
+   go acc (x:xs) (y:ys) = f acc x y >>= \acc' -> go acc' xs ys
+   go _ _ _ = fail "foldZipEqualM"
+
+{-# INLINE listExtensionBy #-}
+listExtensionBy :: (a1 -> a2 -> Maybe a)
+                -> [a1] -> [a2]
+                -> ([a1], [a2], [a])
+listExtensionBy f xs ys = go (reverse xs) (reverse ys) []
+  where
+    go (x:xs) (y:ys) zs | Just z <- f x y = go xs ys (z:zs)
+    go xs     ys     zs                   = (xs, ys, zs)
+
+
+zipMaybeWithEqual :: String
+                  -> (a -> b -> c)
+                  -> Maybe a -> Maybe b -> Maybe c
+zipMaybeWithEqual _   _ Nothing   Nothing = Nothing
+zipMaybeWithEqual msg _ Nothing  (Just _) = error ("zipMaybeWithEqual(Nothing,Just):" ++ msg)
+zipMaybeWithEqual msg _ (Just _)  Nothing = error ("zipMaybeWithEqual(Just,Nothing):" ++ msg)
+zipMaybeWithEqual _   f (Just x) (Just y) = Just (f x y)
+
+
+zipPair :: (a -> b -> c)
+        -> (d -> e -> f)
+        -> (a, d)
+        -> (b, e)
+        -> (c, f)
+zipPair abc def (a, d) (b, e) = (abc a b, def d e)
+
+
+-- | Splits up a number evenly across several partitions in proportions to weights given to those partitions.
+--
+-- > sum (apportion n weights) == n
+--
+-- Annoyingly, it is important that this works properly if n is negative as well -- these can occur
+-- when we have turned off deed checking. I don't care about handling negative weights.
+apportion :: Int -> [Int] -> [Int]
+apportion _      []        = error "apportion: empty list"
+apportion orig_n weighting
+  | orig_n < 0 = map negate $ apportion (negate orig_n) weighting
+  | otherwise  = result
+  where
+    fracs :: [Rational]
+    fracs = ASSERT2(denominator /= 0, text "apportion: must have at least one non-zero weight")
+            map (\numerator -> fromIntegral numerator / denominator) weighting
+      where denominator = fromIntegral (sum weighting)
+    
+    -- Here is the idea:
+    --  1) Do one pass through the list of fractians
+    --  2) Start by allocating the floor of the number of "n" that we should allocate to this weight of the fraction
+    --  3) Accumulate the fractional pieces and the indexes that generated them
+    --  4) Use circular programming to feed the list of fractional pieces that we actually allowed the allocation
+    --     of back in to the one pass we are doing over the list
+    ((_, remaining, final_deserving), result) = mapAccumL go (0 :: Int, orig_n, []) fracs
+    go (i, n, deserving) frac = ((i + 1, n - whole, (i, remainder) : deserving),
+                                 whole + if i `elem` final_deserving_allowed then 1 else 0)
+      where (whole, remainder) = properFraction (frac * fromIntegral orig_n)
+    
+    -- We should prefer to allocate pieces to those bits of the fraction where the error (i.e. the fractional part) is greatest.
+    -- We cannot allocate more of these "fixup" pieces than we had "n" left at the end of the first pass.
+    final_deserving_allowed = map fst (take remaining (sortBy (comparing (Down . snd)) final_deserving))
+
+
+{-# NOINLINE prettyUniqSupply #-}
+hFunctionsUniqSupply, supercompileUniqSupply, anfUniqSupply, expandUniqSupply, reduceUniqSupply, tagUniqSupply, prettyUniqSupply, matchUniqSupply, splitterUniqSupply :: UniqSupply
+supercompileUniqSupply = unsafePerformIO $ mkSplitUniqSupply 'p'
+(hFunctionsUniqSupply:anfUniqSupply:expandUniqSupply:reduceUniqSupply:tagUniqSupply:prettyUniqSupply:matchUniqSupply:splitterUniqSupply:_) = listSplitUniqSupply supercompileUniqSupply
+
+
+infixr 5 `Car`
+
+data Train a b = Car a (Train a b) | Loco b
+
+instance (Outputable a, Outputable b) => Outputable (Train a b) where
+    ppr xs = brackets (fsep (punctuate comma (trainFoldr (\a -> (ppr a :)) (\b -> (ppr b :)) [] xs)))
+
+{-# INLINE trainAppend #-}
+trainAppend :: Train a b -> (b -> Train a b') -> Train a b'
+trainAppend init_abs mk_tl = go init_abs
+  where go (Car a abs) = Car a (go abs)
+        go (Loco b)    = mk_tl b
+
+{-# INLINE fmapTrain #-}
+fmapTrain :: (a -> a') -> (b -> b') -> Train a b -> Train a' b'
+fmapTrain f g = go
+  where go (Car a abs) = Car (f a) (go abs)
+        go (Loco b)    = Loco (g b)
+
+fmapCars :: (a -> a') -> Train a b -> Train a' b
+fmapCars f = fmapTrain f id
+
+fmapLoco :: (b -> b') -> Train a b -> Train a b'
+fmapLoco f abs = trainAppend abs (Loco . f)
+
+nullTrain :: Train a b -> Bool
+nullTrain (Car _ _) = False
+nullTrain (Loco _)  = True
+
+unconsTrain :: Train a b -> Maybe (a, Train a b)
+unconsTrain (Car a abs) = Just (a, abs)
+unconsTrain (Loco _)    = Nothing
+
+trainInit :: (a -> b -> b) -> Train a b -> Train a b
+trainInit _ (Loco _)    = error "trainInit"
+trainInit f (Car a abs) = go a abs
+  where go a (Loco b)     = Loco (f a b)
+        go a (Car a' abs) = Car a (go a' abs)
+
+trainToList :: Train a b -> ([a], b)
+trainToList (Car a abs) = first (a:) (trainToList abs)
+trainToList (Loco b)    = ([], b)
+
+trainLoco :: Train a b -> b
+trainLoco (Car _ abs) = trainLoco abs
+trainLoco (Loco b)    = b
+
+trainCars :: Train a b -> [a]
+trainCars (Car a abs) = a : trainCars abs
+trainCars (Loco _)    = []
+
+trainFirst :: Train a a -> a
+trainFirst (Car a _) = a
+trainFirst (Loco a)  = a
+
+trainHead :: Train a b -> Maybe a
+trainHead (Car a _) = Just a
+trainHead (Loco _)  = Nothing
+
+trainCarFoldl' :: (c -> a -> c) -> c -> Train a b -> c
+trainCarFoldl' f_car = trainFoldl' f_car (\s _a -> s)
+
+{-# INLINE trainFoldl' #-}
+trainFoldl' :: (c -> a -> c) -> (c -> b -> d) -> c -> Train a b -> d
+trainFoldl' f_car f_loco = go
+  where go s (Loco b)    = s `seq` f_loco s b
+        go s (Car a abs) = s `seq` go (f_car s a) abs
+
+trainCarFoldr :: (a -> c -> c) -> c -> Train a b -> c
+trainCarFoldr f_car = trainFoldr f_car (\_b s -> s)
+
+{-# INLINE trainFoldr #-}
+trainFoldr :: (a -> d -> d) -> (b -> c -> d) -> c -> Train a b -> d
+trainFoldr f_car f_loco = go
+  where go s (Loco b)    = f_loco b s
+        go s (Car a abs) = f_car a (go s abs)
+
+trainCarMapAccumL :: (acc -> a -> (acc, a')) -> acc -> Train a b -> (acc, Train a' b)
+trainCarMapAccumL f_car = trainMapAccumL f_car (,)
+
+{-# INLINE trainMapAccumL #-}
+trainMapAccumL :: (acc -> a -> (acc, a')) -> (acc -> b -> (acc, b')) -> acc -> Train a b -> (acc, Train a' b')
+trainMapAccumL f_car f_loco = go
+  where go s (Loco b) = (s', Loco b')
+          where (s', b') = f_loco s b
+        go s (Car a abs) = second (Car a') (go s' abs)
+          where (s', a') = f_car s a
+
+{-# INLINE trainLeftExtensionBy #-}
+trainLeftExtensionBy :: (a1 -> a2 -> Maybe a)
+                     -> (b1 -> b2 -> b)
+                     -> Train a1 b1 -- ^ Longer list
+                     -> Train a2 b2 -- ^ Shorter list
+                     -> Maybe ([a1], Train a b) -- Pair of the prefix present in the longer list and the common suffix (== shorter list)
+trainLeftExtensionBy f_car f_loco xs ys = case trainExtensionBy f_car f_loco xs ys of
+    (xs_cars, [], train) -> Just (xs_cars, train)
+    _                    -> Nothing
+
+{-# INLINE trainExtensionBy #-}
+trainExtensionBy :: (a1 -> a2 -> Maybe a)
+                 -> (b1 -> b2 -> b)
+                 -> Train a1 b1
+                 -> Train a2 b2
+                 -> ([a1], [a2], Train a b)
+trainExtensionBy f_car f_loco xs ys = go (reverse xs_cars) (reverse ys_cars) (Loco (f_loco xs_loco ys_loco))
+  where
+    (xs_cars, xs_loco) = trainToList xs
+    (ys_cars, ys_loco) = trainToList ys
+
+    go (x_car:xs_cars) (y_car:ys_cars) train | Just car <- f_car x_car y_car = go xs_cars ys_cars (Car car train)
+    go xs_cars         ys_cars         train                                 = (xs_cars, ys_cars, train)
+
+trainLength :: Train a b -> Int
+trainLength = trainCarFoldl' (\n _ -> n + 1) 0
+
+trainDropTailByList :: [c]
+                    -> Train a b
+                    -> Train a (Train a b)
+trainDropTailByList cs abs = case go abs of Left _cs -> Loco abs; Right abs -> abs
+  where go (Loco _)    = Left cs
+        go (Car a abs) = case go abs of
+          Left cs -> case cs of
+            []    -> Right (Car a (Loco abs))
+            _:cds -> Left cds
+          Right abs' -> Right (Car a abs')
+
+{-
+trainUpTo :: (a -> Bool)
+          -> Train a b
+          -> Maybe ([a], a, Train a b)
+trainUpTo p = go
+  where go (Loco _)    = Nothing
+        go (Car a abs)
+          | p a       = Just ([], a, abs)
+          | otherwise = liftM (first3 (a:)) $ go abs
+-}
+
+data Stream a = a :< Stream a
+
+listToStream :: [a] -> Stream a
+listToStream []     = error "listToStream"
+listToStream (x:xs) = x :< listToStream xs
+
+
+class MonadTrans t where
+    lift :: Monad m => m a -> t m a
+
+
+newtype StateT s m a = StateT { unStateT :: s -> m (a, s) }
+
+instance Functor m => Functor (StateT s m) where
+    fmap f mx = StateT $ \s -> fmap (first f) (unStateT mx s)
+
+instance (Functor m, Monad m) => Applicative (StateT s m) where
+    pure = return
+    (<*>) = ap
+
+instance Monad m => Monad (StateT s m) where
+    return x = StateT $ \s -> return (x, s)
+    mx >>= fxmy = StateT $ \s -> unStateT mx s >>= \(x, s) -> unStateT (fxmy x) s
+
+instance MonadTrans (StateT s) where
+    lift mx = StateT $ \s -> liftM (flip (,) s) mx
+
+instance MonadFix m => MonadFix (StateT s m) where
+    mfix fxmx = StateT $ \s -> mfix $ \(~(x, _)) -> unStateT (fxmx x) s
+
+
+newtype ReaderT r m a = ReaderT { unReaderT :: r -> m a }
+
+instance Functor m => Functor (ReaderT r m) where
+    fmap f mx = ReaderT $ \r -> fmap f (unReaderT mx r)
+
+instance Applicative m => Applicative (ReaderT r m) where
+    pure x = ReaderT $ \_ -> pure x
+    mf <*> mx = ReaderT $ \r -> unReaderT mf r <*> unReaderT mx r
+
+instance Monad m => Monad (ReaderT r m) where
+    return x = ReaderT $ \_ -> return x
+    mx >>= fxmy = ReaderT $ \r -> unReaderT mx r >>= \x -> unReaderT (fxmy x) r
+
+instance MonadTrans (ReaderT r) where
+    lift mx = ReaderT $ \_ -> mx
+
+instance MonadFix m => MonadFix (ReaderT r m) where
+    mfix fxmy = ReaderT $ \r -> mfix $ \x -> unReaderT (fxmy x) r
+
+runReaderT :: r -> ReaderT r m a -> m a
+runReaderT = flip unReaderT
+
+
+newtype ContT r m a = ContT { unContT :: (a -> m r) -> m r }
+
+instance Functor (ContT r m) where
+    fmap f mx = ContT $ \k -> unContT mx (k . f)
+
+instance Applicative (ContT r m) where
+    pure = return
+    (<*>) = ap
+
+instance Monad (ContT r m) where
+    return x = ContT $ \k -> k x
+    mx >>= fxmy = ContT $ \k -> unContT mx $ \x -> unContT (fxmy x) k
+
+instance MonadTrans (ContT r) where
+    lift mx = ContT $ \k -> mx >>= k
+
+
+runContT :: Monad m => ContT r m r -> m r
+runContT mx = unContT mx return
+
+callCC :: ((forall b. a -> ContT r m b) -> ContT r m a) -> ContT r m a
+callCC f = ContT $ \k -> unContT (f (\a -> ContT $ \_k -> k a)) k
--- a/compiler/supercompile/chsc-plugin.cabal
+++ b/compiler/supercompile/chsc-plugin.cabal
+Name:           chsc-plugin
+Version:        1.0
+License:        BSD3
+Synopsis:       A plugin for GHC that performs supercompilation.
+Cabal-Version:  >= 1.2
+Build-Type:     Simple
+Author:         Max Bolingbroke <batterseapower@hotmail.com>
+Maintainer:     Max Bolingbroke <batterseapower@hotmail.com>
+
+Source-Repository head
+  type:     git
+  location: http://darcs.haskell.org/ghc.git
+  branch:   supercompiler
+
+Library
+    Exposed-Modules:
+        CHSC
+    Other-Modules:
+        Supercompile.Core.FreeVars
+        Supercompile.Core.Renaming
+        Supercompile.Core.Size
+        Supercompile.Core.Syntax
+        Supercompile.Core.Tag
+        Supercompile.Drive.Match
+        Supercompile.Drive.Process
+        Supercompile.Drive.Split
+        Supercompile.Evaluator.Deeds
+        Supercompile.Evaluator.Evaluate
+        Supercompile.Evaluator.FreeVars
+        Supercompile.Evaluator.Residualise
+        Supercompile.Evaluator.Syntax
+        Supercompile.StaticFlags
+        Supercompile.Termination.Combinators
+        Supercompile.Termination.Generaliser
+        Supercompile.Termination.TagBag
+        Supercompile.Utilities
+        Supercompile
+    Extensions:
+        CPP,
+        PatternGuards,
+        ExistentialQuantification,
+        ScopedTypeVariables,
+        FlexibleInstances,
+        RankNTypes,
+        DeriveDataTypeable
+    Build-Depends:
+        base >= 4.3 && < 4.4,
+        containers >= 0.4 && < 0.5,
+        ghc >= 7.1 && < 7.2
--- a/compiler/supercompile/install-plugin-inplace
+++ b/compiler/supercompile/install-plugin-inplace
+#!/bin/sh
+
+SCRIPT_DIR=$(cd $(dirname "$0"); pwd)
+INPLACE_DIR="$SCRIPT_DIR/../../inplace"
+
+# NB: this script relies on the installed Cabal (presumably from the bootstrapping compiler)
+# actually understanding the package metadata used by the in-tree Cabal. Risky, but works for now.
+cabal install --disable-library-profiling --user --with-ghc=$INPLACE_DIR/bin/ghc-stage2 --with-ghc-pkg=$INPLACE_DIR/bin/ghc-pkg --package-db=$INPLACE_DIR/lib/package.conf.d
--- a/compiler/typecheck/TcBinds.lhs
+++ b/compiler/typecheck/TcBinds.lhs
@@ -47,6 +47,8 @@ import Outputable
 import FastString

 import Control.Monad
+import Data.Either
+import Data.List

 #include "HsVersions.h"
 \end{code}
@@ -430,7 +432,8 @@ tcPolyNoGen tc_sig_fn prag_fn rec_tc bind_list
      = do { mono_ty' <- zonkTcType (idType mono_id)
             -- Zonk, mainly to expose unboxed types to checkStrictBinds
           ; let mono_id' = setIdType mono_id mono_ty'
-           ; _specs <- tcSpecPrags mono_id' (prag_fn name)
+           ; (prags, _mono_id') <- addInlinePrags mono_id' (prag_fn name)
+           ; _specs <- tcSpecPrags mono_id' prags
           ; return mono_id' }
           -- NB: tcPrags generates error messages for
           --     specialisation pragmas for non-overloaded sigs
@@ -459,8 +462,8 @@ tcPolyCheck sig@(TcSigInfo { sig_id = poly_id, sig_tvs = tvs_w_scoped
               tcExtendTyVarEnv2 [(n,tv) | (Just n, tv) <- tvs_w_scoped] $
               tcMonoBinds (\_ -> Just sig) LetLclBndr rec_tc bind_list

+       ; (prag_sigs, poly_id) <- addInlinePrags poly_id prag_sigs
       ; spec_prags <- tcSpecPrags poly_id prag_sigs
-       ; poly_id    <- addInlinePrags poly_id prag_sigs

       ; let (_, _, mono_id) = mono_info
             export = ABE { abe_wrap = idHsWrapper
@@ -538,7 +541,7 @@ mkExport prag_fn qtvs theta (poly_name, mb_sig, mono_id)
                           Just sig -> sig_id sig
                -- poly_id has a zonked type

-        ; poly_id <- addInlinePrags poly_id prag_sigs
+        ; (prag_sigs, poly_id) <- addInlinePrags poly_id prag_sigs
        ; spec_prags <- tcSpecPrags poly_id prag_sigs
                -- tcPrags requires a zonked poly_id

@@ -585,16 +588,18 @@ mkExport prag_fn qtvs theta (poly_name, mb_sig, mono_id)
 type PragFun = Name -> [LSig Name]

 mkPragFun :: [LSig Name] -> LHsBinds Name -> PragFun
-mkPragFun sigs binds = \n -> lookupNameEnv prag_env n `orElse` []
+mkPragFun sigs binds = \n -> map ($ n) defs ++ (lookupNameEnv prag_env n `orElse` [])
  where
-    prs = mapCatMaybes get_sig sigs
+    (defs, prs) = partitionEithers $ mapCatMaybes get_sig sigs

-    get_sig :: LSig Name -> Maybe (Located Name, LSig Name)
-    get_sig (L l (SpecSig nm ty inl)) = Just (nm, L l $ SpecSig  nm ty (add_arity nm inl))
-    get_sig (L l (InlineSig nm inl))  = Just (nm, L l $ InlineSig nm   (add_arity nm inl))
-    get_sig _                         = Nothing
+    get_sig :: LSig Name -> Maybe (Either (Name -> LSig Name) (Located Name, LSig Name))
+    get_sig (L l (SpecSig nm ty inl))       = Just (Right (nm,    L l $ SpecSig   nm     ty (add_arity (unLoc nm) inl)))
+    get_sig (L l (InlineSig (Just nm) inl)) = Just (Right (nm,    L l $ InlineSig (Just nm) (add_arity (unLoc nm) inl)))
+    get_sig (L l (InlineSig Nothing   inl)) = Just (Left  (\nm -> L l $ InlineSig Nothing   (add_arity        nm  inl)))
+    get_sig (L l (SupercompileSig nm))      = Just (Right (nm,    L l $ SupercompileSig nm))
+    get_sig _                               = Nothing

-    add_arity (L _ n) inl_prag   -- Adjust inl_sat field to match visible arity of function
+    add_arity n inl_prag   -- Adjust inl_sat field to match visible arity of function
      | Just ar <- lookupNameEnv ar_env n,
        Inline <- inl_inline inl_prag     = inl_prag { inl_sat = Just ar }
        -- add arity only for real INLINE pragmas, not INLINABLE
@@ -622,15 +627,13 @@ tcSpecPrags :: Id -> [LSig Name]
 -- Pre-condition: the poly_id is zonked
 -- Reason: required by tcSubExp
 tcSpecPrags poly_id prag_sigs
-  = do { unless (null bad_sigs) warn_discarded_sigs
-       ; mapAndRecoverM (wrapLocM (tcSpec poly_id)) spec_sigs }
+  = do { spec_sigs' <- mapAndRecoverM (wrapLocM (tcSpec poly_id)) spec_sigs
+       ; unless (null prag_sigs') $
+            warnPrags poly_id prag_sigs' $
+                      ptext (sLit "Discarding unexpected pragmas for")
+       ; return spec_sigs' }
  where
-    spec_sigs = filter isSpecLSig prag_sigs
-    bad_sigs  = filter is_bad_sig prag_sigs
-    is_bad_sig s = not (isSpecLSig s || isInlineLSig s)
-
-    warn_discarded_sigs = warnPrags poly_id bad_sigs $
-                          ptext (sLit "Discarding unexpected pragmas for")
+    (spec_sigs, prag_sigs') = partition isSpecLSig prag_sigs


 --------------

--- a/compiler/typecheck/TcClassDcl.lhs
+++ b/compiler/typecheck/TcClassDcl.lhs
@@ -218,8 +218,8 @@ tcDefMeth clas tyvars this_dict binds_in hs_sig_fn prag_fn (sel_id, dm_info)
 	     -- type errors from tcInstanceMethodBody come from here


-           ; dm_id_w_inline <- addInlinePrags dm_id prags
-           ; spec_prags     <- tcSpecPrags dm_id prags
+           ; (prags, dm_id_w_inline) <- addInlinePrags dm_id prags
+           ; spec_prags <- tcSpecPrags dm_id prags

           ; let local_dm_ty = instantiateMethod clas dm_id (mkTyVarTys tyvars)
                 hs_ty       = lookupHsSig hs_sig_fn sel_name 

--- a/compiler/typecheck/TcInstDcls.lhs
+++ b/compiler/typecheck/TcInstDcls.lhs
@@ -511,7 +511,7 @@ cyclicDeclErr :: Outputable d => [Located d] -> TcRn ()
 cyclicDeclErr inst_decls
  = setSrcSpan (getLoc (head sorted_decls)) $
    addErr (sep [ptext (sLit "Cycle in type declarations: data constructor used (in a type) before it is defined"),
-		 nest 2 (vcat (map ppr_decl sorted_decls))])
+                 nest 2 (vcat (map ppr_decl sorted_decls))])
  where
    sorted_decls = sortLocated inst_decls
    ppr_decl (L loc decl) = ppr loc <> colon <+> ppr decl
@@ -585,8 +585,8 @@ tcLocalInstDecl (L loc (ClsInstD { cid_poly_ty = poly_ty, cid_binds = binds
                -- Dfun location is that of instance *header*

        ; overlap_flag <- getOverlapFlag
-        ; let dfun  	= mkDictFunId dfun_name tyvars theta clas inst_tys
-              ispec 	= mkLocalInstance dfun overlap_flag
+        ; let dfun      = mkDictFunId dfun_name tyvars theta clas inst_tys
+              ispec     = mkLocalInstance dfun overlap_flag
              inst_info = InstInfo { iSpec  = ispec, iBinds = VanillaInst binds uprags False }

        ; return ( [inst_info], fam_insts0 ++ concat fam_insts1) }
@@ -1099,7 +1099,7 @@ tcInstanceMethods dfun_id clas tyvars dfun_ev_vars inst_tys
                                          mkMethIds sig_fn clas tyvars dfun_ev_vars
                                                    inst_tys sel_id
           ; let prags = prag_fn (idName sel_id)
-           ; meth_id1 <- addInlinePrags meth_id prags
+           ; (prags, meth_id1) <- addInlinePrags meth_id prags
           ; spec_prags <- tcSpecPrags meth_id1 prags
           ; bind <- tcInstanceMethodBody InstSkol
                          tyvars dfun_ev_vars
No results found