diff --git a/aclocal.m4 b/aclocal.m4
index 8a9fe63cb5781782afd5c46e1243f678c533412f..98f3e3d66295b68c959cf66e814a8376e0d38cea 100644
--- a/aclocal.m4
+++ b/aclocal.m4
@@ -848,7 +848,7 @@ AS_IF([test "$fp_num1" $2 "$fp_num2"], [$4], [$5])[]dnl
 
 dnl
 dnl Check for Happy and version.  If we're building GHC, then we need
-dnl at least Happy version 1.14.  If there's no installed Happy, we look
+dnl at least Happy version 1.19.  If there's no installed Happy, we look
 dnl for a happy source tree and point the build system at that instead.
 dnl
 AC_DEFUN([FPTOOLS_HAPPY],
diff --git a/compiler/cmm/CmmLayoutStack.hs b/compiler/cmm/CmmLayoutStack.hs
index 17d111d46f62e44c0d79ceafcabff28fd6fd92ca..2efb806a1edff02d625c1dcb35e6ff2d92c24030 100644
--- a/compiler/cmm/CmmLayoutStack.hs
+++ b/compiler/cmm/CmmLayoutStack.hs
@@ -147,7 +147,7 @@ layout :: DynFlags
           , [CmmBlock]                  -- [out] new blocks
           )
 
-layout dflags procpoints liveness entry entry_args final_stackmaps final_hwm blocks
+layout dflags procpoints liveness entry entry_args final_stackmaps final_sp_high blocks
   = go blocks init_stackmap entry_args []
   where
     (updfr, cont_info)  = collectContInfo blocks
@@ -204,14 +204,7 @@ layout dflags procpoints liveness entry entry_args final_stackmaps final_hwm blo
        --
        let middle_pre = blockToList $ foldl blockSnoc middle1 middle2
 
-           sp_high = final_hwm - entry_args
-              -- The stack check value is adjusted by the Sp offset on
-              -- entry to the proc, which is entry_args.  We are
-              -- assuming that we only do a stack check at the
-              -- beginning of a proc, and we don't modify Sp before the
-              -- check.
-
-           final_blocks = manifestSp dflags final_stackmaps stack0 sp0 sp_high entry0
+           final_blocks = manifestSp dflags final_stackmaps stack0 sp0 final_sp_high entry0
                               middle_pre sp_off last1 fixup_blocks
 
            acc_stackmaps' = mapUnion acc_stackmaps out
@@ -780,24 +773,24 @@ areaToSp :: DynFlags -> ByteOff -> ByteOff -> (Area -> StackLoc) -> CmmExpr -> C
 areaToSp dflags sp_old _sp_hwm area_off (CmmStackSlot area n) =
   cmmOffset dflags (CmmReg spReg) (sp_old - area_off area - n)
 areaToSp dflags _ sp_hwm _ (CmmLit CmmHighStackMark) = mkIntExpr dflags sp_hwm
-areaToSp dflags _ _ _ (CmmMachOp (MO_U_Lt _)  -- Note [null stack check]
+areaToSp dflags _ _ _ (CmmMachOp (MO_U_Lt _)  -- Note [Always false stack check]
                           [CmmMachOp (MO_Sub _)
-                                  [ CmmReg (CmmGlobal Sp)
-                                  , CmmLit (CmmInt 0 _)],
-                           CmmReg (CmmGlobal SpLim)]) = zeroExpr dflags
+                                  [ CmmRegOff (CmmGlobal Sp) off
+                                  , CmmLit (CmmInt lit _)],
+                           CmmReg (CmmGlobal SpLim)])
+                              | fromIntegral off == lit = zeroExpr dflags
 areaToSp _ _ _ _ other = other
 
--- -----------------------------------------------------------------------------
--- Note [null stack check]
+-- Note [Always false stack check]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 --
--- If the high-water Sp is zero, then we end up with
+-- We can optimise stack checks of the form
 --
---   if (Sp - 0 < SpLim) then .. else ..
+--   if ((Sp + x) - x < SpLim) then .. else ..
 --
--- and possibly some dead code for the failure case.  Optimising this
--- away depends on knowing that SpLim <= Sp, so it is really the job
--- of the stack layout algorithm, hence we do it now.  This is also
--- convenient because control-flow optimisation later will drop the
+-- where x is an integer offset. Optimising this away depends on knowing that
+-- SpLim <= Sp, so it is really the job of the stack layout algorithm, hence we
+-- do it now.  This is also convenient because sinking pass will later drop the
 -- dead code.
 
 optStackCheck :: CmmNode O C -> CmmNode O C
@@ -1021,4 +1014,3 @@ insertReloads stackmap =
 
 stackSlotRegs :: StackMap -> [(LocalReg, StackLoc)]
 stackSlotRegs sm = eltsUFM (sm_regs sm)
-
diff --git a/compiler/cmm/CmmOpt.hs b/compiler/cmm/CmmOpt.hs
index f4cf86448c6f6631b23ff80e858c8584bb7b4cf0..acaed28acf45a151c9247bcfa34d06632f749bd5 100644
--- a/compiler/cmm/CmmOpt.hs
+++ b/compiler/cmm/CmmOpt.hs
@@ -51,7 +51,7 @@ cmmMachOpFoldM _ op [CmmLit (CmmInt x rep)]
       MO_S_Neg _ -> CmmLit (CmmInt (-x) rep)
       MO_Not _   -> CmmLit (CmmInt (complement x) rep)
 
-        -- these are interesting: we must first narrow to the 
+        -- these are interesting: we must first narrow to the
         -- "from" type, in order to truncate to the correct size.
         -- The final narrow/widen to the destination type
         -- is implicit in the CmmLit.
@@ -87,7 +87,7 @@ cmmMachOpFoldM dflags conv_outer [CmmMachOp conv_inner [x]]
         | otherwise ->
             Nothing
   where
-        isIntConversion (MO_UU_Conv rep1 rep2) 
+        isIntConversion (MO_UU_Conv rep1 rep2)
           = Just (rep1,rep2,False)
         isIntConversion (MO_SS_Conv rep1 rep2)
           = Just (rep1,rep2,True)
@@ -318,7 +318,7 @@ cmmMachOpFoldM dflags mop [x, (CmmLit (CmmInt n _))]
            | Just p <- exactLog2 n ->
                  Just (cmmMachOpFold dflags (MO_U_Shr rep) [x, CmmLit (CmmInt p rep)])
         MO_S_Quot rep
-           | Just p <- exactLog2 n, 
+           | Just p <- exactLog2 n,
              CmmReg _ <- x ->   -- We duplicate x below, hence require
                                 -- it is a reg.  FIXME: remove this restriction.
                 -- shift right is not the same as quot, because it rounds
@@ -362,7 +362,7 @@ cmmMachOpFoldM _ _ _ = Nothing
 -- This algorithm for determining the $\log_2$ of exact powers of 2 comes
 -- from GCC.  It requires bit manipulation primitives, and we use GHC
 -- extensions.  Tough.
--- 
+--
 -- Used to be in MachInstrs --SDM.
 -- ToDo: remove use of unboxery --SDM.
 
@@ -387,54 +387,6 @@ exactLog2 x_
     pow2 x | x ==# _ILIT(1) = _ILIT(0)
            | otherwise = _ILIT(1) +# pow2 (x `shiftR_FastInt` _ILIT(1))
 
-
--- -----------------------------------------------------------------------------
--- Loopify for C
-
-{-
- This is a simple pass that replaces tail-recursive functions like this:
-
-   fac() {
-     ...
-     jump fac();
-   }
-
- with this:
-
-  fac() {
-   L:
-     ...
-     goto L;
-  }
-
-  the latter generates better C code, because the C compiler treats it
-  like a loop, and brings full loop optimisation to bear.
-
-  In my measurements this makes little or no difference to anything
-  except factorial, but what the hell.
--}
-
-{-
-cmmLoopifyForC :: DynFlags -> RawCmmDecl -> RawCmmDecl
--- XXX: revisit if we actually want to do this
--- cmmLoopifyForC p@(CmmProc Nothing _ _) = p  -- only if there's an info table, ignore case alts
-cmmLoopifyForC dflags (CmmProc infos entry_lbl live
-                 (ListGraph blocks@(BasicBlock top_id _ : _))) =
---  pprTrace "jump_lbl" (ppr jump_lbl <+> ppr entry_lbl) $
-  CmmProc infos entry_lbl live (ListGraph blocks')
-  where blocks' = [ BasicBlock id (map do_stmt stmts)
-                  | BasicBlock id stmts <- blocks ]
-
-        do_stmt (CmmJump (CmmLit (CmmLabel lbl)) _) | lbl == jump_lbl
-                = CmmBranch top_id
-        do_stmt stmt = stmt
-
-        jump_lbl | tablesNextToCode dflags = toInfoLbl entry_lbl
-                 | otherwise               = entry_lbl
-
-cmmLoopifyForC _ top = top
--}
-
 -- -----------------------------------------------------------------------------
 -- Utils
 
@@ -449,4 +401,3 @@ isComparisonExpr _                  = False
 isPicReg :: CmmExpr -> Bool
 isPicReg (CmmReg (CmmGlobal PicBaseReg)) = True
 isPicReg _ = False
-
diff --git a/compiler/codeGen/StgCmmHeap.hs b/compiler/codeGen/StgCmmHeap.hs
index f4c58e95e100b4b30214c433769582c11bfbc2c9..1d1100c6bea5a8674c51cb3c80e2e68281e38943 100644
--- a/compiler/codeGen/StgCmmHeap.hs
+++ b/compiler/codeGen/StgCmmHeap.hs
@@ -533,6 +533,27 @@ heapStackCheckGen stk_hwm mb_bytes
        call <- mkCall generic_gc (GC, GC) [] [] updfr_sz []
        do_checks stk_hwm False  mb_bytes (call <*> mkBranch lretry)
 
+-- Note [Single stack check]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- When compiling a function we can determine how much stack space it will
+-- use. We therefore need to perform only a single stack check at the beginning
+-- of a function to see if we have enough stack space. Instead of referring
+-- directly to Sp - as we used to do in the past - the code generator uses
+-- (old + 0) in the stack check. Stack layout phase turns (old + 0) into Sp.
+--
+-- The idea here is that, while we need to perform only one stack check for
+-- each function, we could in theory place more stack checks later in the
+-- function. They would be redundant, but not incorrect (in a sense that they
+-- should not change program behaviour). We need to make sure however that a
+-- stack check inserted after incrementing the stack pointer checks for a
+-- respectively smaller stack space. This would not be the case if the code
+-- generator produced direct references to Sp. By referencing (old + 0) we make
+-- sure that we always check for a correct amount of stack: when converting
+-- (old + 0) to Sp the stack layout phase takes into account changes already
+-- made to stack pointer. The idea for this change came from observations made
+-- while debugging #8275.
+
 do_checks :: Maybe CmmExpr    -- Should we check the stack?
           -> Bool       -- Should we check for preemption?
           -> Maybe CmmExpr    -- Heap headroom (bytes)
@@ -547,11 +568,13 @@ do_checks mb_stk_hwm checkYield mb_alloc_lit do_gc = do
 
     bump_hp   = cmmOffsetExprB dflags (CmmReg hpReg) alloc_lit
 
-    -- Sp overflow if (Sp - CmmHighStack < SpLim)
+    -- Sp overflow if ((old + 0) - CmmHighStack < SpLim)
+    -- At the beginning of a function old + 0 = Sp
+    -- See Note [Single stack check]
     sp_oflo sp_hwm =
          CmmMachOp (mo_wordULt dflags)
                   [CmmMachOp (MO_Sub (typeWidth (cmmRegType dflags spReg)))
-                             [CmmReg spReg, sp_hwm],
+                             [CmmStackSlot Old 0, sp_hwm],
                    CmmReg spLimReg]
 
     -- Hp overflow if (Hp > HpLim)
diff --git a/compiler/iface/TcIface.lhs b/compiler/iface/TcIface.lhs
index f2ed68f631fa1ed45b579c26f60c77063f0f57fa..867af7b755cc5d5960a289380a2e199b7ce46ef7 100644
--- a/compiler/iface/TcIface.lhs
+++ b/compiler/iface/TcIface.lhs
@@ -6,13 +6,13 @@
 Type checking of type signatures in interface files
 
 \begin{code}
-module TcIface ( 
-        tcLookupImported_maybe, 
-        importDecl, checkWiredInTyCon, tcHiBootIface, typecheckIface, 
+module TcIface (
+        tcLookupImported_maybe,
+        importDecl, checkWiredInTyCon, tcHiBootIface, typecheckIface,
         tcIfaceDecl, tcIfaceInst, tcIfaceFamInst, tcIfaceRules,
-        tcIfaceVectInfo, tcIfaceAnnotations, 
+        tcIfaceVectInfo, tcIfaceAnnotations,
         tcIfaceExpr,    -- Desired by HERMIT (Trac #7683)
-        tcIfaceGlobal, 
+        tcIfaceGlobal,
         tcExtCoreBindings
  ) where
 
@@ -60,7 +60,7 @@ import Demand
 import Module
 import UniqFM
 import UniqSupply
-import Outputable       
+import Outputable
 import ErrUtils
 import Maybes
 import SrcLoc
@@ -83,9 +83,9 @@ An IfaceDecl is populated with RdrNames, and these are not renamed to
 Names before typechecking, because there should be no scope errors etc.
 
         -- For (b) consider: f = \$(...h....)
-        -- where h is imported, and calls f via an hi-boot file.  
+        -- where h is imported, and calls f via an hi-boot file.
         -- This is bad!  But it is not seen as a staging error, because h
-        -- is indeed imported.  We don't want the type-checker to black-hole 
+        -- is indeed imported.  We don't want the type-checker to black-hole
         -- when simplifying and compiling the splice!
         --
         -- Simple solution: discard any unfolding that mentions a variable
@@ -101,19 +101,19 @@ Names before typechecking, because there should be no scope errors etc.
 
 The main idea is this.  We are chugging along type-checking source code, and
 find a reference to GHC.Base.map.  We call tcLookupGlobal, which doesn't find
-it in the EPS type envt.  So it 
+it in the EPS type envt.  So it
         1 loads GHC.Base.hi
         2 gets the decl for GHC.Base.map
         3 typechecks it via tcIfaceDecl
         4 and adds it to the type env in the EPS
 
-Note that DURING STEP 4, we may find that map's type mentions a type 
-constructor that also 
+Note that DURING STEP 4, we may find that map's type mentions a type
+constructor that also
 
 Notice that for imported things we read the current version from the EPS
 mutable variable.  This is important in situations like
         ...$(e1)...$(e2)...
-where the code that e1 expands to might import some defns that 
+where the code that e1 expands to might import some defns that
 also turn out to be needed by the code that e2 expands to.
 
 \begin{code}
@@ -122,13 +122,13 @@ tcLookupImported_maybe :: Name -> TcM (MaybeErr MsgDoc TyThing)
 tcLookupImported_maybe name
   = do  { hsc_env <- getTopEnv
         ; mb_thing <- liftIO (lookupTypeHscEnv hsc_env name)
-        ; case mb_thing of  
+        ; case mb_thing of
             Just thing -> return (Succeeded thing)
             Nothing    -> tcImportDecl_maybe name }
 
 tcImportDecl_maybe :: Name -> TcM (MaybeErr MsgDoc TyThing)
 -- Entry point for *source-code* uses of importDecl
-tcImportDecl_maybe name 
+tcImportDecl_maybe name
   | Just thing <- wiredInNameTyThing_maybe name
   = do  { when (needWiredInHomeIface thing)
                (initIfaceTcRn (loadWiredInHomeIface name))
@@ -145,14 +145,14 @@ importDecl name
     do  { traceIf nd_doc
 
         -- Load the interface, which should populate the PTE
-        ; mb_iface <- ASSERT2( isExternalName name, ppr name ) 
+        ; mb_iface <- ASSERT2( isExternalName name, ppr name )
                       loadInterface nd_doc (nameModule name) ImportBySystem
         ; case mb_iface of {
                 Failed err_msg  -> return (Failed err_msg) ;
                 Succeeded _ -> do
 
         -- Now look it up again; this time we should find it
-        { eps <- getEps 
+        { eps <- getEps
         ; case lookupTypeEnv (eps_PTE eps) name of
             Just thing -> return (Succeeded thing)
             Nothing    -> return (Failed not_found_msg)
@@ -174,7 +174,7 @@ importDecl name
 Note [Loading instances for wired-in things]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 We need to make sure that we have at least *read* the interface files
-for any module with an instance decl or RULE that we might want.  
+for any module with an instance decl or RULE that we might want.
 
 * If the instance decl is an orphan, we have a whole separate mechanism
   (loadOprhanModules)
@@ -204,12 +204,12 @@ checkWiredInTyCon :: TyCon -> TcM ()
 -- are loaded. See Note [Loading instances for wired-in things]
 -- It might not be a wired-in tycon (see the calls in TcUnify),
 -- in which case this is a no-op.
-checkWiredInTyCon tc    
-  | not (isWiredInName tc_name) 
+checkWiredInTyCon tc
+  | not (isWiredInName tc_name)
   = return ()
   | otherwise
   = do  { mod <- getModule
-        ; ASSERT( isExternalName tc_name ) 
+        ; ASSERT( isExternalName tc_name )
           when (mod /= nameModule tc_name)
                (initIfaceTcRn (loadWiredInHomeIface tc_name))
                 -- Don't look for (non-existent) Float.hi when
@@ -232,7 +232,7 @@ ifCheckWiredInThing thing
                 -- the HPT, so without the test we'll demand-load it into the PIT!
                 -- C.f. the same test in checkWiredInTyCon above
         ; let name = getName thing
-        ; ASSERT2( isExternalName name, ppr name ) 
+        ; ASSERT2( isExternalName name, ppr name )
           when (needWiredInHomeIface thing && mod /= nameModule name)
                (loadWiredInHomeIface name) }
 
@@ -261,12 +261,12 @@ typecheckIface :: ModIface      -- Get the decls from here
                -> TcRnIf gbl lcl ModDetails
 typecheckIface iface
   = initIfaceTc iface $ \ tc_env_var -> do
-        -- The tc_env_var is freshly allocated, private to 
+        -- The tc_env_var is freshly allocated, private to
         -- type-checking this particular interface
         {       -- Get the right set of decls and rules.  If we are compiling without -O
                 -- we discard pragmas before typechecking, so that we don't "see"
                 -- information that we shouldn't.  From a versioning point of view
-                -- It's not actually *wrong* to do so, but in fact GHCi is unable 
+                -- It's not actually *wrong* to do so, but in fact GHCi is unable
                 -- to handle unboxed tuples, so it must not see unfoldings.
           ignore_prags <- goptM Opt_IgnoreInterfacePragmas
 
@@ -326,24 +326,24 @@ tcHiBootIface hsc_src mod
         ; if not (isOneShot mode)
                 -- In --make and interactive mode, if this module has an hs-boot file
                 -- we'll have compiled it already, and it'll be in the HPT
-                -- 
+                --
                 -- We check wheher the interface is a *boot* interface.
                 -- It can happen (when using GHC from Visual Studio) that we
-                -- compile a module in TypecheckOnly mode, with a stable, 
+                -- compile a module in TypecheckOnly mode, with a stable,
                 -- fully-populated HPT.  In that case the boot interface isn't there
                 -- (it's been replaced by the mother module) so we can't check it.
-                -- And that's fine, because if M's ModInfo is in the HPT, then 
+                -- And that's fine, because if M's ModInfo is in the HPT, then
                 -- it's been compiled once, and we don't need to check the boot iface
           then do { hpt <- getHpt
                   ; case lookupUFM hpt (moduleName mod) of
-                      Just info | mi_boot (hm_iface info) 
+                      Just info | mi_boot (hm_iface info)
                                 -> return (hm_details info)
                       _ -> return emptyModDetails }
           else do
 
-        -- OK, so we're in one-shot mode.  
-        -- In that case, we're read all the direct imports by now, 
-        -- so eps_is_boot will record if any of our imports mention us by 
+        -- OK, so we're in one-shot mode.
+        -- In that case, we're read all the direct imports by now,
+        -- so eps_is_boot will record if any of our imports mention us by
         -- way of hi-boot file
         { eps <- getEps
         ; case lookupUFM (eps_is_boot eps) (moduleName mod) of {
@@ -352,10 +352,10 @@ tcHiBootIface hsc_src mod
             Just (_, False) -> failWithTc moduleLoop ;
                 -- Someone below us imported us!
                 -- This is a loop with no hi-boot in the way
-                
+
             Just (_mod, True) ->        -- There's a hi-boot interface below us
-                
-    do  { read_result <- findAndReadIface 
+
+    do  { read_result <- findAndReadIface
                                 need mod
                                 True    -- Hi-boot file
 
@@ -367,10 +367,10 @@ tcHiBootIface hsc_src mod
     need = ptext (sLit "Need the hi-boot interface for") <+> ppr mod
                  <+> ptext (sLit "to compare against the Real Thing")
 
-    moduleLoop = ptext (sLit "Circular imports: module") <+> quotes (ppr mod) 
+    moduleLoop = ptext (sLit "Circular imports: module") <+> quotes (ppr mod)
                      <+> ptext (sLit "depends on itself")
 
-    elaborate err = hang (ptext (sLit "Could not find hi-boot interface for") <+> 
+    elaborate err = hang (ptext (sLit "Could not find hi-boot interface for") <+>
                           quotes (ppr mod) <> colon) 4 err
 \end{code}
 
@@ -386,7 +386,7 @@ the constructor argument types.  This is in the hope that we may never
 poke on those argument types, and hence may never need to load the
 interface files for types mentioned in the arg types.
 
-E.g.    
+E.g.
         data Foo.S = MkS Baz.T
 Mabye we can get away without even loading the interface for Baz!
 
@@ -394,9 +394,9 @@ This is not just a performance thing.  Suppose we have
         data Foo.S = MkS Baz.T
         data Baz.T = MkT Foo.S
 (in different interface files, of course).
-Now, first we load and typecheck Foo.S, and add it to the type envt.  
+Now, first we load and typecheck Foo.S, and add it to the type envt.
 If we do explore MkS's argument, we'll load and typecheck Baz.T.
-If we explore MkT's argument we'll find Foo.S already in the envt.  
+If we explore MkT's argument we'll find Foo.S already in the envt.
 
 If we typechecked constructor args eagerly, when loading Foo.S we'd try to
 typecheck the type Baz.T.  So we'd fault in Baz.T... and then need Foo.S...
@@ -411,13 +411,13 @@ events takes place:
         * we build a thunk <t> for the constructor arg tys
         * we build a thunk for the extended type environment (depends on <t>)
         * we write the extended type envt into the global EPS mutvar
-        
+
 Now we look something up in the type envt
         * that pulls on <t>
         * which reads the global type envt out of the global EPS mutvar
         * but that depends in turn on <t>
 
-It's subtle, because, it'd work fine if we typechecked the constructor args 
+It's subtle, because, it'd work fine if we typechecked the constructor args
 eagerly -- they don't need the extended type envt.  They just get the extended
 type envt by accident, because they look at it later.
 
@@ -435,7 +435,7 @@ tc_iface_decl :: TyConParent    -- For nested declarations
               -> Bool   -- True <=> discard IdInfo on IfaceId bindings
               -> IfaceDecl
               -> IfL TyThing
-tc_iface_decl _ ignore_prags (IfaceId {ifName = occ_name, ifType = iface_type, 
+tc_iface_decl _ ignore_prags (IfaceId {ifName = occ_name, ifType = iface_type,
                                        ifIdDetails = details, ifIdInfo = info})
   = do  { name <- lookupIfaceTop occ_name
         ; ty <- tcIfaceType iface_type
@@ -443,13 +443,13 @@ tc_iface_decl _ ignore_prags (IfaceId {ifName = occ_name, ifType = iface_type,
         ; info <- tcIdInfo ignore_prags name ty info
         ; return (AnId (mkGlobalId details name ty info)) }
 
-tc_iface_decl parent _ (IfaceData {ifName = occ_name, 
-                          ifCType = cType, 
+tc_iface_decl parent _ (IfaceData {ifName = occ_name,
+                          ifCType = cType,
                           ifTyVars = tv_bndrs,
                           ifRoles = roles,
                           ifCtxt = ctxt, ifGadtSyntax = gadt_syn,
-                          ifCons = rdr_cons, 
-                          ifRec = is_rec, ifPromotable = is_prom, 
+                          ifCons = rdr_cons,
+                          ifRec = is_rec, ifPromotable = is_prom,
                           ifAxiom = mb_axiom_name })
   = bindIfaceTyVars_AT tv_bndrs $ \ tyvars -> do
     { tc_name <- lookupIfaceTop occ_name
@@ -457,7 +457,7 @@ tc_iface_decl parent _ (IfaceData {ifName = occ_name,
             { stupid_theta <- tcIfaceCtxt ctxt
             ; parent' <- tc_parent tyvars mb_axiom_name
             ; cons <- tcIfaceDataCons tc_name tycon tyvars rdr_cons
-            ; return (buildAlgTyCon tc_name tyvars roles cType stupid_theta 
+            ; return (buildAlgTyCon tc_name tyvars roles cType stupid_theta
                                     cons is_rec is_prom gadt_syn parent') }
     ; traceIf (text "tcIfaceDecl4" <+> ppr tycon)
     ; return (ATyCon tycon) }
@@ -481,20 +481,20 @@ tc_iface_decl parent _ (IfaceData {ifName = occ_name,
                             -- gotten from separate interface-file declarations
                             -- NB: ax_tvs may be shorter because of eta-reduction
                             -- See Note [Eta reduction for data family axioms] in TcInstDcls
-                 lhs_tys = substTys subst ax_lhs `chkAppend` 
+                 lhs_tys = substTys subst ax_lhs `chkAppend`
                            dropList ax_tvs tycon_tys
                             -- The 'lhs_tys' should be 1-1 with the 'tyvars'
                             -- but ax_tvs maybe shorter because of eta-reduction
            ; return (FamInstTyCon ax_unbr fam_tc lhs_tys) }
 
-tc_iface_decl parent _ (IfaceSyn {ifName = occ_name, ifTyVars = tv_bndrs, 
+tc_iface_decl parent _ (IfaceSyn {ifName = occ_name, ifTyVars = tv_bndrs,
                                   ifRoles = roles,
                                   ifSynRhs = mb_rhs_ty,
                                   ifSynKind = kind })
    = bindIfaceTyVars_AT tv_bndrs $ \ tyvars -> do
      { tc_name  <- lookupIfaceTop occ_name
      ; rhs_kind <- tcIfaceKind kind     -- Note [Synonym kind loop]
-     ; rhs      <- forkM (mk_doc tc_name) $ 
+     ; rhs      <- forkM (mk_doc tc_name) $
                    tc_syn_rhs mb_rhs_ty
      ; tycon    <- buildSynTyCon tc_name tyvars roles rhs rhs_kind parent
      ; return (ATyCon tycon) }
@@ -510,8 +510,8 @@ tc_iface_decl parent _ (IfaceSyn {ifName = occ_name, ifTyVars = tv_bndrs,
 
 tc_iface_decl _parent ignore_prags
             (IfaceClass {ifCtxt = rdr_ctxt, ifName = tc_occ,
-                         ifTyVars = tv_bndrs, ifRoles = roles, ifFDs = rdr_fds, 
-                         ifATs = rdr_ats, ifSigs = rdr_sigs, 
+                         ifTyVars = tv_bndrs, ifRoles = roles, ifFDs = rdr_fds,
+                         ifATs = rdr_ats, ifSigs = rdr_sigs,
                          ifMinDef = mindef_occ, ifRec = tc_isrec })
 -- ToDo: in hs-boot files we should really treat abstract classes specially,
 --       as we do abstract tycons
@@ -542,15 +542,14 @@ tc_iface_decl _parent ignore_prags
    tc_sig (IfaceClassOp occ dm rdr_ty)
      = do { op_name <- lookupIfaceTop occ
           ; op_ty   <- forkM (mk_op_doc op_name rdr_ty) (tcIfaceType rdr_ty)
-                -- Must be done lazily for just the same reason as the 
+                -- Must be done lazily for just the same reason as the
                 -- type of a data con; to avoid sucking in types that
                 -- it mentions unless it's necessary to do so
           ; return (op_name, dm, op_ty) }
 
    tc_at cls (IfaceAT tc_decl defs_decls)
      = do ATyCon tc <- tc_iface_decl (AssocFamilyTyCon cls) ignore_prags tc_decl
-          defs <- forkM (mk_at_doc tc) $
-                  foldlM tc_ax_branches [] defs_decls
+          defs <- forkM (mk_at_doc tc) (tc_ax_branches tc defs_decls)
                   -- Must be done lazily in case the RHS of the defaults mention
                   -- the type constructor being defined here
                   -- e.g.   type AT a; type AT b = AT [b]   Trac #8002
@@ -566,14 +565,14 @@ tc_iface_decl _parent ignore_prags
 
 tc_iface_decl _ _ (IfaceForeign {ifName = rdr_name, ifExtName = ext_name})
   = do  { name <- lookupIfaceTop rdr_name
-        ; return (ATyCon (mkForeignTyCon name ext_name 
+        ; return (ATyCon (mkForeignTyCon name ext_name
                                          liftedTypeKind)) }
 
 tc_iface_decl _ _ (IfaceAxiom { ifName = ax_occ, ifTyCon = tc
                               , ifAxBranches = branches, ifRole = role })
   = do { tc_name     <- lookupIfaceTop ax_occ
        ; tc_tycon    <- tcIfaceTyCon tc
-       ; tc_branches <- foldlM tc_ax_branches [] branches
+       ; tc_branches <- tc_ax_branches tc_tycon branches
        ; let axiom = computeAxiomIncomps $
                      CoAxiom { co_ax_unique   = nameUnique tc_name
                              , co_ax_name     = tc_name
@@ -583,12 +582,15 @@ tc_iface_decl _ _ (IfaceAxiom { ifName = ax_occ, ifTyCon = tc
                              , co_ax_implicit = False }
        ; return (ACoAxiom axiom) }
 
-tc_ax_branches :: [CoAxBranch] -> IfaceAxBranch -> IfL [CoAxBranch]
-tc_ax_branches prev_branches
-               (IfaceAxBranch { ifaxbTyVars = tv_bndrs, ifaxbLHS = lhs, ifaxbRHS = rhs
-                              , ifaxbRoles = roles, ifaxbIncomps = incomps })
+tc_ax_branches :: TyCon -> [IfaceAxBranch] -> IfL [CoAxBranch]
+tc_ax_branches tc if_branches = foldlM (tc_ax_branch (tyConKind tc)) [] if_branches
+
+tc_ax_branch :: Kind -> [CoAxBranch] -> IfaceAxBranch -> IfL [CoAxBranch]
+tc_ax_branch tc_kind prev_branches
+             (IfaceAxBranch { ifaxbTyVars = tv_bndrs, ifaxbLHS = lhs, ifaxbRHS = rhs
+                            , ifaxbRoles = roles, ifaxbIncomps = incomps })
   = bindIfaceTyVars tv_bndrs $ \ tvs -> do  -- Variables will all be fresh
-    { tc_lhs <- mapM tcIfaceType lhs
+    { tc_lhs <- tcIfaceTcArgs tc_kind lhs   -- See Note [Checking IfaceTypes vs IfaceKinds]
     ; tc_rhs <- tcIfaceType rhs
     ; let br = CoAxBranch { cab_loc     = noSrcSpan
                           , cab_tvs     = tvs
@@ -608,7 +610,7 @@ tcIfaceDataCons tycon_name tycon _ if_cons
         IfNewTyCon con   -> do  { data_con <- tc_con_decl con
                                 ; mkNewTyConRhs tycon_name tycon data_con }
   where
-    tc_con_decl (IfCon { ifConInfix = is_infix, 
+    tc_con_decl (IfCon { ifConInfix = is_infix,
                          ifConUnivTvs = univ_tvs, ifConExTvs = ex_tvs,
                          ifConOcc = occ, ifConCtxt = ctxt, ifConEqSpec = spec,
                          ifConArgTys = args, ifConFields = field_lbls,
@@ -619,32 +621,32 @@ tcIfaceDataCons tycon_name tycon _ if_cons
         ; name  <- lookupIfaceTop occ
 
         -- Read the context and argument types, but lazily for two reasons
-        -- (a) to avoid looking tugging on a recursive use of 
+        -- (a) to avoid looking tugging on a recursive use of
         --     the type itself, which is knot-tied
-        -- (b) to avoid faulting in the component types unless 
+        -- (b) to avoid faulting in the component types unless
         --     they are really needed
         ; ~(eq_spec, theta, arg_tys, stricts) <- forkM (mk_doc name) $
              do { eq_spec <- tcIfaceEqSpec spec
                 ; theta   <- tcIfaceCtxt ctxt
                 ; arg_tys <- mapM tcIfaceType args
-                ; stricts <- mapM tc_strict if_stricts 
-                        -- The IfBang field can mention 
+                ; stricts <- mapM tc_strict if_stricts
+                        -- The IfBang field can mention
                         -- the type itself; hence inside forkM
                 ; return (eq_spec, theta, arg_tys, stricts) }
         ; lbl_names <- mapM lookupIfaceTop field_lbls
 
         -- Remember, tycon is the representation tycon
-        ; let orig_res_ty = mkFamilyTyConApp tycon 
+        ; let orig_res_ty = mkFamilyTyConApp tycon
                                 (substTyVars (mkTopTvSubst eq_spec) univ_tyvars)
 
         ; con <- buildDataCon (pprPanic "tcIfaceDataCons: FamInstEnvs" (ppr name))
                        name is_infix
                        stricts lbl_names
-                       univ_tyvars ex_tyvars 
-                       eq_spec theta 
+                       univ_tyvars ex_tyvars
+                       eq_spec theta
                        arg_tys orig_res_ty tycon
         ; traceIf (text "Done interface-file tc_con_decl" <+> ppr name)
-        ; return con } 
+        ; return con }
     mk_doc con_name = ptext (sLit "Constructor") <+> ppr con_name
 
     tc_strict IfNoBang = return HsNoBang
@@ -665,7 +667,7 @@ tcIfaceEqSpec spec
 Note [Synonym kind loop]
 ~~~~~~~~~~~~~~~~~~~~~~~~
 Notice that we eagerly grab the *kind* from the interface file, but
-build a forkM thunk for the *rhs* (and family stuff).  To see why, 
+build a forkM thunk for the *rhs* (and family stuff).  To see why,
 consider this (Trac #2412)
 
 M.hs:       module M where { import X; data T = MkT S }
@@ -714,7 +716,7 @@ tcIfaceFamInst (IfaceFamInst { ifFamInstFam = fam, ifFamInstTys = mb_tcs
 %************************************************************************
 
 We move a IfaceRule from eps_rules to eps_rule_base when all its LHS free vars
-are in the type environment.  However, remember that typechecking a Rule may 
+are in the type environment.  However, remember that typechecking a Rule may
 (as a side effect) augment the type envt, and so we may need to iterate the process.
 
 \begin{code}
@@ -729,7 +731,7 @@ tcIfaceRule :: IfaceRule -> IfL CoreRule
 tcIfaceRule (IfaceRule {ifRuleName = name, ifActivation = act, ifRuleBndrs = bndrs,
                         ifRuleHead = fn, ifRuleArgs = args, ifRuleRhs = rhs,
                         ifRuleAuto = auto })
-  = do  { ~(bndrs', args', rhs') <- 
+  = do  { ~(bndrs', args', rhs') <-
                 -- Typecheck the payload lazily, in the hope it'll never be looked at
                 forkM (ptext (sLit "Rule") <+> ftext name) $
                 bindIfaceBndrs bndrs                      $ \ bndrs' ->
@@ -737,9 +739,9 @@ tcIfaceRule (IfaceRule {ifRuleName = name, ifActivation = act, ifRuleBndrs = bnd
                    ; rhs'  <- tcIfaceExpr rhs
                    ; return (bndrs', args', rhs') }
         ; let mb_tcs = map ifTopFreeName args
-        ; return (Rule { ru_name = name, ru_fn = fn, ru_act = act, 
-                          ru_bndrs = bndrs', ru_args = args', 
-                          ru_rhs = occurAnalyseExpr rhs', 
+        ; return (Rule { ru_name = name, ru_fn = fn, ru_act = act,
+                          ru_bndrs = bndrs', ru_args = args',
+                          ru_rhs = occurAnalyseExpr rhs',
                           ru_rough = mb_tcs,
                           ru_auto = auto,
                           ru_local = False }) } -- An imported RULE is never for a local Id
@@ -804,7 +806,7 @@ tcIfaceAnnTarget (ModuleTarget mod) = do
 -- and again and again...
 --
 tcIfaceVectInfo :: Module -> TypeEnv -> IfaceVectInfo -> IfL VectInfo
-tcIfaceVectInfo mod typeEnv (IfaceVectInfo 
+tcIfaceVectInfo mod typeEnv (IfaceVectInfo
                              { ifaceVectInfoVar            = vars
                              , ifaceVectInfoTyCon          = tycons
                              , ifaceVectInfoTyConReuse     = tyconsReuse
@@ -818,7 +820,7 @@ tcIfaceVectInfo mod typeEnv (IfaceVectInfo
        ; tyConRes2     <- mapM (vectTyConReuseMapping varsSet) tyconsReuse
        ; vParallelVars <- mapM vectVar                         parallelVars
        ; let (vTyCons, vDataCons, vScSels) = unzip3 (tyConRes1 ++ tyConRes2)
-       ; return $ VectInfo 
+       ; return $ VectInfo
                   { vectInfoVar            = mkVarEnv  vVars `extendVarEnvList` concat vScSels
                   , vectInfoTyCon          = mkNameEnv vTyCons
                   , vectInfoDataCon        = mkNameEnv (concat vDataCons)
@@ -827,12 +829,12 @@ tcIfaceVectInfo mod typeEnv (IfaceVectInfo
                   }
        }
   where
-    vectVarMapping name 
+    vectVarMapping name
       = do { vName <- lookupOrig mod (mkLocalisedOccName mod mkVectOcc name)
            ; var   <- forkM (ptext (sLit "vect var")  <+> ppr name)  $
                         tcIfaceExtId name
-           ; vVar  <- forkM (ptext (sLit "vect vVar [mod =") <+> 
-                             ppr mod <> ptext (sLit "; nameModule =") <+> 
+           ; vVar  <- forkM (ptext (sLit "vect vVar [mod =") <+>
+                             ppr mod <> ptext (sLit "; nameModule =") <+>
                              ppr (nameModule name) <> ptext (sLit "]") <+> ppr vName) $
                        tcIfaceExtId vName
            ; return (var, (var, vVar))
@@ -848,10 +850,10 @@ tcIfaceVectInfo mod typeEnv (IfaceVectInfo
       --                -- Id is external
       --              Nothing        -> tcIfaceExtId name
       --          }
-      -- 
+      --
       --   notAnIdErr = pprPanic "TcIface.tcIfaceVectInfo: not an id" (ppr name)
 
-    vectVar name 
+    vectVar name
       = forkM (ptext (sLit "vect scalar var")  <+> ppr name)  $
           tcIfaceExtId name
 
@@ -865,7 +867,7 @@ tcIfaceVectInfo mod typeEnv (IfaceVectInfo
 
     vectTyConMapping vars name vName
       = do { tycon  <- lookupLocalOrExternalTyCon name
-           ; vTycon <- forkM (ptext (sLit "vTycon of") <+> ppr vName) $ 
+           ; vTycon <- forkM (ptext (sLit "vTycon of") <+> ppr vName) $
                          lookupLocalOrExternalTyCon vName
 
                -- Map the data constructors of the original type constructor to those of the
@@ -876,7 +878,7 @@ tcIfaceVectInfo mod typeEnv (IfaceVectInfo
                -- NB: This is lazy!  We don't pull at the type constructors before we actually use
                --     the data constructor mapping.
            ; let isAbstract | isClassTyCon tycon = False
-                            | datacon:_ <- tyConDataCons tycon 
+                            | datacon:_ <- tyConDataCons tycon
                                                  = not $ dataConWrapId datacon `elemVarSet` vars
                             | otherwise          = True
                  vDataCons  | isAbstract = []
@@ -888,7 +890,7 @@ tcIfaceVectInfo mod typeEnv (IfaceVectInfo
                    -- Map the (implicit) superclass and methods selectors as they don't occur in
                    -- the var map.
                  vScSels    | Just cls  <- tyConClass_maybe tycon
-                            , Just vCls <- tyConClass_maybe vTycon 
+                            , Just vCls <- tyConClass_maybe vTycon
                             = [ (sel, (sel, vSel))
                               | (sel, vSel) <- zip (classAllSelIds cls) (classAllSelIds vCls)
                               ]
@@ -930,7 +932,7 @@ tcIfaceType (IfaceAppTy t1 t2)     = do { t1' <- tcIfaceType t1; t2' <- tcIfaceT
 tcIfaceType (IfaceLitTy l)         = do { l1 <- tcIfaceTyLit l; return (LitTy l1) }
 tcIfaceType (IfaceFunTy t1 t2)     = do { t1' <- tcIfaceType t1; t2' <- tcIfaceType t2; return (FunTy t1' t2') }
 tcIfaceType (IfaceTyConApp tc tks) = do { tc' <- tcIfaceTyCon tc
-                                        ; tks' <- tcIfaceTcArgs (tyConKind tc') tks 
+                                        ; tks' <- tcIfaceTcArgs (tyConKind tc') tks
                                         ; return (mkTyConApp tc' tks') }
 tcIfaceType (IfaceForAllTy tv t)  = bindIfaceTyVar tv $ \ tv' -> do { t' <- tcIfaceType t; return (ForAllTy tv' t') }
 
@@ -938,7 +940,7 @@ tcIfaceTypes :: [IfaceType] -> IfL [Type]
 tcIfaceTypes tys = mapM tcIfaceType tys
 
 tcIfaceTcArgs :: Kind -> [IfaceType] -> IfL [Type]
-tcIfaceTcArgs _ [] 
+tcIfaceTcArgs _ []
   = return []
 tcIfaceTcArgs kind (tk:tks)
   = case splitForAllTy_maybe kind of
@@ -946,7 +948,7 @@ tcIfaceTcArgs kind (tk:tks)
       Just (_, kind') -> do { k'   <- tcIfaceKind tk
                             ; tks' <- tcIfaceTcArgs kind' tks
                             ; return (k':tks') }
-  
+
 -----------------------------------------
 tcIfaceCtxt :: IfaceContext -> IfL ThetaType
 tcIfaceCtxt sts = mapM tcIfaceType sts
@@ -979,18 +981,18 @@ and consider the two IfaceTypes
       M.Proxy * M.T{tc}
       M.Proxy 'M.T{tc} 'M.T(d}
 The first is conventional, but in the latter we use the promoted
-type constructor (as a kind) and data constructor (as a type).  However, 
+type constructor (as a kind) and data constructor (as a type).  However,
 the Name of the promoted type constructor is just M.T; it's the *same name*
-as the ordinary type constructor.  
+as the ordinary type constructor.
 
 We could add a "promoted" flag to an IfaceTyCon, but that's a bit heavy.
-Instead we use context to distinguish, as in the source language.  
+Instead we use context to distinguish, as in the source language.
   - When checking a kind, we look up M.T{tc} and promote it
   - When checking a type, we look up M.T{tc} and don't promote it
                                  and M.T{d}  and promote it
     See tcIfaceKindCon and tcIfaceKTyCon respectively
 
-This context business is why we need tcIfaceTcArgs.
+This context business is why we need tcIfaceTcArgs, and tcIfaceApps
 
 
 %************************************************************************
@@ -1065,7 +1067,7 @@ tcIfaceExpr (IfaceExt gbl)
 tcIfaceExpr (IfaceLit lit)
   = do lit' <- tcIfaceLit lit
        return (Lit lit')
- 
+
 tcIfaceExpr (IfaceFCall cc ty) = do
     ty' <- tcIfaceType ty
     u <- newUnique
@@ -1080,17 +1082,17 @@ tcIfaceExpr (IfaceTuple boxity args)  = do
   where
     arity = length args
     con_id = dataConWorkId (tupleCon boxity arity)
-    
+
 
 tcIfaceExpr (IfaceLam bndr body)
   = bindIfaceBndr bndr $ \bndr' ->
     Lam bndr' <$> tcIfaceExpr body
 
 tcIfaceExpr (IfaceApp fun arg)
-  = App <$> tcIfaceExpr fun <*> tcIfaceExpr arg
+  = tcIfaceApps fun arg
 
-tcIfaceExpr (IfaceECase scrut ty) 
-  = do { scrut' <- tcIfaceExpr scrut 
+tcIfaceExpr (IfaceECase scrut ty)
+  = do { scrut' <- tcIfaceExpr scrut
        ; ty' <- tcIfaceType ty
        ; return (castBottomExpr scrut' ty') }
 
@@ -1128,8 +1130,8 @@ tcIfaceExpr (IfaceLet (IfaceRec pairs) body)
        ; body' <- tcIfaceExpr body
        ; return (Let (Rec pairs') body') } }
  where
-   tc_rec_bndr (IfLetBndr fs ty _) 
-     = do { name <- newIfaceName (mkVarOccFS fs)  
+   tc_rec_bndr (IfLetBndr fs ty _)
+     = do { name <- newIfaceName (mkVarOccFS fs)
           ; ty'  <- tcIfaceType ty
           ; return (mkLocalId name ty') }
    tc_pair (IfLetBndr _ _ info, rhs) id
@@ -1143,6 +1145,31 @@ tcIfaceExpr (IfaceTick tickish expr) = do
     tickish' <- tcIfaceTickish tickish
     return (Tick tickish' expr')
 
+-------------------------
+tcIfaceApps :: IfaceExpr -> IfaceExpr -> IfL CoreExpr
+-- See Note [Checking IfaceTypes vs IfaceKinds]
+tcIfaceApps fun arg
+  = go_down fun [arg]
+  where
+    go_down (IfaceApp fun arg) args = go_down fun (arg:args)
+    go_down fun args = do { fun' <- tcIfaceExpr fun
+                          ; go_up fun' (exprType fun') args }
+
+    go_up :: CoreExpr -> Type -> [IfaceExpr] -> IfL CoreExpr
+    go_up fun _ [] = return fun
+    go_up fun fun_ty (IfaceType t : args)
+       | Just (tv,body_ty) <- splitForAllTy_maybe fun_ty
+       = do { t' <- if isKindVar tv  -- See Note [Checking IfaceTypes vs IfaceKinds]
+                    then tcIfaceKind t
+                    else tcIfaceType t
+            ; let fun_ty' = substTyWith [tv] [t'] body_ty
+            ; go_up (App fun (Type t')) fun_ty' args }
+    go_up fun fun_ty (arg : args)
+       | Just (_, fun_ty') <- splitFunTy_maybe fun_ty
+       = do { arg' <- tcIfaceExpr arg
+            ; go_up (App fun arg') fun_ty' args }
+    go_up fun fun_ty args = pprPanic "tcIfaceApps" (ppr fun $$ ppr fun_ty $$ ppr args)
+
 -------------------------
 tcIfaceTickish :: IfaceTickish -> IfM lcl (Tickish Id)
 tcIfaceTickish (IfaceHpcTick modl ix)   = return (HpcTick modl ix)
@@ -1150,7 +1177,7 @@ tcIfaceTickish (IfaceSCC  cc tick push) = return (ProfNote cc tick push)
 
 -------------------------
 tcIfaceLit :: Literal -> IfL Literal
--- Integer literals deserialise to (LitInteger i <error thunk>) 
+-- Integer literals deserialise to (LitInteger i <error thunk>)
 -- so tcIfaceLit just fills in the type.
 -- See Note [Integer literals] in Literal
 tcIfaceLit (LitInteger i _)
@@ -1166,7 +1193,7 @@ tcIfaceAlt _ _ (IfaceDefault, names, rhs)
   = ASSERT( null names ) do
     rhs' <- tcIfaceExpr rhs
     return (DEFAULT, [], rhs')
-  
+
 tcIfaceAlt _ _ (IfaceLitAlt lit, names, rhs)
   = ASSERT( null names ) do
     lit' <- tcIfaceLit lit
@@ -1206,7 +1233,7 @@ do_one :: IfaceBinding -> IfL [CoreBind] -> IfL [CoreBind]
 do_one (IfaceNonRec bndr rhs) thing_inside
   = do  { rhs' <- tcIfaceExpr rhs
         ; bndr' <- newExtCoreBndr bndr
-        ; extendIfaceIdEnv [bndr'] $ do 
+        ; extendIfaceIdEnv [bndr'] $ do
         { core_binds <- thing_inside
         ; return (NonRec bndr' rhs' : core_binds) }}
 
@@ -1240,7 +1267,7 @@ tcIdDetails _ (IfRecSelId tc naughty)
        ; return (RecSelId { sel_tycon = tc', sel_naughty = naughty }) }
 
 tcIdInfo :: Bool -> Name -> Type -> IfaceIdInfo -> IfL IdInfo
-tcIdInfo ignore_prags name ty info 
+tcIdInfo ignore_prags name ty info
   | ignore_prags = return vanillaIdInfo
   | otherwise    = case info of
                         NoInfo       -> return vanillaIdInfo
@@ -1257,7 +1284,7 @@ tcIdInfo ignore_prags name ty info
     tcPrag info (HsInline prag)    = return (info `setInlinePragInfo` prag)
 
         -- The next two are lazy, so they don't transitively suck stuff in
-    tcPrag info (HsUnfold lb if_unf) 
+    tcPrag info (HsUnfold lb if_unf)
       = do { unf <- tcUnfolding name ty info if_unf
            ; let info1 | lb        = info `setOccInfo` strongLoopBreaker
                        | otherwise = info
@@ -1291,7 +1318,7 @@ tcUnfolding name _ _ (IfInlineRule arity unsat_ok boring_ok if_expr)
   = do  { mb_expr <- tcPragExpr name if_expr
         ; return (case mb_expr of
                     Nothing   -> NoUnfolding
-                    Just expr -> mkCoreUnfolding InlineStable True expr arity 
+                    Just expr -> mkCoreUnfolding InlineStable True expr arity
                                                  (UnfWhen unsat_ok boring_ok))
     }
 
@@ -1320,8 +1347,8 @@ tcPragExpr name expr
         in_scope <- get_in_scope
         case lintUnfolding noSrcLoc in_scope core_expr' of
           Nothing       -> return ()
-          Just fail_msg -> do { mod <- getIfModule 
-                              ; pprPanic "Iface Lint failure" 
+          Just fail_msg -> do { mod <- getIfModule
+                              ; pprPanic "Iface Lint failure"
                                   (vcat [ ptext (sLit "In interface for") <+> ppr mod
                                         , hang doc 2 fail_msg
                                         , ppr name <+> equals <+> ppr core_expr'
@@ -1331,7 +1358,7 @@ tcPragExpr name expr
     doc = text "Unfolding of" <+> ppr name
 
     get_in_scope :: IfL [Var] -- Totally disgusting; but just for linting
-    get_in_scope        
+    get_in_scope
         = do { (gbl_env, lcl_env) <- getEnvs
              ; rec_ids <- case if_rec_types gbl_env of
                             Nothing -> return []
@@ -1357,19 +1384,19 @@ tcIfaceGlobal name
   | Just thing <- wiredInNameTyThing_maybe name
         -- Wired-in things include TyCons, DataCons, and Ids
         -- Even though we are in an interface file, we want to make
-        -- sure the instances and RULES of this thing (particularly TyCon) are loaded 
+        -- sure the instances and RULES of this thing (particularly TyCon) are loaded
         -- Imagine: f :: Double -> Double
   = do { ifCheckWiredInThing thing; return thing }
   | otherwise
   = do  { env <- getGblEnv
         ; case if_rec_types env of {    -- Note [Tying the knot]
-            Just (mod, get_type_env) 
+            Just (mod, get_type_env)
                 | nameIsLocalOrFrom mod name
                 -> do           -- It's defined in the module being compiled
                 { type_env <- setLclEnv () get_type_env         -- yuk
                 ; case lookupNameEnv type_env name of
                         Just thing -> return thing
-                        Nothing   -> pprPanic "tcIfaceGlobal (local): not found:"  
+                        Nothing   -> pprPanic "tcIfaceGlobal (local): not found:"
                                                 (ppr name $$ ppr type_env) }
 
           ; _ -> do
@@ -1395,7 +1422,7 @@ tcIfaceGlobal name
 --    after we've built M's type envt.
 --
 -- b) In ghc --make, during the upsweep, we encounter M.hs, whose interface M.hi
---    is up to date.  So we call typecheckIface on M.hi.  This splats M.T into 
+--    is up to date.  So we call typecheckIface on M.hi.  This splats M.T into
 --    if_rec_types so that the (lazily typechecked) decls see all the other decls
 --
 -- In case (b) it's important to do the if_rec_types check *before* looking in the HPT
@@ -1403,7 +1430,7 @@ tcIfaceGlobal name
 -- emasculated form (e.g. lacking data constructors).
 
 tcIfaceTyCon :: IfaceTyCon -> IfL TyCon
-tcIfaceTyCon (IfaceTc name) 
+tcIfaceTyCon (IfaceTc name)
   = do { thing <- tcIfaceGlobal name
        ; case thing of    -- A "type constructor" can be a promoted data constructor
                           --           c.f. Trac #5881
@@ -1412,12 +1439,12 @@ tcIfaceTyCon (IfaceTc name)
            _ -> pprPanic "tcIfaceTyCon" (ppr name $$ ppr thing) }
 
 tcIfaceKindCon :: IfaceTyCon -> IfL TyCon
-tcIfaceKindCon (IfaceTc name) 
+tcIfaceKindCon (IfaceTc name)
   = do { thing <- tcIfaceGlobal name
        ; case thing of    -- A "type constructor" here is a promoted type constructor
                           --           c.f. Trac #5881
-           ATyCon tc 
-             | isSuperKind (tyConKind tc) 
+           ATyCon tc
+             | isSuperKind (tyConKind tc)
              -> return tc   -- Mainly just '*' or 'AnyK'
              | Just prom_tc <- promotableTyCon_maybe tc
              -> return prom_tc
@@ -1456,7 +1483,7 @@ bindIfaceBndr (IfaceIdBndr (fs, ty)) thing_inside
         ; extendIfaceIdEnv [id] (thing_inside id) }
 bindIfaceBndr (IfaceTvBndr bndr) thing_inside
   = bindIfaceTyVar bndr thing_inside
-    
+
 bindIfaceBndrs :: [IfaceBndr] -> ([CoreBndr] -> IfL a) -> IfL a
 bindIfaceBndrs []     thing_inside = thing_inside []
 bindIfaceBndrs (b:bs) thing_inside
diff --git a/compiler/main/HscMain.hs b/compiler/main/HscMain.hs
index 39e483ef43aff9d7db8f4e415e6b0353f0e8e0dc..331ec6da51ed194505508956a343f3ca61658943 100644
--- a/compiler/main/HscMain.hs
+++ b/compiler/main/HscMain.hs
@@ -852,7 +852,7 @@ checkSafeImports dflags tcg_env
               (text $ "is imported both as a safe and unsafe import!"))
         | otherwise
         = return v1
-    
+
     -- easier interface to work with
     checkSafe (_, _, False) = return Nothing
     checkSafe (m, l, True ) = fst `fmap` hscCheckSafe' dflags m l
@@ -879,7 +879,7 @@ hscGetSafe hsc_env m l = runHsc hsc_env $ do
     let pkgs' | Just p <- self = p:pkgs
               | otherwise      = pkgs
     return (good, pkgs')
- 
+
 -- | Is a module trusted? If not, throw or log errors depending on the type.
 -- Return (regardless of trusted or not) if the trust type requires the modules
 -- own package be trusted and a list of other packages required to be trusted
@@ -963,7 +963,7 @@ hscCheckSafe' dflags m l = do
             Just _  -> return iface
             Nothing -> snd `fmap` (liftIO $ getModuleInterface hsc_env m)
         return iface'
-#else 
+#else
         return iface
 #endif
 
@@ -1280,12 +1280,8 @@ tryNewCodeGen hsc_env this_mod data_tycons
 
       | otherwise
         = {-# SCC "cmmPipeline" #-}
-          let initTopSRT = initUs_ us emptySRT in
-  
-          let run_pipeline topSRT cmmgroup = do
-                (topSRT, cmmgroup) <- cmmPipeline hsc_env topSRT cmmgroup
-                return (topSRT,cmmgroup)
-  
+          let initTopSRT = initUs_ us emptySRT
+              run_pipeline = cmmPipeline hsc_env
           in do topSRT <- Stream.mapAccumL run_pipeline initTopSRT ppr_stream1
                 Stream.yield (srtToData topSRT)
 
@@ -1616,7 +1612,7 @@ hscCompileCoreExpr' hsc_env srcspan ds_expr
          ; prepd_expr <- corePrepExpr dflags hsc_env tidy_expr
 
            {- Lint if necessary -}
-         ; lintInteractiveExpr "hscCompileExpr" hsc_env prepd_expr 
+         ; lintInteractiveExpr "hscCompileExpr" hsc_env prepd_expr
 
            {- Convert to BCOs -}
          ; bcos <- coreExprToBCOs dflags iNTERACTIVE prepd_expr
@@ -1658,4 +1654,3 @@ showModuleIndex (i,n) = "[" ++ padded ++ " of " ++ n_str ++ "] "
     n_str = show n
     i_str = show i
     padded = replicate (length n_str - length i_str) ' ' ++ i_str
-
diff --git a/compiler/parser/Lexer.x b/compiler/parser/Lexer.x
index 0b18597718e8b12a2778ae9023ff9c6f5aaa2b90..1ace315a6f37f14c0eaf84c51c23ceb2a546038b 100644
--- a/compiler/parser/Lexer.x
+++ b/compiler/parser/Lexer.x
@@ -318,6 +318,7 @@ $tab+         { warn Opt_WarnTabs (text "Tab character") }
   "[|"        / { ifExtension thEnabled } { token ITopenExpQuote }
   "[||"       / { ifExtension thEnabled } { token ITopenTExpQuote }
   "[e|"       / { ifExtension thEnabled } { token ITopenExpQuote }
+  "[e||"      / { ifExtension thEnabled } { token ITopenTExpQuote }
   "[p|"       / { ifExtension thEnabled } { token ITopenPatQuote }
   "[d|"       / { ifExtension thEnabled } { layout_token ITopenDecQuote }
   "[t|"       / { ifExtension thEnabled } { token ITopenTypQuote }
diff --git a/compiler/simplCore/Simplify.lhs b/compiler/simplCore/Simplify.lhs
index a0bd7f8edc3941fe526b107ec805ae1fe41c3058..2c1b01bee87b8935bfb876aafc5614f7c84450e2 100644
--- a/compiler/simplCore/Simplify.lhs
+++ b/compiler/simplCore/Simplify.lhs
@@ -2609,32 +2609,40 @@ type varaibles as well as term variables.
         case (case e of ...) of
             C t xs::[t] -> j t xs
 
-Note [Join point abstaction]
+Note [Join point abstraction]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-If we try to lift a primitive-typed something out
-for let-binding-purposes, we will *caseify* it (!),
-with potentially-disastrous strictness results.  So
-instead we turn it into a function: \v -> e
-where v::State# RealWorld#.  The value passed to this function
-is realworld#, which generates (almost) no code.
-
-There's a slight infelicity here: we pass the overall
-case_bndr to all the join points if it's used in *any* RHS,
-because we don't know its usage in each RHS separately
-
-We used to say "&& isUnLiftedType rhs_ty'" here, but now
-we make the join point into a function whenever used_bndrs'
-is empty.  This makes the join-point more CPR friendly.
-Consider:       let j = if .. then I# 3 else I# 4
-                in case .. of { A -> j; B -> j; C -> ... }
-
-Now CPR doesn't w/w j because it's a thunk, so
-that means that the enclosing function can't w/w either,
-which is a lose.  Here's the example that happened in practice:
-        kgmod :: Int -> Int -> Int
-        kgmod x y = if x > 0 && y < 0 || x < 0 && y > 0
-                    then 78
-                    else 5
+Join points always have at least one value argument,
+for several reasons
+
+* If we try to lift a primitive-typed something out
+  for let-binding-purposes, we will *caseify* it (!),
+  with potentially-disastrous strictness results.  So
+  instead we turn it into a function: \v -> e
+  where v::State# RealWorld#.  The value passed to this function
+  is realworld#, which generates (almost) no code.
+
+* CPR.  We used to say "&& isUnLiftedType rhs_ty'" here, but now
+  we make the join point into a function whenever used_bndrs'
+  is empty.  This makes the join-point more CPR friendly.
+  Consider:       let j = if .. then I# 3 else I# 4
+                  in case .. of { A -> j; B -> j; C -> ... }
+
+  Now CPR doesn't w/w j because it's a thunk, so
+  that means that the enclosing function can't w/w either,
+  which is a lose.  Here's the example that happened in practice:
+          kgmod :: Int -> Int -> Int
+          kgmod x y = if x > 0 && y < 0 || x < 0 && y > 0
+                      then 78
+                      else 5
+
+* Let-no-escape.  We want a join point to turn into a let-no-escape
+  so that it is implemented as a jump, and one of the conditions
+  for LNE is that it's not updatable.  In CoreToStg, see
+  Note [What is a non-escaping let]
+
+* Floating.  Since a join point will be entered once, no sharing is
+  gained by floating out, but something might be lost by doing
+  so because it might be allocated.
 
 I have seen a case alternative like this:
         True -> \v -> ...
@@ -2643,6 +2651,11 @@ It's a bit silly to add the realWorld dummy arg in this case, making
            True -> $j s
 (the \v alone is enough to make CPR happy) but I think it's rare
 
+There's a slight infelicity here: we pass the overall
+case_bndr to all the join points if it's used in *any* RHS,
+because we don't know its usage in each RHS separately
+
+
 Note [Duplicating StrictArg]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 The original plan had (where E is a big argument)
diff --git a/compiler/typecheck/TcInstDcls.lhs b/compiler/typecheck/TcInstDcls.lhs
index 381c0823ccb7dc3e54652d1516856d75b7b23abc..6d4e620534811b5a6c937f64575e1a9d8a1de3c7 100644
--- a/compiler/typecheck/TcInstDcls.lhs
+++ b/compiler/typecheck/TcInstDcls.lhs
@@ -1358,6 +1358,9 @@ tcInstanceMethods dfun_id clas tyvars dfun_ev_vars inst_tys
 -- Notice that the dictionary bindings "..d1..d2.." must be generated
 -- by the constraint solver, since the <context> may be
 -- user-specified.
+--
+-- See also Note [Newtype deriving superclasses] in TcDeriv
+-- for why we don't just coerce the superclass
 
   = do { rep_d_stuff <- checkConstraints InstSkol tyvars dfun_ev_vars $
                         emitWanted ScOrigin rep_pred
diff --git a/compiler/typecheck/TcTypeNats.hs b/compiler/typecheck/TcTypeNats.hs
index 7961df4a7d2e01dd676acfb823292b6d16fb31d2..7f8c72258fa3bc9bf4759da31706fa55edb9d27a 100644
--- a/compiler/typecheck/TcTypeNats.hs
+++ b/compiler/typecheck/TcTypeNats.hs
@@ -336,30 +336,62 @@ Interact with axioms
 
 interactTopAdd :: [Xi] -> Xi -> [Pair Type]
 interactTopAdd [s,t] r
-  | Just 0 <- mbZ = [ s === num 0, t === num 0 ]
-  | Just x <- mbX, Just z <- mbZ, Just y <- minus z x = [t === num y]
-  | Just y <- mbY, Just z <- mbZ, Just x <- minus z y = [s === num x]
+  | Just 0 <- mbZ = [ s === num 0, t === num 0 ]                          -- (s + t ~ 0) => (s ~ 0, t ~ 0)
+  | Just x <- mbX, Just z <- mbZ, Just y <- minus z x = [t === num y]     -- (5 + t ~ 8) => (t ~ 3)
+  | Just y <- mbY, Just z <- mbZ, Just x <- minus z y = [s === num x]     -- (s + 5 ~ 8) => (s ~ 3)
   where
   mbX = isNumLitTy s
   mbY = isNumLitTy t
   mbZ = isNumLitTy r
 interactTopAdd _ _ = []
 
+{-
+Note [Weakened interaction rule for subtraction]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+A simpler interaction here might be:
+
+  `s - t ~ r` --> `t + r ~ s`
+
+This would enable us to reuse all the code for addition.
+Unfortunately, this works a little too well at the moment.
+Consider the following example:
+
+    0 - 5 ~ r --> 5 + r ~ 0 --> (5 = 0, r = 0)
+
+This (correctly) spots that the constraint cannot be solved.
+
+However, this may be a problem if the constraint did not
+need to be solved in the first place!  Consider the following example:
+
+f :: Proxy (If (5 <=? 0) (0 - 5) (5 - 0)) -> Proxy 5
+f = id
+
+Currently, GHC is strict while evaluating functions, so this does not
+work, because even though the `If` should evaluate to `5 - 0`, we
+also evaluate the "then" branch which generates the constraint `0 - 5 ~ r`,
+which fails.
+
+So, for the time being, we only add an improvement when the RHS is a constant,
+which happens to work OK for the moment, although clearly we need to do
+something more general.
+-}
 interactTopSub :: [Xi] -> Xi -> [Pair Type]
 interactTopSub [s,t] r
-  | Just 0 <- mbZ = [ s === t ]
-  | otherwise     = [ t .+. r === s ]
+  | Just z <- mbZ = [ s === (num z .+. t) ]         -- (s - t ~ 5) => (5 + t ~ s)
   where
   mbZ = isNumLitTy r
 interactTopSub _ _ = []
 
 
 
+
+
 interactTopMul :: [Xi] -> Xi -> [Pair Type]
 interactTopMul [s,t] r
-  | Just 1 <- mbZ = [ s === num 1, t === num 1 ]
-  | Just x <- mbX, Just z <- mbZ, Just y <- divide z x = [t === num y]
-  | Just y <- mbY, Just z <- mbZ, Just x <- divide z y = [s === num x]
+  | Just 1 <- mbZ = [ s === num 1, t === num 1 ]                        -- (s * t ~ 1)  => (s ~ 1, t ~ 1)
+  | Just x <- mbX, Just z <- mbZ, Just y <- divide z x = [t === num y]  -- (3 * t ~ 15) => (t ~ 5)
+  | Just y <- mbY, Just z <- mbZ, Just x <- divide z y = [s === num x]  -- (s * 3 ~ 15) => (s ~ 5)
   where
   mbX = isNumLitTy s
   mbY = isNumLitTy t
@@ -368,9 +400,9 @@ interactTopMul _ _ = []
 
 interactTopExp :: [Xi] -> Xi -> [Pair Type]
 interactTopExp [s,t] r
-  | Just 0 <- mbZ = [ s === num 0 ]
-  | Just x <- mbX, Just z <- mbZ, Just y <- logExact  z x = [t === num y]
-  | Just y <- mbY, Just z <- mbZ, Just x <- rootExact z y = [s === num x]
+  | Just 0 <- mbZ = [ s === num 0 ]                                       -- (s ^ t ~ 0) => (s ~ 0)
+  | Just x <- mbX, Just z <- mbZ, Just y <- logExact  z x = [t === num y] -- (2 ^ t ~ 8) => (t ~ 3)
+  | Just y <- mbY, Just z <- mbZ, Just x <- rootExact z y = [s === num x] -- (s ^ 2 ~ 9) => (s ~ 3)
   where
   mbX = isNumLitTy s
   mbY = isNumLitTy t
@@ -379,7 +411,7 @@ interactTopExp _ _ = []
 
 interactTopLeq :: [Xi] -> Xi -> [Pair Type]
 interactTopLeq [s,t] r
-  | Just 0 <- mbY, Just True <- mbZ = [ s === num 0 ]
+  | Just 0 <- mbY, Just True <- mbZ = [ s === num 0 ]                     -- (s <= 0) => (s ~ 0)
   where
   mbY = isNumLitTy t
   mbZ = isBoolLitTy r
@@ -398,11 +430,11 @@ interactInertAdd [x1,y1] z1 [x2,y2] z2
   where sameZ = eqType z1 z2
 interactInertAdd _ _ _ _ = []
 
-{- XXX: Should we add some rules here?
-When `interactTopSub` sees `x - y ~ z`, it generates `z + y ~ x`.
-Hopefully, this should interact further and generate all additional
-needed facts that we might need. -}
 interactInertSub :: [Xi] -> Xi -> [Xi] -> Xi -> [Pair Type]
+interactInertSub [x1,y1] z1 [x2,y2] z2
+  | sameZ && eqType x1 x2         = [ y1 === y2 ]
+  | sameZ && eqType y1 y2         = [ x1 === x2 ]
+  where sameZ = eqType z1 z2
 interactInertSub _ _ _ _ = []
 
 interactInertMul :: [Xi] -> Xi -> [Xi] -> Xi -> [Pair Type]
diff --git a/compiler/utils/BooleanFormula.hs b/compiler/utils/BooleanFormula.hs
index 3e82e75b1fd1cbb9be67984f47ad68d9d8997b99..8620ef555d9367c0baed19bddb94ab38a6a0e85d 100644
--- a/compiler/utils/BooleanFormula.hs
+++ b/compiler/utils/BooleanFormula.hs
@@ -1,6 +1,9 @@
 --------------------------------------------------------------------------------
--- | Boolean formulas without negation (qunatifier free)
-
+-- | Boolean formulas without quantifiers and without negation.
+-- Such a formula consists of variables, conjunctions (and), and disjunctions (or).
+--
+-- This module is used to represent minimal complete definitions for classes.
+--
 {-# LANGUAGE DeriveDataTypeable, DeriveFunctor, DeriveFoldable,
              DeriveTraversable #-}
 
@@ -36,13 +39,16 @@ mkFalse, mkTrue :: BooleanFormula a
 mkFalse = Or []
 mkTrue = And []
 
+-- Convert a Bool to a BooleanFormula
 mkBool :: Bool -> BooleanFormula a
 mkBool False = mkFalse
 mkBool True  = mkTrue
 
+-- Make a conjunction, and try to simplify
 mkAnd :: Eq a => [BooleanFormula a] -> BooleanFormula a
 mkAnd = maybe mkFalse (mkAnd' . nub) . concatMapM fromAnd
   where
+  -- See Note [Simplification of BooleanFormulas]
   fromAnd :: BooleanFormula a -> Maybe [BooleanFormula a]
   fromAnd (And xs) = Just xs
      -- assume that xs are already simplified
@@ -55,14 +61,50 @@ mkAnd = maybe mkFalse (mkAnd' . nub) . concatMapM fromAnd
 mkOr :: Eq a => [BooleanFormula a] -> BooleanFormula a
 mkOr = maybe mkTrue (mkOr' . nub) . concatMapM fromOr
   where
+  -- See Note [Simplification of BooleanFormulas]
   fromOr (Or xs) = Just xs
   fromOr (And []) = Nothing
   fromOr x = Just [x]
   mkOr' [x] = x
   mkOr' xs = Or xs
 
+
+{-
+Note [Simplification of BooleanFormulas]
+~~~~~~~~~~~~~~~~~~~~~~
+The smart constructors (`mkAnd` and `mkOr`) do some attempt to simplify expressions. In particular,
+ 1. Collapsing nested ands and ors, so
+     `(mkAnd [x, And [y,z]]`
+    is represented as
+     `And [x,y,z]`
+    Implemented by `fromAnd`/`fromOr`
+ 2. Collapsing trivial ands and ors, so
+     `mkAnd [x]` becomes just `x`.
+    Implemented by mkAnd' / mkOr'
+ 3. Conjunction with false, disjunction with true is simplified, i.e.
+     `mkAnd [mkFalse,x]` becomes `mkFalse`.
+ 4. Common subexpresion elimination:
+     `mkAnd [x,x,y]` is reduced to just `mkAnd [x,y]`.
+
+This simplification is not exhaustive, in the sense that it will not produce
+the smallest possible equivalent expression. For example,
+`Or [And [x,y], And [x]]` could be simplified to `And [x]`, but it currently
+is not. A general simplifier would need to use something like BDDs.
+
+The reason behind the (crude) simplifier is to make for more user friendly
+error messages. E.g. for the code
+  > class Foo a where
+  >     {-# MINIMAL bar, (foo, baq | foo, quux) #-}
+  > instance Foo Int where
+  >     bar = ...
+  >     baz = ...
+  >     quux = ...
+We don't show a ridiculous error message like
+    Implement () and (either (`foo' and ()) or (`foo' and ()))
+-}
+
 ----------------------------------------------------------------------
--- Evaluation and simplificiation
+-- Evaluation and simplification
 ----------------------------------------------------------------------
 
 isFalse :: BooleanFormula a -> Bool
@@ -117,6 +159,8 @@ x `implies` Or ys  = any (x `implies`) ys
 -- Pretty printing
 ----------------------------------------------------------------------
 
+-- Pretty print a BooleanFormula,
+-- using the arguments as pretty printers for Var, And and Or respectively
 pprBooleanFormula' :: (Rational -> a -> SDoc)
                    -> (Rational -> [SDoc] -> SDoc)
                    -> (Rational -> [SDoc] -> SDoc)
diff --git a/docs/users_guide/7.8.1-notes.xml b/docs/users_guide/7.8.1-notes.xml
index 6c85335c258a573d91c7582b83a4abe8201e5079..eee3174e54be33a4109f460a9ef17ea5f879b85d 100644
--- a/docs/users_guide/7.8.1-notes.xml
+++ b/docs/users_guide/7.8.1-notes.xml
@@ -113,10 +113,8 @@
 
         <listitem>
             <para>
-                The LLVM backend now supports 128bit SIMD
-                operations. This is now exploited in both the
-                <literal>vector</literal> and <literal>dph</literal>
-                packages, exposing a high level interface.
+                The LLVM backend now supports 128- and 256-bit SIMD
+                operations.
 
                 TODO FIXME: reference.
            </para>
@@ -488,6 +486,20 @@
                     Template Haskell now supports annotation pragmas.
                </para>
            </listitem>
+           <listitem>
+             <para>
+                    Typed Template Haskell expressions are now supported. See
+                    <xref linkend="template-haskell"/> for more details.
+             </para>
+           </listitem>
+           <listitem>
+             <para>
+                    Template Haskell declarations, types, patterns, and
+                    <emphasis>untyped</emphasis> expressions are no longer
+                    typechecked at all. This is a backwards-compatible change
+                    since it allows strictly more programs to be typed.
+             </para>
+           </listitem>
        </itemizedlist>
     </sect3>
 
diff --git a/docs/users_guide/glasgow_exts.xml b/docs/users_guide/glasgow_exts.xml
index 2a6c8181c53169fd937d4c603a896d24a7193b2b..57eb6451bf66743014f53d93108c90ab08b99534 100644
--- a/docs/users_guide/glasgow_exts.xml
+++ b/docs/users_guide/glasgow_exts.xml
@@ -7872,12 +7872,13 @@ Wiki page</ulink>.
 		  <itemizedlist>
 		    <listitem><para> an expression; the spliced expression must
 		    have type <literal>Q Exp</literal></para></listitem>
-		    <listitem><para> an type; the spliced expression must
-		    have type <literal>Q Typ</literal></para></listitem>
-		    <listitem><para> a list of top-level declarations; the spliced expression
+		    <listitem><para> a pattern; the spliced pattern must
+		    have type <literal>Q Pat</literal></para></listitem>
+		    <listitem><para> a type; the spliced expression must
+		    have type <literal>Q Type</literal></para></listitem>
+		    <listitem><para> a list of declarations; the spliced expression
                     must have type <literal>Q [Dec]</literal></para></listitem>
 		    </itemizedlist>
-            Note that pattern splices are not supported.
             Inside a splice you can only call functions defined in imported modules,
 	    not functions defined elsewhere in the same module.</para></listitem>
 
@@ -7895,6 +7896,36 @@ Wiki page</ulink>.
                              the quotation has type <literal>Q Pat</literal>.</para></listitem>
 		  </itemizedlist></para></listitem>
 
+	      <listitem>
+		<para>
+		  A <emphasis>typed</emphasis> expression splice is written
+		  <literal>$$x</literal>, where <literal>x</literal> is an
+		  identifier, or <literal>$$(...)</literal>, where the "..." is
+		  an arbitrary expression.
+		</para>
+		<para>
+		  A typed expression splice can occur in place of an
+		  expression; the spliced expression must have type <literal>Q
+		  (TExp a)</literal>
+		</para>
+	      </listitem>
+
+	      <listitem>
+		<para>
+		  A <emphasis>typed</emphasis> expression quotation is written
+		  as <literal>[|| ... ||]</literal>, or <literal>[e||
+		  ... ||]</literal>, where the "..." is an expression; if the
+		  "..." expression has type <literal>a</literal>, then the
+		  quotation has type <literal>Q (TExp a)</literal>.
+		</para>
+
+		<para>
+		  Values of type <literal>TExp a</literal> may be converted to
+		  values of type <literal>Exp</literal> using the function
+		  <literal>unType :: TExp a -> Exp</literal>.
+		</para>
+	      </listitem>
+
 	      <listitem><para>
 		  A quasi-quotation can appear in either a pattern context or an
 		  expression context and is also written in Oxford brackets:
@@ -7950,13 +7981,117 @@ h z = z-1
 </programlisting>
             This abbreviation makes top-level declaration slices quieter and less intimidating.
 	    </para></listitem>
+	    
+	    <listitem>
+	      <para>
+		Binders are lexically scoped. For example, consider the
+		following code, where a value <literal>g</literal> of type
+		<literal>Bool -> Q Pat</literal> is in scope, having been
+		imported from another module
+<programlisting>
+y :: Int
+y = 7
+
+f :: Int -> Int -> Int
+f n = \ $(g True) -> y+n
+</programlisting>
+                The <literal>y</literal> in the right-hand side of
+                <literal>f</literal> refers to the top-level <literal>y =
+                7</literal>, even if the pattern splice <literal>$(g
+                n)</literal> also generates a binder <literal>y</literal>.
+	      </para>
+
+	      <para>
+		Note that a pattern quasiquoter <emphasis>may</emphasis>
+		generate binders that scope over the right-hand side of a
+		definition because these binders are in scope lexically. For
+		example, given a quasiquoter <literal>haskell</literal> that
+		parses Haskell, in the following code, the <literal>y</literal>
+		in the right-hand side of <literal>f</literal> refers to the
+		<literal>y</literal> bound by the <literal>haskell</literal>
+		pattern quasiquoter, <emphasis>not</emphasis> the top-level
+		<literal>y = 7</literal>.
+<programlisting>
+y :: Int
+y = 7
+
+f :: Int -> Int -> Int
+f n = \ [haskell|y|] -> y+n
+</programlisting>
+	      </para>
+	    </listitem>
+	    <listitem>
+	      <para>
+		The type environment seen by <literal>reify</literal> includes
+		all the top-level declaration up to the end of the immediately
+		preceding <emphasis>declaration group</emphasis>, but no more.
+	      </para>
+
+	      <para>
+		A <emphasis>declaration group</emphasis> is the group of
+		declarations created by a top-level declaration splice, plus
+		those following it, down to but not including the next top-level
+		declaration splice. The first declaration group in a module
+		includes all top-level definitions down to but not including the
+		first top-level declaration splice.
+	      </para>
+
+
+	      <para>
+		Concretely, consider the following code
+<programlisting>
+module M where
+   import ...
+   f x = x
+   $(th1 4)
+   h y = k y y $(blah1)
+   $(th2 10)
+   w z = $(blah2)
+</programlisting>
+
+              In this example
+              <orderedlist>
+		<listitem>
+		  <para>
+		    A <literal>reify</literal> inside the splice <literal>$(th1
+		    ..)</literal> would see the definition of
+		    <literal>f</literal>.
+		  </para>
+		</listitem>
+		<listitem>
+		  <para>
+		    A <literal>reify</literal> inside the splice
+		    <literal>$(blah1)</literal> would see the definition of
+		    <literal>f</literal>, but would not see the definition of
+		    <literal>h</literal>.
+		  </para>
+		</listitem>
+		<listitem>
+		  <para>
+		    A <literal>reify</literal> inside the splice
+		    <literal>$(th2..)</literal> would see the definition of
+		    <literal>f</literal>, all the bindings created by
+		    <literal>$(th1..)</literal>, and the definition of
+		    <literal>h</literal>.
+		  </para>
+		</listitem>
+		<listitem>
+		  <para>
+		    A <literal>reify</literal> inside the splice
+		    <literal>$(blah2)</literal> would see the same definitions
+		    as the splice <literal>$(th2...)</literal>.
+		  </para>
+		</listitem>
+              </orderedlist>
+	      </para>
+	    </listitem>
 
 
 	</itemizedlist>
 (Compared to the original paper, there are many differences of detail.
 The syntax for a declaration splice uses "<literal>$</literal>" not "<literal>splice</literal>".
 The type of the enclosed expression must be  <literal>Q [Dec]</literal>, not  <literal>[Q Dec]</literal>.
-Pattern splices and quotations are not implemented.)
+Typed expression splices and quotations are supported.)
 
 </sect2>
 
@@ -11131,13 +11266,13 @@ Here are some examples:</para>
   data T3 a b = MkT3 a     -- OK: nominal is higher than necessary, but safe
 
   type role T4 nominal
-  data T4 a = MkT4 (a Int)    -- OK, but N is higher than necessary
+  data T4 a = MkT4 (a Int) -- OK, but N is higher than necessary
 
   type role C representational _
   class C a b where ...    -- OK
 
   type role X nominal
-  type X a@N = ...           -- ERROR: role annotations not allowed for type synonyms
+  type X a = ...           -- ERROR: role annotations not allowed for type synonyms
 </programlisting>
 
 </sect2>
diff --git a/libraries/Cabal b/libraries/Cabal
index 01e68a3299e7336cda0cb4cd8adf71c678b57701..e3e3702a6997f9a431ca562156cf667c93bd0e5e 160000
--- a/libraries/Cabal
+++ b/libraries/Cabal
@@ -1 +1 @@
-Subproject commit 01e68a3299e7336cda0cb4cd8adf71c678b57701
+Subproject commit e3e3702a6997f9a431ca562156cf667c93bd0e5e
diff --git a/utils/hp2ps/hp2ps.1 b/utils/hp2ps/hp2ps.1
index fd0bca0234b448c229dadcce0b4668a22ca8df29..6698b973aa57af742c7cad3b75eeedcfbed9c319 100644
--- a/utils/hp2ps/hp2ps.1
+++ b/utils/hp2ps/hp2ps.1
@@ -97,6 +97,8 @@ on
 Use a small box for the title.
 .IP "\fB\-y\fP"
 Draw the graph in the traditional York style, ignoring marks.
+.IP "\fB\-c\fP"
+Use colours in the rendering of the graphs.
 .IP "\fB\-?\fP"
 Print out usage information. 
 .SH "INPUT FORMAT"