diff --git a/compiler/GHC/ByteCode/Asm.hs b/compiler/GHC/ByteCode/Asm.hs
index c0359cacb9956a93650532bb520b3562fcabccbe..fab5cd405582ec996169c920461cb4b4d3a709b5 100644
--- a/compiler/GHC/ByteCode/Asm.hs
+++ b/compiler/GHC/ByteCode/Asm.hs
@@ -22,7 +22,7 @@ import GHC.ByteCode.InfoTable
 import GHC.ByteCode.Types
 import GHCi.RemoteTypes
 import GHC.Runtime.Interpreter
-import GHC.Runtime.Heap.Layout hiding ( WordOff )
+import GHC.Runtime.Heap.Layout ( fromStgWord, StgWord )
 
 import GHC.Types.Name
 import GHC.Types.Name.Set
@@ -199,8 +199,8 @@ assembleBCO platform (ProtoBCO { protoBCOName       = nm
       -- this BCO to be long.
       (n_insns0, lbl_map0) = inspectAsm platform False initial_offset asm
       ((n_insns, lbl_map), long_jumps)
-        | isLarge (fromIntegral $ Map.size lbl_map0)
-          || isLarge n_insns0
+        | isLargeW (fromIntegral $ Map.size lbl_map0)
+          || isLargeW n_insns0
                     = (inspectAsm platform True initial_offset asm, True)
         | otherwise = ((n_insns0, lbl_map0), False)
 
@@ -229,7 +229,7 @@ assembleBCO platform (ProtoBCO { protoBCOName       = nm
 
   return ul_bco
 
-mkBitmapArray :: Word16 -> [StgWord] -> UArray Int Word64
+mkBitmapArray :: Word -> [StgWord] -> UArray Int Word64
 -- Here the return type must be an array of Words, not StgWords,
 -- because the underlying ByteArray# will end up as a component
 -- of a BCO object.
@@ -244,9 +244,21 @@ type AsmState = (SizedSeq Word16,
 
 data Operand
   = Op Word
+  | IOp Int
   | SmallOp Word16
   | LabelOp LocalLabel
--- (unused)  | LargeOp Word
+
+wOp :: WordOff -> Operand
+wOp = Op . fromIntegral
+
+bOp :: ByteOff -> Operand
+bOp = Op . fromIntegral
+
+truncHalfWord :: Platform -> HalfWord -> Operand
+truncHalfWord platform w = case platformWordSize platform of
+  PW4 | w <= 65535      -> Op (fromIntegral w)
+  PW8 | w <= 4294967295 -> Op (fromIntegral w)
+  _ -> pprPanic "GHC.ByteCode.Asm.truncHalfWord" (ppr w)
 
 data Assembler a
   = AllocPtr (IO BCOPtr) (Word -> Assembler a)
@@ -287,9 +299,9 @@ type LabelEnv = LocalLabel -> Word
 largeOp :: Bool -> Operand -> Bool
 largeOp long_jumps op = case op of
    SmallOp _ -> False
-   Op w      -> isLarge w
+   Op w      -> isLargeW w
+   IOp i     -> isLargeI i
    LabelOp _ -> long_jumps
--- LargeOp _ -> True
 
 runAsm :: Platform -> Bool -> LabelEnv -> Assembler a -> StateT AsmState IO a
 runAsm platform long_jumps e = go
@@ -308,15 +320,15 @@ runAsm platform long_jumps e = go
       go $ k w
     go (AllocLabel _ k) = go k
     go (Emit w ops k) = do
-      let largeOps = any (largeOp long_jumps) ops
+      let largeArgs = any (largeOp long_jumps) ops
           opcode
-            | largeOps = largeArgInstr w
+            | largeArgs = largeArgInstr w
             | otherwise = w
           words = concatMap expand ops
           expand (SmallOp w) = [w]
           expand (LabelOp w) = expand (Op (e w))
-          expand (Op w) = if largeOps then largeArg platform (fromIntegral w) else [fromIntegral w]
---        expand (LargeOp w) = largeArg platform w
+          expand (Op w) = if largeArgs then largeArg platform (fromIntegral w) else [fromIntegral w]
+          expand (IOp i) = if largeArgs then largeArg platform (fromIntegral i) else [fromIntegral i]
       state $ \(st_i0,st_l0,st_p0) ->
         let st_i1 = addListToSS st_i0 (opcode : words)
         in ((), (st_i1,st_l0,st_p0))
@@ -350,7 +362,7 @@ inspectAsm platform long_jumps initial_offset
         count (SmallOp _) = 1
         count (LabelOp _) = count (Op 0)
         count (Op _) = if largeOps then largeArg16s platform else 1
---      count (LargeOp _) = largeArg16s platform
+        count (IOp _) = if largeOps then largeArg16s platform else 1
 
 -- Bring in all the bci_ bytecode constants.
 #include "Bytecodes.h"
@@ -379,15 +391,15 @@ assembleI :: Platform
           -> Assembler ()
 assembleI platform i = case i of
   STKCHECK n               -> emit bci_STKCHECK [Op n]
-  PUSH_L o1                -> emit bci_PUSH_L [SmallOp o1]
-  PUSH_LL o1 o2            -> emit bci_PUSH_LL [SmallOp o1, SmallOp o2]
-  PUSH_LLL o1 o2 o3        -> emit bci_PUSH_LLL [SmallOp o1, SmallOp o2, SmallOp o3]
-  PUSH8 o1                 -> emit bci_PUSH8 [SmallOp o1]
-  PUSH16 o1                -> emit bci_PUSH16 [SmallOp o1]
-  PUSH32 o1                -> emit bci_PUSH32 [SmallOp o1]
-  PUSH8_W o1               -> emit bci_PUSH8_W [SmallOp o1]
-  PUSH16_W o1              -> emit bci_PUSH16_W [SmallOp o1]
-  PUSH32_W o1              -> emit bci_PUSH32_W [SmallOp o1]
+  PUSH_L o1                -> emit bci_PUSH_L [wOp o1]
+  PUSH_LL o1 o2            -> emit bci_PUSH_LL [wOp o1, wOp o2]
+  PUSH_LLL o1 o2 o3        -> emit bci_PUSH_LLL [wOp o1, wOp o2, wOp o3]
+  PUSH8 o1                 -> emit bci_PUSH8 [bOp o1]
+  PUSH16 o1                -> emit bci_PUSH16 [bOp o1]
+  PUSH32 o1                -> emit bci_PUSH32 [bOp o1]
+  PUSH8_W o1               -> emit bci_PUSH8_W [bOp o1]
+  PUSH16_W o1              -> emit bci_PUSH16_W [bOp o1]
+  PUSH32_W o1              -> emit bci_PUSH32_W [bOp o1]
   PUSH_G nm                -> do p <- ptr (BCOPtrName nm)
                                  emit bci_PUSH_G [Op p]
   PUSH_PRIMOP op           -> do p <- ptr (BCOPtrPrimOp op)
@@ -419,7 +431,7 @@ assembleI platform i = case i of
   PUSH_UBX32 lit           -> do np <- literal lit
                                  emit bci_PUSH_UBX32 [Op np]
   PUSH_UBX lit nws         -> do np <- literal lit
-                                 emit bci_PUSH_UBX [Op np, SmallOp nws]
+                                 emit bci_PUSH_UBX [Op np, wOp nws]
 
   -- see Note [Generating code for top-level string literal bindings] in GHC.StgToByteCode
   PUSH_ADDR nm             -> do np <- lit [BCONPtrAddr nm]
@@ -437,15 +449,15 @@ assembleI platform i = case i of
   PUSH_APPLY_PPPPP         -> emit bci_PUSH_APPLY_PPPPP []
   PUSH_APPLY_PPPPPP        -> emit bci_PUSH_APPLY_PPPPPP []
 
-  SLIDE     n by           -> emit bci_SLIDE [SmallOp n, SmallOp by]
-  ALLOC_AP  n              -> emit bci_ALLOC_AP [SmallOp n]
-  ALLOC_AP_NOUPD n         -> emit bci_ALLOC_AP_NOUPD [SmallOp n]
-  ALLOC_PAP arity n        -> emit bci_ALLOC_PAP [SmallOp arity, SmallOp n]
-  MKAP      off sz         -> emit bci_MKAP [SmallOp off, SmallOp sz]
-  MKPAP     off sz         -> emit bci_MKPAP [SmallOp off, SmallOp sz]
-  UNPACK    n              -> emit bci_UNPACK [SmallOp n]
+  SLIDE     n by           -> emit bci_SLIDE [wOp n, wOp by]
+  ALLOC_AP  n              -> emit bci_ALLOC_AP [truncHalfWord platform n]
+  ALLOC_AP_NOUPD n         -> emit bci_ALLOC_AP_NOUPD [truncHalfWord platform n]
+  ALLOC_PAP arity n        -> emit bci_ALLOC_PAP [truncHalfWord platform arity, truncHalfWord platform n]
+  MKAP      off sz         -> emit bci_MKAP [wOp off, truncHalfWord platform sz]
+  MKPAP     off sz         -> emit bci_MKPAP [wOp off, truncHalfWord platform sz]
+  UNPACK    n              -> emit bci_UNPACK [wOp n]
   PACK      dcon sz        -> do itbl_no <- lit [BCONPtrItbl (getName dcon)]
-                                 emit bci_PACK [Op itbl_no, SmallOp sz]
+                                 emit bci_PACK [Op itbl_no, wOp sz]
   LABEL     lbl            -> label lbl
   TESTLT_I  i l            -> do np <- int i
                                  emit bci_TESTLT_I [Op np, LabelOp l]
@@ -498,13 +510,13 @@ assembleI platform i = case i of
   TESTLT_P  i l            -> emit bci_TESTLT_P [SmallOp i, LabelOp l]
   TESTEQ_P  i l            -> emit bci_TESTEQ_P [SmallOp i, LabelOp l]
   CASEFAIL                 -> emit bci_CASEFAIL []
-  SWIZZLE   stkoff n       -> emit bci_SWIZZLE [SmallOp stkoff, SmallOp n]
+  SWIZZLE   stkoff n       -> emit bci_SWIZZLE [wOp stkoff, IOp n]
   JMP       l              -> emit bci_JMP [LabelOp l]
   ENTER                    -> emit bci_ENTER []
   RETURN rep               -> emit (return_non_tuple rep) []
   RETURN_TUPLE             -> emit bci_RETURN_T []
   CCALL off m_addr i       -> do np <- addr m_addr
-                                 emit bci_CCALL [SmallOp off, Op np, SmallOp i]
+                                 emit bci_CCALL [wOp off, Op np, SmallOp i]
   PRIMCALL                 -> emit bci_PRIMCALL []
   BRK_FUN index uniq cc    -> do p1 <- ptr BCOPtrBreakArray
                                  q <- int (getKey uniq)
@@ -556,8 +568,11 @@ assembleI platform i = case i of
     words ws = lit (map BCONPtrWord ws)
     word w = words [w]
 
-isLarge :: Word -> Bool
-isLarge n = n > 65535
+isLargeW :: Word -> Bool
+isLargeW n = n > 65535
+
+isLargeI :: Int -> Bool
+isLargeI n = n > 32767 || n < -32768
 
 push_alts :: ArgRep -> Word16
 push_alts V   = bci_PUSH_ALTS_V
diff --git a/compiler/GHC/ByteCode/Instr.hs b/compiler/GHC/ByteCode/Instr.hs
index e8d377ed695022d55cc3947dbb778f9192a097b9..b59ffbcfe3e89659c50152a8ede8da0297c49c69 100644
--- a/compiler/GHC/ByteCode/Instr.hs
+++ b/compiler/GHC/ByteCode/Instr.hs
@@ -23,7 +23,7 @@ import GHC.Types.Unique
 import GHC.Types.Literal
 import GHC.Core.DataCon
 import GHC.Builtin.PrimOps
-import GHC.Runtime.Heap.Layout
+import GHC.Runtime.Heap.Layout ( StgWord )
 
 import Data.Int
 import Data.Word
@@ -41,7 +41,7 @@ data ProtoBCO a
         protoBCOInstrs     :: [BCInstr],  -- instrs
         -- arity and GC info
         protoBCOBitmap     :: [StgWord],
-        protoBCOBitmapSize :: Word16,
+        protoBCOBitmapSize :: Word,
         protoBCOArity      :: Int,
         -- what the BCO came from, for debugging only
         protoBCOExpr       :: Either [CgStgAlt] CgStgRhs,
@@ -58,18 +58,18 @@ instance Outputable LocalLabel where
 
 data BCInstr
    -- Messing with the stack
-   = STKCHECK  Word
+   = STKCHECK  !Word
 
    -- Push locals (existing bits of the stack)
-   | PUSH_L    !Word16{-offset-}
-   | PUSH_LL   !Word16 !Word16{-2 offsets-}
-   | PUSH_LLL  !Word16 !Word16 !Word16{-3 offsets-}
+   | PUSH_L    !WordOff{-offset-}
+   | PUSH_LL   !WordOff !WordOff{-2 offsets-}
+   | PUSH_LLL  !WordOff !WordOff !WordOff{-3 offsets-}
 
    -- Push the specified local as a 8, 16, 32 bit value onto the stack. (i.e.,
    -- the stack will grow by 8, 16 or 32 bits)
-   | PUSH8  !Word16
-   | PUSH16 !Word16
-   | PUSH32 !Word16
+   | PUSH8  !ByteOff
+   | PUSH16 !ByteOff
+   | PUSH32 !ByteOff
 
    -- Push the specified local as a 8, 16, 32 bit value onto the stack, but the
    -- value will take the whole word on the stack (i.e., the stack will grow by
@@ -78,9 +78,9 @@ data BCInstr
    -- Currently we expect all values on the stack to take full words, except for
    -- the ones used for PACK (i.e., actually constructing new data types, in
    -- which case we use PUSH{8,16,32})
-   | PUSH8_W  !Word16
-   | PUSH16_W !Word16
-   | PUSH32_W !Word16
+   | PUSH8_W  !ByteOff
+   | PUSH16_W !ByteOff
+   | PUSH32_W !ByteOff
 
    -- Push a ptr  (these all map to PUSH_G really)
    | PUSH_G       Name
@@ -102,8 +102,8 @@ data BCInstr
    | PUSH_UBX8  Literal
    | PUSH_UBX16 Literal
    | PUSH_UBX32 Literal
-   | PUSH_UBX   Literal Word16
-        -- push this int/float/double/addr, on the stack. Word16
+   | PUSH_UBX   Literal !WordOff
+        -- push this int/float/double/addr, on the stack. Word
         -- is # of words to copy from literal pool.  Eitherness reflects
         -- the difficulty of dealing with MachAddr here, mostly due to
         -- the excessive (and unnecessary) restrictions imposed by the
@@ -129,58 +129,61 @@ data BCInstr
    | PUSH_APPLY_PPPPP
    | PUSH_APPLY_PPPPPP
 
-   | SLIDE     Word16{-this many-} Word16{-down by this much-}
+   | SLIDE     !WordOff{-this many-} !WordOff{-down by this much-}
 
    -- To do with the heap
-   | ALLOC_AP  !Word16 -- make an AP with this many payload words
-   | ALLOC_AP_NOUPD !Word16 -- make an AP_NOUPD with this many payload words
-   | ALLOC_PAP !Word16 !Word16 -- make a PAP with this arity / payload words
-   | MKAP      !Word16{-ptr to AP is this far down stack-} !Word16{-number of words-}
-   | MKPAP     !Word16{-ptr to PAP is this far down stack-} !Word16{-number of words-}
-   | UNPACK    !Word16 -- unpack N words from t.o.s Constr
-   | PACK      DataCon !Word16
+   | ALLOC_AP  !HalfWord {- make an AP with this many payload words.
+                            HalfWord matches the size of the n_args field in StgAP,
+                            make sure that we handle truncation when generating
+                            bytecode using this HalfWord type here -}
+   | ALLOC_AP_NOUPD !HalfWord -- make an AP_NOUPD with this many payload words
+   | ALLOC_PAP !HalfWord !HalfWord -- make a PAP with this arity / payload words
+   | MKAP      !WordOff{-ptr to AP is this far down stack-} !HalfWord{-number of words-}
+   | MKPAP     !WordOff{-ptr to PAP is this far down stack-} !HalfWord{-number of words-}
+   | UNPACK    !WordOff -- unpack N words from t.o.s Constr
+   | PACK      DataCon !WordOff
                         -- after assembly, the DataCon is an index into the
                         -- itbl array
    -- For doing case trees
    | LABEL     LocalLabel
-   | TESTLT_I  Int    LocalLabel
-   | TESTEQ_I  Int    LocalLabel
-   | TESTLT_W  Word   LocalLabel
-   | TESTEQ_W  Word   LocalLabel
-   | TESTLT_I64 Int64 LocalLabel
-   | TESTEQ_I64 Int64 LocalLabel
-   | TESTLT_I32 Int32 LocalLabel
-   | TESTEQ_I32 Int32 LocalLabel
-   | TESTLT_I16 Int16 LocalLabel
-   | TESTEQ_I16 Int16 LocalLabel
-   | TESTLT_I8 Int8   LocalLabel
-   | TESTEQ_I8 Int16  LocalLabel
-   | TESTLT_W64 Word64 LocalLabel
-   | TESTEQ_W64 Word64 LocalLabel
-   | TESTLT_W32 Word32 LocalLabel
-   | TESTEQ_W32 Word32 LocalLabel
-   | TESTLT_W16 Word16 LocalLabel
-   | TESTEQ_W16 Word16 LocalLabel
-   | TESTLT_W8 Word8 LocalLabel
-   | TESTEQ_W8 Word8 LocalLabel
-   | TESTLT_F  Float  LocalLabel
-   | TESTEQ_F  Float  LocalLabel
-   | TESTLT_D  Double LocalLabel
-   | TESTEQ_D  Double LocalLabel
+   | TESTLT_I   !Int    LocalLabel
+   | TESTEQ_I   !Int    LocalLabel
+   | TESTLT_W   !Word   LocalLabel
+   | TESTEQ_W   !Word   LocalLabel
+   | TESTLT_I64 !Int64  LocalLabel
+   | TESTEQ_I64 !Int64  LocalLabel
+   | TESTLT_I32 !Int32  LocalLabel
+   | TESTEQ_I32 !Int32  LocalLabel
+   | TESTLT_I16 !Int16  LocalLabel
+   | TESTEQ_I16 !Int16  LocalLabel
+   | TESTLT_I8  !Int8   LocalLabel
+   | TESTEQ_I8  !Int16  LocalLabel
+   | TESTLT_W64 !Word64 LocalLabel
+   | TESTEQ_W64 !Word64 LocalLabel
+   | TESTLT_W32 !Word32 LocalLabel
+   | TESTEQ_W32 !Word32 LocalLabel
+   | TESTLT_W16 !Word16 LocalLabel
+   | TESTEQ_W16 !Word16 LocalLabel
+   | TESTLT_W8  !Word8  LocalLabel
+   | TESTEQ_W8  !Word8  LocalLabel
+   | TESTLT_F   !Float  LocalLabel
+   | TESTEQ_F   !Float  LocalLabel
+   | TESTLT_D   !Double LocalLabel
+   | TESTEQ_D   !Double LocalLabel
 
    -- The Word16 value is a constructor number and therefore
    -- stored in the insn stream rather than as an offset into
    -- the literal pool.
-   | TESTLT_P  Word16 LocalLabel
-   | TESTEQ_P  Word16 LocalLabel
+   | TESTLT_P  !Word16 LocalLabel
+   | TESTEQ_P  !Word16 LocalLabel
 
    | CASEFAIL
    | JMP              LocalLabel
 
    -- For doing calls to C (via glue code generated by libffi)
-   | CCALL            Word16    -- stack frame size
+   | CCALL            !WordOff  -- stack frame size
                       (RemotePtr C_ffi_cif) -- addr of the glue code
-                      Word16    -- flags.
+                      !Word16   -- flags.
                                 --
                                 -- 0x1: call is interruptible
                                 -- 0x2: call is unsafe
@@ -191,8 +194,8 @@ data BCInstr
    | PRIMCALL
 
    -- For doing magic ByteArray passing to foreign calls
-   | SWIZZLE          Word16 -- to the ptr N words down the stack,
-                      Word16 -- add M (interpreted as a signed 16-bit entity)
+   | SWIZZLE          !WordOff -- to the ptr N words down the stack,
+                      !Int     -- add M
 
    -- To Infinity And Beyond
    | ENTER
@@ -202,7 +205,7 @@ data BCInstr
                    -- Note [unboxed tuple bytecodes and tuple_BCO] in GHC.StgToByteCode
 
    -- Breakpoints
-   | BRK_FUN          Word16 Unique (RemotePtr CostCentre)
+   | BRK_FUN         !Word16 Unique (RemotePtr CostCentre)
 
 -- -----------------------------------------------------------------------------
 -- Printing bytecode instructions
diff --git a/compiler/GHC/ByteCode/Types.hs b/compiler/GHC/ByteCode/Types.hs
index e16fcf7f5a1dafcc4f63634aa43a03026b407773..c555fb329e9b6f827e5576027fd30ecce5847e3b 100644
--- a/compiler/GHC/ByteCode/Types.hs
+++ b/compiler/GHC/ByteCode/Types.hs
@@ -11,7 +11,7 @@ module GHC.ByteCode.Types
   , FFIInfo(..)
   , RegBitmap(..)
   , NativeCallType(..), NativeCallInfo(..), voidTupleReturnInfo, voidPrimCallInfo
-  , ByteOff(..), WordOff(..)
+  , ByteOff(..), WordOff(..), HalfWord(..)
   , UnlinkedBCO(..), BCOPtr(..), BCONPtr(..)
   , ItblEnv, ItblPtr(..)
   , AddrEnv, AddrPtr(..)
@@ -79,6 +79,12 @@ newtype ByteOff = ByteOff Int
 newtype WordOff = WordOff Int
     deriving (Enum, Eq, Show, Integral, Num, Ord, Real, Outputable)
 
+-- A type for values that are half the size of a word on the target
+-- platform where the interpreter runs (which may be a different
+-- wordsize than the compiler).
+newtype HalfWord = HalfWord Word
+    deriving (Enum, Eq, Show, Integral, Num, Ord, Real, Outputable)
+
 newtype RegBitmap = RegBitmap { unRegBitmap :: Word32 }
     deriving (Enum, Eq, Show, Integral, Num, Ord, Real, Bits, FiniteBits, Outputable)
 
@@ -188,7 +194,7 @@ instance NFData BCONPtr where
 data CgBreakInfo
    = CgBreakInfo
    { cgb_tyvars :: ![IfaceTvBndr] -- ^ Type variables in scope at the breakpoint
-   , cgb_vars   :: ![Maybe (IfaceIdBndr, Word16)]
+   , cgb_vars   :: ![Maybe (IfaceIdBndr, Word)]
    , cgb_resty  :: !IfaceType
    }
 -- See Note [Syncing breakpoint info] in GHC.Runtime.Eval
diff --git a/compiler/GHC/CoreToIface.hs b/compiler/GHC/CoreToIface.hs
index 566900cdb450074ace7ee05995f74dfb21206aa6..89c180c5bbd440a7962272817b38123bc55f9088 100644
--- a/compiler/GHC/CoreToIface.hs
+++ b/compiler/GHC/CoreToIface.hs
@@ -49,8 +49,6 @@ module GHC.CoreToIface
 
 import GHC.Prelude
 
-import Data.Word
-
 import GHC.StgToCmm.Types
 
 import GHC.ByteCode.Types
@@ -698,7 +696,7 @@ toIfaceLFInfo nm lfi = case lfi of
 
 -- Dehydrating CgBreakInfo
 
-dehydrateCgBreakInfo :: [TyVar] -> [Maybe (Id, Word16)] -> Type -> CgBreakInfo
+dehydrateCgBreakInfo :: [TyVar] -> [Maybe (Id, Word)] -> Type -> CgBreakInfo
 dehydrateCgBreakInfo ty_vars idOffSets tick_ty =
           CgBreakInfo
             { cgb_tyvars = map toIfaceTvBndr ty_vars
diff --git a/compiler/GHC/IfaceToCore.hs b/compiler/GHC/IfaceToCore.hs
index 0dad3a4714f02345dd324b58c8b1021d9b4443fb..7f93c79ca209d7bcadeec0fd8468cb4df0b26127 100644
--- a/compiler/GHC/IfaceToCore.hs
+++ b/compiler/GHC/IfaceToCore.hs
@@ -35,8 +35,6 @@ import GHC.Prelude
 
 import GHC.ByteCode.Types
 
-import Data.Word
-
 import GHC.Driver.Env
 import GHC.Driver.Session
 import GHC.Driver.Config.Core.Lint ( initLintConfig )
@@ -2164,7 +2162,7 @@ bindIfaceTyConBinderX bind_tv (Bndr tv vis) thing_inside
 
 -- CgBreakInfo
 
-hydrateCgBreakInfo :: CgBreakInfo -> IfL ([Maybe (Id, Word16)], Type)
+hydrateCgBreakInfo :: CgBreakInfo -> IfL ([Maybe (Id, Word)], Type)
 hydrateCgBreakInfo CgBreakInfo{..} = do
   bindIfaceTyVars cgb_tyvars $ \_ -> do
     result_ty <- tcIfaceType cgb_resty
diff --git a/compiler/GHC/StgToByteCode.hs b/compiler/GHC/StgToByteCode.hs
index 4ca574dea0145b79775f3fad1d09f3fc45b4200c..94a0f457708026919eefa3d1eaa8155633bed72c 100644
--- a/compiler/GHC/StgToByteCode.hs
+++ b/compiler/GHC/StgToByteCode.hs
@@ -242,7 +242,7 @@ mkProtoBCO
    -> Either  [CgStgAlt] (CgStgRhs)
                 -- ^ original expression; for debugging only
    -> Int       -- ^ arity
-   -> Word16    -- ^ bitmap size
+   -> WordOff   -- ^ bitmap size
    -> [StgWord] -- ^ bitmap
    -> Bool      -- ^ True <=> is a return point, rather than a function
    -> [FFIInfo]
@@ -252,7 +252,7 @@ mkProtoBCO platform nm instrs_ordlist origin arity bitmap_size bitmap is_ret ffi
         protoBCOName = nm,
         protoBCOInstrs = maybe_with_stack_check,
         protoBCOBitmap = bitmap,
-        protoBCOBitmapSize = bitmap_size,
+        protoBCOBitmapSize = fromIntegral bitmap_size,
         protoBCOArity = arity,
         protoBCOExpr = origin,
         protoBCOFFIs = ffis
@@ -396,7 +396,9 @@ schemeER_wrk d p (StgTick (Breakpoint tick_ty tick_no fvs) rhs)
         platform <- profilePlatform <$> getProfile
         let idOffSets = getVarOffSets platform d p fvs
             ty_vars   = tyCoVarsOfTypesWellScoped (tick_ty:map idType fvs)
-        let breakInfo = dehydrateCgBreakInfo ty_vars idOffSets tick_ty
+        let toWord :: Maybe (Id, WordOff) -> Maybe (Id, Word)
+            toWord = fmap (\(i, wo) -> (i, fromIntegral wo))
+            breakInfo  = dehydrateCgBreakInfo ty_vars (map toWord idOffSets) tick_ty
         newBreakInfo tick_no breakInfo
         hsc_env <- getHscEnv
         let cc | Just interp <- hsc_interp hsc_env
@@ -407,7 +409,7 @@ schemeER_wrk d p (StgTick (Breakpoint tick_ty tick_no fvs) rhs)
         return $ breakInstr `consOL` code
 schemeER_wrk d p rhs = schemeE d 0 p rhs
 
-getVarOffSets :: Platform -> StackDepth -> BCEnv -> [Id] -> [Maybe (Id, Word16)]
+getVarOffSets :: Platform -> StackDepth -> BCEnv -> [Id] -> [Maybe (Id, WordOff)]
 getVarOffSets platform depth env = map getOffSet
   where
     getOffSet id = case lookupBCEnv_maybe id env of
@@ -420,23 +422,9 @@ getVarOffSets platform depth env = map getOffSet
             -- this "adjustment" is needed due to stack manipulation for
             -- BRK_FUN in Interpreter.c In any case, this is used only when
             -- we trigger a breakpoint.
-            let !var_depth_ws =
-                    trunc16W $ bytesToWords platform (depth - offset) + 2
+            let !var_depth_ws = bytesToWords platform (depth - offset) + 2
             in Just (id, var_depth_ws)
 
-truncIntegral16 :: Integral a => a -> Word16
-truncIntegral16 w
-    | w > fromIntegral (maxBound :: Word16)
-    = panic "stack depth overflow"
-    | otherwise
-    = fromIntegral w
-
-trunc16B :: ByteOff -> Word16
-trunc16B = truncIntegral16
-
-trunc16W :: WordOff -> Word16
-trunc16W = truncIntegral16
-
 fvsToEnv :: BCEnv -> CgStgRhs -> [Id]
 -- Takes the free variables of a right-hand side, and
 -- delivers an ordered list of the local variables that will
@@ -493,7 +481,7 @@ returnUnliftedReps d s szb reps = do
                         PUSH_BCO tuple_bco `consOL`
                         unitOL RETURN_TUPLE
     return ( mkSlideB platform szb (d - s) -- clear to sequel
-             `appOL`  ret)                 -- go
+             `consOL` ret)                 -- go
 
 -- construct and return an unboxed tuple
 returnUnboxedTuple
@@ -557,7 +545,7 @@ schemeE d s p (StgLet _ext binds body) = do
          fvss  = map (fvsToEnv p') rhss
 
          -- Sizes of free vars
-         size_w = trunc16W . idSizeW platform
+         size_w = idSizeW platform
          sizes = map (\rhs_fvs -> sum (map size_w rhs_fvs)) fvss
 
          -- the arity of each rhs
@@ -576,13 +564,13 @@ schemeE d s p (StgLet _ext binds body) = do
          build_thunk
              :: StackDepth
              -> [Id]
-             -> Word16
+             -> WordOff
              -> ProtoBCO Name
-             -> Word16
-             -> Word16
+             -> WordOff
+             -> HalfWord
              -> BcM BCInstrList
          build_thunk _ [] size bco off arity
-            = return (PUSH_BCO bco `consOL` unitOL (mkap (off+size) size))
+            = return (PUSH_BCO bco `consOL` unitOL (mkap (off+size) (fromIntegral size)))
            where
                 mkap | arity == 0 = MKAP
                      | otherwise  = MKPAP
@@ -594,9 +582,9 @@ schemeE d s p (StgLet _ext binds body) = do
 
          alloc_code = toOL (zipWith mkAlloc sizes arities)
            where mkAlloc sz 0
-                    | is_tick     = ALLOC_AP_NOUPD sz
-                    | otherwise   = ALLOC_AP sz
-                 mkAlloc sz arity = ALLOC_PAP arity sz
+                    | is_tick     = ALLOC_AP_NOUPD (fromIntegral sz)
+                    | otherwise   = ALLOC_AP (fromIntegral sz)
+                 mkAlloc sz arity = ALLOC_PAP arity (fromIntegral sz)
 
          is_tick = case binds of
                      StgNonRec id _ -> occNameFS (getOccName id) == tickFS
@@ -607,7 +595,7 @@ schemeE d s p (StgLet _ext binds body) = do
                 build_thunk d' fvs size bco off arity
 
          compile_binds =
-            [ compile_bind d' fvs x rhs size arity (trunc16W n)
+            [ compile_bind d' fvs x rhs size arity n
             | (fvs, x, rhs, size, arity, n) <-
                 zip6 fvss xs rhss sizes arities [n_binds, n_binds-1 .. 1]
             ]
@@ -735,7 +723,7 @@ mkConAppCode orig_d _ p con args = app_code
                 more_push_code <- do_pushery (d + arg_bytes) args
                 return (push `appOL` more_push_code)
             do_pushery !d [] = do
-                let !n_arg_words = trunc16W $ bytesToWords platform (d - orig_d)
+                let !n_arg_words = bytesToWords platform (d - orig_d)
                 return (unitOL (PACK con n_arg_words))
 
         -- Push on the stack in the reverse order.
@@ -761,7 +749,7 @@ doTailCall init_d s p fn args = do
         platform <- profilePlatform <$> getProfile
         assert (sz == wordSize platform) return ()
         let slide = mkSlideB platform (d - init_d + wordSize platform) (init_d - s)
-        return (push_fn `appOL` (slide `appOL` unitOL ENTER))
+        return (push_fn `appOL` (slide `consOL` unitOL ENTER))
   do_pushes !d args reps = do
       let (push_apply, n, rest_of_reps) = findPushSeq reps
           (these_args, rest_of_args) = splitAt n args
@@ -948,7 +936,7 @@ doCase d s p scrut bndr alts
              massert isAlgCase
              rhs_code <- schemeE stack_bot s p' rhs
              return (my_discr alt,
-                     unitOL (UNPACK (trunc16W size)) `appOL` rhs_code)
+                     unitOL (UNPACK size) `appOL` rhs_code)
            where
              real_bndrs = filterOut isTyVar bndrs
 
@@ -1009,8 +997,9 @@ doCase d s p scrut bndr alts
            | ubx_tuple_frame              = ([1], 2) -- call_info, tuple_BCO
            | otherwise                    = ([], 0)
 
-        bitmap_size = trunc16W $ fromIntegral extra_slots +
-                                 bytesToWords platform (d - s)
+        bitmap_size :: WordOff
+        bitmap_size = fromIntegral extra_slots +
+                      bytesToWords platform (d - s)
 
         bitmap_size' :: Int
         bitmap_size' = fromIntegral bitmap_size
@@ -1028,15 +1017,15 @@ doCase d s p scrut bndr alts
                              isUnboxedSumType (idType id) = Nothing
                            | isFollowableArg (bcIdArgRep platform id) = Just (fromIntegral rel_offset)
                            | otherwise                      = Nothing
-                where rel_offset = trunc16W $ bytesToWords platform (d - offset)
+                where rel_offset = bytesToWords platform (d - offset)
 
-        bitmap = intsToReverseBitmap platform bitmap_size'{-size-} pointers
+        bitmap = intsToReverseBitmap platform bitmap_size' pointers
 
      alt_stuff <- mapM codeAlt alts
      alt_final0 <- mkMultiBranch maybe_ncons alt_stuff
 
      let alt_final
-           | ubx_tuple_frame    = mkSlideW 0 2 `mappend` alt_final0
+           | ubx_tuple_frame    = SLIDE 0 2 `consOL` alt_final0
            | otherwise          = alt_final0
 
      let
@@ -1306,11 +1295,11 @@ mkStackBitmap
   -- ^ The stack layout of the arguments, where each offset is relative to the
   -- /bottom/ of the stack space they occupy. Their offsets must be word-aligned,
   -- and the list must be sorted in order of ascending offset (i.e. bottom to top).
-  -> (Word16, [StgWord])
+  -> (WordOff, [StgWord])
 mkStackBitmap platform nptrs_prefix args_info args
   = (bitmap_size, bitmap)
   where
-    bitmap_size = trunc16W $ nptrs_prefix + arg_bottom
+    bitmap_size = nptrs_prefix + arg_bottom
     bitmap = intsToReverseBitmap platform (fromIntegral bitmap_size) ptr_offsets
 
     arg_bottom = nativeCallSize args_info
@@ -1384,7 +1373,7 @@ generatePrimCall d s p target _mb_unit _result_ty args
               (push_target `consOL`
                push_info `consOL`
                PUSH_BCO args_bco `consOL`
-               (mkSlideB platform szb (d - s) `appOL` unitOL PRIMCALL))
+               (mkSlideB platform szb (d - s) `consOL` unitOL PRIMCALL))
 
 -- -----------------------------------------------------------------------------
 -- Deal with a CCall.
@@ -1552,7 +1541,7 @@ generateCCall d0 s p (CCallSpec target cconv safety) result_ty args
          push_r =
              if returns_void
                 then nilOL
-                else unitOL (PUSH_UBX (mkDummyLiteral platform r_rep) (trunc16W r_sizeW))
+                else unitOL (PUSH_UBX (mkDummyLiteral platform r_rep) (r_sizeW))
 
          -- generate the marshalling code we're going to call
 
@@ -1560,7 +1549,7 @@ generateCCall d0 s p (CCallSpec target cconv safety) result_ty args
          -- instruction needs to describe the chunk of stack containing
          -- the ccall args to the GC, so it needs to know how large it
          -- is.  See comment in Interpreter.c with the CCALL instruction.
-         stk_offset   = trunc16W $ bytesToWords platform (d_after_r - s)
+         stk_offset   = bytesToWords platform (d_after_r - s)
 
          conv = case cconv of
            CCallConv -> FFICCall
@@ -1589,7 +1578,7 @@ generateCCall d0 s p (CCallSpec target cconv safety) result_ty args
 
          -- slide and return
          d_after_r_min_s = bytesToWords platform (d_after_r - s)
-         wrapup       = mkSlideW (trunc16W r_sizeW) (d_after_r_min_s - r_sizeW)
+         wrapup       = mkSlideW r_sizeW (d_after_r_min_s - r_sizeW)
                         `snocOL` RETURN (toArgRep platform r_rep)
          --trace (show (arg1_offW, args_offW  ,  (map argRepSizeW a_reps) )) $
      return (
@@ -1793,8 +1782,9 @@ pushAtom d p (StgVarArg var)
    = do platform <- targetPlatform <$> getDynFlags
 
         let !szb = idSizeCon platform var
+            with_instr :: (ByteOff -> BCInstr) -> BcM (OrdList BCInstr, ByteOff)
             with_instr instr = do
-                let !off_b = trunc16B $ d - d_v
+                let !off_b = d - d_v
                 return (unitOL (instr off_b), wordSize platform)
 
         case szb of
@@ -1803,7 +1793,7 @@ pushAtom d p (StgVarArg var)
             4 -> with_instr PUSH32_W
             _ -> do
                 let !szw = bytesToWords platform szb
-                    !off_w = trunc16W $ bytesToWords platform (d - d_v) + szw - 1
+                    !off_w = bytesToWords platform (d - d_v) + szw - 1
                 return (toOL (genericReplicate szw (PUSH_L off_w)),
                               wordsToBytes platform szw)
         -- d - d_v           offset from TOS to the first slot of the object
@@ -1864,7 +1854,7 @@ pushLiteral padded lit =
                 1  -> PUSH_UBX8 lit
                 2  -> PUSH_UBX16 lit
                 4  -> PUSH_UBX32 lit
-                _  -> PUSH_UBX lit (trunc16W $ bytesToWords platform size_bytes)
+                _  -> PUSH_UBX lit (bytesToWords platform size_bytes)
 
      case lit of
         LitLabel {}     -> code AddrRep
@@ -1903,7 +1893,7 @@ pushConstrAtom d p va@(StgVarArg v)
         platform <- targetPlatform <$> getDynFlags
         let !szb = idSizeCon platform v
             done instr = do
-                let !off = trunc16B $ d - d_v
+                let !off = d - d_v
                 return (unitOL (instr off), szb)
         case szb of
             1 -> done PUSH8
@@ -2153,25 +2143,20 @@ unsupportedCConvException = throwGhcException (ProgramError
   ("Error: bytecode compiler can't handle some foreign calling conventions\n"++
    "  Workaround: use -fobject-code, or compile this module to .o separately."))
 
-mkSlideB :: Platform -> ByteOff -> ByteOff -> OrdList BCInstr
-mkSlideB platform !nb !db = mkSlideW n d
+mkSlideB :: Platform -> ByteOff -> ByteOff -> BCInstr
+mkSlideB platform nb db = SLIDE n d
   where
-    !n = trunc16W $ bytesToWords platform nb
+    !n = bytesToWords platform nb
     !d = bytesToWords platform db
 
-mkSlideW :: Word16 -> WordOff -> OrdList BCInstr
+mkSlideW :: WordOff -> WordOff -> OrdList BCInstr
 mkSlideW !n !ws
-    | ws > fromIntegral limit
-    -- If the amount to slide doesn't fit in a Word16, generate multiple slide
-    -- instructions
-    = SLIDE n limit `consOL` mkSlideW n (ws - fromIntegral limit)
     | ws == 0
     = nilOL
     | otherwise
     = unitOL (SLIDE n $ fromIntegral ws)
-  where
-    limit :: Word16
-    limit = maxBound
+
+
 
 atomPrimRep :: StgArg -> PrimRep
 atomPrimRep (StgVarArg v) = bcIdPrimRep v
diff --git a/rts/Disassembler.c b/rts/Disassembler.c
index 56be0fb775d90d0b5295f910faaa61e91554d5d5..b93c082a8f3c8712b65474a54a497f5f63e273b4 100644
--- a/rts/Disassembler.c
+++ b/rts/Disassembler.c
@@ -28,7 +28,6 @@
 int
 disInstr ( StgBCO *bco, int pc )
 {
-   int i;
    StgWord16 instr;
 
    StgWord16*     instrs      = (StgWord16*)(bco->instrs->payload);
@@ -75,14 +74,15 @@ disInstr ( StgBCO *bco, int pc )
          debugBelch("\n");
          pc += 4;
          break;
-      case bci_SWIZZLE:
-         debugBelch("SWIZZLE stkoff %d by %d\n",
-                         instrs[pc], (signed int)instrs[pc+1]);
-         pc += 2; break;
-      case bci_CCALL:
+      case bci_SWIZZLE: {
+         W_     stkoff = BCO_GET_LARGE_ARG;
+         StgInt by     = BCO_GET_LARGE_ARG;
+         debugBelch("SWIZZLE stkoff %" FMT_Word " by %" FMT_Int "\n", stkoff, by);
+         break; }
+      case bci_CCALL: {
          debugBelch("CCALL    marshaller at 0x%" FMT_HexWord "\n",
                          literals[instrs[pc]] );
-         pc += 1; break;
+         pc += 1; break; }
       case bci_PRIMCALL:
          debugBelch("PRIMCALL\n");
          break;
@@ -91,34 +91,45 @@ disInstr ( StgBCO *bco, int pc )
          debugBelch("STKCHECK %" FMT_Word "\n", (W_)stk_words_reqd );
          break;
      }
-      case bci_PUSH_L:
-         debugBelch("PUSH_L   %d\n", instrs[pc] );
-         pc += 1; break;
-      case bci_PUSH_LL:
-         debugBelch("PUSH_LL  %d %d\n", instrs[pc], instrs[pc+1] );
-         pc += 2; break;
-      case bci_PUSH_LLL:
-         debugBelch("PUSH_LLL %d %d %d\n", instrs[pc], instrs[pc+1],
-                                                            instrs[pc+2] );
-         pc += 3; break;
-      case bci_PUSH8:
-         debugBelch("PUSH8    %d\n", instrs[pc] );
-         pc += 1; break;
-      case bci_PUSH16:
-         debugBelch("PUSH16   %d\n", instrs[pc] );
-         pc += 1; break;
-      case bci_PUSH32:
-         debugBelch("PUSH32   %d\n", instrs[pc] );
-         pc += 1; break;
-      case bci_PUSH8_W:
-         debugBelch("PUSH8_W  %d\n", instrs[pc] );
-         pc += 1; break;
-      case bci_PUSH16_W:
-         debugBelch("PUSH16_W %d\n", instrs[pc] );
-         pc += 1; break;
-      case bci_PUSH32_W:
-         debugBelch("PUSH32_W %d\n", instrs[pc] );
-         pc += 1; break;
+      case bci_PUSH_L: {
+         W_ x1 = BCO_GET_LARGE_ARG;
+         debugBelch("PUSH_L   %" FMT_Word "\n", x1 );
+         break; }
+      case bci_PUSH_LL: {
+         W_ x1 = BCO_GET_LARGE_ARG;
+         W_ x2 = BCO_GET_LARGE_ARG;
+         debugBelch("PUSH_LL  %" FMT_Word " %" FMT_Word "\n", x1, x2 );
+         break; }
+      case bci_PUSH_LLL: {
+         W_ x1 = BCO_GET_LARGE_ARG;
+         W_ x2 = BCO_GET_LARGE_ARG;
+         W_ x3 = BCO_GET_LARGE_ARG;
+         debugBelch("PUSH_LLL %" FMT_Word " %" FMT_Word " %" FMT_Word "\n", x1, x2, x3);
+         break; }
+      case bci_PUSH8: {
+         W_ x1 = BCO_GET_LARGE_ARG;
+         debugBelch("PUSH8    %" FMT_Word "\n", x1 );
+         break; }
+      case bci_PUSH16: {
+         W_ x1 = BCO_GET_LARGE_ARG;
+         debugBelch("PUSH16   %" FMT_Word "\n", x1 );
+         break; }
+      case bci_PUSH32: {
+         W_ x1 = BCO_GET_LARGE_ARG;
+         debugBelch("PUSH32   %" FMT_Word "\n", x1 );
+         break; }
+      case bci_PUSH8_W: {
+         W_ x1 = BCO_GET_LARGE_ARG;
+         debugBelch("PUSH8_W  %" FMT_Word "\n", x1 );
+         break; }
+      case bci_PUSH16_W: {
+         W_ x1 = BCO_GET_LARGE_ARG;
+         debugBelch("PUSH16_W %" FMT_Word "\n", x1 );
+         break; }
+      case bci_PUSH32_W: {
+         W_ x1 = BCO_GET_LARGE_ARG;
+         debugBelch("PUSH32_W %" FMT_Word "\n", x1 );
+         break; }
       case bci_PUSH_G:
          debugBelch("PUSH_G   " ); printPtr( ptrs[instrs[pc]] );
          debugBelch("\n" );
@@ -178,12 +189,14 @@ disInstr ( StgBCO *bco, int pc )
              "PUSH_UBX32 0x%" FMT_HexWord32 "\n",
              (StgWord32) literals[instrs[pc]] );
          pc += 1; break;
-      case bci_PUSH_UBX:
+      case bci_PUSH_UBX: {
          debugBelch("PUSH_UBX ");
-         for (i = 0; i < instrs[pc+1]; i++)
-            debugBelch("0x%" FMT_HexWord " ", literals[i + instrs[pc]] );
+         W_ offset = BCO_GET_LARGE_ARG;
+         W_ nwords = BCO_GET_LARGE_ARG;
+         for (W_ i = 0; i < nwords; i++)
+            debugBelch("0x%" FMT_HexWord " ", literals[i + offset] );
          debugBelch("\n");
-         pc += 2; break;
+         break; }
       case bci_PUSH_APPLY_N:
           debugBelch("PUSH_APPLY_N\n");
           break;
@@ -217,35 +230,48 @@ disInstr ( StgBCO *bco, int pc )
       case bci_PUSH_APPLY_PPPPPP:
           debugBelch("PUSH_APPLY_PPPPPP\n");
           break;
-      case bci_SLIDE:
-         debugBelch("SLIDE     %d down by %d\n", instrs[pc], instrs[pc+1] );
-         pc += 2; break;
-      case bci_ALLOC_AP:
-         debugBelch("ALLOC_AP  %d words\n", instrs[pc] );
-         pc += 1; break;
-      case bci_ALLOC_AP_NOUPD:
-         debugBelch("ALLOC_AP_NOUPD %d words\n", instrs[pc] );
-         pc += 1; break;
-      case bci_ALLOC_PAP:
-         debugBelch("ALLOC_PAP %d arity, %d words\n",
-                 instrs[pc], instrs[pc+1] );
-         pc += 2; break;
-      case bci_MKAP:
-         debugBelch("MKAP      %d words, %d stkoff\n", instrs[pc+1],
-                                                           instrs[pc] );
-         pc += 2; break;
-      case bci_MKPAP:
-         debugBelch("MKPAP     %d words, %d stkoff\n", instrs[pc+1],
-                                                           instrs[pc] );
-         pc += 2; break;
-      case bci_UNPACK:
-         debugBelch("UNPACK    %d\n", instrs[pc] );
-         pc += 1; break;
-      case bci_PACK:
-         debugBelch("PACK      %d words with itbl ", instrs[pc+1] );
-         printPtr( (StgPtr)literals[instrs[pc]] );
+      case bci_SLIDE: {
+         W_ nwords = BCO_GET_LARGE_ARG;
+         W_ by     = BCO_GET_LARGE_ARG;
+         debugBelch("SLIDE     %" FMT_Word " down by %" FMT_Word "\n", nwords, by );
+         break; }
+      case bci_ALLOC_AP: {
+         W_ nwords = BCO_GET_LARGE_ARG;
+         debugBelch("ALLOC_AP  %" FMT_Word " words\n", nwords );
+         break; }
+      case bci_ALLOC_AP_NOUPD: {
+         W_ nwords = BCO_GET_LARGE_ARG;
+         debugBelch("ALLOC_AP_NOUPD %" FMT_Word " words\n", nwords );
+         break; }
+      case bci_ALLOC_PAP: {
+         W_ arity = BCO_GET_LARGE_ARG;
+         W_ nwords = BCO_GET_LARGE_ARG;
+         debugBelch("ALLOC_PAP %" FMT_Word " arity, %" FMT_Word " words\n",
+                 arity, nwords );
+         break; }
+      case bci_MKAP: {
+         W_ stkoff = BCO_GET_LARGE_ARG;
+         W_ nwords = BCO_GET_LARGE_ARG;
+         debugBelch("MKAP      %" FMT_Word " words, %" FMT_Word " stkoff\n", nwords,
+                                                           stkoff );
+         break; }
+      case bci_MKPAP: {
+         W_ stkoff = BCO_GET_LARGE_ARG;
+         W_ nwords = BCO_GET_LARGE_ARG;
+         debugBelch("MKPAP     %" FMT_Word " words, %" FMT_Word " stkoff\n", nwords,
+                                                      stkoff );
+         break; }
+      case bci_UNPACK: {
+         W_ nwords = BCO_GET_LARGE_ARG;
+         debugBelch("UNPACK    %" FMT_Word "\n", nwords );
+         break; }
+      case bci_PACK: {
+         int itbl = BCO_NEXT;
+         W_ nwords = BCO_GET_LARGE_ARG;
+         debugBelch("PACK      %" FMT_Word " words with itbl ", nwords );
+         printPtr( (StgPtr)literals[itbl] );
          debugBelch("\n");
-         pc += 2; break;
+         break; }
 
       case bci_TESTLT_I: {
           unsigned int discr  = BCO_NEXT;
diff --git a/rts/Interpreter.c b/rts/Interpreter.c
index c624d3bcd51dc0c626e85a38777b39aa251db239..171a9316c753e85b42c47dd0628a3979aa7ceb2c 100644
--- a/rts/Interpreter.c
+++ b/rts/Interpreter.c
@@ -1226,15 +1226,15 @@ run_BCO:
         }
 
         case bci_PUSH_L: {
-            int o1 = BCO_NEXT;
+            W_ o1 = BCO_GET_LARGE_ARG;
             SpW(-1) = SpW(o1);
             Sp_subW(1);
             goto nextInsn;
         }
 
         case bci_PUSH_LL: {
-            int o1 = BCO_NEXT;
-            int o2 = BCO_NEXT;
+            W_ o1 = BCO_GET_LARGE_ARG;
+            W_ o2 = BCO_GET_LARGE_ARG;
             SpW(-1) = SpW(o1);
             SpW(-2) = SpW(o2);
             Sp_subW(2);
@@ -1242,9 +1242,9 @@ run_BCO:
         }
 
         case bci_PUSH_LLL: {
-            int o1 = BCO_NEXT;
-            int o2 = BCO_NEXT;
-            int o3 = BCO_NEXT;
+            W_ o1 = BCO_GET_LARGE_ARG;
+            W_ o2 = BCO_GET_LARGE_ARG;
+            W_ o3 = BCO_GET_LARGE_ARG;
             SpW(-1) = SpW(o1);
             SpW(-2) = SpW(o2);
             SpW(-3) = SpW(o3);
@@ -1253,56 +1253,56 @@ run_BCO:
         }
 
         case bci_PUSH8: {
-            int off = BCO_NEXT;
+            W_ off = BCO_GET_LARGE_ARG;
             Sp_subB(1);
             *(StgWord8*)Sp = *(StgWord8*)(Sp_plusB(off+1));
             goto nextInsn;
         }
 
         case bci_PUSH16: {
-            int off = BCO_NEXT;
+            W_ off = BCO_GET_LARGE_ARG;
             Sp_subB(2);
             *(StgWord16*)Sp = *(StgWord16*)(Sp_plusB(off+2));
             goto nextInsn;
         }
 
         case bci_PUSH32: {
-            int off = BCO_NEXT;
+            W_ off = BCO_GET_LARGE_ARG;
             Sp_subB(4);
             *(StgWord32*)Sp = *(StgWord32*)(Sp_plusB(off+4));
             goto nextInsn;
         }
 
         case bci_PUSH8_W: {
-            int off = BCO_NEXT;
+            W_ off = BCO_GET_LARGE_ARG;
             *(StgWord*)(Sp_minusW(1)) = *(StgWord8*)(Sp_plusB(off));
             Sp_subW(1);
             goto nextInsn;
         }
 
         case bci_PUSH16_W: {
-            int off = BCO_NEXT;
+            W_ off = BCO_GET_LARGE_ARG;
             *(StgWord*)(Sp_minusW(1)) = *(StgWord16*)(Sp_plusB(off));
             Sp_subW(1);
             goto nextInsn;
         }
 
         case bci_PUSH32_W: {
-            int off = BCO_NEXT;
+            W_ off = BCO_GET_LARGE_ARG;
             *(StgWord*)(Sp_minusW(1)) = *(StgWord32*)(Sp_plusB(off));
             Sp_subW(1);
             goto nextInsn;
         }
 
         case bci_PUSH_G: {
-            int o1 = BCO_GET_LARGE_ARG;
+            W_ o1 = BCO_GET_LARGE_ARG;
             SpW(-1) = BCO_PTR(o1);
             Sp_subW(1);
             goto nextInsn;
         }
 
         case bci_PUSH_ALTS_P: {
-            int o_bco  = BCO_GET_LARGE_ARG;
+            W_ o_bco  = BCO_GET_LARGE_ARG;
             Sp_subW(2);
             SpW(1) = BCO_PTR(o_bco);
             SpW(0) = (W_)&stg_ctoi_R1p_info;
@@ -1315,7 +1315,7 @@ run_BCO:
         }
 
         case bci_PUSH_ALTS_N: {
-            int o_bco  = BCO_GET_LARGE_ARG;
+            W_ o_bco  = BCO_GET_LARGE_ARG;
             SpW(-2) = (W_)&stg_ctoi_R1n_info;
             SpW(-1) = BCO_PTR(o_bco);
             Sp_subW(2);
@@ -1328,7 +1328,7 @@ run_BCO:
         }
 
         case bci_PUSH_ALTS_F: {
-            int o_bco  = BCO_GET_LARGE_ARG;
+            W_ o_bco  = BCO_GET_LARGE_ARG;
             SpW(-2) = (W_)&stg_ctoi_F1_info;
             SpW(-1) = BCO_PTR(o_bco);
             Sp_subW(2);
@@ -1341,7 +1341,7 @@ run_BCO:
         }
 
         case bci_PUSH_ALTS_D: {
-            int o_bco  = BCO_GET_LARGE_ARG;
+            W_ o_bco  = BCO_GET_LARGE_ARG;
             SpW(-2) = (W_)&stg_ctoi_D1_info;
             SpW(-1) = BCO_PTR(o_bco);
             Sp_subW(2);
@@ -1354,7 +1354,7 @@ run_BCO:
         }
 
         case bci_PUSH_ALTS_L: {
-            int o_bco  = BCO_GET_LARGE_ARG;
+            W_ o_bco  = BCO_GET_LARGE_ARG;
             SpW(-2) = (W_)&stg_ctoi_L1_info;
             SpW(-1) = BCO_PTR(o_bco);
             Sp_subW(2);
@@ -1367,7 +1367,7 @@ run_BCO:
         }
 
         case bci_PUSH_ALTS_V: {
-            int o_bco  = BCO_GET_LARGE_ARG;
+            W_ o_bco  = BCO_GET_LARGE_ARG;
             SpW(-2) = (W_)&stg_ctoi_V_info;
             SpW(-1) = BCO_PTR(o_bco);
             Sp_subW(2);
@@ -1380,9 +1380,9 @@ run_BCO:
         }
 
         case bci_PUSH_ALTS_T: {
-            int o_bco = BCO_GET_LARGE_ARG;
+            W_ o_bco = BCO_GET_LARGE_ARG;
             W_ tuple_info = (W_)BCO_LIT(BCO_GET_LARGE_ARG);
-            int o_tuple_bco = BCO_GET_LARGE_ARG;
+            W_ o_tuple_bco = BCO_GET_LARGE_ARG;
 
 #if defined(PROFILING)
             SpW(-1) = (W_)cap->r.rCCCS;
@@ -1526,30 +1526,30 @@ run_BCO:
         }
 
         case bci_PUSH_UBX8: {
-            int o_lit = BCO_GET_LARGE_ARG;
+            W_ o_lit = BCO_GET_LARGE_ARG;
             Sp_subB(1);
             *(StgWord8*)Sp = *(StgWord8*)(literals+o_lit);
             goto nextInsn;
         }
 
         case bci_PUSH_UBX16: {
-            int o_lit = BCO_GET_LARGE_ARG;
+            W_ o_lit = BCO_GET_LARGE_ARG;
             Sp_subB(2);
             *(StgWord16*)Sp = *(StgWord16*)(literals+o_lit);
             goto nextInsn;
         }
 
         case bci_PUSH_UBX32: {
-            int o_lit = BCO_GET_LARGE_ARG;
+            W_ o_lit = BCO_GET_LARGE_ARG;
             Sp_subB(4);
             *(StgWord32*)Sp = *(StgWord32*)(literals+o_lit);
             goto nextInsn;
         }
 
         case bci_PUSH_UBX: {
-            int i;
-            int o_lits = BCO_GET_LARGE_ARG;
-            int n_words = BCO_NEXT;
+            W_ i;
+            W_ o_lits = BCO_GET_LARGE_ARG;
+            W_ n_words = BCO_GET_LARGE_ARG;
             Sp_subW(n_words);
             for (i = 0; i < n_words; i++) {
                 SpW(i) = (W_)BCO_LIT(o_lits+i);
@@ -1558,10 +1558,10 @@ run_BCO:
         }
 
         case bci_SLIDE: {
-            int n  = BCO_NEXT;
-            int by = BCO_NEXT;
+            W_ n  = BCO_GET_LARGE_ARG;
+            W_ by = BCO_GET_LARGE_ARG;
             /* a_1, .. a_n, b_1, .. b_by, s => a_1, .. a_n, s */
-            while(--n >= 0) {
+            while(n-- > 0) {
                 SpW(n+by) = SpW(n);
             }
             Sp_addW(by);
@@ -1570,7 +1570,7 @@ run_BCO:
         }
 
         case bci_ALLOC_AP: {
-            int n_payload = BCO_NEXT;
+            StgHalfWord n_payload = BCO_GET_LARGE_ARG;
             StgAP *ap = (StgAP*)allocate(cap, AP_sizeW(n_payload));
             SpW(-1) = (W_)ap;
             ap->n_args = n_payload;
@@ -1583,7 +1583,7 @@ run_BCO:
         }
 
         case bci_ALLOC_AP_NOUPD: {
-            int n_payload = BCO_NEXT;
+            StgHalfWord n_payload = BCO_GET_LARGE_ARG;
             StgAP *ap = (StgAP*)allocate(cap, AP_sizeW(n_payload));
             SpW(-1) = (W_)ap;
             ap->n_args = n_payload;
@@ -1597,8 +1597,8 @@ run_BCO:
 
         case bci_ALLOC_PAP: {
             StgPAP* pap;
-            int arity = BCO_NEXT;
-            int n_payload = BCO_NEXT;
+            StgHalfWord arity = BCO_GET_LARGE_ARG;
+            StgHalfWord n_payload = BCO_GET_LARGE_ARG;
             pap = (StgPAP*)allocate(cap, PAP_sizeW(n_payload));
             SpW(-1) = (W_)pap;
             pap->n_args = n_payload;
@@ -1611,11 +1611,11 @@ run_BCO:
         }
 
         case bci_MKAP: {
-            int i;
-            int stkoff = BCO_NEXT;
-            int n_payload = BCO_NEXT;
+            StgHalfWord i;
+            W_ stkoff = BCO_GET_LARGE_ARG;
+            StgHalfWord n_payload = BCO_GET_LARGE_ARG;
             StgAP* ap = (StgAP*)SpW(stkoff);
-            ASSERT((int)ap->n_args == n_payload);
+            ASSERT(ap->n_args == n_payload);
             ap->fun = (StgClosure*)SpW(0);
 
             // The function should be a BCO, and its bitmap should
@@ -1635,11 +1635,11 @@ run_BCO:
         }
 
         case bci_MKPAP: {
-            int i;
-            int stkoff = BCO_NEXT;
-            int n_payload = BCO_NEXT;
+            StgHalfWord i;
+            W_ stkoff = BCO_GET_LARGE_ARG;
+            StgHalfWord n_payload = BCO_GET_LARGE_ARG;
             StgPAP* pap = (StgPAP*)SpW(stkoff);
-            ASSERT((int)pap->n_args == n_payload);
+            ASSERT(pap->n_args == n_payload);
             pap->fun = (StgClosure*)SpW(0);
 
             // The function should be a BCO
@@ -1663,8 +1663,8 @@ run_BCO:
 
         case bci_UNPACK: {
             /* Unpack N ptr words from t.o.s constructor */
-            int i;
-            int n_words = BCO_NEXT;
+            W_ i;
+            W_ n_words = BCO_GET_LARGE_ARG;
             StgClosure* con = UNTAG_CLOSURE((StgClosure*)SpW(0));
             Sp_subW(n_words);
             for (i = 0; i < n_words; i++) {
@@ -1674,9 +1674,9 @@ run_BCO:
         }
 
         case bci_PACK: {
-            int i;
-            int o_itbl         = BCO_GET_LARGE_ARG;
-            int n_words        = BCO_NEXT;
+            W_ i;
+            W_ o_itbl         = BCO_GET_LARGE_ARG;
+            W_ n_words        = BCO_GET_LARGE_ARG;
             StgInfoTable* itbl = INFO_PTR_TO_STRUCT((StgInfoTable *)BCO_LIT(o_itbl));
             int request        = CONSTR_sizeW( itbl->layout.payload.ptrs,
                                                itbl->layout.payload.nptrs );
@@ -2006,9 +2006,9 @@ run_BCO:
         }
 
         case bci_SWIZZLE: {
-            int stkoff = BCO_NEXT;
-            signed short n = (signed short)(BCO_NEXT);
-            SpW(stkoff) += (W_)n;
+            W_ stkoff = BCO_GET_LARGE_ARG;
+            StgInt n = BCO_GET_LARGE_ARG;
+            (*(StgInt*)(Sp_plusW(stkoff))) += n;
             goto nextInsn;
         }
 
@@ -2020,7 +2020,7 @@ run_BCO:
 
         case bci_CCALL: {
             void *tok;
-            int stk_offset            = BCO_NEXT;
+            W_ stk_offset             = BCO_GET_LARGE_ARG;
             int o_itbl                = BCO_GET_LARGE_ARG;
             int flags                 = BCO_NEXT;
             bool interruptible        = flags & 0x1;
@@ -2056,7 +2056,7 @@ run_BCO:
             uint32_t nargs = cif->nargs;
             uint32_t ret_size;
             uint32_t i;
-            int j;
+            W_ j;
             StgPtr p;
             W_ ret[2];                  // max needed
             W_ *arguments[stk_offset];  // max needed
diff --git a/testsuite/tests/ghci/should_run/LargeBCO.hs b/testsuite/tests/ghci/should_run/LargeBCO.hs
new file mode 100644
index 0000000000000000000000000000000000000000..237ce2e83691ee223494c4c42571f43ad52ee0b6
--- /dev/null
+++ b/testsuite/tests/ghci/should_run/LargeBCO.hs
@@ -0,0 +1,32 @@
+
+{-
+   Test for BCOs that use larger than 16 bit stack offsets.
+
+   Using Template Haskell because loading the code directly into
+   GHCi produces different bytecode that does not have large stack
+   offsets.
+
+   testcase from #22888
+ -}
+
+{-# LANGUAGE TemplateHaskell #-}
+module Main where
+
+import LargeBCO_A
+
+import Data.Binary.Get (runGet, Get, getWord32be)
+import qualified Data.ByteString.Lazy as B
+import Data.Bits (Bits(..))
+import Data.Word (Word32)
+
+import Language.Haskell.TH.Lib
+
+result :: String
+result = $(let initState = SHA256S 1 2 3 4 5 6 7 8
+               input     = B.replicate 64 0
+               output    = runGet (processSHA256Block initState) input
+           in litE (stringL (show output))
+          )
+
+main :: IO ()
+main = putStrLn result
diff --git a/testsuite/tests/ghci/should_run/LargeBCO.stdout b/testsuite/tests/ghci/should_run/LargeBCO.stdout
new file mode 100644
index 0000000000000000000000000000000000000000..06d5a6b1adb0ce8e0a2b165e604cd87da603f07b
--- /dev/null
+++ b/testsuite/tests/ghci/should_run/LargeBCO.stdout
@@ -0,0 +1 @@
+SHA256S 1251949539 2800197164 2023110800 2630081144 3831421046 3141654527 2982319529 2535435789
diff --git a/testsuite/tests/ghci/should_run/LargeBCO_A.hs b/testsuite/tests/ghci/should_run/LargeBCO_A.hs
new file mode 100644
index 0000000000000000000000000000000000000000..999e64c4af4c4c2e921be5449e73ec0f9921fa2a
--- /dev/null
+++ b/testsuite/tests/ghci/should_run/LargeBCO_A.hs
@@ -0,0 +1,215 @@
+{-# LANGUAGE TemplateHaskell #-}
+module LargeBCO_A (processSHA256Block, SHA256State(..)) where
+
+import Data.Binary.Get (runGet, Get, getWord32be)
+import qualified Data.ByteString.Lazy as B
+import Data.Binary.Get (Get, getWord32be)
+import Data.Bits (Bits(..))
+import Data.Word (Word32)
+import System.Environment
+
+data SHA256Sched = SHA256Sched !Word32 !Word32 !Word32 !Word32 !Word32 -- 00-04
+                               !Word32 !Word32 !Word32 !Word32 !Word32 -- 05-09
+                               !Word32 !Word32 !Word32 !Word32 !Word32 -- 10-04
+                               !Word32 !Word32 !Word32 !Word32 !Word32 -- 15-09
+                               !Word32 !Word32 !Word32 !Word32 !Word32 -- 20-04
+                               !Word32 !Word32 !Word32 !Word32 !Word32 -- 25-09
+                               !Word32 !Word32 !Word32 !Word32 !Word32 -- 30-04
+                               !Word32 !Word32 !Word32 !Word32 !Word32 -- 35-09
+                               !Word32 !Word32 !Word32 !Word32 !Word32 -- 40-04
+                               !Word32 !Word32 !Word32 !Word32 !Word32 -- 45-09
+                               !Word32 !Word32 !Word32 !Word32 !Word32 -- 50-04
+                               !Word32 !Word32 !Word32 !Word32 !Word32 -- 55-09
+                               !Word32 !Word32 !Word32 !Word32         -- 60-63
+
+data SHA256State = SHA256S !Word32 !Word32 !Word32 !Word32
+                           !Word32 !Word32 !Word32 !Word32
+    deriving (Show)
+
+{-# SPECIALIZE ch :: Word32 -> Word32 -> Word32 -> Word32 #-}
+ch :: Bits a => a -> a -> a -> a
+ch x y z = (x .&. y) `xor` (complement x .&. z)
+
+{-# SPECIALIZE maj :: Word32 -> Word32 -> Word32 -> Word32 #-}
+maj :: Bits a => a -> a -> a -> a
+maj x y z = (x .&. (y .|. z)) .|. (y .&. z)
+
+bsig256_0 :: Word32 -> Word32
+bsig256_0 x = rotateR x 2 `xor` rotateR x 13 `xor` rotateR x 22
+
+bsig256_1 :: Word32 -> Word32
+bsig256_1 x = rotateR x 6 `xor` rotateR x 11 `xor` rotateR x 25
+
+lsig256_0 :: Word32 -> Word32
+lsig256_0 x = rotateR x 7 `xor` rotateR x 18 `xor` shiftR x 3
+
+lsig256_1 :: Word32 -> Word32
+lsig256_1 x = rotateR x 17 `xor` rotateR x 19 `xor` shiftR x 10
+
+getSHA256Sched :: Get SHA256Sched
+getSHA256Sched = do
+  w00 <- getWord32be
+  w01 <- getWord32be
+  w02 <- getWord32be
+  w03 <- getWord32be
+  w04 <- getWord32be
+  w05 <- getWord32be
+  w06 <- getWord32be
+  w07 <- getWord32be
+  w08 <- getWord32be
+  w09 <- getWord32be
+  w10 <- getWord32be
+  w11 <- getWord32be
+  w12 <- getWord32be
+  w13 <- getWord32be
+  w14 <- getWord32be
+  w15 <- getWord32be
+  let w16 = lsig256_1 w14 + w09 + lsig256_0 w01 + w00
+      w17 = lsig256_1 w15 + w10 + lsig256_0 w02 + w01
+      w18 = lsig256_1 w16 + w11 + lsig256_0 w03 + w02
+      w19 = lsig256_1 w17 + w12 + lsig256_0 w04 + w03
+      w20 = lsig256_1 w18 + w13 + lsig256_0 w05 + w04
+      w21 = lsig256_1 w19 + w14 + lsig256_0 w06 + w05
+      w22 = lsig256_1 w20 + w15 + lsig256_0 w07 + w06
+      w23 = lsig256_1 w21 + w16 + lsig256_0 w08 + w07
+      w24 = lsig256_1 w22 + w17 + lsig256_0 w09 + w08
+      w25 = lsig256_1 w23 + w18 + lsig256_0 w10 + w09
+      w26 = lsig256_1 w24 + w19 + lsig256_0 w11 + w10
+      w27 = lsig256_1 w25 + w20 + lsig256_0 w12 + w11
+      w28 = lsig256_1 w26 + w21 + lsig256_0 w13 + w12
+      w29 = lsig256_1 w27 + w22 + lsig256_0 w14 + w13
+      w30 = lsig256_1 w28 + w23 + lsig256_0 w15 + w14
+      w31 = lsig256_1 w29 + w24 + lsig256_0 w16 + w15
+      w32 = lsig256_1 w30 + w25 + lsig256_0 w17 + w16
+      w33 = lsig256_1 w31 + w26 + lsig256_0 w18 + w17
+      w34 = lsig256_1 w32 + w27 + lsig256_0 w19 + w18
+      w35 = lsig256_1 w33 + w28 + lsig256_0 w20 + w19
+      w36 = lsig256_1 w34 + w29 + lsig256_0 w21 + w20
+      w37 = lsig256_1 w35 + w30 + lsig256_0 w22 + w21
+      w38 = lsig256_1 w36 + w31 + lsig256_0 w23 + w22
+      w39 = lsig256_1 w37 + w32 + lsig256_0 w24 + w23
+      w40 = lsig256_1 w38 + w33 + lsig256_0 w25 + w24
+      w41 = lsig256_1 w39 + w34 + lsig256_0 w26 + w25
+      w42 = lsig256_1 w40 + w35 + lsig256_0 w27 + w26
+      w43 = lsig256_1 w41 + w36 + lsig256_0 w28 + w27
+      w44 = lsig256_1 w42 + w37 + lsig256_0 w29 + w28
+      w45 = lsig256_1 w43 + w38 + lsig256_0 w30 + w29
+      w46 = lsig256_1 w44 + w39 + lsig256_0 w31 + w30
+      w47 = lsig256_1 w45 + w40 + lsig256_0 w32 + w31
+      w48 = lsig256_1 w46 + w41 + lsig256_0 w33 + w32
+      w49 = lsig256_1 w47 + w42 + lsig256_0 w34 + w33
+      w50 = lsig256_1 w48 + w43 + lsig256_0 w35 + w34
+      w51 = lsig256_1 w49 + w44 + lsig256_0 w36 + w35
+      w52 = lsig256_1 w50 + w45 + lsig256_0 w37 + w36
+      w53 = lsig256_1 w51 + w46 + lsig256_0 w38 + w37
+      w54 = lsig256_1 w52 + w47 + lsig256_0 w39 + w38
+      w55 = lsig256_1 w53 + w48 + lsig256_0 w40 + w39
+      w56 = lsig256_1 w54 + w49 + lsig256_0 w41 + w40
+      w57 = lsig256_1 w55 + w50 + lsig256_0 w42 + w41
+      w58 = lsig256_1 w56 + w51 + lsig256_0 w43 + w42
+      w59 = lsig256_1 w57 + w52 + lsig256_0 w44 + w43
+      w60 = lsig256_1 w58 + w53 + lsig256_0 w45 + w44
+      w61 = lsig256_1 w59 + w54 + lsig256_0 w46 + w45
+      w62 = lsig256_1 w60 + w55 + lsig256_0 w47 + w46
+      w63 = lsig256_1 w61 + w56 + lsig256_0 w48 + w47
+  return $! SHA256Sched w00 w01 w02 w03 w04 w05 w06 w07 w08 w09
+                        w10 w11 w12 w13 w14 w15 w16 w17 w18 w19
+                        w20 w21 w22 w23 w24 w25 w26 w27 w28 w29
+                        w30 w31 w32 w33 w34 w35 w36 w37 w38 w39
+                        w40 w41 w42 w43 w44 w45 w46 w47 w48 w49
+                        w50 w51 w52 w53 w54 w55 w56 w57 w58 w59
+                        w60 w61 w62 w63
+
+{-# NOINLINE processSHA256Block #-}
+processSHA256Block :: SHA256State -> Get SHA256State
+processSHA256Block !s00@(SHA256S a00 b00 c00 d00 e00 f00 g00 h00) = do
+  (SHA256Sched w00 w01 w02 w03 w04 w05 w06 w07 w08 w09
+               w10 w11 w12 w13 w14 w15 w16 w17 w18 w19
+               w20 w21 w22 w23 w24 w25 w26 w27 w28 w29
+               w30 w31 w32 w33 w34 w35 w36 w37 w38 w39
+               w40 w41 w42 w43 w44 w45 w46 w47 w48 w49
+               w50 w51 w52 w53 w54 w55 w56 w57 w58 w59
+               w60 w61 w62 w63) <- getSHA256Sched
+  let s01 = step256 s00 0x428a2f98 w00
+      s02 = step256 s01 0x71374491 w01
+      s03 = step256 s02 0xb5c0fbcf w02
+      s04 = step256 s03 0xe9b5dba5 w03
+      s05 = step256 s04 0x3956c25b w04
+      s06 = step256 s05 0x59f111f1 w05
+      s07 = step256 s06 0x923f82a4 w06
+      s08 = step256 s07 0xab1c5ed5 w07
+      s09 = step256 s08 0xd807aa98 w08
+      s10 = step256 s09 0x12835b01 w09
+      s11 = step256 s10 0x243185be w10
+      s12 = step256 s11 0x550c7dc3 w11
+      s13 = step256 s12 0x72be5d74 w12
+      s14 = step256 s13 0x80deb1fe w13
+      s15 = step256 s14 0x9bdc06a7 w14
+      s16 = step256 s15 0xc19bf174 w15
+      s17 = step256 s16 0xe49b69c1 w16
+      s18 = step256 s17 0xefbe4786 w17
+      s19 = step256 s18 0x0fc19dc6 w18
+      s20 = step256 s19 0x240ca1cc w19
+      s21 = step256 s20 0x2de92c6f w20
+      s22 = step256 s21 0x4a7484aa w21
+      s23 = step256 s22 0x5cb0a9dc w22
+      s24 = step256 s23 0x76f988da w23
+      s25 = step256 s24 0x983e5152 w24
+      s26 = step256 s25 0xa831c66d w25
+      s27 = step256 s26 0xb00327c8 w26
+      s28 = step256 s27 0xbf597fc7 w27
+      s29 = step256 s28 0xc6e00bf3 w28
+      s30 = step256 s29 0xd5a79147 w29
+      s31 = step256 s30 0x06ca6351 w30
+      s32 = step256 s31 0x14292967 w31
+      s33 = step256 s32 0x27b70a85 w32
+      s34 = step256 s33 0x2e1b2138 w33
+      s35 = step256 s34 0x4d2c6dfc w34
+      s36 = step256 s35 0x53380d13 w35
+      s37 = step256 s36 0x650a7354 w36
+      s38 = step256 s37 0x766a0abb w37
+      s39 = step256 s38 0x81c2c92e w38
+      s40 = step256 s39 0x92722c85 w39
+      s41 = step256 s40 0xa2bfe8a1 w40
+      s42 = step256 s41 0xa81a664b w41
+      s43 = step256 s42 0xc24b8b70 w42
+      s44 = step256 s43 0xc76c51a3 w43
+      s45 = step256 s44 0xd192e819 w44
+      s46 = step256 s45 0xd6990624 w45
+      s47 = step256 s46 0xf40e3585 w46
+      s48 = step256 s47 0x106aa070 w47
+      s49 = step256 s48 0x19a4c116 w48
+      s50 = step256 s49 0x1e376c08 w49
+      s51 = step256 s50 0x2748774c w50
+      s52 = step256 s51 0x34b0bcb5 w51
+      s53 = step256 s52 0x391c0cb3 w52
+      s54 = step256 s53 0x4ed8aa4a w53
+      s55 = step256 s54 0x5b9cca4f w54
+      s56 = step256 s55 0x682e6ff3 w55
+      s57 = step256 s56 0x748f82ee w56
+      s58 = step256 s57 0x78a5636f w57
+      s59 = step256 s58 0x84c87814 w58
+      s60 = step256 s59 0x8cc70208 w59
+      s61 = step256 s60 0x90befffa w60
+      s62 = step256 s61 0xa4506ceb w61
+      s63 = step256 s62 0xbef9a3f7 w62
+      s64 = step256 s63 0xc67178f2 w63
+      SHA256S a64 b64 c64 d64 e64 f64 g64 h64 = s64
+  return $! SHA256S (a00 + a64) (b00 + b64) (c00 + c64) (d00 + d64)
+                    (e00 + e64) (f00 + f64) (g00 + g64) (h00 + h64)
+
+{-# INLINE step256 #-}
+step256 :: SHA256State -> Word32 -> Word32 -> SHA256State
+step256 !(SHA256S a b c d e f g h) k w = SHA256S a' b' c' d' e' f' g' h'
+ where
+  t1 = h + bsig256_1 e + ch e f g + k + w
+  t2 = bsig256_0 a + maj a b c
+  h' = g
+  g' = f
+  f' = e
+  e' = d + t1
+  d' = c
+  c' = b
+  b' = a
+  a' = t1 + t2
+
diff --git a/testsuite/tests/ghci/should_run/T22888.hs b/testsuite/tests/ghci/should_run/T22888.hs
new file mode 100644
index 0000000000000000000000000000000000000000..8157028b66a2e97ddb405f1a3b3153e98a1b76d3
--- /dev/null
+++ b/testsuite/tests/ghci/should_run/T22888.hs
@@ -0,0 +1,221 @@
+{-
+
+  This module produced a panic when compiled with -fbyte-code-and-object-code
+  and optimization because it required stack offsets greater than 65535
+
+  See #22888
+
+ -}
+
+module Main (main, processSHA256Block) where
+
+import Data.Binary.Get (Get, getWord32be)
+import Data.Bits (Bits(..))
+import Data.Word (Word32)
+
+main :: IO ()
+main = pure ()
+
+data SHA256Sched = SHA256Sched !Word32 !Word32 !Word32 !Word32 !Word32 -- 00-04
+                               !Word32 !Word32 !Word32 !Word32 !Word32 -- 05-09
+                               !Word32 !Word32 !Word32 !Word32 !Word32 -- 10-04
+                               !Word32 !Word32 !Word32 !Word32 !Word32 -- 15-09
+                               !Word32 !Word32 !Word32 !Word32 !Word32 -- 20-04
+                               !Word32 !Word32 !Word32 !Word32 !Word32 -- 25-09
+                               !Word32 !Word32 !Word32 !Word32 !Word32 -- 30-04
+                               !Word32 !Word32 !Word32 !Word32 !Word32 -- 35-09
+                               !Word32 !Word32 !Word32 !Word32 !Word32 -- 40-04
+                               !Word32 !Word32 !Word32 !Word32 !Word32 -- 45-09
+                               !Word32 !Word32 !Word32 !Word32 !Word32 -- 50-04
+                               !Word32 !Word32 !Word32 !Word32 !Word32 -- 55-09
+                               !Word32 !Word32 !Word32 !Word32         -- 60-63
+
+data SHA256State = SHA256S !Word32 !Word32 !Word32 !Word32
+                           !Word32 !Word32 !Word32 !Word32
+
+{-# SPECIALIZE ch :: Word32 -> Word32 -> Word32 -> Word32 #-}
+ch :: Bits a => a -> a -> a -> a
+ch x y z = (x .&. y) `xor` (complement x .&. z)
+
+{-# SPECIALIZE maj :: Word32 -> Word32 -> Word32 -> Word32 #-}
+maj :: Bits a => a -> a -> a -> a
+maj x y z = (x .&. (y .|. z)) .|. (y .&. z)
+
+bsig256_0 :: Word32 -> Word32
+bsig256_0 x = rotateR x 2 `xor` rotateR x 13 `xor` rotateR x 22
+
+bsig256_1 :: Word32 -> Word32
+bsig256_1 x = rotateR x 6 `xor` rotateR x 11 `xor` rotateR x 25
+
+lsig256_0 :: Word32 -> Word32
+lsig256_0 x = rotateR x 7 `xor` rotateR x 18 `xor` shiftR x 3
+
+lsig256_1 :: Word32 -> Word32
+lsig256_1 x = rotateR x 17 `xor` rotateR x 19 `xor` shiftR x 10
+
+getSHA256Sched :: Get SHA256Sched
+getSHA256Sched = do
+  w00 <- getWord32be
+  w01 <- getWord32be
+  w02 <- getWord32be
+  w03 <- getWord32be
+  w04 <- getWord32be
+  w05 <- getWord32be
+  w06 <- getWord32be
+  w07 <- getWord32be
+  w08 <- getWord32be
+  w09 <- getWord32be
+  w10 <- getWord32be
+  w11 <- getWord32be
+  w12 <- getWord32be
+  w13 <- getWord32be
+  w14 <- getWord32be
+  w15 <- getWord32be
+  let w16 = lsig256_1 w14 + w09 + lsig256_0 w01 + w00
+      w17 = lsig256_1 w15 + w10 + lsig256_0 w02 + w01
+      w18 = lsig256_1 w16 + w11 + lsig256_0 w03 + w02
+      w19 = lsig256_1 w17 + w12 + lsig256_0 w04 + w03
+      w20 = lsig256_1 w18 + w13 + lsig256_0 w05 + w04
+      w21 = lsig256_1 w19 + w14 + lsig256_0 w06 + w05
+      w22 = lsig256_1 w20 + w15 + lsig256_0 w07 + w06
+      w23 = lsig256_1 w21 + w16 + lsig256_0 w08 + w07
+      w24 = lsig256_1 w22 + w17 + lsig256_0 w09 + w08
+      w25 = lsig256_1 w23 + w18 + lsig256_0 w10 + w09
+      w26 = lsig256_1 w24 + w19 + lsig256_0 w11 + w10
+      w27 = lsig256_1 w25 + w20 + lsig256_0 w12 + w11
+      w28 = lsig256_1 w26 + w21 + lsig256_0 w13 + w12
+      w29 = lsig256_1 w27 + w22 + lsig256_0 w14 + w13
+      w30 = lsig256_1 w28 + w23 + lsig256_0 w15 + w14
+      w31 = lsig256_1 w29 + w24 + lsig256_0 w16 + w15
+      w32 = lsig256_1 w30 + w25 + lsig256_0 w17 + w16
+      w33 = lsig256_1 w31 + w26 + lsig256_0 w18 + w17
+      w34 = lsig256_1 w32 + w27 + lsig256_0 w19 + w18
+      w35 = lsig256_1 w33 + w28 + lsig256_0 w20 + w19
+      w36 = lsig256_1 w34 + w29 + lsig256_0 w21 + w20
+      w37 = lsig256_1 w35 + w30 + lsig256_0 w22 + w21
+      w38 = lsig256_1 w36 + w31 + lsig256_0 w23 + w22
+      w39 = lsig256_1 w37 + w32 + lsig256_0 w24 + w23
+      w40 = lsig256_1 w38 + w33 + lsig256_0 w25 + w24
+      w41 = lsig256_1 w39 + w34 + lsig256_0 w26 + w25
+      w42 = lsig256_1 w40 + w35 + lsig256_0 w27 + w26
+      w43 = lsig256_1 w41 + w36 + lsig256_0 w28 + w27
+      w44 = lsig256_1 w42 + w37 + lsig256_0 w29 + w28
+      w45 = lsig256_1 w43 + w38 + lsig256_0 w30 + w29
+      w46 = lsig256_1 w44 + w39 + lsig256_0 w31 + w30
+      w47 = lsig256_1 w45 + w40 + lsig256_0 w32 + w31
+      w48 = lsig256_1 w46 + w41 + lsig256_0 w33 + w32
+      w49 = lsig256_1 w47 + w42 + lsig256_0 w34 + w33
+      w50 = lsig256_1 w48 + w43 + lsig256_0 w35 + w34
+      w51 = lsig256_1 w49 + w44 + lsig256_0 w36 + w35
+      w52 = lsig256_1 w50 + w45 + lsig256_0 w37 + w36
+      w53 = lsig256_1 w51 + w46 + lsig256_0 w38 + w37
+      w54 = lsig256_1 w52 + w47 + lsig256_0 w39 + w38
+      w55 = lsig256_1 w53 + w48 + lsig256_0 w40 + w39
+      w56 = lsig256_1 w54 + w49 + lsig256_0 w41 + w40
+      w57 = lsig256_1 w55 + w50 + lsig256_0 w42 + w41
+      w58 = lsig256_1 w56 + w51 + lsig256_0 w43 + w42
+      w59 = lsig256_1 w57 + w52 + lsig256_0 w44 + w43
+      w60 = lsig256_1 w58 + w53 + lsig256_0 w45 + w44
+      w61 = lsig256_1 w59 + w54 + lsig256_0 w46 + w45
+      w62 = lsig256_1 w60 + w55 + lsig256_0 w47 + w46
+      w63 = lsig256_1 w61 + w56 + lsig256_0 w48 + w47
+  return $! SHA256Sched w00 w01 w02 w03 w04 w05 w06 w07 w08 w09
+                        w10 w11 w12 w13 w14 w15 w16 w17 w18 w19
+                        w20 w21 w22 w23 w24 w25 w26 w27 w28 w29
+                        w30 w31 w32 w33 w34 w35 w36 w37 w38 w39
+                        w40 w41 w42 w43 w44 w45 w46 w47 w48 w49
+                        w50 w51 w52 w53 w54 w55 w56 w57 w58 w59
+                        w60 w61 w62 w63
+
+processSHA256Block :: SHA256State -> Get SHA256State
+processSHA256Block !s00@(SHA256S a00 b00 c00 d00 e00 f00 g00 h00) = do
+  (SHA256Sched w00 w01 w02 w03 w04 w05 w06 w07 w08 w09
+               w10 w11 w12 w13 w14 w15 w16 w17 w18 w19
+               w20 w21 w22 w23 w24 w25 w26 w27 w28 w29
+               w30 w31 w32 w33 w34 w35 w36 w37 w38 w39
+               w40 w41 w42 w43 w44 w45 w46 w47 w48 w49
+               w50 w51 w52 w53 w54 w55 w56 w57 w58 w59
+               w60 w61 w62 w63) <- getSHA256Sched
+  let s01 = step256 s00 0x428a2f98 w00
+      s02 = step256 s01 0x71374491 w01
+      s03 = step256 s02 0xb5c0fbcf w02
+      s04 = step256 s03 0xe9b5dba5 w03
+      s05 = step256 s04 0x3956c25b w04
+      s06 = step256 s05 0x59f111f1 w05
+      s07 = step256 s06 0x923f82a4 w06
+      s08 = step256 s07 0xab1c5ed5 w07
+      s09 = step256 s08 0xd807aa98 w08
+      s10 = step256 s09 0x12835b01 w09
+      s11 = step256 s10 0x243185be w10
+      s12 = step256 s11 0x550c7dc3 w11
+      s13 = step256 s12 0x72be5d74 w12
+      s14 = step256 s13 0x80deb1fe w13
+      s15 = step256 s14 0x9bdc06a7 w14
+      s16 = step256 s15 0xc19bf174 w15
+      s17 = step256 s16 0xe49b69c1 w16
+      s18 = step256 s17 0xefbe4786 w17
+      s19 = step256 s18 0x0fc19dc6 w18
+      s20 = step256 s19 0x240ca1cc w19
+      s21 = step256 s20 0x2de92c6f w20
+      s22 = step256 s21 0x4a7484aa w21
+      s23 = step256 s22 0x5cb0a9dc w22
+      s24 = step256 s23 0x76f988da w23
+      s25 = step256 s24 0x983e5152 w24
+      s26 = step256 s25 0xa831c66d w25
+      s27 = step256 s26 0xb00327c8 w26
+      s28 = step256 s27 0xbf597fc7 w27
+      s29 = step256 s28 0xc6e00bf3 w28
+      s30 = step256 s29 0xd5a79147 w29
+      s31 = step256 s30 0x06ca6351 w30
+      s32 = step256 s31 0x14292967 w31
+      s33 = step256 s32 0x27b70a85 w32
+      s34 = step256 s33 0x2e1b2138 w33
+      s35 = step256 s34 0x4d2c6dfc w34
+      s36 = step256 s35 0x53380d13 w35
+      s37 = step256 s36 0x650a7354 w36
+      s38 = step256 s37 0x766a0abb w37
+      s39 = step256 s38 0x81c2c92e w38
+      s40 = step256 s39 0x92722c85 w39
+      s41 = step256 s40 0xa2bfe8a1 w40
+      s42 = step256 s41 0xa81a664b w41
+      s43 = step256 s42 0xc24b8b70 w42
+      s44 = step256 s43 0xc76c51a3 w43
+      s45 = step256 s44 0xd192e819 w44
+      s46 = step256 s45 0xd6990624 w45
+      s47 = step256 s46 0xf40e3585 w46
+      s48 = step256 s47 0x106aa070 w47
+      s49 = step256 s48 0x19a4c116 w48
+      s50 = step256 s49 0x1e376c08 w49
+      s51 = step256 s50 0x2748774c w50
+      s52 = step256 s51 0x34b0bcb5 w51
+      s53 = step256 s52 0x391c0cb3 w52
+      s54 = step256 s53 0x4ed8aa4a w53
+      s55 = step256 s54 0x5b9cca4f w54
+      s56 = step256 s55 0x682e6ff3 w55
+      s57 = step256 s56 0x748f82ee w56
+      s58 = step256 s57 0x78a5636f w57
+      s59 = step256 s58 0x84c87814 w58
+      s60 = step256 s59 0x8cc70208 w59
+      s61 = step256 s60 0x90befffa w60
+      s62 = step256 s61 0xa4506ceb w61
+      s63 = step256 s62 0xbef9a3f7 w62
+      s64 = step256 s63 0xc67178f2 w63
+      SHA256S a64 b64 c64 d64 e64 f64 g64 h64 = s64
+  return $! SHA256S (a00 + a64) (b00 + b64) (c00 + c64) (d00 + d64)
+                    (e00 + e64) (f00 + f64) (g00 + g64) (h00 + h64)
+
+{-# INLINE step256 #-}
+step256 :: SHA256State -> Word32 -> Word32 -> SHA256State
+step256 !(SHA256S a b c d e f g h) k w = SHA256S a' b' c' d' e' f' g' h'
+ where
+  t1 = h + bsig256_1 e + ch e f g + k + w
+  t2 = bsig256_0 a + maj a b c
+  h' = g
+  g' = f
+  f' = e
+  e' = d + t1
+  d' = c
+  c' = b
+  b' = a
+  a' = t1 + t2
+
diff --git a/testsuite/tests/ghci/should_run/all.T b/testsuite/tests/ghci/should_run/all.T
index 0a393c0fc28d6180691209978428b5abec7ae4ef..dfe0adaf62761a379d1a8a1b85340d1ec8f93ed9 100644
--- a/testsuite/tests/ghci/should_run/all.T
+++ b/testsuite/tests/ghci/should_run/all.T
@@ -85,10 +85,10 @@ test('T19628', [extra_files(['T19628a.hs']), only_ways(['ghci']) ], compile_and_
 test('T21052', just_ghci, ghci_script, ['T21052.script'])
 test('T21300', just_ghci, ghci_script, ['T21300.script'])
 test('UnliftedDataType2', just_ghci, compile_and_run, [''])
-
 test('T22829', just_ghci + [extra_hc_opts("-Wmissing-import-lists -Werror")], compile_and_run, [''])
 test('T23229', just_ghci + [extra_hc_opts("-this-unit-id my-package -Wno-missing-methods T23229")], ghci_script, ['T23229.script'])
 test('T22958a', just_ghci, compile_and_run, [''])
 test('T22958b', just_ghci, compile_and_run, [''])
 test('T22958c', just_ghci, compile_and_run, [''])
 test('GhciMainIs', just_ghci, compile_and_run, ['-main-is otherMain'])
+test('LargeBCO', [extra_files(['LargeBCO_A.hs']), req_interp, extra_hc_opts("-O -fbyte-code-and-object-code -fprefer-byte-code")], compile_and_run, [''])