diff --git a/compiler/cmm/CmmInfo.hs b/compiler/cmm/CmmInfo.hs
index 734896adc847fdc20e45d144fa1f56bdcb696ace..c608372c6491f0c59fed067427676e0214fca362 100644
--- a/compiler/cmm/CmmInfo.hs
+++ b/compiler/cmm/CmmInfo.hs
@@ -58,7 +58,7 @@ cmmToRawCmm cmm = do
 --	<normal forward rest of StgInfoTable>
 --	<forward variable part>
 --
---	See includes/InfoTables.h
+--	See includes/rts/storage/InfoTables.h
 --
 -- For return-points these are as follows
 --
diff --git a/compiler/cmm/PprCmm.hs b/compiler/cmm/PprCmm.hs
index 504098891a44f8630d95c66538f64ade90131545..9f622c0a643fb7d595aaaf53a8c9610c1f80a80e 100644
--- a/compiler/cmm/PprCmm.hs
+++ b/compiler/cmm/PprCmm.hs
@@ -56,7 +56,7 @@ import Data.Maybe
 -- Temp Jan08
 import SMRep
 import ClosureInfo
-#include "../includes/StgFun.h"
+#include "../includes/rts/storage/FunTypes.h"
 
 
 pprCmms :: (Outputable info, Outputable g) => [GenCmm CmmStatic info g] -> SDoc
diff --git a/compiler/codeGen/CgCallConv.hs b/compiler/codeGen/CgCallConv.hs
index 5fa0a85dcfd81d459db634b0fb717c4f2ee6e7b2..351375d1e4e5e64010f3c6de363b4e4af9155e79 100644
--- a/compiler/codeGen/CgCallConv.hs
+++ b/compiler/codeGen/CgCallConv.hs
@@ -64,7 +64,7 @@ import Data.Bits
 -------------------------------------------------------------------------
 
 -- bring in ARG_P, ARG_N, etc.
-#include "../includes/StgFun.h"
+#include "../includes/rts/storage/FunTypes.h"
 
 -------------------------
 argDescrType :: ArgDescr -> StgHalfWord
diff --git a/compiler/codeGen/CgProf.hs b/compiler/codeGen/CgProf.hs
index 7750c0f08ef8861bcd0f42f02be6909794ecf24b..a3aa59b572b07e6b731ca373b682950f1558643b 100644
--- a/compiler/codeGen/CgProf.hs
+++ b/compiler/codeGen/CgProf.hs
@@ -24,9 +24,9 @@ module CgProf (
   ) where
 
 #include "HsVersions.h"
-#include "MachDeps.h"
+#include "../includes/MachDeps.h"
  -- For WORD_SIZE_IN_BITS only.
-#include "../includes/Constants.h"
+#include "../includes/rts/Constants.h"
 	-- For LDV_CREATE_MASK, LDV_STATE_USE
 	-- which are StgWords
 #include "../includes/DerivedConstants.h"
diff --git a/compiler/codeGen/CgUtils.hs b/compiler/codeGen/CgUtils.hs
index fad85f7e16b0137f4f7234dcdc10bcb09a31d7a7..d1d81e5de4caaf62cbc5c6527ebfe254bd1f8d1b 100644
--- a/compiler/codeGen/CgUtils.hs
+++ b/compiler/codeGen/CgUtils.hs
@@ -50,7 +50,7 @@ module CgUtils (
   ) where
 
 #include "HsVersions.h"
-#include "../includes/MachRegs.h"
+#include "../includes/stg/MachRegs.h"
 
 import BlockId
 import CgMonad
diff --git a/compiler/codeGen/SMRep.lhs b/compiler/codeGen/SMRep.lhs
index 32d958380006945830b2731ecff74813c96d8d0e..1667af8637abe76706e8745108147f2e292386b9 100644
--- a/compiler/codeGen/SMRep.lhs
+++ b/compiler/codeGen/SMRep.lhs
@@ -246,7 +246,7 @@ data SMRep
   | BlackHoleRep
 
 data ClosureType	-- Corresponds 1-1 with the varieties of closures
-			-- implemented by the RTS.  Compare with ghc/includes/ClosureTypes.h
+			-- implemented by the RTS.  Compare with includes/rts/storage/ClosureTypes.h
     = Constr
     | ConstrNoCaf
     | Fun
@@ -284,7 +284,7 @@ isStaticRep BlackHoleRep	         = False
 \end{code}
 
 \begin{code}
-#include "../includes/ClosureTypes.h"
+#include "../includes/rts/storage/ClosureTypes.h"
 -- Defines CONSTR, CONSTR_1_0 etc
 
 -- krc: only called by tickyDynAlloc in CgTicky; return
diff --git a/compiler/codeGen/StgCmmLayout.hs b/compiler/codeGen/StgCmmLayout.hs
index 84d4ef0362c1fa05ba3d9d1eab27fca1cfc07922..11a32577327d2bbc836a24838e77c3a8f618fa71 100644
--- a/compiler/codeGen/StgCmmLayout.hs
+++ b/compiler/codeGen/StgCmmLayout.hs
@@ -313,7 +313,7 @@ mkVirtHeapOffsets is_thunk things
 -------------------------------------------------------------------------
 
 -- bring in ARG_P, ARG_N, etc.
-#include "../includes/StgFun.h"
+#include "../includes/rts/storage/FunTypes.h"
 
 -------------------------
 -- argDescrType :: ArgDescr -> StgHalfWord
diff --git a/compiler/codeGen/StgCmmProf.hs b/compiler/codeGen/StgCmmProf.hs
index 6fb20f8f46b90e1a689a037a5285f3410e4496e6..850356149c371ae078dc75e6cb70f1829dbca091 100644
--- a/compiler/codeGen/StgCmmProf.hs
+++ b/compiler/codeGen/StgCmmProf.hs
@@ -25,9 +25,9 @@ module StgCmmProf (
   ) where
 
 #include "HsVersions.h"
-#include "MachDeps.h"
+#include "../includes/MachDeps.h"
  -- For WORD_SIZE_IN_BITS only.
-#include "../includes/Constants.h"
+#include "../includes/rts/Constants.h"
 	-- For LDV_CREATE_MASK, LDV_STATE_USE
 	-- which are StgWords
 #include "../includes/DerivedConstants.h"
diff --git a/compiler/codeGen/StgCmmUtils.hs b/compiler/codeGen/StgCmmUtils.hs
index 357ca2c5b6419610334563946af9b3c4e84d48dc..d2d7bb1e41d55528a941e32e815786a02db2827a 100644
--- a/compiler/codeGen/StgCmmUtils.hs
+++ b/compiler/codeGen/StgCmmUtils.hs
@@ -44,7 +44,7 @@ module StgCmmUtils (
   ) where
 
 #include "HsVersions.h"
-#include "MachRegs.h"
+#include "../includes/stg/MachRegs.h"
 
 import StgCmmMonad
 import StgCmmClosure
diff --git a/compiler/ghci/ByteCodeAsm.lhs b/compiler/ghci/ByteCodeAsm.lhs
index e842bf75cffb5c8f618a9f54836e77c185bd2422..030ef896e607ae2b552418ae3c4e8442ee60b187 100644
--- a/compiler/ghci/ByteCodeAsm.lhs
+++ b/compiler/ghci/ByteCodeAsm.lhs
@@ -208,7 +208,7 @@ sizeSS16 :: SizedSeq a -> Word16
 sizeSS16 (SizedSeq n _) = fromIntegral n
 
 -- Bring in all the bci_ bytecode constants.
-#include "Bytecodes.h"
+#include "rts/Bytecodes.h"
 
 largeArgInstr :: Word16 -> Word16
 largeArgInstr bci = bci_FLAG_LARGE_ARGS .|. bci
diff --git a/compiler/ghci/ByteCodeItbls.lhs b/compiler/ghci/ByteCodeItbls.lhs
index 61644b2a8123d0e8a78f8b3c3693c9b5c67224b3..696ed0f5644b8d5760ad3fcdcbaa9df6a8f14752 100644
--- a/compiler/ghci/ByteCodeItbls.lhs
+++ b/compiler/ghci/ByteCodeItbls.lhs
@@ -84,7 +84,7 @@ mkITbl tc
         dcs = tyConDataCons tc
         n   = tyConFamilySize tc
 
-#include "../includes/ClosureTypes.h"
+#include "../includes/rts/storage/ClosureTypes.h"
 cONSTR :: Int	-- Defined in ClosureTypes.h
 cONSTR = CONSTR 
 
@@ -151,7 +151,7 @@ ptrToInt (Ptr a#) = I# (addr2Int# a#)
 #if sparc_TARGET_ARCH
 -- After some consideration, we'll try this, where
 -- 0x55555555 stands in for the address to jump to.
--- According to ghc/includes/MachRegs.h, %g3 is very
+-- According to includes/rts/MachRegs.h, %g3 is very
 -- likely indeed to be baggable.
 --
 --   0000 07155555              sethi   %hi(0x55555555), %g3
diff --git a/compiler/ghci/RtClosureInspect.hs b/compiler/ghci/RtClosureInspect.hs
index cc1604761634ebdbe03360dbeab6567d49fa1de4..c0b91405254d45cecc66447e32eddf81061766ea 100644
--- a/compiler/ghci/RtClosureInspect.hs
+++ b/compiler/ghci/RtClosureInspect.hs
@@ -165,7 +165,7 @@ data Closure = Closure { tipe         :: ClosureType
 instance Outputable ClosureType where
   ppr = text . show 
 
-#include "../includes/ClosureTypes.h"
+#include "../includes/rts/storage/ClosureTypes.h"
 
 aP_CODE, pAP_CODE :: Int
 aP_CODE = AP
diff --git a/compiler/iface/BinIface.hs b/compiler/iface/BinIface.hs
index 679494b0a51704f6978755cd8b232948ec8a6800..15cefe8cdfceb724f2271db93739a2a735f18c39 100644
--- a/compiler/iface/BinIface.hs
+++ b/compiler/iface/BinIface.hs
@@ -206,7 +206,7 @@ initBinMemSize :: Int
 initBinMemSize = 1024 * 1024
 
 -- The *host* architecture version:
-#include "MachDeps.h"
+#include "../includes/MachDeps.h"
 
 binaryInterfaceMagic :: Word32
 #if   WORD_SIZE_IN_BITS == 32
diff --git a/compiler/main/Constants.lhs b/compiler/main/Constants.lhs
index b809f5280bd6fb677ab5e48f38ec64af64b55efc..106c9a78ca6cff41a4d55043fd578e09dce358ed 100644
--- a/compiler/main/Constants.lhs
+++ b/compiler/main/Constants.lhs
@@ -14,8 +14,8 @@ import Data.Bits (shiftL)
 -- be in trouble.
 
 #include "HsVersions.h"
-#include "../includes/MachRegs.h"
-#include "../includes/Constants.h"
+#include "../includes/stg/MachRegs.h"
+#include "../includes/rts/Constants.h"
 #include "../includes/MachDeps.h"
 #include "../includes/DerivedConstants.h"
 
diff --git a/compiler/main/StaticFlags.hs b/compiler/main/StaticFlags.hs
index 126b3a064ae0b9d814ccd6d668dced502dc2d2ba..b13661ed99d8d32868432e07ac5ba9f8a6fe6921 100644
--- a/compiler/main/StaticFlags.hs
+++ b/compiler/main/StaticFlags.hs
@@ -275,7 +275,7 @@ opt_Unregisterised		= lookUp  (fsLit "-funregisterised")
 -- Derived, not a real option.  Determines whether we will be compiling
 -- info tables that reside just before the entry code, or with an
 -- indirection to the entry code.  See TABLES_NEXT_TO_CODE in 
--- includes/InfoTables.h.
+-- includes/rts/storage/InfoTables.h.
 tablesNextToCode :: Bool
 tablesNextToCode 		= not opt_Unregisterised
 		 		  && cGhcEnableTablesNextToCode == "YES"
diff --git a/compiler/nativeGen/Alpha/Regs.hs b/compiler/nativeGen/Alpha/Regs.hs
index 0a5c24e65a37cc9a075151438b00753535425a9a..2d85c5f141ba3a06f53ac935b7664af3121fe073 100644
--- a/compiler/nativeGen/Alpha/Regs.hs
+++ b/compiler/nativeGen/Alpha/Regs.hs
@@ -21,7 +21,7 @@ where
 {-
 #include "nativeGen/NCG.h"
 #include "HsVersions.h"
-#include "../includes/MachRegs.h"
+#include "../includes/stg/MachRegs.h"
 
 import RegsBase
 
diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs
index 8eb515e6bf3fe1dfe68b0419d8a47e4a8f3f1f8a..495296a6ec406286f039aa1dcb1952b2ab465f1c 100644
--- a/compiler/nativeGen/PPC/CodeGen.hs
+++ b/compiler/nativeGen/PPC/CodeGen.hs
@@ -22,7 +22,7 @@ where
 
 #include "HsVersions.h"
 #include "nativeGen/NCG.h"
-#include "MachDeps.h"
+#include "../includes/MachDeps.h"
 
 -- NCG stuff:
 import PPC.Instr
diff --git a/compiler/nativeGen/PPC/Regs.hs b/compiler/nativeGen/PPC/Regs.hs
index 467ea497865b902320e91fb29752b186593ef5d6..18f06ed6ef0ed42dd7b657171b37a0e64d24ebc2 100644
--- a/compiler/nativeGen/PPC/Regs.hs
+++ b/compiler/nativeGen/PPC/Regs.hs
@@ -49,7 +49,7 @@ where
 
 #include "nativeGen/NCG.h"
 #include "HsVersions.h"
-#include "../includes/MachRegs.h"
+#include "../includes/stg/MachRegs.h"
 
 import Reg
 import RegClass
diff --git a/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs b/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs
index 5f3f0ac495cb470d6fea032525fb33ca1645c86b..fd0faaee90c2b3b7ae3c32aa8bfe31fab1012881 100644
--- a/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs
+++ b/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs
@@ -43,7 +43,7 @@ import FastTypes
 -- 	There is an allocatableRegsInClass :: RegClass -> Int, but doing the unboxing
 -- 	is too slow for us here.
 --
--- 	Look at includes/MachRegs.h to get these numbers.
+-- 	Look at includes/stg/MachRegs.h to get these numbers.
 --
 
 #if i386_TARGET_ARCH
diff --git a/compiler/nativeGen/RegAlloc/Linear/Main.hs b/compiler/nativeGen/RegAlloc/Linear/Main.hs
index 00e01d7ebca5bd204dd1b6b86c47c7196966e49c..7201207c790e7268a3d9121c7a409e22df589117 100644
--- a/compiler/nativeGen/RegAlloc/Linear/Main.hs
+++ b/compiler/nativeGen/RegAlloc/Linear/Main.hs
@@ -115,7 +115,7 @@ import Data.Maybe
 import Data.List
 import Control.Monad
 
-#include "../includes/MachRegs.h"
+#include "../includes/stg/MachRegs.h"
 
 
 -- -----------------------------------------------------------------------------
diff --git a/compiler/nativeGen/SPARC/CodeGen.hs b/compiler/nativeGen/SPARC/CodeGen.hs
index 39ab5ebdefc839d11037ea8819e8238afe4fbb3c..c430e18579f6718891e18a7ce652f8bf43961bd6 100644
--- a/compiler/nativeGen/SPARC/CodeGen.hs
+++ b/compiler/nativeGen/SPARC/CodeGen.hs
@@ -15,7 +15,7 @@ where
 
 #include "HsVersions.h"
 #include "nativeGen/NCG.h"
-#include "MachDeps.h"
+#include "../includes/MachDeps.h"
 
 -- NCG stuff:
 import SPARC.CodeGen.Sanity
diff --git a/compiler/nativeGen/SPARC/RegPlate.hs b/compiler/nativeGen/SPARC/RegPlate.hs
index 0301ab75c05f809f1f8f2a740dc828484cab48a9..a0d0e4c88d23b04f137dbc847db8f37fe9f96974 100644
--- a/compiler/nativeGen/SPARC/RegPlate.hs
+++ b/compiler/nativeGen/SPARC/RegPlate.hs
@@ -93,7 +93,7 @@ import FastBool
 #define f31	63
 
 
-#include "../includes/MachRegs.h"
+#include "../includes/stg/MachRegs.h"
 
 -- | Check whether a machine register is free for allocation.
 freeReg :: RegNo -> FastBool
diff --git a/compiler/nativeGen/SPARC/Regs.hs b/compiler/nativeGen/SPARC/Regs.hs
index 1c41e888ae63c45b1ad395f00c09126a20a6e925..8ad400f813e079abfea53dab8eeb5c5c58d2ba82 100644
--- a/compiler/nativeGen/SPARC/Regs.hs
+++ b/compiler/nativeGen/SPARC/Regs.hs
@@ -54,7 +54,7 @@ import FastBool
 	prepared for any eventuality.
 
 	The whole fp-register pairing thing on sparcs is a huge nuisance.  See
-	fptools/ghc/includes/MachRegs.h for a description of what's going on
+	includes/stg/MachRegs.h for a description of what's going on
 	here.
 -}
 
@@ -290,11 +290,13 @@ regDotColor reg
 
 -- Hard coded freeReg / globalRegMaybe -----------------------------------------
 -- This isn't being used at the moment because we're generating
---	these functions from the information in includes/MachRegs.hs via RegPlate.hs
+--	these functions from the information in
+--	includes/stg/MachRegs.hs via RegPlate.hs
 	
 -- | Check whether a machine register is free for allocation.
---	This needs to match the info in includes/MachRegs.h otherwise modules
---	compiled with the NCG won't be compatible with via-C ones.
+--	This needs to match the info in includes/stg/MachRegs.h
+--	otherwise modules compiled with the NCG won't be compatible
+--	with via-C ones.
 --
 {-
 freeReg :: RegNo -> FastBool
diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs
index 8a1c77e1df299c8780735a3404870f0c657962d0..2e72d16854aab907fce70e39ea60d46c668286d2 100644
--- a/compiler/nativeGen/X86/CodeGen.hs
+++ b/compiler/nativeGen/X86/CodeGen.hs
@@ -27,7 +27,7 @@ where
 
 #include "HsVersions.h"
 #include "nativeGen/NCG.h"
-#include "MachDeps.h"
+#include "../includes/MachDeps.h"
 
 -- NCG stuff:
 import X86.Instr
diff --git a/compiler/nativeGen/X86/Ppr.hs b/compiler/nativeGen/X86/Ppr.hs
index 398c480aafbdab0a065a9d837b41713c976e77a9..c0f465791c41e81f47e50cd03929574d01975e71 100644
--- a/compiler/nativeGen/X86/Ppr.hs
+++ b/compiler/nativeGen/X86/Ppr.hs
@@ -466,7 +466,7 @@ pprDataItem lit
 	-- all such offsets will fit into 32 bits, so we have to stick
 	-- to 32-bit offset fields and modify the RTS appropriately
         --
-        -- See Note [x86-64-relative] in includes/InfoTables.h
+        -- See Note [x86-64-relative] in includes/rts/storage/InfoTables.h
 	-- 
 	ppr_item II64  x 
 	   | isRelativeReloc x =
diff --git a/compiler/nativeGen/X86/Regs.hs b/compiler/nativeGen/X86/Regs.hs
index 3a1d93883d942c2811e6460c557a943c686a382a..64d835b2ebe1bff7d5b3a75b94fcb224d1088f5f 100644
--- a/compiler/nativeGen/X86/Regs.hs
+++ b/compiler/nativeGen/X86/Regs.hs
@@ -54,7 +54,7 @@ where
 -- HACK: go for the max
 #endif
 
-#include "../includes/MachRegs.h"
+#include "../includes/stg/MachRegs.h"
 
 import Reg
 import RegClass
diff --git a/compiler/parser/cutils.c b/compiler/parser/cutils.c
index 4a7b7b3c68dbf99ed0436406ad176341680f7a91..157c90290d8ac249e47924e5e0bacce46ae52819 100644
--- a/compiler/parser/cutils.c
+++ b/compiler/parser/cutils.c
@@ -4,7 +4,9 @@ places in the GHC library.
 */
 
 #include "Rts.h"
+#if __GLASGOW_HASKELL__ <= 610
 #include "RtsFlags.h"
+#endif
 
 #include "HsFFI.h"
 
@@ -20,19 +22,19 @@ and causes gcc to require too many registers on x84
 */
 
 HsInt
-ghc_strlen( HsAddr a )
+ghc_strlen( HsPtr a )
 {
     return (strlen((char *)a));
 }
 
 HsInt
-ghc_memcmp( HsAddr a1, HsAddr a2, HsInt len )
+ghc_memcmp( HsPtr a1, HsPtr a2, HsInt len )
 {
     return (memcmp((char *)a1, a2, len));
 }
 
 HsInt
-ghc_memcmp_off( HsAddr a1, HsInt i, HsAddr a2, HsInt len )
+ghc_memcmp_off( HsPtr a1, HsInt i, HsPtr a2, HsInt len )
 {
     return (memcmp((char *)a1 + i, a2, len));
 }
diff --git a/compiler/utils/Binary.hs b/compiler/utils/Binary.hs
index 6cd045a98b4e9282e6d555d62d21ab040ba8f1c5..e633f35b3ae052c0ce4973c0511511811d43aa56 100644
--- a/compiler/utils/Binary.hs
+++ b/compiler/utils/Binary.hs
@@ -59,7 +59,7 @@ module Binary
 #include "HsVersions.h"
 
 -- The *host* architecture version:
-#include "MachDeps.h"
+#include "../includes/MachDeps.h"
 
 import {-# SOURCE #-} Name (Name)
 import FastString
diff --git a/compiler/utils/FastMutInt.lhs b/compiler/utils/FastMutInt.lhs
index 1b2b05eb9f7e5a2d451d97be3bd6596afffb184f..6aa1c798cf9da585999ec8dce90db71f3680f8b7 100644
--- a/compiler/utils/FastMutInt.lhs
+++ b/compiler/utils/FastMutInt.lhs
@@ -19,7 +19,7 @@ module FastMutInt(
 
 #ifdef __GLASGOW_HASKELL__
 
-#include "MachDeps.h"
+#include "../includes/MachDeps.h"
 #ifndef SIZEOF_HSINT
 #define SIZEOF_HSINT  INT_SIZE_IN_BYTES
 #endif
diff --git a/includes/ClosureMacros.h b/includes/ClosureMacros.h
deleted file mode 100644
index 1c371b25e803b1a152816ce1fadd0a6fc90f84f8..0000000000000000000000000000000000000000
--- a/includes/ClosureMacros.h
+++ /dev/null
@@ -1,185 +0,0 @@
-/* ----------------------------------------------------------------------------
- *
- * (c) The GHC Team, 1998-2004
- *
- * Macros for building and manipulating closures
- *
- * -------------------------------------------------------------------------- */
-
-#ifndef CLOSUREMACROS_H
-#define CLOSUREMACROS_H
-
-/* Say whether the code comes before the heap; on mingwin this may not be the
-   case, not because of another random MS pathology, but because the static
-   program may reside in a DLL
-*/
-
-/* -----------------------------------------------------------------------------
-   Info tables are slammed up against the entry code, and the label
-   for the info table is at the *end* of the table itself.  This
-   inline function adjusts an info pointer to point to the beginning
-   of the table, so we can use standard C structure indexing on it.
-
-   Note: this works for SRT info tables as long as you don't want to
-   access the SRT, since they are laid out the same with the SRT
-   pointer as the first word in the table.
-
-   NOTES ABOUT MANGLED C VS. MINI-INTERPRETER:
-
-   A couple of definitions:
-
-       "info pointer"    The first word of the closure.  Might point
-                         to either the end or the beginning of the
-			 info table, depending on whether we're using
-			 the mini interpretter or not.  GET_INFO(c)
-			 retrieves the info pointer of a closure.
-
-       "info table"      The info table structure associated with a
-                         closure.  This is always a pointer to the
-			 beginning of the structure, so we can
-			 use standard C structure indexing to pull out
-			 the fields.  get_itbl(c) returns a pointer to
-			 the info table for closure c.
-
-   An address of the form xxxx_info points to the end of the info
-   table or the beginning of the info table depending on whether we're
-   mangling or not respectively.  So, 
-
-         c->header.info = xxx_info 
-
-   makes absolute sense, whether mangling or not.
- 
-   -------------------------------------------------------------------------- */
-
-#define SET_INFO(c,i) ((c)->header.info = (i))
-#define GET_INFO(c)   ((c)->header.info)
-#define GET_ENTRY(c)  (ENTRY_CODE(GET_INFO(c)))
-
-#define get_itbl(c)   (INFO_PTR_TO_STRUCT((c)->header.info))
-#define get_ret_itbl(c) (RET_INFO_PTR_TO_STRUCT((c)->header.info))
-#define get_fun_itbl(c) (FUN_INFO_PTR_TO_STRUCT((c)->header.info))
-#define get_thunk_itbl(c) (THUNK_INFO_PTR_TO_STRUCT((c)->header.info))
-#define get_con_itbl(c) (CON_INFO_PTR_TO_STRUCT((c)->header.info))
-
-#define GET_TAG(con) (get_itbl(con)->srt_bitmap)
-
-#ifdef TABLES_NEXT_TO_CODE
-#define INFO_PTR_TO_STRUCT(info) ((StgInfoTable *)(info) - 1)
-#define RET_INFO_PTR_TO_STRUCT(info) ((StgRetInfoTable *)(info) - 1)
-#define FUN_INFO_PTR_TO_STRUCT(info) ((StgFunInfoTable *)(info) - 1)
-#define THUNK_INFO_PTR_TO_STRUCT(info) ((StgThunkInfoTable *)(info) - 1)
-#define CON_INFO_PTR_TO_STRUCT(info) ((StgConInfoTable *)(info) - 1)
-#define itbl_to_fun_itbl(i) ((StgFunInfoTable *)(((StgInfoTable *)(i) + 1)) - 1)
-#define itbl_to_ret_itbl(i) ((StgRetInfoTable *)(((StgInfoTable *)(i) + 1)) - 1)
-#define itbl_to_thunk_itbl(i) ((StgThunkInfoTable *)(((StgInfoTable *)(i) + 1)) - 1)
-#define itbl_to_con_itbl(i) ((StgConInfoTable *)(((StgInfoTable *)(i) + 1)) - 1)
-#else
-#define INFO_PTR_TO_STRUCT(info) ((StgInfoTable *)info)
-#define RET_INFO_PTR_TO_STRUCT(info) ((StgRetInfoTable *)info)
-#define FUN_INFO_PTR_TO_STRUCT(info) ((StgFunInfoTable *)info)
-#define THUNK_INFO_PTR_TO_STRUCT(info) ((StgThunkInfoTable *)info)
-#define CON_INFO_PTR_TO_STRUCT(info) ((StgConInfoTable *)info)
-#define itbl_to_fun_itbl(i) ((StgFunInfoTable *)(i))
-#define itbl_to_ret_itbl(i) ((StgRetInfoTable *)(i))
-#define itbl_to_thunk_itbl(i) ((StgThunkInfoTable *)(i))
-#define itbl_to_con_itbl(i) ((StgConInfoTable *)(i))
-#endif
-
-/* -----------------------------------------------------------------------------
-   Macros for building closures
-   -------------------------------------------------------------------------- */
-
-#ifdef PROFILING
-#ifdef DEBUG_RETAINER
-/* 
-  For the sake of debugging, we take the safest way for the moment. Actually, this 
-  is useful to check the sanity of heap before beginning retainer profiling.
-  flip is defined in RetainerProfile.c, and declared as extern in RetainerProfile.h.
-  Note: change those functions building Haskell objects from C datatypes, i.e.,
-  all rts_mk???() functions in RtsAPI.c, as well.
- */
-#define SET_PROF_HDR(c,ccs_)            \
-        ((c)->header.prof.ccs = ccs_, (c)->header.prof.hp.rs = (retainerSet *)((StgWord)NULL | flip))
-#else
-/*
-  For retainer profiling only: we do not have to set (c)->header.prof.hp.rs to
-  NULL | flip (flip is defined in RetainerProfile.c) because even when flip
-  is 1, rs is invalid and will be initialized to NULL | flip later when 
-  the closure *c is visited.
- */
-/*
-#define SET_PROF_HDR(c,ccs_)            \
-        ((c)->header.prof.ccs = ccs_, (c)->header.prof.hp.rs = NULL)
- */
-/*
-  The following macro works for both retainer profiling and LDV profiling:
-  for retainer profiling, ldvTime remains 0, so rs fields are initialized to 0.
-  See the invariants on ldvTime.
- */
-#define SET_PROF_HDR(c,ccs_)            \
-        ((c)->header.prof.ccs = ccs_,   \
-        LDV_RECORD_CREATE((c)))
-#endif /* DEBUG_RETAINER */
-#define SET_STATIC_PROF_HDR(ccs_)       \
-        prof : { ccs : (CostCentreStack *)ccs_, hp : { rs : NULL } },
-#else
-#define SET_PROF_HDR(c,ccs)
-#define SET_STATIC_PROF_HDR(ccs)
-#endif
-
-#ifdef TICKY_TICKY
-#define SET_TICKY_HDR(c,stuff)	     /* old: (c)->header.ticky.updated = stuff */
-#define SET_STATIC_TICKY_HDR(stuff)  /* old: ticky : { updated : stuff } */
-#else
-#define SET_TICKY_HDR(c,stuff)
-#define SET_STATIC_TICKY_HDR(stuff)
-#endif
-
-#define SET_HDR(c,_info,ccs)				\
-   {							\
-	(c)->header.info = _info;			\
-	SET_PROF_HDR((StgClosure *)(c),ccs);		\
-	SET_TICKY_HDR((StgClosure *)(c),0);		\
-   }
-
-#define SET_ARR_HDR(c,info,costCentreStack,n_words)	\
-   SET_HDR(c,info,costCentreStack);			\
-   (c)->words = n_words;
-
-/* -----------------------------------------------------------------------------
-   How to get hold of the static link field for a static closure.
-   -------------------------------------------------------------------------- */
-
-/* These are hard-coded. */
-#define FUN_STATIC_LINK(p)   (&(p)->payload[0])
-#define THUNK_STATIC_LINK(p) (&(p)->payload[1])
-#define IND_STATIC_LINK(p)   (&(p)->payload[1])
-
-INLINE_HEADER StgClosure **
-STATIC_LINK(const StgInfoTable *info, StgClosure *p)
-{ 
-    switch (info->type) {
-    case THUNK_STATIC:
-	return THUNK_STATIC_LINK(p);
-    case FUN_STATIC:
-	return FUN_STATIC_LINK(p);
-    case IND_STATIC:
-	return IND_STATIC_LINK(p);
-    default:
-	return &(p)->payload[info->layout.payload.ptrs +
-			     info->layout.payload.nptrs];
-    }
-}
-
-#define STATIC_LINK2(info,p)							\
-   (*(StgClosure**)(&((p)->payload[info->layout.payload.ptrs +			\
-					info->layout.payload.nptrs + 1])))
-
-/* -----------------------------------------------------------------------------
-   INTLIKE and CHARLIKE closures.
-   -------------------------------------------------------------------------- */
-
-#define CHARLIKE_CLOSURE(n) ((P_)&stg_CHARLIKE_closure[(n)-MIN_CHARLIKE])
-#define INTLIKE_CLOSURE(n)  ((P_)&stg_INTLIKE_closure[(n)-MIN_INTLIKE])
-
-#endif /* CLOSUREMACROS_H */
diff --git a/includes/Cmm.h b/includes/Cmm.h
index 4577672f44cc35aa165d9a6a0cf26b305c0cdf05..aba5c2e36ba5e49e6d4fcbdd55f92d243e21a4ed 100644
--- a/includes/Cmm.h
+++ b/includes/Cmm.h
@@ -66,7 +66,6 @@
 #define CMINUSMINUS 1
 
 #include "ghcconfig.h"
-#include "RtsConfig.h"
 
 /* -----------------------------------------------------------------------------
    Types 
@@ -144,6 +143,18 @@
 	name : bits8[] str;			\
   }						\
 
+#ifdef TABLES_NEXT_TO_CODE
+#define RET_LBL(f) f##_info
+#else
+#define RET_LBL(f) f##_ret
+#endif
+
+#ifdef TABLES_NEXT_TO_CODE
+#define ENTRY_LBL(f) f##_info
+#else
+#define ENTRY_LBL(f) f##_entry
+#endif
+
 /* -----------------------------------------------------------------------------
    Byte/word macros
 
@@ -320,26 +331,26 @@
    Constants.
    -------------------------------------------------------------------------- */
 
-#include "Constants.h"
+#include "rts/Constants.h"
 #include "DerivedConstants.h"
-#include "ClosureTypes.h"
-#include "StgFun.h"
-#include "OSThreads.h"
-#include "SMPClosureOps.h"
+#include "rts/storage/ClosureTypes.h"
+#include "rts/storage/FunTypes.h"
+#include "rts/storage/SMPClosureOps.h"
+#include "rts/OSThreads.h"
 
 /*
  * Need MachRegs, because some of the RTS code is conditionally
  * compiled based on REG_R1, REG_R2, etc.
  */
 #define STOLEN_X86_REGS 4
-#include "MachRegs.h"
+#include "stg/MachRegs.h"
 
-#include "Liveness.h"
-#include "StgLdvProf.h"
+#include "rts/storage/Liveness.h"
+#include "rts/prof/LDV.h"
 
 #undef BLOCK_SIZE
 #undef MBLOCK_SIZE
-#include "Block.h"  /* For Bdescr() */
+#include "rts/storage/Block.h"  /* For Bdescr() */
 
 
 #define MyCapability()  (BaseReg - OFFSET_Capability_r)
diff --git a/includes/GranSim.h b/includes/GranSim.h
deleted file mode 100644
index be5aa83a52a0b53a5900ec60fb4778ab10fdd90b..0000000000000000000000000000000000000000
--- a/includes/GranSim.h
+++ /dev/null
@@ -1,331 +0,0 @@
-/*
-  Headers for GranSim specific objects.
-  
-  Note that in GranSim we have one run-queue and blocking-queue for each
-  processor. Therefore, this header file redefines variables like
-  run_queue_hd to be relative to CurrentProc. The main arrays of runnable
-  and blocking queues are defined in Schedule.c.  The important STG-called
-  GranSim macros (e.g. for fetching nodes) are at the end of this
-  file. Usually they are just wrappers to proper C functions in GranSim.c.  
-*/
-
-#ifndef GRANSIM_H
-#define GRANSIM_H
-
-#if !defined(GRAN)
-
-/* Dummy definitions for basic GranSim macros called from STG land */
-#define DO_GRAN_ALLOCATE(n)     			  /* nothing */
-#define DO_GRAN_UNALLOCATE(n)   			  /* nothing */
-#define DO_GRAN_FETCH(node)     			  /* nothing */
-#define DO_GRAN_EXEC(arith,branch,load,store,floats)      /* nothing */
-#define GRAN_FETCH_AND_RESCHEDULE(liveness_mask,reenter)  /* nothing */
-#define GRAN_RESCHEDULE(liveness_mask,reenter)	          /* nothing */
-
-#endif
-
-#if defined(GRAN)  /* whole file */
-
-extern StgTSO *CurrentTSO;
-
-/*
- * @node Headers for GranSim specific objects, , ,
- * @section Headers for GranSim specific objects
- *
- * @menu
- * * Externs and prototypes::	
- * * Run and blocking queues::	
- * * Spark queues::		
- * * Processor related stuff::	
- * * GranSim costs::		
- * * STG called GranSim functions::  
- * * STG-called routines::	
- * @end menu
- *
- * @node Externs and prototypes, Run and blocking queues, Includes, Headers for GranSim specific objects
- * @subsection Externs and prototypes
- */
-
-/* Global constants */
-extern char *gran_event_names[];
-extern char *proc_status_names[];
-extern char *event_names[];
-
-/* Vars checked from within STG land */
-extern rtsBool  NeedToReSchedule, IgnoreEvents, IgnoreYields; 
-; 
-extern rtsTime  TimeOfNextEvent, TimeOfLastEvent, EndOfTimeSlice;
-
-/* costs for basic operations (copied from RTS flags) */
-extern nat gran_arith_cost, gran_branch_cost, gran_load_cost, gran_store_cost, gran_float_cost;
-
-extern nat SparksAvail;     /* How many sparks are available */
-extern nat SurplusThreads;  /* How many excess threads are there */
-extern nat sparksIgnored, sparksCreated;
-
-/*
- * @node Run and blocking queues, Spark queues, Externs and prototypes, Headers for GranSim specific objects
- * @subsection Run and blocking queues
- */
-
-/* declared in Schedule.c */
-extern StgTSO *run_queue_hds[], *run_queue_tls[];
-extern StgTSO *blocked_queue_hds[], *blocked_queue_tls[];
-extern StgTSO *ccalling_threadss[];
-
-#define run_queue_hd         run_queue_hds[CurrentProc]
-#define run_queue_tl         run_queue_tls[CurrentProc]
-#define blocked_queue_hd     blocked_queue_hds[CurrentProc]
-#define blocked_queue_tl     blocked_queue_tls[CurrentProc]
-#define pending_sparks_hd    pending_sparks_hds[CurrentProc]
-#define pending_sparks_tl    pending_sparks_tls[CurrentProc]
-#define ccalling_threads     ccalling_threadss[CurrentProc]
-
-/*
- * @node Spark queues, Processor related stuff, Run and blocking queues, Headers for GranSim specific objects
- * @subsection Spark queues
- */
-
-/*
-  In GranSim we use a double linked list to represent spark queues.
-  
-  This is more flexible, but slower, than the array of pointers
-  representation used in GUM. We use the flexibility to define new fields in
-  the rtsSpark structure, representing e.g. granularity info (see HWL's PhD
-  thesis), or info about the parent of a spark.
-*/
-
-/* Sparks and spark queues */
-typedef struct rtsSpark_
-{
-  StgClosure    *node;
-  nat            name, global;
-  nat            gran_info;      /* for granularity improvement mechanisms */
-  PEs            creator;        /* PE that created this spark (unused) */
-  struct rtsSpark_  *prev, *next;
-} rtsSpark;
-typedef rtsSpark *rtsSparkQ;
-
-/* The spark queues, proper */
-/* In GranSim this is a globally visible array of spark queues */
-extern rtsSparkQ pending_sparks_hds[];
-extern rtsSparkQ pending_sparks_tls[];
-
-/* Prototypes of those spark routines visible to compiler generated .hc */
-/* Routines only used inside the RTS are defined in rts/parallel GranSimRts.h */
-rtsSpark    *newSpark(StgClosure *node, 
-		      nat name, nat gran_info, nat size_info, 
-		      nat par_info, nat local);
-/* void         add_to_spark_queue(rtsSpark *spark); */
-
-/*
- * @node Processor related stuff, GranSim costs, Spark queues, Headers for GranSim specific objects
- * @subsection Processor related stuff
- */
-
-extern PEs CurrentProc;
-extern rtsTime CurrentTime[];  
-
-/* Maximum number of PEs that can be simulated */
-#define MAX_PROC             32 /* (BITS_IN(StgWord))  */ /* ToDo: fix this!! */
-/*
-#if MAX_PROC==16 
-#else 
-#error MAX_PROC should be 32 on this architecture 
-#endif
-*/
-
-/* #define CurrentTSO           CurrentTSOs[CurrentProc] */
-
-/* Processor numbers to bitmasks and vice-versa */
-#define MainProc	     0           /* Id of main processor */
-#define NO_PRI               0           /* dummy priority */
-#define MAX_PRI              10000       /* max possible priority */
-#define MAIN_PRI             MAX_PRI     /* priority of main thread */ 
-
-/* GrAnSim uses IdleProcs as bitmask to indicate which procs are idle */
-#define PE_NUMBER(n)          (1l << (long)n)
-#define ThisPE		      PE_NUMBER(CurrentProc)
-#define MainPE		      PE_NUMBER(MainProc)
-#define Everywhere	      (~0l)
-#define Nowhere	              (0l)
-#define Now                   CurrentTime[CurrentProc]
-
-#define IS_LOCAL_TO(ga,proc)  ((1l << (PEs) proc) & ga)
-
-#define GRAN_TIME_SLICE       1000        /* max time between 2 ReSchedules */
-
-/*
- * @node GranSim costs, STG called GranSim functions, Processor related stuff, Headers for GranSim specific objects
- * @subsection GranSim costs
- */
-
-/* Default constants for communication (see RtsFlags on how to change them) */
-
-#define LATENCY		           1000	/* Latency for single packet */
-#define ADDITIONAL_LATENCY	    100	/* Latency for additional packets */
-#define BASICBLOCKTIME	    	     10
-#define FETCHTIME	  	(LATENCY*2+MSGUNPACKTIME)
-#define LOCALUNBLOCKTIME  	     10
-#define GLOBALUNBLOCKTIME 	(LATENCY+MSGUNPACKTIME)
-
-#define	MSGPACKTIME		     0  /* Cost of creating a packet */
-#define	MSGUNPACKTIME		     0  /* Cost of receiving a packet */
-#define MSGTIDYTIME                  0  /* Cost of cleaning up after send */
-
-/* How much to increase GrAnSims internal packet size if an overflow 
-   occurs.
-   NB: This is a GrAnSim internal variable and is independent of the
-   simulated packet buffer size.
-*/
-
-#define GRANSIM_DEFAULT_PACK_BUFFER_SIZE     400
-#define REALLOC_SZ                           200
-
-/* extern W_ gran_mpacktime, gran_mtidytime, gran_munpacktime; */
-
-/* Thread cost model */
-#define THREADCREATETIME	   (25+THREADSCHEDULETIME)
-#define THREADQUEUETIME		    12  /* Cost of adding a thread to the running/runnable queue */
-#define THREADDESCHEDULETIME	    75  /* Cost of descheduling a thread */
-#define THREADSCHEDULETIME	    75  /* Cost of scheduling a thread */
-#define THREADCONTEXTSWITCHTIME	    (THREADDESCHEDULETIME+THREADSCHEDULETIME)
-
-/* Instruction Cost model (SPARC, including cache misses) */
-#define ARITH_COST	     	   1
-#define BRANCH_COST	     	   2
-#define LOAD_COST	  	   4
-#define STORE_COST	  	   4
-#define FLOAT_COST		   1 /* ? */
-
-#define HEAPALLOC_COST             11
-
-#define PRI_SPARK_OVERHEAD    5
-#define PRI_SCHED_OVERHEAD    5
-
-/*
- * @node STG called GranSim functions, STG-called routines, GranSim costs, Headers for GranSim specific objects
- * @subsection STG called GranSim functions
- */
-
-/* STG called GranSim functions */
-void GranSimAllocate(StgInt n);
-void GranSimUnallocate(StgInt n);
-void GranSimExec(StgWord ariths, StgWord branches, StgWord loads, StgWord stores, StgWord floats);
-StgInt GranSimFetch(StgClosure *node);
-void GranSimSpark(StgInt local, StgClosure *node);
-void GranSimSparkAt(rtsSpark *spark, StgClosure *where,StgInt identifier);
-void GranSimSparkAtAbs(rtsSpark *spark, PEs proc, StgInt identifier);
-void GranSimBlock(StgTSO *tso, PEs proc, StgClosure *node);
-
-
-/*
- * @node STG-called routines,  , STG called GranSim functions, Headers for GranSim specific objects
- * @subsection STG-called routines
- */
-
-/* Wrapped version of calls to GranSim-specific STG routines */
-
-/*
-#define DO_PERFORM_RESCHEDULE(liveness, always_reenter_node) PerformReschedule_wrapper(liveness, always_reenter_node)
-*/
-#define DO_GRAN_ALLOCATE(n)     STGCALL1(GranSimAllocate, n)
-#define DO_GRAN_UNALLOCATE(n)   STGCALL1(GranSimUnallocate, n)
-#define DO_GRAN_FETCH(node)     STGCALL1(GranSimFetch, node)
-#define DO_GRAN_EXEC(arith,branch,load,store,floats) GranSimExec(arith,branch,load,store,floats)
-
-/* 
-   ToDo: Clean up this mess of GRAN macros!!! -- HWL
-*/
-/* DO_GRAN_FETCH((StgClosure*)R1.p); */
-#define GRAN_FETCH()		/* nothing */
-
-#define GRAN_FETCH_AND_RESCHEDULE(liveness,reenter)	\
-          DO_GRAN_FETCH((StgClosure*)R1.p); 			        \
-          DO_GRAN_YIELD(liveness,ENTRY_CODE((D_)(*R1.p))); 
-/* RESTORE_EVERYTHING is done implicitly before entering threaded world again */
-
-/*
-  This is the only macro currently enabled;
-  It should check whether it is time for the current thread to yield
-  (e.g. if there is a more recent event in the queue) and it should check
-  whether node is local, via a call to GranSimFetch.
-  ToDo: split this in 2 routines:
-         - GRAN_YIELD (as it is below)
-	 - GRAN_FETCH (the rest of this macro)
-        emit only these 2 macros based on node's liveness
-	  node alive: emit both macros
-	  node not alive: do only a GRAN_YIELD
-	  
-        replace gran_yield_? with gran_block_? (they really block the current
-	thread)
-*/
-#define GRAN_RESCHEDULE(liveness,ptrs)  \
-          if (RET_STGCALL1(StgInt, GranSimFetch, (StgClosure*)R1.p)) {\
-            EXTFUN_RTS(gran_block_##ptrs); \
-            JMP_(gran_block_##ptrs);       \
-          } else {                         \
-	    if (TimeOfLastEvent < CurrentTime[CurrentProc] && \
-                HEAP_ALLOCED((StgClosure *)R1.p) && \
-                LOOKS_LIKE_GHC_INFO(get_itbl((StgClosure *)R1.p))) { \
-                                  EXTFUN_RTS(gran_yield_##ptrs); \
-                                  JMP_(gran_yield_##ptrs); \
-                } \
-            /* GRAN_YIELD(ptrs)  */             \
-	  }
-
-
-/*                                                   YIELD(liveness,reenter) */
-
-/* GRAN_YIELD(liveness_mask); */
-
-/* GRAN_FETCH_AND_RESCHEDULE(liveness_mask,reenter) */
-
-#define THREAD_CONTEXT_SWITCH(liveness_mask,reenter)	\
-        do { \
-	if (context_switch /* OR_INTERVAL_EXPIRED */) {	\
-          GRAN_RESCHEDULE(liveness_mask,reenter); \
-        } }while(0)
-
-#define GRAN_EXEC(arith,branch,load,store,floats)       \
-        { \
-          W_ cost = gran_arith_cost*arith +   \
-                    gran_branch_cost*branch + \
-                    gran_load_cost*load +   \
-                    gran_store_cost*store +   \
-                    gran_float_cost*floats;   \
-          CurrentTSO->gran.exectime += cost;                      \
-          CurrentTime[CurrentProc] += cost;                      \
-        }
-
-/* In GranSim we first check whether there is an event to handle; only if
-   this is the case (or the time slice is over in case of fair scheduling)
-   we do a yield, which is very similar to that in the concurrent world 
-   ToDo: check whether gran_yield_? can be merged with other yielding codes
-*/
-
-#define DO_GRAN_YIELD(ptrs)	if (!IgnoreYields && \
-                                    TimeOfLastEvent < CurrentTime[CurrentProc] && \
-				    HEAP_ALLOCED((StgClosure *)R1.p) && \
-                                    LOOKS_LIKE_GHC_INFO(get_itbl((StgClosure *)R1.p))) { \
-                                  EXTFUN_RTS(gran_yield_##ptrs); \
-                                  JMP_(gran_yield_##ptrs); \
-                                }
-
-#define GRAN_YIELD(ptrs)                                   \
-        {                                                   \
-          extern int context_switch;                          \
-          if ( (CurrentTime[CurrentProc]>=EndOfTimeSlice) ||   \
-               ((CurrentTime[CurrentProc]>=TimeOfNextEvent) && \
-	        (TimeOfNextEvent!=0) && !IgnoreEvents )) {     \
-	    /* context_switch = 1; */                          \
-            DO_GRAN_YIELD(ptrs);   \
-	  }                                                    \
-	}
-
-#define ADD_TO_SPARK_QUEUE(spark)	      \
-   STGCALL1(add_to_spark_queue,spark) \
-
-#endif /* GRAN */
-
-#endif /* GRANSIM_H */
diff --git a/includes/HsFFI.h b/includes/HsFFI.h
index cd9f7ede8015db4cd131e42e24acd3c919e77136..f489be56235985ba5aeaa0439ad08657db21db27 100644
--- a/includes/HsFFI.h
+++ b/includes/HsFFI.h
@@ -18,8 +18,7 @@ extern "C" {
 
 /* get types from GHC's runtime system */
 #include "ghcconfig.h"
-#include "RtsConfig.h"
-#include "StgTypes.h"
+#include "stg/Types.h"
 
 /* get limits for integral types */
 #ifdef HAVE_STDINT_H
@@ -84,10 +83,7 @@ typedef StgDouble		HsDouble;
 typedef StgInt			HsBool;
 typedef void*			HsPtr;          /* this should better match StgAddr */
 typedef void			(*HsFunPtr)(void); /* this should better match StgAddr */
-typedef void*			HsForeignPtr;   /* ... and this StgForeignPtr       */
 typedef void*			HsStablePtr;
-typedef void*			HsAddr;         /* DEPRECATED */
-typedef void*			HsForeignObj;   /* DEPRECATED */
 
 /* this should correspond to the type of StgChar in StgTypes.h */
 #define HS_CHAR_MIN		0
diff --git a/includes/MachDeps.h b/includes/MachDeps.h
index 7b71f7c37821b16ff5cb983b3ff95308b136d2cf..8e6db3ec0e1ffa31503fa7eea091a417382c919a 100644
--- a/includes/MachDeps.h
+++ b/includes/MachDeps.h
@@ -58,9 +58,6 @@
 #define SIZEOF_HSFUNPTR         SIZEOF_VOID_P
 #define ALIGNMENT_HSFUNPTR      ALIGNMENT_VOID_P
 
-#define SIZEOF_HSFOREIGNPTR     SIZEOF_VOID_P
-#define ALIGNMENT_HSFOREIGNPTR  ALIGNMENT_VOID_P
-
 #define SIZEOF_HSSTABLEPTR      SIZEOF_VOID_P
 #define ALIGNMENT_HSSTABLEPTR   ALIGNMENT_VOID_P
 
diff --git a/includes/Parallel.h b/includes/Parallel.h
deleted file mode 100644
index e18fbe9b2c592f894969c95d46477d6bbc4fc2e9..0000000000000000000000000000000000000000
--- a/includes/Parallel.h
+++ /dev/null
@@ -1,360 +0,0 @@
-/*
-  Definitions for GUM i.e. running on a parallel machine.
-
-  This section contains definitions applicable only to programs compiled
-  to run on a parallel machine, i.e. on GUM. Some of these definitions
-  are also used when simulating parallel execution, i.e. on GranSim.
-*/
-
-#ifndef PARALLEL_H
-#define PARALLEL_H
-
-#if defined(PAR) || defined(GRAN)        /* whole file */
-
-/*
- * @node Parallel definitions, End of File
- * @section Parallel definitions
- *
- * @menu
- * * Basic definitions::		
- * * GUM::			
- * * GranSim::			
- * @end menu
- *
- * @node Basic definitions, GUM, Parallel definitions, Parallel definitions
- * @subsection Basic definitions
- */
-
-/* This clashes with TICKY, but currently TICKY and PAR hate each other anyway */
-#define _HS  sizeofW(StgHeader)
-
-/* SET_PAR_HDR and SET_STATIC_PAR_HDR now live in ClosureMacros.h */
-
-/* Needed for dumping routines */
-#if defined(PAR)
-# define NODE_STR_LEN              20
-# define TIME_STR_LEN              120
-# define TIME                      rtsTime
-# define CURRENT_TIME              (msTime() - startTime)
-# define TIME_ON_PROC(p)           (msTime() - startTime)
-# define CURRENT_PROC              thisPE
-# define BINARY_STATS              RtsFlags.ParFlags.ParStats.Binary
-#elif defined(GRAN)
-# define NODE_STR_LEN              20
-# define TIME_STR_LEN              120
-# define TIME                      rtsTime
-# define CURRENT_TIME              CurrentTime[CurrentProc]
-# define TIME_ON_PROC(p)           CurrentTime[p]
-# define CURRENT_PROC              CurrentProc
-# define BINARY_STATS              RtsFlags.GranFlags.GranSimStats.Binary
-#endif
-
-#if defined(PAR)
-#  define MAX_PES	256		/* Maximum number of processors */
-	/* MAX_PES is enforced by SysMan, which does not
-	   allow more than this many "processors".
-	   This is important because PackGA [GlobAddr.lc]
-	   **assumes** that a PE# can fit in 8+ bits.
-	*/
-
-# define SPARK_POOLS 	2   /* no. of spark pools */
-# define REQUIRED_POOL 	0   /* idx of pool of mandatory sparks (concurrency) */
-# define ADVISORY_POOL 	1   /* idx of pool of advisory sparks (parallelism) */
-#endif
-
-/*
- * @menu
- * * GUM::			
- * * GranSim::			
- * @end menu
- *
- * @node GUM, GranSim, Basic definitions, Parallel definitions
- * @subsection GUM
- */
-
-#if defined(PAR) 
-/*
-  Symbolic constants for the packing code.
-  
-  This constant defines how many words of data we can pack into a single
-  packet in the parallel (GUM) system.
-*/
-
-/*
- * @menu
- * * Types::			
- * * Externs::			
- * * Prototypes::		
- * * Macros::			
- * @end menu
- *
- * @node Types, Externs, GUM, GUM
- * @subsubsection Types
- */
-
-/* Sparks and spark queues */
-typedef StgClosure  *rtsSpark;
-typedef rtsSpark    *rtsSparkQ;
-
-typedef struct rtsPackBuffer_ {
-  StgInt /* nat */           id; 
-  StgInt /* nat */           size;
-  StgInt /* nat */           unpacked_size;
-  StgTSO       *tso;
-  StgWord     *buffer[0];  
-} rtsPackBuffer;
-
-#define PACK_BUFFER_HDR_SIZE 4
-
-/*
- * @node Externs, Prototypes, Types, GUM
- * @subsubsection Externs
- */
-
-/* extern rtsBool do_sp_profile; */
-
-extern globalAddr theGlobalFromGA, theGlobalToGA;
-extern StgBlockedFetch *PendingFetches;
-extern GlobalTaskId    *allPEs;
-
-extern rtsBool      IAmMainThread, GlobalStopPending;
-/*extern rtsBool      fishing; */
-extern rtsTime      last_fish_arrived_at;
-extern nat          outstandingFishes;
-extern GlobalTaskId SysManTask;
-extern int          seed;     /* pseudo-random-number generator seed: */
-                              /* Initialised in ParInit */
-extern StgInt       threadId; /* Number of Threads that have existed on a PE */
-extern GlobalTaskId mytid;
-
-extern GlobalTaskId *allPEs;
-extern nat nPEs;
-extern nat sparksIgnored, sparksCreated, threadsIgnored, threadsCreated;
-extern nat advisory_thread_count;
-
-extern rtsBool InGlobalGC;  /* Are we in the midst of performing global GC */
-
-extern ullong startTime;    /* start of comp; in RtsStartup.c */
-
-/* the spark pools proper */
-extern rtsSpark *pending_sparks_hd[];  /* ptr to start of a spark pool */ 
-extern rtsSpark *pending_sparks_tl[];  /* ptr to end of a spark pool */ 
-extern rtsSpark *pending_sparks_lim[]; 
-extern rtsSpark *pending_sparks_base[]; 
-extern nat spark_limit[];
-
-extern rtsPackBuffer *PackBuffer;      /* size: can be set via option */
-extern rtsPackBuffer *buffer;
-extern rtsPackBuffer *freeBuffer;
-extern rtsPackBuffer *packBuffer;
-extern rtsPackBuffer *gumPackBuffer;
-
-extern nat thisPE;
-
-/* From Global.c 
-extern GALA *freeGALAList;
-extern GALA *freeIndirections;
-extern GALA *liveIndirections;
-extern GALA *liveRemoteGAs;
-*/
-
-/*
- * @node Prototypes, Macros, Externs, GUM
- * @subsubsection Prototypes
- */
-
-/* From ParInit.c */
-void          initParallelSystem(void);
-void          SynchroniseSystem(void);
-void          par_exit(StgInt n);
-
-PEs           taskIDtoPE (GlobalTaskId gtid);
-void          registerTask (GlobalTaskId gtid);
-globalAddr   *LAGAlookup (StgClosure *addr);
-StgClosure   *GALAlookup (globalAddr *ga);
-/*static GALA  *allocIndirection (StgPtr addr); */
-globalAddr   *makeGlobal (StgClosure *addr, rtsBool preferred);
-globalAddr   *setRemoteGA (StgClosure *addr, globalAddr *ga, rtsBool preferred);
-void          splitWeight (globalAddr *to, globalAddr *from);
-globalAddr   *addWeight (globalAddr *ga);
-void          initGAtables (void);
-void          RebuildLAGAtable (void);
-StgWord       PackGA (StgWord pe, int slot);
-
-# if defined(DEBUG)
-/* from Global.c */
-/* highest_slot breaks the abstraction of the slot counter for GAs; it is
-   only used for sanity checking and should used nowhere else */
-StgInt highest_slot (void); 
-# endif
-
-/*
- * @node Macros,  , Prototypes, GUM
- * @subsubsection Macros
- */
-
-/* delay (in us) between dying fish returning and sending out a new fish */
-#define FISH_DELAY                   1000
-/* max no. of outstanding spark steals */
-#define MAX_FISHES                   1  
-
-/* ToDo: check which of these is actually needed! */
-
-#    define PACK_HEAP_REQUIRED  ((RtsFlags.ParFlags.packBufferSize - PACK_HDR_SIZE) / (PACK_GA_SIZE + _HS) * (MIN_UPD_SIZE + 2))
-
-#  define MAX_GAS 	(RtsFlags.ParFlags.packBufferSize / PACK_GA_SIZE)
-
-
-#  define PACK_GA_SIZE	3	/* Size of a packed GA in words */
-			        /* Size of a packed fetch-me in words */
-#  define PACK_FETCHME_SIZE (PACK_GA_SIZE + _HS)
-
-#  define PACK_HDR_SIZE	1	/* Words of header in a packet */
-
-#  define PACK_PLC_SIZE	2	/* Size of a packed PLC in words */
-
-/*
-  Definitions relating to the entire parallel-only fixed-header field.
-
-  On GUM, the global addresses for each local closure are stored in a
-  separate hash table, rather then with the closure in the heap.  We call
-  @getGA@ to look up the global address associated with a local closure (0
-  is returned for local closures that have no global address), and @setGA@
-  to store a new global address for a local closure which did not
-  previously have one.  */
-
-#  define GA_HDR_SIZE			0
-  
-#  define GA(closure)		        getGA(closure)
-  
-#  define SET_GA(closure, ga)             setGA(closure,ga)
-#  define SET_STATIC_GA(closure)
-#  define SET_GRAN_HDR(closure,pe)
-#  define SET_STATIC_PROCS(closure)
-  
-#  define MAX_GA_WEIGHT			0	/* Treat as 2^n */
-  
-/* At the moment, there is no activity profiling for GUM.  This may change. */
-#  define SET_TASK_ACTIVITY(act)        /* nothing */
-
-/* 
-   The following macros are only needed for sanity checking (see Sanity.c).
-*/
-
-/* NB: this is PVM specific and should be updated for MPI etc
-       in PVM a task id (tid) is split into 2 parts: the id for the 
-       physical processor it is running on and an index of tasks running
-       on a processor; PVM_PE_MASK indicates which part of a tid holds the 
-       id of the physical processor (the other part of the word holds the 
-       index on that processor)
-       MAX_PVM_PES and MAX_PVM_TIDS are maximal values for these 2 components
-       in GUM we have an upper bound for the total number of PVM PEs allowed:
-       it's MAX_PE defined in Parallel.h
-       to check the slot field of a GA we call a fct highest_slot which just
-       returns the internal counter 
-*/
-#define PVM_PE_MASK    0xfffc0000
-#define MAX_PVM_PES    MAX_PES
-#define MAX_PVM_TIDS   MAX_PES
-
-#if 0
-#define LOOKS_LIKE_TID(tid)  (((tid & PVM_PE_MASK) != 0) && \
-                              (((tid & PVM_PE_MASK) + (tid & ~PVM_PE_MASK)) < MAX_PVM_TIDS))
-#define LOOKS_LIKE_SLOT(slot) (slot<=highest_slot())
-
-#define LOOKS_LIKE_GA(ga)    (LOOKS_LIKE_TID((ga)->payload.gc.gtid) && \
-                             LOOKS_LIKE_SLOT((ga)->payload.gc.slot))
-#else
-rtsBool looks_like_tid(StgInt tid);
-rtsBool looks_like_slot(StgInt slot);
-rtsBool looks_like_ga(globalAddr *ga);
-#define LOOKS_LIKE_TID(tid)  looks_like_tid(tid)
-#define LOOKS_LIKE_GA(ga)    looks_like_ga(ga)
-#endif /* 0 */
-
-#endif /* PAR */
-
-/*
- * @node GranSim,  , GUM, Parallel definitions
- * @subsection GranSim
- */
-
-#if defined(GRAN)
-/* ToDo: Check which of the PAR routines are needed in GranSim -- HWL */
-
-/*
- * @menu
- * * Types::			
- * * Prototypes::		
- * * Macros::			
- * @end menu
- *
- * @node Types, Prototypes, GranSim, GranSim
- * @subsubsection Types
- */
-
-typedef StgWord *StgBuffer;
-typedef struct rtsPackBuffer_ {
-  StgInt /* nat */           id;
-  StgInt /* nat */           size;
-  StgInt /* nat */           unpacked_size;
-  StgTSO       *tso;
-  StgWord      *buffer;  
-} rtsPackBuffer;
-
-/*
- * @node Macros,  , Prototypes, GranSim
- * @subsubsection Macros
- */
-
-/* max no. of outstanding spark steals */
-#define MAX_FISHES                   1  
-
-/* These are needed in the packing code to get the size of the packet
-   right. The closures itself are never built in GrAnSim. */
-#  define FETCHME_VHS				IND_VHS
-#  define FETCHME_HS				IND_HS
-  
-#  define FETCHME_GA_LOCN                       FETCHME_HS
-  
-#  define FETCHME_CLOSURE_SIZE(closure)		IND_CLOSURE_SIZE(closure)
-#  define FETCHME_CLOSURE_NoPTRS(closure)		0L
-#  define FETCHME_CLOSURE_NoNONPTRS(closure)	(IND_CLOSURE_SIZE(closure)-IND_VHS)
-  
-#  define MAX_GAS 	(RtsFlags.GranFlags.packBufferSize / PACK_GA_SIZE)
-#  define PACK_GA_SIZE	3	/* Size of a packed GA in words */
-			        /* Size of a packed fetch-me in words */
-#  define PACK_FETCHME_SIZE (PACK_GA_SIZE + _HS)
-#  define PACK_HDR_SIZE	4	/* Words of header in a packet */
-
-#    define PACK_HEAP_REQUIRED  \
-      (RtsFlags.GranFlags.packBufferSize * sizeofW(StgClosure*) + \
-      2 * sizeofW(StgInt) + sizeofW(StgTSO*))
-
-#    define PACK_FLAG_LOCN           0  
-#    define PACK_TSO_LOCN            1
-#    define PACK_UNPACKED_SIZE_LOCN  2
-#    define PACK_SIZE_LOCN           3
-#    define MAGIC_PACK_FLAG          0xfabc
-
-#  define GA_HDR_SIZE			1
-
-#  define PROCS_HDR_POSN		PAR_HDR_POSN
-#  define PROCS_HDR_SIZE		1
-
-/* Accessing components of the field */
-#  define PROCS(closure)	        ((closure)->header.gran.procs)
-/* SET_PROCS is now SET_GRAN_HEADER in ClosureMacros.h. */
-
-#endif   /* GRAN */
-
-/*
- * @node End of File,  , Parallel definitions
- * @section End of File
- */
-
-#endif /* defined(PAR) || defined(GRAN)         whole file */
-
-#endif /* Parallel_H */
-
-
diff --git a/includes/README b/includes/README
index 90695a6e7c8d158562dd625b1a82154af723237b..dbde6579f425dd592b283f4169cb244b0a8bde6b 100644
--- a/includes/README
+++ b/includes/README
@@ -83,17 +83,14 @@ Rts.h
   SpinLock.h
   TSO.h
   Updates.h		/* macros for performing updates */
-  GranSim.h
   Parallel.h
   Block.h
   Stable.h
   Hooks.h
   Signals.h
-  DNInvoke.h
-    Dotnet.h
-  RtsExternal.h		/* decls for RTS things required by .hc code */
-    (RtsAPI.h)
-    (HsFFI.h)
+  Adjustor.h            /* foreign import "wrapper", aka adjustors */
+  StgPrimFloat.h        /* primitive floating-point operations */
+  Hpc.h
 
 Cmm.h			/* included into .cmm source only */
   DerivedConstants.h	/* generated by mkDerivedConstants.c from other */
diff --git a/includes/Rts.h b/includes/Rts.h
index a32bf34309bb420e1e3be948127645764774c188..7358c368c1df72c0ed3ea3c42cb7703b72875794 100644
--- a/includes/Rts.h
+++ b/includes/Rts.h
@@ -1,8 +1,9 @@
 /* -----------------------------------------------------------------------------
  *
- * (c) The GHC Team, 1998-2004
+ * (c) The GHC Team, 1998-2009
  *
- * Top-level include file for the RTS itself
+ * RTS external APIs.  This file declares everything that the GHC RTS
+ * exposes externally.
  *
  * ---------------------------------------------------------------------------*/
 
@@ -18,8 +19,8 @@ extern "C" {
 #endif
 #include "Stg.h"
 
-// ToDo: move RtsExternal stuff elsewhere
-#include "RtsExternal.h"
+#include "HsFFI.h"
+#include "RtsAPI.h"
 
 // Turn off inlining when debugging - it obfuscates things
 #ifdef DEBUG
@@ -27,7 +28,7 @@ extern "C" {
 # define STATIC_INLINE static
 #endif
 
-#include "RtsTypes.h"
+#include "rts/Types.h"
 
 #if __GNUC__ >= 3
 /* Assume that a flexible array member at the end of a struct
@@ -63,11 +64,6 @@ extern "C" {
 
 #define sizeofW(t) ROUNDUP_BYTES_TO_WDS(sizeof(t))
 
-/* 
- * It's nice to be able to grep for casts
- */
-#define stgCast(ty,e) ((ty)(e))
-
 /* -----------------------------------------------------------------------------
    Assertions and Debuggery
 
@@ -75,7 +71,8 @@ extern "C" {
    ASSERT(p)  like CHECK(p) if DEBUG is on, otherwise a no-op
    -------------------------------------------------------------------------- */
 
-extern void _assertFail (const char *, unsigned int);
+void _assertFail(const char *filename, unsigned int linenum)
+   GNUC3_ATTRIBUTE(__noreturn__);
 
 #define CHECK(predicate)			\
 	if (predicate)				\
@@ -124,29 +121,6 @@ extern void _assertFail (const char *, unsigned int);
 #define FMT_Int64  "lld"
 #endif
 
-/*
- * Macros for untagging and retagging closure pointers
- * For more information look at the comments in Cmm.h
- */
-
-static inline StgWord
-GET_CLOSURE_TAG(StgClosure * p)
-{
-    return (StgWord)p & TAG_MASK;
-}
-
-static inline StgClosure *
-UNTAG_CLOSURE(StgClosure * p)
-{
-    return (StgClosure*)((StgWord)p & ~TAG_MASK);
-}
-
-static inline StgClosure *
-TAG_CLOSURE(StgWord tag,StgClosure * p)
-{
-    return (StgClosure*)((StgWord)p | tag);
-}
-
 /* -----------------------------------------------------------------------------
    Include everything STG-ish
    -------------------------------------------------------------------------- */
@@ -158,59 +132,62 @@ TAG_CLOSURE(StgWord tag,StgClosure * p)
  */
 #include <stdlib.h>
 
+#include "rts/Config.h"
+
 /* Global constaints */
-#include "Constants.h"
+#include "rts/Constants.h"
 
 /* Profiling information */
-#include "StgProf.h"
-#include "StgLdvProf.h"
-
-/* Storage format definitions */
-#include "StgFun.h"
-#include "Closures.h"
-#include "Liveness.h"
-#include "ClosureTypes.h"
-#include "InfoTables.h"
-#include "TSO.h"
-
-/* Info tables, closures & code fragments defined in the RTS */
-#include "StgMiscClosures.h"
+#include "rts/prof/CCS.h"
+#include "rts/prof/LDV.h"
 
 /* Parallel information */
-#include "OSThreads.h"
-#include "SMPClosureOps.h"
-#include "SpinLock.h"
-
-/* Macros for STG/C code */
-#include "Block.h"
-#include "ClosureMacros.h"
-
-/* Runtime-system hooks */
-#include "Hooks.h"
-#include "RtsMessages.h"
+#include "rts/OSThreads.h"
+#include "rts/SpinLock.h"
 
-/* for StablePtr/getStablePtr/deRefStablePtr */
-#include "Storage.h"
-#include "Stable.h"
+#include "rts/Messages.h"
 
-#include "ieee-flpt.h"
-
-#include "Signals.h"
+/* Storage format definitions */
+#include "rts/storage/FunTypes.h"
+#include "rts/storage/InfoTables.h"
+#include "rts/storage/Closures.h"
+#include "rts/storage/Liveness.h"
+#include "rts/storage/ClosureTypes.h"
+#include "rts/storage/TSO.h"
+#include "stg/MiscClosures.h" /* InfoTables, closures etc. defined in the RTS */
+#include "rts/storage/SMPClosureOps.h"
+#include "rts/storage/Block.h"
+#include "rts/storage/ClosureMacros.h"
+#include "rts/storage/MBlock.h"
+#include "rts/storage/GC.h"
+
+/* Other RTS external APIs */
+#include "rts/Parallel.h"
+#include "rts/Hooks.h"
+#include "rts/Signals.h"
+#include "rts/Hpc.h"
+#include "rts/Flags.h"
+#include "rts/Adjustor.h"
+#include "rts/FileLock.h"
+#include "rts/Globals.h"
+#include "rts/IOManager.h"
+#include "rts/Linker.h"
+#include "rts/Threads.h"
+#include "rts/Timer.h"
+#include "rts/Stable.h"
 
 /* Misc stuff without a home */
 DLL_IMPORT_RTS extern char **prog_argv;	/* so we can get at these from Haskell */
 DLL_IMPORT_RTS extern int    prog_argc;
 DLL_IMPORT_RTS extern char  *prog_name;
 
-extern void stackOverflow(void);
-
-extern void      __decodeDouble_2Int (I_ *man_sign, W_ *man_high, W_ *man_low, I_ *exp, StgDouble dbl);
-extern void      __decodeFloat_Int (I_ *man, I_ *exp, StgFloat flt);
+void stackOverflow(void);
 
-/* Initialising the whole adjustor thunk machinery. */
-extern void initAdjustor(void);
+void stg_exit(int n) GNU_ATTRIBUTE(__noreturn__);
 
-extern void stg_exit(int n) GNU_ATTRIBUTE(__noreturn__);
+#ifndef mingw32_HOST_OS
+int stg_sig_install (int, int, void *);
+#endif
 
 /* -----------------------------------------------------------------------------
    RTS Exit codes
@@ -236,11 +213,6 @@ extern void stg_exit(int n) GNU_ATTRIBUTE(__noreturn__);
   extern StgInt RIGHT_ARITY_##arity; \
   extern StgInt TAGGED_PTR_##arity;
 
-#define TICK_VAR_INI(arity) \
-  StgInt SLOW_CALLS_##arity = 1; \
-  StgInt RIGHT_ARITY_##arity = 1; \
-  StgInt TAGGED_PTR_##arity = 0;
-
 extern StgInt TOTAL_CALLS;
 
 TICK_VAR(1)
@@ -253,10 +225,6 @@ TICK_VAR(2)
 
 #define IF_RTSFLAGS(c,s)  if (RtsFlags.c) { s; }
 
-/* -----------------------------------------------------------------------------
-   Assertions and Debuggery
-   -------------------------------------------------------------------------- */
-
 #ifdef DEBUG
 #if IN_STG_CODE
 #define IF_DEBUG(c,s)  if (RtsFlags[0].DebugFlags.c) { s; }
diff --git a/includes/RtsAPI.h b/includes/RtsAPI.h
index 5160046da804ab79449d98c72c6a8ea4b6a33e07..2d2c35ca979691c340de4e52be358d43f15f8408 100644
--- a/includes/RtsAPI.h
+++ b/includes/RtsAPI.h
@@ -26,7 +26,7 @@ typedef enum {
     HeapExhausted /* out of memory */
 } SchedulerStatus;
 
-typedef StgClosure *HaskellObj;
+typedef struct StgClosure_ *HaskellObj;
 
 /*
  * An abstract type representing the token returned by rts_lock() and
diff --git a/includes/RtsConfig.h b/includes/RtsConfig.h
deleted file mode 100644
index 3b088b7417f52774a310adef977ecb3e7c23cefd..0000000000000000000000000000000000000000
--- a/includes/RtsConfig.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/* -----------------------------------------------------------------------------
- *
- * (c) The GHC Team, 1998-2004
- *
- * Rts settings.
- *
- * NOTE: assumes #include "ghcconfig.h"
- * 
- * NB: THIS FILE IS INCLUDED IN NON-C CODE AND DATA!  #defines only please.
- * ---------------------------------------------------------------------------*/
-
-#ifndef RTSCONFIG_H
-#define RTSCONFIG_H
-
-/*
- * SUPPORT_LONG_LONGS controls whether we need to support long longs on a
- * particular platform.   On 64-bit platforms, we don't need to support
- * long longs since regular machine words will do just fine.
- */
-#if HAVE_LONG_LONG && SIZEOF_VOID_P < 8
-#define SUPPORT_LONG_LONGS 1
-#endif
-
-/*
- * Whether the runtime system will use libbfd for debugging purposes.
- */
-#if defined(DEBUG) && defined(HAVE_BFD_H) && defined(HAVE_LIBBFD) && !defined(_WIN32)
-#define USING_LIBBFD 1
-#endif
-
-/* -----------------------------------------------------------------------------
-   Labels - entry labels & info labels point to the same place in
-   TABLES_NEXT_TO_CODE, so we only generate the _info label.  Jumps
-   must therefore be directed to foo_info rather than foo_entry when
-   TABLES_NEXT_TO_CODE is on.
-
-   This isn't a good place for these macros, but they need to be
-   available to .cmm sources as well as C and we don't have a better
-   place.
-   -------------------------------------------------------------------------- */
-
-#ifdef TABLES_NEXT_TO_CODE
-#define ENTRY_LBL(f) f##_info
-#else
-#define ENTRY_LBL(f) f##_entry
-#endif
-
-#ifdef TABLES_NEXT_TO_CODE
-#define RET_LBL(f) f##_info
-#else
-#define RET_LBL(f) f##_ret
-#endif
-
-/* -----------------------------------------------------------------------------
-   Signals - supported on non-PAR versions of the runtime.  See RtsSignals.h.
-   -------------------------------------------------------------------------- */
-
-#define RTS_USER_SIGNALS 1
-
-/* Profile spin locks */
-
-#define PROF_SPIN
-
-#endif /* RTSCONFIG_H */
diff --git a/includes/RtsExternal.h b/includes/RtsExternal.h
deleted file mode 100644
index 2272b2a4291282033c61d82f70e3f0c54395ee42..0000000000000000000000000000000000000000
--- a/includes/RtsExternal.h
+++ /dev/null
@@ -1,129 +0,0 @@
-/* -----------------------------------------------------------------------------
- *
- * (c) The GHC Team, 1998-2004
- *
- * Things visible externally to the RTS
- *
- * -------------------------------------------------------------------------- */
-
-#ifndef RTSEXTERNAL_H
-#define RTSEXTERNAL_H
-
-/* The RTS public interface. */
-#include "RtsAPI.h"
-
-/* The standard FFI interface */
-#include "HsFFI.h"
-
-#ifdef HAVE_SYS_TYPES_H
-#include <sys/types.h>
-#endif
-
-/* -----------------------------------------------------------------------------
-   Functions exported by the RTS for use in Stg code
-   -------------------------------------------------------------------------- */
-
-#if IN_STG_CODE
-extern void newCAF(void*);
-#else
-extern void newCAF(StgClosure*);
-#endif
-
-/* ToDo: remove? */
-extern HsInt genSymZh(void);
-extern HsInt resetGenSymZh(void);
-
-/* Alternate to raise(3) for threaded rts, for OpenBSD */
-extern int genericRaise(int sig);
-
-/* Concurrency/Exception PrimOps. */
-extern int cmp_thread(StgPtr tso1, StgPtr tso2);
-extern int rts_getThreadId(StgPtr tso);
-extern int forkOS_createThread ( HsStablePtr entry );
-extern pid_t forkProcess(HsStablePtr *entry);
-extern HsBool rtsSupportsBoundThreads(void);
-extern StgInt newSpark (StgRegTable *reg, StgClosure *p);
-extern void stopTimer(void);
-extern unsigned int n_capabilities;
-
-/* grimy low-level support functions defined in StgPrimFloat.c */
-extern StgDouble __encodeDouble (I_ size, StgByteArray arr, I_ e);
-extern StgDouble __2Int_encodeDouble (I_ j_high, I_ j_low, I_ e);
-extern StgDouble __int_encodeDouble (I_ j, I_ e);
-extern StgDouble __word_encodeDouble (W_ j, I_ e);
-extern StgFloat  __encodeFloat (I_ size, StgByteArray arr, I_ e);
-extern StgFloat  __int_encodeFloat (I_ j, I_ e);
-extern StgFloat  __word_encodeFloat (W_ j, I_ e);
-extern StgInt    isDoubleNaN(StgDouble d);
-extern StgInt    isDoubleInfinite(StgDouble d);
-extern StgInt    isDoubleDenormalized(StgDouble d);
-extern StgInt    isDoubleNegativeZero(StgDouble d);
-extern StgInt    isFloatNaN(StgFloat f);
-extern StgInt    isFloatInfinite(StgFloat f);
-extern StgInt    isFloatDenormalized(StgFloat f);
-extern StgInt    isFloatNegativeZero(StgFloat f);
-
-/* Suspending/resuming threads around foreign calls */
-extern void *        suspendThread ( StgRegTable * );
-extern StgRegTable * resumeThread  ( void * );
-
-/* scheduler stuff */
-extern void stg_scheduleThread (StgRegTable *reg, struct StgTSO_ *tso);
-
-/* Creating and destroying an adjustor thunk */
-extern void*  createAdjustor(int cconv, StgStablePtr hptr, StgFunPtr wptr,
-                             char *typeString);
-extern void   freeHaskellFunctionPtr(void* ptr);
-
-/* Hpc stuff */
-extern int hs_hpc_module(char *modName, StgWord32 modCount, StgWord32 modHashNo,StgWord64 *tixArr);
-// Simple linked list of modules
-typedef struct _HpcModuleInfo {
-  char *modName;		// name of module
-  StgWord32 tickCount;		// number of ticks
-  StgWord32 tickOffset;		// offset into a single large .tix Array
-  StgWord32 hashNo;		// Hash number for this module's mix info
-  StgWord64 *tixArr;		// tix Array; local for this module
-  struct _HpcModuleInfo *next;
-} HpcModuleInfo;
-
-extern HpcModuleInfo *hs_hpc_rootModule(void);
-
-
-#if defined(mingw32_HOST_OS)
-extern int  rts_InstallConsoleEvent ( int action, StgStablePtr *handler );
-extern void rts_ConsoleHandlerDone  ( int ev );
-#else
-extern int stg_sig_install (int, int, void *);
-#endif
-
-#if defined(mingw32_HOST_OS)
-extern StgInt console_handler;
-#else
-extern StgInt *signal_handlers;
-#endif
-
-#if defined(mingw32_HOST_OS)
-void *getIOManagerEvent (void);
-HsWord32 readIOManagerEvent (void);
-void sendIOManagerEvent (HsWord32 event);
-#else
-extern void setIOManagerPipe (int fd);
-#endif
-
-extern void* allocateExec(unsigned int len, void **exec_addr);
-
-// Breakpoint stuff
-
-/* -----------------------------------------------------------------------------
-   Storage manager stuff exported
-   -------------------------------------------------------------------------- */
-
-extern void performGC(void);
-extern void performMajorGC(void);
-extern HsInt64 getAllocations( void );
-extern void revertCAFs( void );
-extern void dirty_MUT_VAR(StgRegTable *reg, StgClosure *p);
-extern void dirty_MVAR(StgRegTable *reg, StgClosure *p);
-
-#endif /*  RTSEXTERNAL_H */
diff --git a/includes/RtsFlags.h b/includes/RtsFlags.h
index ab6f2982cdaad73e5b114b5e3e3889114097293f..778fccf40aa7eeace041e4ce5997e1c6072e3307 100644
--- a/includes/RtsFlags.h
+++ b/includes/RtsFlags.h
@@ -1,239 +1,2 @@
-/* -----------------------------------------------------------------------------
- *
- * (c) The GHC Team, 1998-1999
- *
- * Datatypes that holds the command-line flag settings.
- *
- * ---------------------------------------------------------------------------*/
-
-#ifndef RTSFLAGS_H
-#define RTSFLAGS_H
-
-#include <stdio.h>
-
-/* For defaults, see the @initRtsFlagsDefaults@ routine. */
-
-struct GC_FLAGS {
-    FILE   *statsFile;
-    nat	    giveStats;
-#define NO_GC_STATS	 0
-#define COLLECT_GC_STATS 1
-#define ONELINE_GC_STATS 2
-#define SUMMARY_GC_STATS 3
-#define VERBOSE_GC_STATS 4
-
-    nat     maxStkSize;         /* in *words* */
-    nat     initialStkSize;     /* in *words* */
-
-    nat	    maxHeapSize;        /* in *blocks* */
-    nat     minAllocAreaSize;   /* in *blocks* */
-    nat     minOldGenSize;      /* in *blocks* */
-    nat     heapSizeSuggestion; /* in *blocks* */
-    double  oldGenFactor;
-    double  pcFreeHeap;
-
-    nat     generations;
-    nat     steps;
-    rtsBool squeezeUpdFrames;
-
-    rtsBool compact;		/* True <=> "compact all the time" */
-    double  compactThreshold;
-
-    rtsBool sweep;		/* use "mostly mark-sweep" instead of copying
-                                 * for the oldest generation */
-    rtsBool ringBell;
-    rtsBool frontpanel;
-
-    int idleGCDelayTime;	/* in milliseconds */
-
-    StgWord heapBase;           /* address to ask the OS for memory */
-};
-
-struct DEBUG_FLAGS {  
-    /* flags to control debugging output & extra checking in various subsystems */
-    rtsBool scheduler;      /* 's' */
-    rtsBool interpreter;    /* 'i' */
-    rtsBool weak;           /* 'w' */
-    rtsBool gccafs;         /* 'G' */
-    rtsBool gc;             /* 'g' */
-    rtsBool block_alloc;    /* 'b' */
-    rtsBool sanity;         /* 'S'   warning: might be expensive! */
-    rtsBool stable;         /* 't' */
-    rtsBool prof;           /* 'p' */
-    rtsBool eventlog;       /* 'e' */
-    rtsBool linker;         /* 'l'   the object linker */
-    rtsBool apply;          /* 'a' */
-    rtsBool stm;            /* 'm' */
-    rtsBool squeeze;        /* 'z'  stack squeezing & lazy blackholing */
-    rtsBool hpc; 	    /* 'c' coverage */
-    rtsBool timestamp;          /* add timestamps to traces */
-};
-
-struct COST_CENTRE_FLAGS {
-    unsigned int	    doCostCentres;
-# define COST_CENTRES_SUMMARY	1
-# define COST_CENTRES_VERBOSE	2 /* incl. serial time profile */
-# define COST_CENTRES_ALL	3
-# define COST_CENTRES_XML       4
-
-    int	    profilerTicks;   /* derived */
-    int	    msecsPerTick;    /* derived */
-};
-
-struct PROFILING_FLAGS {
-    unsigned int	doHeapProfile;
-# define NO_HEAP_PROFILING	0	/* N.B. Used as indexes into arrays */
-# define HEAP_BY_CCS		1
-# define HEAP_BY_MOD		2
-# define HEAP_BY_DESCR		4
-# define HEAP_BY_TYPE		5
-# define HEAP_BY_RETAINER       6
-# define HEAP_BY_LDV            7
-
-# define HEAP_BY_CLOSURE_TYPE   8
-
-    nat                 profileInterval;      /* delta between samples (in ms) */
-    nat                 profileIntervalTicks; /* delta between samples (in 'ticks') */
-    rtsBool             includeTSOs;
-
-
-    rtsBool		showCCSOnException;
-
-    nat                 maxRetainerSetSize;
-
-    nat                 ccsLength;
-
-    char*               modSelector;
-    char*               descrSelector;
-    char*               typeSelector;
-    char*               ccSelector;
-    char*               ccsSelector;
-    char*               retainerSelector;
-    char*               bioSelector;
-
-};
-
-#ifdef EVENTLOG
-struct EVENTLOG_FLAGS {
-  rtsBool doEventLogging;
-};
-#endif
-
-struct CONCURRENT_FLAGS {
-    int ctxtSwitchTime;		/* in milliseconds */
-    int ctxtSwitchTicks;	/* derived */
-};
-
-struct MISC_FLAGS {
-    int tickInterval;     /* in milliseconds */
-    rtsBool install_signal_handlers;
-    rtsBool machineReadable;
-    StgWord linkerMemBase;       /* address to ask the OS for memory
-                                  * for the linker, NULL ==> off */
-};
-
-#ifdef THREADED_RTS
-struct PAR_FLAGS {
-  nat            nNodes;         /* number of threads to run simultaneously */
-  rtsBool        migrate;        /* migrate threads between capabilities */
-  rtsBool        wakeupMigrate;  /* migrate a thread on wakeup */
-  unsigned int	 maxLocalSparks;
-  rtsBool        parGcEnabled;   /* enable parallel GC */
-  rtsBool        parGcGen;       /* do parallel GC in this generation
-                                  * and higher only */
-  rtsBool        parGcLoadBalancing; /* do load-balancing in parallel GC */
-  rtsBool        setAffinity;    /* force thread affinity with CPUs */
-};
-#endif /* THREADED_RTS */
-
-struct TICKY_FLAGS {
-    rtsBool showTickyStats;
-    FILE   *tickyFile;
-};
-
-#ifdef USE_PAPI
-#define MAX_PAPI_USER_EVENTS 8
-
-struct PAPI_FLAGS {
-    nat     eventType;          /* The type of events to count */
-    nat     numUserEvents;
-    char *  userEvents[MAX_PAPI_USER_EVENTS];
-};
-
-#define PAPI_FLAG_CACHE_L1 1
-#define PAPI_FLAG_CACHE_L2 2
-#define PAPI_FLAG_BRANCH 3
-#define PAPI_FLAG_STALLS 4
-#define PAPI_FLAG_CB_EVENTS 5
-#define PAPI_USER_EVENTS 6
-
-#endif
-
-/* Put them together: */
-
-typedef struct _RTS_FLAGS {
-    /* The first portion of RTS_FLAGS is invariant. */
-    struct GC_FLAGS	     GcFlags;
-    struct CONCURRENT_FLAGS  ConcFlags;
-    struct MISC_FLAGS        MiscFlags;
-    struct DEBUG_FLAGS	     DebugFlags;
-    struct COST_CENTRE_FLAGS CcFlags;
-    struct PROFILING_FLAGS   ProfFlags;
-#ifdef EVENTLOG
-    struct EVENTLOG_FLAGS    EventLogFlags;
-#endif
-    struct TICKY_FLAGS	     TickyFlags;
-
-#if defined(THREADED_RTS)
-    struct PAR_FLAGS	ParFlags;
-#endif
-#ifdef USE_PAPI
-    struct PAPI_FLAGS   PapiFlags;
-#endif
-} RTS_FLAGS;
-
-#ifdef COMPILING_RTS_MAIN
-extern DLLIMPORT RTS_FLAGS RtsFlags;
-#elif IN_STG_CODE
-/* Hack because the C code generator can't generate '&label'. */
-extern RTS_FLAGS RtsFlags[];
-#else
-extern RTS_FLAGS RtsFlags;
-#endif
-
-/* Routines that operate-on/to-do-with RTS flags: */
-
-extern void initRtsFlagsDefaults(void);
-extern void setupRtsFlags(int *argc, char *argv[], int *rts_argc, char *rts_argv[]);
-extern void setProgName(char *argv[]);
-
-
-/*
- * The printf formats are here, so we are less likely to make
- * overly-long filenames (with disastrous results).  No more than 128
- * chars, please!  
- */
-
-#define STATS_FILENAME_MAXLEN	128
-
-#define GR_FILENAME_FMT		"%0.124s.gr"
-#define GR_FILENAME_FMT_GUM	"%0.120s.%03d.%s"
-#define HP_FILENAME_FMT		"%0.124s.hp"
-#define LIFE_FILENAME_FMT	"%0.122s.life"
-#define PROF_FILENAME_FMT	"%0.122s.prof"
-#define PROF_FILENAME_FMT_GUM	"%0.118s.%03d.prof"
-#define QP_FILENAME_FMT		"%0.124s.qp"
-#define STAT_FILENAME_FMT	"%0.122s.stat"
-#define TICKY_FILENAME_FMT	"%0.121s.ticky"
-#define TIME_FILENAME_FMT	"%0.122s.time"
-#define TIME_FILENAME_FMT_GUM	"%0.118s.%03d.time"
-
-/* an "int" so as to match normal "argc" */
-/* Now defined in Stg.h (lib/std/cbits need these too.)
-extern int     prog_argc;
-extern char  **prog_argv;
-*/
-extern int     rts_argc;  /* ditto */
-extern char   *rts_argv[];
-
-#endif	/* RTSFLAGS_H */
+#warning RtsFlags.h is DEPRECATED; please just #include "Rts.h"
+#include "Rts.h"
diff --git a/includes/Stable.h b/includes/Stable.h
deleted file mode 100644
index 9752a534bb8d17764e960a20ded1530913bd3616..0000000000000000000000000000000000000000
--- a/includes/Stable.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/* -----------------------------------------------------------------------------
- *
- * (c) The GHC Team, 1998-2004
- *
- * Stable Pointers: A stable pointer is represented as an index into
- * the stable pointer table in the low BITS_PER_WORD-8 bits with a
- * weight in the upper 8 bits.
- *
- * SUP: StgStablePtr used to be a synonym for StgWord, but stable pointers
- * are guaranteed to be void* on the C-side, so we have to do some occasional
- * casting. Size is not a matter, because StgWord is always the same size as
- * a void*.
- *
- * ---------------------------------------------------------------------------*/
-
-#ifndef STABLE_H
-#define STABLE_H
-
-/* -----------------------------------------------------------------------------
-   External C Interface
-   -------------------------------------------------------------------------- */
-
-EXTERN_INLINE StgPtr  deRefStablePtr(StgStablePtr stable_ptr);
-extern void           freeStablePtr(StgStablePtr sp);
-extern StgStablePtr   splitStablePtr(StgStablePtr sp);
-extern StgStablePtr   getStablePtr(StgPtr p);
-
-/* -----------------------------------------------------------------------------
-   PRIVATE from here.
-   -------------------------------------------------------------------------- */
-
-typedef struct { 
-  StgPtr  addr;			/* Haskell object, free list, or NULL */
-  StgPtr  old;			/* old Haskell object, used during GC */
-  StgWord ref;			/* used for reference counting */
-  StgClosure *sn_obj;		/* the StableName object (or NULL) */
-} snEntry;
-
-extern DLL_IMPORT_RTS snEntry *stable_ptr_table;
-
-extern void freeStablePtr(StgStablePtr sp);
-
-EXTERN_INLINE
-StgPtr deRefStablePtr(StgStablePtr sp)
-{
-    ASSERT(stable_ptr_table[(StgWord)sp].ref > 0);
-    return stable_ptr_table[(StgWord)sp].addr;
-}
-
-extern void    initStablePtrTable    ( void );
-extern void    exitStablePtrTable    ( void );
-extern void    enlargeStablePtrTable ( void );
-extern StgWord lookupStableName      ( StgPtr p );
-
-extern void    markStablePtrTable    ( evac_fn evac, void *user );
-extern void    threadStablePtrTable  ( evac_fn evac, void *user );
-extern void    gcStablePtrTable      ( void );
-extern void    updateStablePtrTable  ( rtsBool full );
-
-extern void    stablePtrPreGC        ( void );
-extern void    stablePtrPostGC       ( void );
-
-#endif
diff --git a/includes/Stg.h b/includes/Stg.h
index 06a866256e0d94408d2f72a6f5443a6964889af2..0344b70fd29996fa5897eb40a0ddfdaa3df4e6ca 100644
--- a/includes/Stg.h
+++ b/includes/Stg.h
@@ -23,11 +23,16 @@
 #ifndef STG_H
 #define STG_H
 
-
-/* If we include "Stg.h" directly, we're in STG code, and we therefore
- * get all the global register variables, macros etc. that go along
- * with that.  If "Stg.h" is included via "Rts.h", we're assumed to
- * be in vanilla C.
+/*
+ * If we are compiling a .hc file, then we want all the register
+ * variables.  This is the what happens if you #include "Stg.h" first:
+ * we assume this is a .hc file, and set IN_STG_CODE==1, which later
+ * causes the register variables to be enabled in stg/Regs.h.
+ *
+ * If instead "Rts.h" is included first, then we are compiling a
+ * vanilla C file.  Everything from Stg.h is provided, except that
+ * IN_STG_CODE is not defined, and the register variables will not be
+ * active.
  */
 #ifndef IN_STG_CODE
 # define IN_STG_CODE 1
@@ -47,7 +52,6 @@
 
 /* Configuration */
 #include "ghcconfig.h"
-#include "RtsConfig.h"
 
 /* The code generator calls the math functions directly in .hc code.
    NB. after configuration stuff above, because this sets #defines
@@ -59,7 +63,7 @@
    -------------------------------------------------------------------------- */
 
 /*
- * The C backend like to refer to labels by just mentioning their
+ * The C backend likes to refer to labels by just mentioning their
  * names.  Howevver, when a symbol is declared as a variable in C, the
  * C compiler will implicitly dereference it when it occurs in source.
  * So we must subvert this behaviour for .hc files by declaring
@@ -165,7 +169,7 @@
    -------------------------------------------------------------------------- */
 
 #include "MachDeps.h"
-#include "StgTypes.h"
+#include "stg/Types.h"
 
 /* -----------------------------------------------------------------------------
    Shorthand forms
@@ -174,24 +178,14 @@
 typedef StgChar		C_;
 typedef StgWord		W_;
 typedef StgWord*	P_;
-typedef P_*		PP_;
 typedef StgInt		I_;
-typedef StgAddr	        A_;
-typedef const StgWord*  D_;
-typedef StgFunPtr       F_;
-typedef StgByteArray    B_;
-typedef StgClosurePtr   L_;
-
-typedef StgInt64        LI_;
-typedef StgWord64       LW_;
-
-#define IF_(f)		static F_ GNUC3_ATTRIBUTE(used) f(void) 
-#define FN_(f)		F_ f(void)
-#define EF_(f)		extern F_ f(void)
-
 typedef StgWord StgWordArray[];
+
 #define EI_(X)          extern StgWordArray (X) GNU_ATTRIBUTE(aligned (8))
 #define II_(X)          static StgWordArray (X) GNU_ATTRIBUTE(aligned (8))
+#define IF_(f)		static StgFunPtr GNUC3_ATTRIBUTE(used) f(void) 
+#define FN_(f)		StgFunPtr f(void)
+#define EF_(f)		extern StgFunPtr f(void)
 
 /* -----------------------------------------------------------------------------
    Tail calls
@@ -200,27 +194,26 @@ typedef StgWord StgWordArray[];
    to be before all procedures (inline & out-of-line).
    -------------------------------------------------------------------------- */
 
-#include "TailCalls.h"
+#include "stg/TailCalls.h"
 
 /* -----------------------------------------------------------------------------
    Other Stg stuff...
    -------------------------------------------------------------------------- */
 
-#include "StgDLL.h"
-#include "MachRegs.h"
-#include "Regs.h"
-
-#include "TickyCounters.h"
+#include "stg/DLL.h"
+#include "stg/MachRegs.h"
+#include "stg/Regs.h"
+#include "stg/Ticky.h"
 
 #if IN_STG_CODE
 /*
  * This is included later for RTS sources, after definitions of
  * StgInfoTable, StgClosure and so on. 
  */
-#include "StgMiscClosures.h"
+#include "stg/MiscClosures.h"
 #endif
 
-#include "SMP.h" // write_barrier() inline is required 
+#include "stg/SMP.h" // write_barrier() inline is required 
 
 /* -----------------------------------------------------------------------------
    Moving Floats and Doubles
@@ -342,7 +335,7 @@ INLINE_HEADER StgDouble PK_DBL(W_ p_src[])
    In both cases the memory location might not be 64-bit aligned.
    -------------------------------------------------------------------------- */
 
-#ifdef SUPPORT_LONG_LONGS
+#if SIZEOF_HSWORD == 4
 
 typedef struct
   { StgWord dhi;
@@ -413,7 +406,7 @@ INLINE_HEADER StgInt64 PK_Int64(W_ p_src[])
     return p_src[0];
 }
 
-#endif
+#endif /* SIZEOF_HSWORD == 4 */
 
 /* -----------------------------------------------------------------------------
    Split markers
diff --git a/includes/Storage.h b/includes/Storage.h
deleted file mode 100644
index 5d3e7733cf033d7cc2cd28e1fd77eb0a702bea30..0000000000000000000000000000000000000000
--- a/includes/Storage.h
+++ /dev/null
@@ -1,583 +0,0 @@
-/* -----------------------------------------------------------------------------
- *
- * (c) The GHC Team, 1998-2004
- *
- * External Storage Manger Interface
- *
- * ---------------------------------------------------------------------------*/
-
-#ifndef STORAGE_H
-#define STORAGE_H
-
-#include <stddef.h>
-#include "OSThreads.h"
-#include "SMP.h"
-
-/* -----------------------------------------------------------------------------
- * Generational GC
- *
- * We support an arbitrary number of generations, with an arbitrary number
- * of steps per generation.  Notes (in no particular order):
- *
- *       - all generations except the oldest should have two steps.  This gives
- *         objects a decent chance to age before being promoted, and in
- *         particular will ensure that we don't end up with too many
- *         thunks being updated in older generations.
- *
- *       - the oldest generation has one step.  There's no point in aging
- *         objects in the oldest generation.
- *
- *       - generation 0, step 0 (G0S0) is the allocation area.  It is given
- *         a fixed set of blocks during initialisation, and these blocks
- *         are never freed.
- *
- *       - during garbage collection, each step which is an evacuation
- *         destination (i.e. all steps except G0S0) is allocated a to-space.
- *         evacuated objects are allocated into the step's to-space until
- *         GC is finished, when the original step's contents may be freed
- *         and replaced by the to-space.
- *
- *       - the mutable-list is per-generation (not per-step).  G0 doesn't 
- *         have one (since every garbage collection collects at least G0).
- * 
- *       - block descriptors contain pointers to both the step and the
- *         generation that the block belongs to, for convenience.
- *
- *       - static objects are stored in per-generation lists.  See GC.c for
- *         details of how we collect CAFs in the generational scheme.
- *
- *       - large objects are per-step, and are promoted in the same way
- *         as small objects, except that we may allocate large objects into
- *         generation 1 initially.
- *
- * ------------------------------------------------------------------------- */
-
-typedef struct step_ {
-    unsigned int         no;		// step number in this generation
-    unsigned int         abs_no;	// absolute step number
-
-    struct generation_ * gen;		// generation this step belongs to
-    unsigned int         gen_no;        // generation number (cached)
-
-    bdescr *             blocks;	// blocks in this step
-    unsigned int         n_blocks;	// number of blocks
-    unsigned int         n_words;       // number of words
-
-    struct step_ *       to;		// destination step for live objects
-
-    bdescr *             large_objects;	 // large objects (doubly linked)
-    unsigned int         n_large_blocks; // no. of blocks used by large objs
-
-    StgTSO *             threads;       // threads in this step
-                                        // linked via global_link
-
-    // ------------------------------------
-    // Fields below are used during GC only
-
-    // During GC, if we are collecting this step, blocks and n_blocks
-    // are copied into the following two fields.  After GC, these blocks
-    // are freed.
-
-#if defined(THREADED_RTS)
-    char pad[128];                      // make sure the following is
-                                        // on a separate cache line.
-    SpinLock     sync_large_objects;    // lock for large_objects
-                                        //    and scavenged_large_objects
-#endif
-
-    int          mark;			// mark (not copy)? (old gen only)
-    int          compact;		// compact (not sweep)? (old gen only)
-
-    bdescr *     old_blocks;	        // bdescr of first from-space block
-    unsigned int n_old_blocks;		// number of blocks in from-space
-    unsigned int live_estimate;         // for sweeping: estimate of live data
-    
-    bdescr *     part_blocks;           // partially-full scanned blocks
-    unsigned int n_part_blocks;         // count of above
-
-    bdescr *     scavenged_large_objects;  // live large objs after GC (d-link)
-    unsigned int n_scavenged_large_blocks; // size (not count) of above
-
-    bdescr *     bitmap;  		// bitmap for compacting collection
-
-    StgTSO *     old_threads;
-
-} step;
-
-
-typedef struct generation_ {
-    unsigned int   no;			// generation number
-    step *         steps;		// steps
-    unsigned int   n_steps;		// number of steps
-    unsigned int   max_blocks;		// max blocks in step 0
-    bdescr        *mut_list;      	// mut objects in this gen (not G0)
-    
-    // stats information
-    unsigned int collections;
-    unsigned int par_collections;
-    unsigned int failed_promotions;
-
-    // temporary use during GC:
-    bdescr        *saved_mut_list;
-} generation;
-
-extern generation * RTS_VAR(generations);
-
-extern generation * RTS_VAR(g0);
-extern step * RTS_VAR(g0s0);
-extern generation * RTS_VAR(oldest_gen);
-extern step * RTS_VAR(all_steps);
-extern nat RTS_VAR(total_steps);
-
-/* -----------------------------------------------------------------------------
-   Initialisation / De-initialisation
-   -------------------------------------------------------------------------- */
-
-extern void initStorage(void);
-extern void exitStorage(void);
-extern void freeStorage(void);
-
-/* -----------------------------------------------------------------------------
-   Generic allocation
-
-   StgPtr allocateInGen(generation *g, nat n)
-                                Allocates a chunk of contiguous store
-   				n words long in generation g,
-   				returning a pointer to the first word.
-   				Always succeeds.
-				
-   StgPtr allocate(nat n)       Equaivalent to allocateInGen(g0)
-				
-   StgPtr allocateLocal(Capability *cap, nat n)
-                                Allocates memory from the nursery in
-				the current Capability.  This can be
-				done without taking a global lock,
-                                unlike allocate().
-
-   StgPtr allocatePinned(nat n) Allocates a chunk of contiguous store
-   				n words long, which is at a fixed
-				address (won't be moved by GC).  
-				Returns a pointer to the first word.
-				Always succeeds.
-				
-				NOTE: the GC can't in general handle
-				pinned objects, so allocatePinned()
-				can only be used for ByteArrays at the
-				moment.
-
-				Don't forget to TICK_ALLOC_XXX(...)
-				after calling allocate or
-				allocatePinned, for the
-				benefit of the ticky-ticky profiler.
-
-   rtsBool doYouWantToGC(void)  Returns True if the storage manager is
-   				ready to perform a GC, False otherwise.
-
-   lnat  allocatedBytes(void)  Returns the number of bytes allocated
-                                via allocate() since the last GC.
-				Used in the reporting of statistics.
-
-   -------------------------------------------------------------------------- */
-
-extern StgPtr  allocate        ( lnat n );
-extern StgPtr  allocateInGen   ( generation *g, lnat n );
-extern StgPtr  allocateLocal   ( Capability *cap, lnat n );
-extern StgPtr  allocatePinned  ( lnat n );
-extern lnat    allocatedBytes  ( void );
-
-extern bdescr * RTS_VAR(small_alloc_list);
-extern bdescr * RTS_VAR(large_alloc_list);
-extern bdescr * RTS_VAR(pinned_object_block);
-
-extern nat RTS_VAR(alloc_blocks);
-extern nat RTS_VAR(alloc_blocks_lim);
-
-INLINE_HEADER rtsBool
-doYouWantToGC( void )
-{
-  return (alloc_blocks >= alloc_blocks_lim);
-}
-
-/* memory allocator for executable memory */
-extern void* allocateExec(unsigned int len, void **exec_addr);
-extern void freeExec (void *p);
-
-/* for splitting blocks groups in two */
-extern bdescr * splitLargeBlock (bdescr *bd, nat blocks);
-
-/* -----------------------------------------------------------------------------
-   Performing Garbage Collection
-
-   GarbageCollect(get_roots)    Performs a garbage collection.  
-				'get_roots' is called to find all the 
-				roots that the system knows about.
-
-
-   -------------------------------------------------------------------------- */
-
-extern void GarbageCollect(rtsBool force_major_gc, nat gc_type, Capability *cap);
-
-/* -----------------------------------------------------------------------------
-   Generational garbage collection support
-
-   recordMutable(StgPtr p)       Informs the garbage collector that a
-				 previously immutable object has
-				 become (permanently) mutable.  Used
-				 by thawArray and similar.
-
-   updateWithIndirection(p1,p2)  Updates the object at p1 with an
-				 indirection pointing to p2.  This is
-				 normally called for objects in an old
-				 generation (>0) when they are updated.
-
-   updateWithPermIndirection(p1,p2)  As above but uses a permanent indir.
-
-   -------------------------------------------------------------------------- */
-
-/*
- * Storage manager mutex
- */
-#if defined(THREADED_RTS)
-extern Mutex sm_mutex;
-#endif
-
-#if defined(THREADED_RTS)
-#define ACQUIRE_SM_LOCK   ACQUIRE_LOCK(&sm_mutex);
-#define RELEASE_SM_LOCK   RELEASE_LOCK(&sm_mutex);
-#define ASSERT_SM_LOCK()  ASSERT_LOCK_HELD(&sm_mutex);
-#else
-#define ACQUIRE_SM_LOCK
-#define RELEASE_SM_LOCK
-#define ASSERT_SM_LOCK()
-#endif
-
-#if !IN_STG_CODE
-
-INLINE_HEADER void
-recordMutableGen(StgClosure *p, nat gen_no)
-{
-    bdescr *bd;
-
-    bd = generations[gen_no].mut_list;
-    if (bd->free >= bd->start + BLOCK_SIZE_W) {
-	bdescr *new_bd;
-	new_bd = allocBlock();
-	new_bd->link = bd;
-	bd = new_bd;
-	generations[gen_no].mut_list = bd;
-    }
-    *bd->free++ = (StgWord)p;
-
-}
-
-INLINE_HEADER void
-recordMutableGenLock(StgClosure *p, nat gen_no)
-{
-    ACQUIRE_SM_LOCK;
-    recordMutableGen(p,gen_no);
-    RELEASE_SM_LOCK;
-}
-
-INLINE_HEADER void
-recordMutable(StgClosure *p)
-{
-    bdescr *bd;
-    ASSERT(closure_MUTABLE(p));
-    bd = Bdescr((P_)p);
-    if (bd->gen_no > 0) recordMutableGen(p, bd->gen_no);
-}
-
-INLINE_HEADER void
-recordMutableLock(StgClosure *p)
-{
-    ACQUIRE_SM_LOCK;
-    recordMutable(p);
-    RELEASE_SM_LOCK;
-}
-
-#endif // !IN_STG_CODE
-
-/* -----------------------------------------------------------------------------
-   The CAF table - used to let us revert CAFs in GHCi
-   -------------------------------------------------------------------------- */
-
-/* set to disable CAF garbage collection in GHCi. */
-/* (needed when dynamic libraries are used). */
-extern rtsBool keepCAFs;
-
-/* -----------------------------------------------------------------------------
-   This is the write barrier for MUT_VARs, a.k.a. IORefs.  A
-   MUT_VAR_CLEAN object is not on the mutable list; a MUT_VAR_DIRTY
-   is.  When written to, a MUT_VAR_CLEAN turns into a MUT_VAR_DIRTY
-   and is put on the mutable list.
-   -------------------------------------------------------------------------- */
-
-void dirty_MUT_VAR(StgRegTable *reg, StgClosure *p);
-
-/* -----------------------------------------------------------------------------
-   DEBUGGING predicates for pointers
-
-   LOOKS_LIKE_INFO_PTR(p)    returns False if p is definitely not an info ptr
-   LOOKS_LIKE_CLOSURE_PTR(p) returns False if p is definitely not a closure ptr
-
-   These macros are complete but not sound.  That is, they might
-   return false positives.  Do not rely on them to distinguish info
-   pointers from closure pointers, for example.
-
-   We don't use address-space predicates these days, for portability
-   reasons, and the fact that code/data can be scattered about the
-   address space in a dynamically-linked environment.  Our best option
-   is to look at the alleged info table and see whether it seems to
-   make sense...
-   -------------------------------------------------------------------------- */
-
-INLINE_HEADER rtsBool LOOKS_LIKE_INFO_PTR (StgWord p);
-INLINE_HEADER rtsBool LOOKS_LIKE_CLOSURE_PTR (void *p); // XXX StgClosure*
-
-/* -----------------------------------------------------------------------------
-   Macros for calculating how big a closure will be (used during allocation)
-   -------------------------------------------------------------------------- */
-
-INLINE_HEADER StgOffset PAP_sizeW   ( nat n_args )
-{ return sizeofW(StgPAP) + n_args; }
-
-INLINE_HEADER StgOffset AP_sizeW   ( nat n_args )
-{ return sizeofW(StgAP) + n_args; }
-
-INLINE_HEADER StgOffset AP_STACK_sizeW ( nat size )
-{ return sizeofW(StgAP_STACK) + size; }
-
-INLINE_HEADER StgOffset CONSTR_sizeW( nat p, nat np )
-{ return sizeofW(StgHeader) + p + np; }
-
-INLINE_HEADER StgOffset THUNK_SELECTOR_sizeW ( void )
-{ return sizeofW(StgSelector); }
-
-INLINE_HEADER StgOffset BLACKHOLE_sizeW ( void )
-{ return sizeofW(StgHeader)+MIN_PAYLOAD_SIZE; }
-
-/* --------------------------------------------------------------------------
-   Sizes of closures
-   ------------------------------------------------------------------------*/
-
-INLINE_HEADER StgOffset sizeW_fromITBL( const StgInfoTable* itbl ) 
-{ return sizeofW(StgClosure) 
-       + sizeofW(StgPtr)  * itbl->layout.payload.ptrs 
-       + sizeofW(StgWord) * itbl->layout.payload.nptrs; }
-
-INLINE_HEADER StgOffset thunk_sizeW_fromITBL( const StgInfoTable* itbl ) 
-{ return sizeofW(StgThunk) 
-       + sizeofW(StgPtr)  * itbl->layout.payload.ptrs 
-       + sizeofW(StgWord) * itbl->layout.payload.nptrs; }
-
-INLINE_HEADER StgOffset ap_stack_sizeW( StgAP_STACK* x )
-{ return AP_STACK_sizeW(x->size); }
-
-INLINE_HEADER StgOffset ap_sizeW( StgAP* x )
-{ return AP_sizeW(x->n_args); }
-
-INLINE_HEADER StgOffset pap_sizeW( StgPAP* x )
-{ return PAP_sizeW(x->n_args); }
-
-INLINE_HEADER StgOffset arr_words_sizeW( StgArrWords* x )
-{ return sizeofW(StgArrWords) + x->words; }
-
-INLINE_HEADER StgOffset mut_arr_ptrs_sizeW( StgMutArrPtrs* x )
-{ return sizeofW(StgMutArrPtrs) + x->ptrs; }
-
-INLINE_HEADER StgWord tso_sizeW ( StgTSO *tso )
-{ return TSO_STRUCT_SIZEW + tso->stack_size; }
-
-INLINE_HEADER StgWord bco_sizeW ( StgBCO *bco )
-{ return bco->size; }
-
-INLINE_HEADER nat
-closure_sizeW_ (StgClosure *p, StgInfoTable *info)
-{
-    switch (info->type) {
-    case THUNK_0_1:
-    case THUNK_1_0:
-	return sizeofW(StgThunk) + 1;
-    case FUN_0_1:
-    case CONSTR_0_1:
-    case FUN_1_0:
-    case CONSTR_1_0:
-	return sizeofW(StgHeader) + 1;
-    case THUNK_0_2:
-    case THUNK_1_1:
-    case THUNK_2_0:
-	return sizeofW(StgThunk) + 2;
-    case FUN_0_2:
-    case CONSTR_0_2:
-    case FUN_1_1:
-    case CONSTR_1_1:
-    case FUN_2_0:
-    case CONSTR_2_0:
-	return sizeofW(StgHeader) + 2;
-    case THUNK:
-	return thunk_sizeW_fromITBL(info);
-    case THUNK_SELECTOR:
-	return THUNK_SELECTOR_sizeW();
-    case AP_STACK:
-	return ap_stack_sizeW((StgAP_STACK *)p);
-    case AP:
-	return ap_sizeW((StgAP *)p);
-    case PAP:
-	return pap_sizeW((StgPAP *)p);
-    case IND:
-    case IND_PERM:
-    case IND_OLDGEN:
-    case IND_OLDGEN_PERM:
-	return sizeofW(StgInd);
-    case ARR_WORDS:
-	return arr_words_sizeW((StgArrWords *)p);
-    case MUT_ARR_PTRS_CLEAN:
-    case MUT_ARR_PTRS_DIRTY:
-    case MUT_ARR_PTRS_FROZEN:
-    case MUT_ARR_PTRS_FROZEN0:
-	return mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
-    case TSO:
-	return tso_sizeW((StgTSO *)p);
-    case BCO:
-	return bco_sizeW((StgBCO *)p);
-    case TVAR_WATCH_QUEUE:
-        return sizeofW(StgTVarWatchQueue);
-    case TVAR:
-        return sizeofW(StgTVar);
-    case TREC_CHUNK:
-        return sizeofW(StgTRecChunk);
-    case TREC_HEADER:
-        return sizeofW(StgTRecHeader);
-    case ATOMIC_INVARIANT:
-        return sizeofW(StgAtomicInvariant);
-    case INVARIANT_CHECK_QUEUE:
-        return sizeofW(StgInvariantCheckQueue);
-    default:
-	return sizeW_fromITBL(info);
-    }
-}
-
-// The definitive way to find the size, in words, of a heap-allocated closure
-INLINE_HEADER nat
-closure_sizeW (StgClosure *p)
-{
-    return closure_sizeW_(p, get_itbl(p));
-}
-
-/* -----------------------------------------------------------------------------
-   Sizes of stack frames
-   -------------------------------------------------------------------------- */
-
-INLINE_HEADER StgWord stack_frame_sizeW( StgClosure *frame )
-{
-    StgRetInfoTable *info;
-
-    info = get_ret_itbl(frame);
-    switch (info->i.type) {
-
-    case RET_DYN:
-    {
-	StgRetDyn *dyn = (StgRetDyn *)frame;
-	return  sizeofW(StgRetDyn) + RET_DYN_BITMAP_SIZE + 
-	    RET_DYN_NONPTR_REGS_SIZE +
-	    RET_DYN_PTRS(dyn->liveness) + RET_DYN_NONPTRS(dyn->liveness);
-    }
-	    
-    case RET_FUN:
-	return sizeofW(StgRetFun) + ((StgRetFun *)frame)->size;
-
-    case RET_BIG:
-	return 1 + GET_LARGE_BITMAP(&info->i)->size;
-
-    case RET_BCO:
-	return 2 + BCO_BITMAP_SIZE((StgBCO *)((P_)frame)[1]);
-
-    default:
-	return 1 + BITMAP_SIZE(info->i.layout.bitmap);
-    }
-}
-
-/* -----------------------------------------------------------------------------
-   Nursery manipulation
-   -------------------------------------------------------------------------- */
-
-extern void     allocNurseries       ( void );
-extern void     resetNurseries       ( void );
-extern void     resizeNurseries      ( nat blocks );
-extern void     resizeNurseriesFixed ( nat blocks );
-extern lnat     countNurseryBlocks   ( void );
-
-
-/* -----------------------------------------------------------------------------
-   Functions from GC.c 
-   -------------------------------------------------------------------------- */
-
-typedef void (*evac_fn)(void *user, StgClosure **root);
-
-extern void         threadPaused ( Capability *cap, StgTSO * );
-extern StgClosure * isAlive      ( StgClosure *p );
-extern void         markCAFs     ( evac_fn evac, void *user );
-extern void         GetRoots     ( evac_fn evac, void *user );
-
-/* -----------------------------------------------------------------------------
-   Stats 'n' DEBUG stuff
-   -------------------------------------------------------------------------- */
-
-extern ullong RTS_VAR(total_allocated);
-
-extern lnat calcAllocated  ( void );
-extern lnat calcLiveBlocks ( void );
-extern lnat calcLiveWords  ( void );
-extern lnat countOccupied  ( bdescr *bd );
-extern lnat calcNeeded     ( void );
-
-#if defined(DEBUG)
-extern void memInventory(rtsBool show);
-extern void checkSanity(void);
-extern nat  countBlocks(bdescr *);
-extern void checkNurserySanity( step *stp );
-#endif
-
-#if defined(DEBUG)
-void printMutOnceList(generation *gen);
-void printMutableList(generation *gen);
-#endif
-
-/* ----------------------------------------------------------------------------
-   Storage manager internal APIs and globals
-   ------------------------------------------------------------------------- */
-
-#define END_OF_STATIC_LIST stgCast(StgClosure*,1)
-
-extern void newDynCAF(StgClosure *);
-
-extern void move_TSO(StgTSO *src, StgTSO *dest);
-extern StgTSO *relocate_stack(StgTSO *dest, ptrdiff_t diff);
-
-extern StgWeak    * RTS_VAR(old_weak_ptr_list);
-extern StgWeak    * RTS_VAR(weak_ptr_list);
-extern StgClosure * RTS_VAR(caf_list);
-extern StgClosure * RTS_VAR(revertible_caf_list);
-extern StgTSO     * RTS_VAR(resurrected_threads);
-
-#define IS_FORWARDING_PTR(p) ((((StgWord)p) & 1) != 0)
-#define MK_FORWARDING_PTR(p) (((StgWord)p) | 1)
-#define UN_FORWARDING_PTR(p) (((StgWord)p) - 1)
-
-INLINE_HEADER rtsBool LOOKS_LIKE_INFO_PTR_NOT_NULL (StgWord p)
-{
-    StgInfoTable *info = INFO_PTR_TO_STRUCT(p);
-    return info->type != INVALID_OBJECT && info->type < N_CLOSURE_TYPES;
-}
-
-INLINE_HEADER rtsBool LOOKS_LIKE_INFO_PTR (StgWord p)
-{
-    return p && (IS_FORWARDING_PTR(p) || LOOKS_LIKE_INFO_PTR_NOT_NULL(p));
-}
-
-INLINE_HEADER rtsBool LOOKS_LIKE_CLOSURE_PTR (void *p)
-{
-    return LOOKS_LIKE_INFO_PTR((StgWord)(UNTAG_CLOSURE((StgClosure *)(p)))->header.info);
-}
-
-#endif /* STORAGE_H */
diff --git a/includes/config.h b/includes/config.h
deleted file mode 100644
index 66e2ade63773cbc7422543c9d2730f0ffea13a41..0000000000000000000000000000000000000000
--- a/includes/config.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef __CONFIG_H__
-#define __CONFIG_H__
-
-#warning config.h is deprecated; please use ghcconfig.h instead
-#include "ghcconfig.h"
-
-#endif
diff --git a/includes/ghc.mk b/includes/ghc.mk
index a266bf4c6abbeb0b7c3ef1ed33491ad4d8981ad4..50107eda37f8142ea9de4c6e785fcbeb06803f13 100644
--- a/includes/ghc.mk
+++ b/includes/ghc.mk
@@ -33,7 +33,7 @@ ifeq "$(GhcEnableTablesNextToCode) $(GhcUnregisterised)" "YES NO"
 includes_CC_OPTS += -DTABLES_NEXT_TO_CODE
 endif
 
-includes_CC_OPTS += -Iincludes -Irts -Irts/parallel
+includes_CC_OPTS += -Iincludes -Irts
 
 ifneq "$(GhcWithSMP)" "YES"
 includes_CC_OPTS += -DNOSMP
@@ -129,7 +129,7 @@ includes_dist-derivedconstants_PROG   = mkDerivedConstants$(exeext)
 
 $(eval $(call build-prog,includes,dist-derivedconstants,0))
 
-$(includes_dist-derivedconstants_depfile) : $(includes_H_CONFIG) $(includes_H_PLATFORM)
+$(includes_dist-derivedconstants_depfile) : $(includes_H_CONFIG) $(includes_H_PLATFORM) $(wildcard includes/*.h) $(wildcard rts/*.h)
 includes/dist-derivedconstants/build/mkDerivedConstants.o : $(includes_H_CONFIG) $(includes_H_PLATFORM)
 
 ifneq "$(BINDIST)" "YES"
@@ -159,7 +159,7 @@ includes_dist-ghcconstants_CC_OPTS = -DGEN_HASKELL
 $(eval $(call build-prog,includes,dist-ghcconstants,0))
 
 ifneq "$(BINDIST)" "YES"
-$(includes_dist-ghcconstants_depfile) : $(includes_H_CONFIG) $(includes_H_PLATFORM)
+$(includes_dist-ghcconstants_depfile) : $(includes_H_CONFIG) $(includes_H_PLATFORM) $(wildcard includes/*.h) $(wildcard rts/*.h)
 
 includes/dist-ghcconstants/build/mkDerivedConstants.o : $(includes_H_CONFIG) $(includes_H_PLATFORM)
 
diff --git a/includes/ieee-flpt.h b/includes/ieee-flpt.h
deleted file mode 100644
index a1fce3a8da23cb48eb04f14446753b8da2be159a..0000000000000000000000000000000000000000
--- a/includes/ieee-flpt.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/* this file is #included into both C (.c and .hc) and Haskell files */
-
-    /* IEEE format floating-point */
-#define IEEE_FLOATING_POINT 1
-
-   /* Radix of exponent representation */
-#ifndef FLT_RADIX
-# define FLT_RADIX 2
-#endif
-
-   /* Number of base-FLT_RADIX digits in the significand of a float */
-#ifndef FLT_MANT_DIG
-# define FLT_MANT_DIG 24
-#endif
-   /* Minimum int x such that FLT_RADIX**(x-1) is a normalised float */
-#ifndef FLT_MIN_EXP
-#  define FLT_MIN_EXP (-125)
-#endif
-   /* Maximum int x such that FLT_RADIX**(x-1) is a representable float */
-#ifndef FLT_MAX_EXP
-# define FLT_MAX_EXP 128
-#endif
-
-   /* Number of base-FLT_RADIX digits in the significand of a double */
-#ifndef DBL_MANT_DIG
-# define DBL_MANT_DIG 53
-#endif
-   /* Minimum int x such that FLT_RADIX**(x-1) is a normalised double */
-#ifndef DBL_MIN_EXP
-#  define DBL_MIN_EXP (-1021)
-#endif
-   /* Maximum int x such that FLT_RADIX**(x-1) is a representable double */
-#ifndef DBL_MAX_EXP
-# define DBL_MAX_EXP 1024
-#endif
diff --git a/includes/mkDerivedConstants.c b/includes/mkDerivedConstants.c
index f1289f61f3eb21ae0f3c1a5ebd48c3ded8b5ad73..b38f3abe36d7136869de6ef15e5daf3a67a458ca 100644
--- a/includes/mkDerivedConstants.c
+++ b/includes/mkDerivedConstants.c
@@ -21,10 +21,8 @@
 #define THREADED_RTS
 
 #include "Rts.h"
-#include "RtsFlags.h"
-#include "Storage.h"
+
 #include "Stable.h"
-#include "OSThreads.h"
 #include "Capability.h"
 
 #include <stdio.h>
diff --git a/includes/rts/Adjustor.h b/includes/rts/Adjustor.h
new file mode 100644
index 0000000000000000000000000000000000000000..71e15246c1bea7b29fda938003bcdd7ae3e2adc7
--- /dev/null
+++ b/includes/rts/Adjustor.h
@@ -0,0 +1,20 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2009
+ *
+ * Adjustor API
+ *
+ * -------------------------------------------------------------------------- */
+
+#ifndef RTS_ADJUSTOR_H
+#define RTS_ADJUSTOR_H
+
+/* Creating and destroying an adjustor thunk */
+void* createAdjustor (int cconv, 
+                      StgStablePtr hptr,
+                      StgFunPtr wptr,
+                      char *typeString);
+
+void freeHaskellFunctionPtr (void* ptr);
+
+#endif /* RTS_ADJUSTOR_H */
diff --git a/includes/Bytecodes.h b/includes/rts/Bytecodes.h
similarity index 100%
rename from includes/Bytecodes.h
rename to includes/rts/Bytecodes.h
diff --git a/includes/rts/Config.h b/includes/rts/Config.h
new file mode 100644
index 0000000000000000000000000000000000000000..ce332fa2a2826ec66a95e617dc388e2eaa6a7dbb
--- /dev/null
+++ b/includes/rts/Config.h
@@ -0,0 +1,36 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2004
+ *
+ * Rts settings.
+ *
+ * NOTE: assumes #include "ghcconfig.h"
+ * 
+ * NB: THIS FILE IS INCLUDED IN NON-C CODE AND DATA!  #defines only please.
+ * ---------------------------------------------------------------------------*/
+
+#ifndef RTS_CONFIG_H
+#define RTS_CONFIG_H
+
+#if defined(TICKY_TICKY) && defined(THREADED_RTS)
+#error TICKY_TICKY is incompatible with THREADED_RTS
+#endif
+
+/*
+ * Whether the runtime system will use libbfd for debugging purposes.
+ */
+#if defined(DEBUG) && defined(HAVE_BFD_H) && defined(HAVE_LIBBFD) && !defined(_WIN32)
+#define USING_LIBBFD 1
+#endif
+
+/* -----------------------------------------------------------------------------
+   Signals - supported on non-PAR versions of the runtime.  See RtsSignals.h.
+   -------------------------------------------------------------------------- */
+
+#define RTS_USER_SIGNALS 1
+
+/* Profile spin locks */
+
+#define PROF_SPIN
+
+#endif /* RTS_CONFIG_H */
diff --git a/includes/Constants.h b/includes/rts/Constants.h
similarity index 99%
rename from includes/Constants.h
rename to includes/rts/Constants.h
index 967a8524964410382f68262cd4643d065dc7d5cd..bab45a362c1259f04c5b53294d141cfbc1b16e7f 100644
--- a/includes/Constants.h
+++ b/includes/rts/Constants.h
@@ -14,8 +14,8 @@
  *
  * -------------------------------------------------------------------------- */
 
-#ifndef CONSTANTS_H
-#define CONSTANTS_H
+#ifndef RTS_CONSTANTS_H
+#define RTS_CONSTANTS_H
 
 /* -----------------------------------------------------------------------------
    Minimum closure sizes
@@ -287,4 +287,4 @@
 #error RESERVED_STACK_WORDS may be wrong!
 #endif
 
-#endif /* CONSTANTS_H */
+#endif /* RTS_CONSTANTS_H */
diff --git a/includes/EventLogFormat.h b/includes/rts/EventLogFormat.h
similarity index 98%
rename from includes/EventLogFormat.h
rename to includes/rts/EventLogFormat.h
index b56b0b77e4b46f1da8753d74bd17e1b66d6e3a7d..363c1ca1cfe13cc2f0410ba824f6f629b93c8be1 100644
--- a/includes/EventLogFormat.h
+++ b/includes/rts/EventLogFormat.h
@@ -73,8 +73,8 @@
  *
  * -------------------------------------------------------------------------- */
 
-#ifndef EVENTLOGFORMAT_H
-#define EVENTLOGFORMAT_H
+#ifndef RTS_EVENTLOGFORMAT_H
+#define RTS_EVENTLOGFORMAT_H
 
 /*
  * Markers for begin/end of the Header.
@@ -140,4 +140,4 @@ typedef StgWord16 EventCapNo;
 
 #endif
 
-#endif /* EVENTLOGFORMAT_H */
+#endif /* RTS_EVENTLOGFORMAT_H */
diff --git a/includes/FileLock.h b/includes/rts/FileLock.h
similarity index 81%
rename from includes/FileLock.h
rename to includes/rts/FileLock.h
index 3fc1a81aec02218748a38f7a3dec3f87b021c3fd..9a35ecc581e0b2e9207425a75399a0c42b0096b9 100644
--- a/includes/FileLock.h
+++ b/includes/rts/FileLock.h
@@ -6,7 +6,10 @@
  *
  * ---------------------------------------------------------------------------*/
 
-void initFileLocking(void);
-void freeFileLocking(void);
+#ifndef RTS_FILELOCK_H
+#define RTS_FILELOCK_H
+
 int  lockFile(int fd, dev_t dev, ino_t ino, int for_writing);
 int  unlockFile(int fd);
+
+#endif /* RTS_FILELOCK_H */
diff --git a/includes/rts/Flags.h b/includes/rts/Flags.h
new file mode 100644
index 0000000000000000000000000000000000000000..3f3a0a952f0a4b0bea64a5a8056245d7bdc42ae1
--- /dev/null
+++ b/includes/rts/Flags.h
@@ -0,0 +1,239 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-1999
+ *
+ * Datatypes that holds the command-line flag settings.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifndef RTS_FLAGS_H
+#define RTS_FLAGS_H
+
+#include <stdio.h>
+
+/* For defaults, see the @initRtsFlagsDefaults@ routine. */
+
+struct GC_FLAGS {
+    FILE   *statsFile;
+    nat	    giveStats;
+#define NO_GC_STATS	 0
+#define COLLECT_GC_STATS 1
+#define ONELINE_GC_STATS 2
+#define SUMMARY_GC_STATS 3
+#define VERBOSE_GC_STATS 4
+
+    nat     maxStkSize;         /* in *words* */
+    nat     initialStkSize;     /* in *words* */
+
+    nat	    maxHeapSize;        /* in *blocks* */
+    nat     minAllocAreaSize;   /* in *blocks* */
+    nat     minOldGenSize;      /* in *blocks* */
+    nat     heapSizeSuggestion; /* in *blocks* */
+    double  oldGenFactor;
+    double  pcFreeHeap;
+
+    nat     generations;
+    nat     steps;
+    rtsBool squeezeUpdFrames;
+
+    rtsBool compact;		/* True <=> "compact all the time" */
+    double  compactThreshold;
+
+    rtsBool sweep;		/* use "mostly mark-sweep" instead of copying
+                                 * for the oldest generation */
+    rtsBool ringBell;
+    rtsBool frontpanel;
+
+    int idleGCDelayTime;	/* in milliseconds */
+
+    StgWord heapBase;           /* address to ask the OS for memory */
+};
+
+struct DEBUG_FLAGS {  
+    /* flags to control debugging output & extra checking in various subsystems */
+    rtsBool scheduler;      /* 's' */
+    rtsBool interpreter;    /* 'i' */
+    rtsBool weak;           /* 'w' */
+    rtsBool gccafs;         /* 'G' */
+    rtsBool gc;             /* 'g' */
+    rtsBool block_alloc;    /* 'b' */
+    rtsBool sanity;         /* 'S'   warning: might be expensive! */
+    rtsBool stable;         /* 't' */
+    rtsBool prof;           /* 'p' */
+    rtsBool eventlog;       /* 'e' */
+    rtsBool linker;         /* 'l'   the object linker */
+    rtsBool apply;          /* 'a' */
+    rtsBool stm;            /* 'm' */
+    rtsBool squeeze;        /* 'z'  stack squeezing & lazy blackholing */
+    rtsBool hpc; 	    /* 'c' coverage */
+    rtsBool timestamp;          /* add timestamps to traces */
+};
+
+struct COST_CENTRE_FLAGS {
+    unsigned int	    doCostCentres;
+# define COST_CENTRES_SUMMARY	1
+# define COST_CENTRES_VERBOSE	2 /* incl. serial time profile */
+# define COST_CENTRES_ALL	3
+# define COST_CENTRES_XML       4
+
+    int	    profilerTicks;   /* derived */
+    int	    msecsPerTick;    /* derived */
+};
+
+struct PROFILING_FLAGS {
+    unsigned int	doHeapProfile;
+# define NO_HEAP_PROFILING	0	/* N.B. Used as indexes into arrays */
+# define HEAP_BY_CCS		1
+# define HEAP_BY_MOD		2
+# define HEAP_BY_DESCR		4
+# define HEAP_BY_TYPE		5
+# define HEAP_BY_RETAINER       6
+# define HEAP_BY_LDV            7
+
+# define HEAP_BY_CLOSURE_TYPE   8
+
+    nat                 profileInterval;      /* delta between samples (in ms) */
+    nat                 profileIntervalTicks; /* delta between samples (in 'ticks') */
+    rtsBool             includeTSOs;
+
+
+    rtsBool		showCCSOnException;
+
+    nat                 maxRetainerSetSize;
+
+    nat                 ccsLength;
+
+    char*               modSelector;
+    char*               descrSelector;
+    char*               typeSelector;
+    char*               ccSelector;
+    char*               ccsSelector;
+    char*               retainerSelector;
+    char*               bioSelector;
+
+};
+
+#ifdef EVENTLOG
+struct EVENTLOG_FLAGS {
+  rtsBool doEventLogging;
+};
+#endif
+
+struct CONCURRENT_FLAGS {
+    int ctxtSwitchTime;		/* in milliseconds */
+    int ctxtSwitchTicks;	/* derived */
+};
+
+struct MISC_FLAGS {
+    int tickInterval;     /* in milliseconds */
+    rtsBool install_signal_handlers;
+    rtsBool machineReadable;
+    StgWord linkerMemBase;       /* address to ask the OS for memory
+                                  * for the linker, NULL ==> off */
+};
+
+#ifdef THREADED_RTS
+struct PAR_FLAGS {
+  nat            nNodes;         /* number of threads to run simultaneously */
+  rtsBool        migrate;        /* migrate threads between capabilities */
+  rtsBool        wakeupMigrate;  /* migrate a thread on wakeup */
+  unsigned int	 maxLocalSparks;
+  rtsBool        parGcEnabled;   /* enable parallel GC */
+  rtsBool        parGcGen;       /* do parallel GC in this generation
+                                  * and higher only */
+  rtsBool        parGcLoadBalancing; /* do load-balancing in parallel GC */
+  rtsBool        setAffinity;    /* force thread affinity with CPUs */
+};
+#endif /* THREADED_RTS */
+
+struct TICKY_FLAGS {
+    rtsBool showTickyStats;
+    FILE   *tickyFile;
+};
+
+#ifdef USE_PAPI
+#define MAX_PAPI_USER_EVENTS 8
+
+struct PAPI_FLAGS {
+    nat     eventType;          /* The type of events to count */
+    nat     numUserEvents;
+    char *  userEvents[MAX_PAPI_USER_EVENTS];
+};
+
+#define PAPI_FLAG_CACHE_L1 1
+#define PAPI_FLAG_CACHE_L2 2
+#define PAPI_FLAG_BRANCH 3
+#define PAPI_FLAG_STALLS 4
+#define PAPI_FLAG_CB_EVENTS 5
+#define PAPI_USER_EVENTS 6
+
+#endif
+
+/* Put them together: */
+
+typedef struct _RTS_FLAGS {
+    /* The first portion of RTS_FLAGS is invariant. */
+    struct GC_FLAGS	     GcFlags;
+    struct CONCURRENT_FLAGS  ConcFlags;
+    struct MISC_FLAGS        MiscFlags;
+    struct DEBUG_FLAGS	     DebugFlags;
+    struct COST_CENTRE_FLAGS CcFlags;
+    struct PROFILING_FLAGS   ProfFlags;
+#ifdef EVENTLOG
+    struct EVENTLOG_FLAGS    EventLogFlags;
+#endif
+    struct TICKY_FLAGS	     TickyFlags;
+
+#if defined(THREADED_RTS)
+    struct PAR_FLAGS	ParFlags;
+#endif
+#ifdef USE_PAPI
+    struct PAPI_FLAGS   PapiFlags;
+#endif
+} RTS_FLAGS;
+
+#ifdef COMPILING_RTS_MAIN
+extern DLLIMPORT RTS_FLAGS RtsFlags;
+#elif IN_STG_CODE
+/* Hack because the C code generator can't generate '&label'. */
+extern RTS_FLAGS RtsFlags[];
+#else
+extern RTS_FLAGS RtsFlags;
+#endif
+
+/* Routines that operate-on/to-do-with RTS flags: */
+
+extern void initRtsFlagsDefaults(void);
+extern void setupRtsFlags(int *argc, char *argv[], int *rts_argc, char *rts_argv[]);
+extern void setProgName(char *argv[]);
+
+
+/*
+ * The printf formats are here, so we are less likely to make
+ * overly-long filenames (with disastrous results).  No more than 128
+ * chars, please!  
+ */
+
+#define STATS_FILENAME_MAXLEN	128
+
+#define GR_FILENAME_FMT		"%0.124s.gr"
+#define GR_FILENAME_FMT_GUM	"%0.120s.%03d.%s"
+#define HP_FILENAME_FMT		"%0.124s.hp"
+#define LIFE_FILENAME_FMT	"%0.122s.life"
+#define PROF_FILENAME_FMT	"%0.122s.prof"
+#define PROF_FILENAME_FMT_GUM	"%0.118s.%03d.prof"
+#define QP_FILENAME_FMT		"%0.124s.qp"
+#define STAT_FILENAME_FMT	"%0.122s.stat"
+#define TICKY_FILENAME_FMT	"%0.121s.ticky"
+#define TIME_FILENAME_FMT	"%0.122s.time"
+#define TIME_FILENAME_FMT_GUM	"%0.118s.%03d.time"
+
+/* an "int" so as to match normal "argc" */
+/* Now defined in Stg.h (lib/std/cbits need these too.)
+extern int     prog_argc;
+extern char  **prog_argv;
+*/
+extern int     rts_argc;  /* ditto */
+extern char   *rts_argv[];
+
+#endif	/* RTS_FLAGS_H */
diff --git a/includes/RtsGlobals.h b/includes/rts/Globals.h
similarity index 84%
rename from includes/RtsGlobals.h
rename to includes/rts/Globals.h
index 476f112e176289491e7979b9a4f16118472a681d..71846e75a109a42b6f0c60a3018f2ed4031d9b79 100644
--- a/includes/RtsGlobals.h
+++ b/includes/rts/Globals.h
@@ -9,13 +9,10 @@
  *
  * ---------------------------------------------------------------------------*/
 
-#ifndef RTSGLOBALS_H
-#define RTSGLOBALS_H
-
-void initGlobalStore(void);
-void exitGlobalStore(void);
+#ifndef RTS_GLOBALS_H
+#define RTS_GLOBALS_H
 
 StgStablePtr getOrSetTypeableStore(StgStablePtr value);
 StgStablePtr getOrSetSignalHandlerStore(StgStablePtr value);
 
-#endif
+#endif /* RTS_GLOBALS_H */
diff --git a/includes/Hooks.h b/includes/rts/Hooks.h
similarity index 89%
rename from includes/Hooks.h
rename to includes/rts/Hooks.h
index e281c89ab9810040fca7814cbfb9e1137758354c..4fe50b4b9f0136594d5faefdf406594542e7b7d5 100644
--- a/includes/Hooks.h
+++ b/includes/rts/Hooks.h
@@ -6,6 +6,9 @@
  *
  * ---------------------------------------------------------------------------*/
 
+#ifndef RTS_HOOKS_H
+#define RTS_HOOKS_H
+
 extern char *ghc_rts_opts;
 
 extern void OnExitHook (void);
@@ -14,3 +17,5 @@ extern void StackOverflowHook (unsigned long stack_size);
 extern void OutOfHeapHook (unsigned long request_size, unsigned long heap_size);
 extern void MallocFailHook (unsigned long request_size /* in bytes */, char *msg);
 extern void defaultsHook (void);
+
+#endif /* RTS_HOOKS_H */
diff --git a/includes/rts/Hpc.h b/includes/rts/Hpc.h
new file mode 100644
index 0000000000000000000000000000000000000000..c966e32cd92fc683c4dcaa9d75d1e73070643a54
--- /dev/null
+++ b/includes/rts/Hpc.h
@@ -0,0 +1,32 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2008-2009
+ *
+ * Haskell Program Coverage
+ *
+ * -------------------------------------------------------------------------- */
+
+#ifndef RTS_HPC_H
+#define RTS_HPC_H
+
+// Simple linked list of modules
+typedef struct _HpcModuleInfo {
+  char *modName;		// name of module
+  StgWord32 tickCount;		// number of ticks
+  StgWord32 tickOffset;		// offset into a single large .tix Array
+  StgWord32 hashNo;		// Hash number for this module's mix info
+  StgWord64 *tixArr;		// tix Array; local for this module
+  struct _HpcModuleInfo *next;
+} HpcModuleInfo;
+
+int hs_hpc_module (char *modName, 
+                   StgWord32 modCount, 
+                   StgWord32 modHashNo,
+                   StgWord64 *tixArr);
+
+HpcModuleInfo * hs_hpc_rootModule (void);
+
+void startupHpc(void);
+void exitHpc(void);
+
+#endif /* RTS_HPC_H */
diff --git a/includes/rts/IOManager.h b/includes/rts/IOManager.h
new file mode 100644
index 0000000000000000000000000000000000000000..1c269ada6ddfc159020309fa0d678bbae871cf51
--- /dev/null
+++ b/includes/rts/IOManager.h
@@ -0,0 +1,39 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2009
+ *
+ * IO Manager functionality in the RTS
+ *
+ * -------------------------------------------------------------------------- */
+
+#ifndef RTS_IOMANAGER_H
+#define RTS_IOMANAGER_H
+
+#if defined(mingw32_HOST_OS)
+
+int  rts_InstallConsoleEvent ( int action, StgStablePtr *handler );
+void rts_ConsoleHandlerDone  ( int ev );
+extern StgInt console_handler;
+
+void *   getIOManagerEvent  (void);
+HsWord32 readIOManagerEvent (void);
+void     sendIOManagerEvent (HsWord32 event);
+
+#else
+
+void     setIOManagerPipe   (int fd);
+
+#endif
+
+//
+// Communicating with the IO manager thread (see GHC.Conc).
+// Posix implementation in posix/Signals.c
+// Win32 implementation in win32/ThrIOManager.c
+//
+#if defined(THREADED_RTS)
+void ioManagerWakeup (void);
+void ioManagerDie (void);
+void ioManagerStart (void);
+#endif
+
+#endif /* RTS_IOMANAGER_H */
diff --git a/includes/Linker.h b/includes/rts/Linker.h
similarity index 88%
rename from includes/Linker.h
rename to includes/rts/Linker.h
index 053d41115383450ae006b69702d56772257bf4eb..df74e7eeb872650b2d04827f3eb16c6f69838080 100644
--- a/includes/Linker.h
+++ b/includes/rts/Linker.h
@@ -6,8 +6,8 @@
  *
  * ---------------------------------------------------------------------------*/
 
-#ifndef LINKER_H
-#define LINKER_H
+#ifndef RTS_LINKER_H
+#define RTS_LINKER_H
 
 /* initialize the object linker */
 void initLinker( void );
@@ -33,6 +33,4 @@ HsInt resolveObjs( void );
 /* load a dynamic library */
 const char *addDLL( char* dll_name );
 
-extern void markRootPtrTable(void (*)(StgClosure **));
-
-#endif /* LINKER_H */
+#endif /* RTS_LINKER_H */
diff --git a/includes/RtsMessages.h b/includes/rts/Messages.h
similarity index 80%
rename from includes/RtsMessages.h
rename to includes/rts/Messages.h
index 79c48d3b98ad71395d5da7d865a50ca14485c924..e01eff47cf0c2a96d40daba605464f4ebbabccf1 100644
--- a/includes/RtsMessages.h
+++ b/includes/rts/Messages.h
@@ -9,8 +9,8 @@
  *
  * ---------------------------------------------------------------------------*/
 
-#ifndef RTSMESSAGES_H
-#define RTSMESSAGES_H
+#ifndef RTS_MESSAGES_H
+#define RTS_MESSAGES_H
 
 #include <stdarg.h>
 
@@ -26,14 +26,15 @@
  * barf() invokes (*fatalInternalErrorFn)().  This function is not
  * expected to return.
  */
-extern void barf(const char *s, ...)
+void barf(const char *s, ...)
    GNUC3_ATTRIBUTE(__noreturn__);
 
-extern void vbarf(const char *s, va_list ap)
+void vbarf(const char *s, va_list ap)
    GNUC3_ATTRIBUTE(__noreturn__);
 
-extern void _assertFail(const char *filename, unsigned int linenum)
-   GNUC3_ATTRIBUTE(__noreturn__);
+// declared in Rts.h:
+// extern void _assertFail(const char *filename, unsigned int linenum)
+//    GNUC3_ATTRIBUTE(__noreturn__);
 
 /*
  * An error condition which is caused by and/or can be corrected by
@@ -41,10 +42,10 @@ extern void _assertFail(const char *filename, unsigned int linenum)
  *
  * errorBelch() invokes (*errorMsgFn)().
  */
-extern void errorBelch(const char *s, ...)
+void errorBelch(const char *s, ...)
    GNUC3_ATTRIBUTE(format (printf, 1, 2));
 
-extern void verrorBelch(const char *s, va_list ap);
+void verrorBelch(const char *s, va_list ap);
 
 /*
  * An error condition which is caused by and/or can be corrected by
@@ -55,10 +56,10 @@ extern void verrorBelch(const char *s, va_list ap);
  *
  * sysErrorBelch() invokes (*sysErrorMsgFn)().
  */
-extern void sysErrorBelch(const char *s, ...)
+void sysErrorBelch(const char *s, ...)
    GNUC3_ATTRIBUTE(format (printf, 1, 2));
 
-extern void vsysErrorBelch(const char *s, va_list ap);
+void vsysErrorBelch(const char *s, va_list ap);
 
 /*
  * A debugging message.  Debugging messages are generated either as a
@@ -67,10 +68,10 @@ extern void vsysErrorBelch(const char *s, va_list ap);
  *
  * debugBelch() invokes (*debugMsgFn)().
  */
-extern void debugBelch(const char *s, ...)
+void debugBelch(const char *s, ...)
    GNUC3_ATTRIBUTE(format (printf, 1, 2));
 
-extern void vdebugBelch(const char *s, va_list ap);
+void vdebugBelch(const char *s, va_list ap);
 
 
 /* Hooks for redirecting message generation: */
@@ -88,4 +89,4 @@ extern RtsMsgFunction rtsDebugMsgFn;
 extern RtsMsgFunction rtsErrorMsgFn;
 extern RtsMsgFunction rtsSysErrorMsgFn;
 
-#endif /* RTSMESSAGES_H */
+#endif /* RTS_MESSAGES_H */
diff --git a/includes/OSThreads.h b/includes/rts/OSThreads.h
similarity index 94%
rename from includes/OSThreads.h
rename to includes/rts/OSThreads.h
index f5c434fc28029088fe2d296ff9c6182e68c7d041..2d32136379b0fc8ff7040f8d5b07208ead3a1da4 100644
--- a/includes/OSThreads.h
+++ b/includes/rts/OSThreads.h
@@ -7,8 +7,8 @@
  * 
  * --------------------------------------------------------------------------*/
 
-#ifndef __OSTHREADS_H__
-#define __OSTHREADS_H__
+#ifndef RTS_OSTHREADS_H
+#define RTS_OSTHREADS_H
 
 #if defined(THREADED_RTS) /* to the end */
 
@@ -152,7 +152,7 @@ typedef HANDLE Mutex;
 // General thread operations
 //
 extern OSThreadId osThreadId      ( void );
-extern void shutdownThread        ( void );
+extern void shutdownThread        ( void )   GNUC3_ATTRIBUTE(__noreturn__);
 extern void yieldThread           ( void );
 
 typedef void OSThreadProcAttr OSThreadProc(void *);
@@ -198,4 +198,12 @@ void setThreadAffinity     (nat n, nat m);
 
 #endif /* defined(THREADED_RTS) */
 
-#endif /* __OSTHREADS_H__ */
+//
+// Support for forkOS (defined regardless of THREADED_RTS, but does
+// nothing when !THREADED_RTS).
+//
+#ifndef CMINUSMINUS
+int forkOS_createThread ( HsStablePtr entry );
+#endif
+
+#endif /* RTS_OSTHREADS_H */
diff --git a/includes/rts/Parallel.h b/includes/rts/Parallel.h
new file mode 100644
index 0000000000000000000000000000000000000000..b6759819b1cb152f4e136c62b645a4ab4c70e3f1
--- /dev/null
+++ b/includes/rts/Parallel.h
@@ -0,0 +1,14 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2009
+ *
+ * Parallelism-related functionality
+ *
+ * -------------------------------------------------------------------------- */
+
+#ifndef RTS_PARALLEL_H
+#define RTS_PARALLEL_H
+
+StgInt newSpark (StgRegTable *reg, StgClosure *p);
+
+#endif /* RTS_PARALLEL_H */
diff --git a/includes/Signals.h b/includes/rts/Signals.h
similarity index 62%
rename from includes/Signals.h
rename to includes/rts/Signals.h
index a5907bbee95942bd9f56fd7dc42b48b54d8ed166..8d9e0fd4b7cc6e0882d60e85178f213034b75a90 100644
--- a/includes/Signals.h
+++ b/includes/rts/Signals.h
@@ -1,18 +1,21 @@
 /* -----------------------------------------------------------------------------
  *
- * (c) The GHC Team, 1998-2005
+ * (c) The GHC Team, 1998-2009
  *
  * RTS signal handling 
  *
  * ---------------------------------------------------------------------------*/
 
-#ifndef SIGNALS_H
-#define SIGNALS_H
+#ifndef RTS_SIGNALS_H
+#define RTS_SIGNALS_H
 
+/* NB. #included in Haskell code, no prototypes in here. */
+
+/* arguments to stg_sig_install() */
 #define STG_SIG_DFL   (-1)
 #define STG_SIG_IGN   (-2)
 #define STG_SIG_ERR   (-3)
 #define STG_SIG_HAN   (-4)
 #define STG_SIG_RST   (-5)
 
-#endif /* SIGNALS_H */
+#endif /* RTS_SIGNALS_H */
diff --git a/includes/SpinLock.h b/includes/rts/SpinLock.h
similarity index 96%
rename from includes/SpinLock.h
rename to includes/rts/SpinLock.h
index 76fcd4e00e17a134d92e349f70c0a0643668a07d..ea992a3457153edee9fc132f10fe5ebe058ff66a 100644
--- a/includes/SpinLock.h
+++ b/includes/rts/SpinLock.h
@@ -14,8 +14,8 @@
  *
  * -------------------------------------------------------------------------- */
 
-#ifndef SPINLOCK_H
-#define SPINLOCK_H
+#ifndef RTS_SPINLOCK_H
+#define RTS_SPINLOCK_H
  
 #if defined(THREADED_RTS)
 
@@ -101,5 +101,5 @@ INLINE_HEADER void initSpinLock(void * p STG_UNUSED)
 
 #endif /* THREADED_RTS */
 
-#endif /* SPINLOCK_H */
+#endif /* RTS_SPINLOCK_H */
 
diff --git a/includes/rts/Stable.h b/includes/rts/Stable.h
new file mode 100644
index 0000000000000000000000000000000000000000..95a3f9615669cc1f5ed2c584f9df2a24101e899d
--- /dev/null
+++ b/includes/rts/Stable.h
@@ -0,0 +1,35 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2004
+ *
+ * Stable Pointers
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifndef RTS_STABLE_H
+#define RTS_STABLE_H
+
+EXTERN_INLINE StgPtr deRefStablePtr (StgStablePtr stable_ptr);
+StgStablePtr getStablePtr  (StgPtr p);
+
+/* -----------------------------------------------------------------------------
+   PRIVATE from here.
+   -------------------------------------------------------------------------- */
+
+typedef struct { 
+  StgPtr  addr;			/* Haskell object, free list, or NULL */
+  StgPtr  old;			/* old Haskell object, used during GC */
+  StgWord ref;			/* used for reference counting */
+  StgClosure *sn_obj;		/* the StableName object (or NULL) */
+} snEntry;
+
+extern DLL_IMPORT_RTS snEntry *stable_ptr_table;
+
+EXTERN_INLINE
+StgPtr deRefStablePtr(StgStablePtr sp)
+{
+    ASSERT(stable_ptr_table[(StgWord)sp].ref > 0);
+    return stable_ptr_table[(StgWord)sp].addr;
+}
+
+#endif /* RTS_STABLE_H */
diff --git a/includes/SchedAPI.h b/includes/rts/Threads.h
similarity index 54%
rename from includes/SchedAPI.h
rename to includes/rts/Threads.h
index b11437bda20123a1a821d03cf4de887b936d33bc..06a0ed11dc7ee843c6309bf07868a50c70fbad5e 100644
--- a/includes/SchedAPI.h
+++ b/includes/rts/Threads.h
@@ -1,18 +1,18 @@
 /* -----------------------------------------------------------------------------
  *
- * (c) The GHC Team 1998-2002
+ * (c) The GHC Team 1998-2009
  *
  * External API for the scheduler.  For most uses, the functions in
  * RtsAPI.h should be enough.
  *
  * ---------------------------------------------------------------------------*/
 
-#ifndef SCHEDAPI_H
-#define SCHEDAPI_H
+#ifndef RTS_THREADS_H
+#define RTS_THREADS_H
 
-/* 
- * Creating threads
- */
+// 
+// Creating threads
+//
 StgTSO *createThread (Capability *cap, nat stack_size);
 
 Capability *scheduleWaitThread (StgTSO *tso, /*out*/HaskellObj* ret,
@@ -24,4 +24,24 @@ StgTSO *createIOThread        (Capability *cap, nat stack_size,
 			       StgClosure *closure);
 StgTSO *createStrictIOThread  (Capability *cap, nat stack_size,  
 			       StgClosure *closure);
+
+// Suspending/resuming threads around foreign calls
+void *        suspendThread (StgRegTable *);
+StgRegTable * resumeThread  (void *);
+
+//
+// Thread operations from Threads.c
+//
+int    cmp_thread      (StgPtr tso1, StgPtr tso2);
+int    rts_getThreadId (StgPtr tso);
+pid_t  forkProcess     (HsStablePtr *entry);
+HsBool rtsSupportsBoundThreads (void);
+
+// The number of Capabilities
+extern unsigned int n_capabilities;
+
+#if !IN_STG_CODE
+extern Capability MainCapability;
 #endif
+
+#endif /* RTS_THREADS_H */
diff --git a/includes/rts/Timer.h b/includes/rts/Timer.h
new file mode 100644
index 0000000000000000000000000000000000000000..e3a5c2dc696c0a8520ffeb905c8c505df8232a15
--- /dev/null
+++ b/includes/rts/Timer.h
@@ -0,0 +1,15 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1995-2006
+ *
+ * Interface to the RTS timer signal (uses OS-dependent Ticker.h underneath)
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifndef RTS_TIMER_H
+#define RTS_TIMER_H
+
+void startTimer (void);
+void stopTimer  (void);
+
+#endif /* RTS_TIMER_H */
diff --git a/includes/RtsTypes.h b/includes/rts/Types.h
similarity index 89%
rename from includes/RtsTypes.h
rename to includes/rts/Types.h
index 79bbf1fccf0898311f4a8f796454b76b2cab867e..6f399e083d3697f774f1fbb1ee553abf60511bf8 100644
--- a/includes/RtsTypes.h
+++ b/includes/rts/Types.h
@@ -35,10 +35,8 @@ typedef enum {
     rtsTrue 
 } rtsBool;
 
-/* 
-   Types specific to the parallel runtime system.
-*/
-
-typedef ullong        rtsTime;
+typedef struct StgClosure_   StgClosure;
+typedef struct StgInfoTable_ StgInfoTable;
+typedef struct StgTSO_       StgTSO;
 
 #endif /* RTS_TYPES_H */
diff --git a/includes/StgProf.h b/includes/rts/prof/CCS.h
similarity index 99%
rename from includes/StgProf.h
rename to includes/rts/prof/CCS.h
index 9b3ce69a9f7d365b8b4e5f7d1e347b33695bf57a..3512930b7bd7b15c21ce483413c9ed7e81348798 100644
--- a/includes/StgProf.h
+++ b/includes/rts/prof/CCS.h
@@ -6,8 +6,8 @@
  *
  * ---------------------------------------------------------------------------*/
 
-#ifndef STGPROF_H
-#define STGPROF_H
+#ifndef RTS_PROF_CCS_H
+#define RTS_PROF_CCS_H
 
 /* -----------------------------------------------------------------------------
  * Data Structures 
@@ -234,5 +234,5 @@ extern CostCentreStack * RTS_VAR(CCS_LIST);         /* registered CCS list */
  
 #endif /* PROFILING */
 
-#endif /* STGPROF_H */
+#endif /* RTS_PROF_CCS_H */
 
diff --git a/includes/StgLdvProf.h b/includes/rts/prof/LDV.h
similarity index 96%
rename from includes/StgLdvProf.h
rename to includes/rts/prof/LDV.h
index 3c3df1c5fad7c793232137fcd5bf49012fffd8ea..c51b10647e97d24e9e3155e2dd4f0973e667c271 100644
--- a/includes/StgLdvProf.h
+++ b/includes/rts/prof/LDV.h
@@ -6,8 +6,8 @@
  *
  * ---------------------------------------------------------------------------*/
 
-#ifndef STGLDVPROF_H
-#define STGLDVPROF_H
+#ifndef RTS_PROF_LDV_H
+#define RTS_PROF_LDV_H
 
 #ifdef PROFILING
 
@@ -42,4 +42,5 @@
 #define LDV_RECORD_DEAD_FILL_SLOP_DYNAMIC(c)  /* nothing */
 
 #endif /* PROFILING */
+
 #endif /* STGLDVPROF_H */
diff --git a/includes/Block.h b/includes/rts/storage/Block.h
similarity index 98%
rename from includes/Block.h
rename to includes/rts/storage/Block.h
index ec894da02e9655fddc6cce3dde2c7877e994e87b..849f99f430c0ab1c4cb5c6273bbc1e2fcab5dbc0 100644
--- a/includes/Block.h
+++ b/includes/rts/storage/Block.h
@@ -6,8 +6,8 @@
  *
  * ---------------------------------------------------------------------------*/
 
-#ifndef BLOCK_H
-#define BLOCK_H
+#ifndef RTS_STORAGE_BLOCK_H
+#define RTS_STORAGE_BLOCK_H
 
 /* The actual block and megablock-size constants are defined in
  * includes/Constants.h, all constants here are derived from these.
@@ -268,4 +268,4 @@ round_up_to_mblocks(StgWord words)
 }
 
 #endif /* !CMINUSMINUS */
-#endif /* BLOCK_H */
+#endif /* RTS_STORAGE_BLOCK_H */
diff --git a/includes/rts/storage/ClosureMacros.h b/includes/rts/storage/ClosureMacros.h
new file mode 100644
index 0000000000000000000000000000000000000000..458960f3f7ccc95717629ac1278beca1a9024899
--- /dev/null
+++ b/includes/rts/storage/ClosureMacros.h
@@ -0,0 +1,395 @@
+/* ----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2004
+ *
+ * Macros for building and manipulating closures
+ *
+ * -------------------------------------------------------------------------- */
+
+#ifndef RTS_STORAGE_CLOSUREMACROS_H
+#define RTS_STORAGE_CLOSUREMACROS_H
+
+/* -----------------------------------------------------------------------------
+   Info tables are slammed up against the entry code, and the label
+   for the info table is at the *end* of the table itself.  This
+   inline function adjusts an info pointer to point to the beginning
+   of the table, so we can use standard C structure indexing on it.
+
+   Note: this works for SRT info tables as long as you don't want to
+   access the SRT, since they are laid out the same with the SRT
+   pointer as the first word in the table.
+
+   NOTES ABOUT MANGLED C VS. MINI-INTERPRETER:
+
+   A couple of definitions:
+
+       "info pointer"    The first word of the closure.  Might point
+                         to either the end or the beginning of the
+			 info table, depending on whether we're using
+			 the mini interpretter or not.  GET_INFO(c)
+			 retrieves the info pointer of a closure.
+
+       "info table"      The info table structure associated with a
+                         closure.  This is always a pointer to the
+			 beginning of the structure, so we can
+			 use standard C structure indexing to pull out
+			 the fields.  get_itbl(c) returns a pointer to
+			 the info table for closure c.
+
+   An address of the form xxxx_info points to the end of the info
+   table or the beginning of the info table depending on whether we're
+   mangling or not respectively.  So, 
+
+         c->header.info = xxx_info 
+
+   makes absolute sense, whether mangling or not.
+ 
+   -------------------------------------------------------------------------- */
+
+#define SET_INFO(c,i) ((c)->header.info = (i))
+#define GET_INFO(c)   ((c)->header.info)
+#define GET_ENTRY(c)  (ENTRY_CODE(GET_INFO(c)))
+
+#define get_itbl(c)   (INFO_PTR_TO_STRUCT((c)->header.info))
+#define get_ret_itbl(c) (RET_INFO_PTR_TO_STRUCT((c)->header.info))
+#define get_fun_itbl(c) (FUN_INFO_PTR_TO_STRUCT((c)->header.info))
+#define get_thunk_itbl(c) (THUNK_INFO_PTR_TO_STRUCT((c)->header.info))
+#define get_con_itbl(c) (CON_INFO_PTR_TO_STRUCT((c)->header.info))
+
+#define GET_TAG(con) (get_itbl(con)->srt_bitmap)
+
+#ifdef TABLES_NEXT_TO_CODE
+#define INFO_PTR_TO_STRUCT(info) ((StgInfoTable *)(info) - 1)
+#define RET_INFO_PTR_TO_STRUCT(info) ((StgRetInfoTable *)(info) - 1)
+#define FUN_INFO_PTR_TO_STRUCT(info) ((StgFunInfoTable *)(info) - 1)
+#define THUNK_INFO_PTR_TO_STRUCT(info) ((StgThunkInfoTable *)(info) - 1)
+#define CON_INFO_PTR_TO_STRUCT(info) ((StgConInfoTable *)(info) - 1)
+#define itbl_to_fun_itbl(i) ((StgFunInfoTable *)(((StgInfoTable *)(i) + 1)) - 1)
+#define itbl_to_ret_itbl(i) ((StgRetInfoTable *)(((StgInfoTable *)(i) + 1)) - 1)
+#define itbl_to_thunk_itbl(i) ((StgThunkInfoTable *)(((StgInfoTable *)(i) + 1)) - 1)
+#define itbl_to_con_itbl(i) ((StgConInfoTable *)(((StgInfoTable *)(i) + 1)) - 1)
+#else
+#define INFO_PTR_TO_STRUCT(info) ((StgInfoTable *)info)
+#define RET_INFO_PTR_TO_STRUCT(info) ((StgRetInfoTable *)info)
+#define FUN_INFO_PTR_TO_STRUCT(info) ((StgFunInfoTable *)info)
+#define THUNK_INFO_PTR_TO_STRUCT(info) ((StgThunkInfoTable *)info)
+#define CON_INFO_PTR_TO_STRUCT(info) ((StgConInfoTable *)info)
+#define itbl_to_fun_itbl(i) ((StgFunInfoTable *)(i))
+#define itbl_to_ret_itbl(i) ((StgRetInfoTable *)(i))
+#define itbl_to_thunk_itbl(i) ((StgThunkInfoTable *)(i))
+#define itbl_to_con_itbl(i) ((StgConInfoTable *)(i))
+#endif
+
+/* -----------------------------------------------------------------------------
+   Macros for building closures
+   -------------------------------------------------------------------------- */
+
+#ifdef PROFILING
+#ifdef DEBUG_RETAINER
+/* 
+  For the sake of debugging, we take the safest way for the moment. Actually, this 
+  is useful to check the sanity of heap before beginning retainer profiling.
+  flip is defined in RetainerProfile.c, and declared as extern in RetainerProfile.h.
+  Note: change those functions building Haskell objects from C datatypes, i.e.,
+  all rts_mk???() functions in RtsAPI.c, as well.
+ */
+#define SET_PROF_HDR(c,ccs_)            \
+        ((c)->header.prof.ccs = ccs_, (c)->header.prof.hp.rs = (retainerSet *)((StgWord)NULL | flip))
+#else
+/*
+  For retainer profiling only: we do not have to set (c)->header.prof.hp.rs to
+  NULL | flip (flip is defined in RetainerProfile.c) because even when flip
+  is 1, rs is invalid and will be initialized to NULL | flip later when 
+  the closure *c is visited.
+ */
+/*
+#define SET_PROF_HDR(c,ccs_)            \
+        ((c)->header.prof.ccs = ccs_, (c)->header.prof.hp.rs = NULL)
+ */
+/*
+  The following macro works for both retainer profiling and LDV profiling:
+  for retainer profiling, ldvTime remains 0, so rs fields are initialized to 0.
+  See the invariants on ldvTime.
+ */
+#define SET_PROF_HDR(c,ccs_)            \
+        ((c)->header.prof.ccs = ccs_,   \
+        LDV_RECORD_CREATE((c)))
+#endif /* DEBUG_RETAINER */
+#else
+#define SET_PROF_HDR(c,ccs)
+#endif
+
+#define SET_HDR(c,_info,ccs)				\
+   {							\
+	(c)->header.info = _info;			\
+	SET_PROF_HDR((StgClosure *)(c),ccs);		\
+   }
+
+#define SET_ARR_HDR(c,info,costCentreStack,n_words)	\
+   SET_HDR(c,info,costCentreStack);			\
+   (c)->words = n_words;
+
+/* -----------------------------------------------------------------------------
+   How to get hold of the static link field for a static closure.
+   -------------------------------------------------------------------------- */
+
+/* These are hard-coded. */
+#define FUN_STATIC_LINK(p)   (&(p)->payload[0])
+#define THUNK_STATIC_LINK(p) (&(p)->payload[1])
+#define IND_STATIC_LINK(p)   (&(p)->payload[1])
+
+INLINE_HEADER StgClosure **
+STATIC_LINK(const StgInfoTable *info, StgClosure *p)
+{ 
+    switch (info->type) {
+    case THUNK_STATIC:
+	return THUNK_STATIC_LINK(p);
+    case FUN_STATIC:
+	return FUN_STATIC_LINK(p);
+    case IND_STATIC:
+	return IND_STATIC_LINK(p);
+    default:
+	return &(p)->payload[info->layout.payload.ptrs +
+			     info->layout.payload.nptrs];
+    }
+}
+
+#define STATIC_LINK2(info,p)							\
+   (*(StgClosure**)(&((p)->payload[info->layout.payload.ptrs +			\
+					info->layout.payload.nptrs + 1])))
+
+/* -----------------------------------------------------------------------------
+   INTLIKE and CHARLIKE closures.
+   -------------------------------------------------------------------------- */
+
+#define CHARLIKE_CLOSURE(n) ((P_)&stg_CHARLIKE_closure[(n)-MIN_CHARLIKE])
+#define INTLIKE_CLOSURE(n)  ((P_)&stg_INTLIKE_closure[(n)-MIN_INTLIKE])
+
+/* ----------------------------------------------------------------------------
+   Macros for untagging and retagging closure pointers
+   For more information look at the comments in Cmm.h
+   ------------------------------------------------------------------------- */
+
+static inline StgWord
+GET_CLOSURE_TAG(StgClosure * p)
+{
+    return (StgWord)p & TAG_MASK;
+}
+
+static inline StgClosure *
+UNTAG_CLOSURE(StgClosure * p)
+{
+    return (StgClosure*)((StgWord)p & ~TAG_MASK);
+}
+
+static inline StgClosure *
+TAG_CLOSURE(StgWord tag,StgClosure * p)
+{
+    return (StgClosure*)((StgWord)p | tag);
+}
+
+/* -----------------------------------------------------------------------------
+   Forwarding pointers
+   -------------------------------------------------------------------------- */
+
+#define IS_FORWARDING_PTR(p) ((((StgWord)p) & 1) != 0)
+#define MK_FORWARDING_PTR(p) (((StgWord)p) | 1)
+#define UN_FORWARDING_PTR(p) (((StgWord)p) - 1)
+
+/* -----------------------------------------------------------------------------
+   DEBUGGING predicates for pointers
+
+   LOOKS_LIKE_INFO_PTR(p)    returns False if p is definitely not an info ptr
+   LOOKS_LIKE_CLOSURE_PTR(p) returns False if p is definitely not a closure ptr
+
+   These macros are complete but not sound.  That is, they might
+   return false positives.  Do not rely on them to distinguish info
+   pointers from closure pointers, for example.
+
+   We don't use address-space predicates these days, for portability
+   reasons, and the fact that code/data can be scattered about the
+   address space in a dynamically-linked environment.  Our best option
+   is to look at the alleged info table and see whether it seems to
+   make sense...
+   -------------------------------------------------------------------------- */
+
+INLINE_HEADER rtsBool LOOKS_LIKE_INFO_PTR_NOT_NULL (StgWord p)
+{
+    StgInfoTable *info = INFO_PTR_TO_STRUCT(p);
+    return info->type != INVALID_OBJECT && info->type < N_CLOSURE_TYPES;
+}
+
+INLINE_HEADER rtsBool LOOKS_LIKE_INFO_PTR (StgWord p)
+{
+    return p && (IS_FORWARDING_PTR(p) || LOOKS_LIKE_INFO_PTR_NOT_NULL(p));
+}
+
+INLINE_HEADER rtsBool LOOKS_LIKE_CLOSURE_PTR (void *p)
+{
+    return LOOKS_LIKE_INFO_PTR((StgWord)(UNTAG_CLOSURE((StgClosure *)(p)))->header.info);
+}
+
+/* -----------------------------------------------------------------------------
+   Macros for calculating the size of a closure
+   -------------------------------------------------------------------------- */
+
+INLINE_HEADER StgOffset PAP_sizeW   ( nat n_args )
+{ return sizeofW(StgPAP) + n_args; }
+
+INLINE_HEADER StgOffset AP_sizeW   ( nat n_args )
+{ return sizeofW(StgAP) + n_args; }
+
+INLINE_HEADER StgOffset AP_STACK_sizeW ( nat size )
+{ return sizeofW(StgAP_STACK) + size; }
+
+INLINE_HEADER StgOffset CONSTR_sizeW( nat p, nat np )
+{ return sizeofW(StgHeader) + p + np; }
+
+INLINE_HEADER StgOffset THUNK_SELECTOR_sizeW ( void )
+{ return sizeofW(StgSelector); }
+
+INLINE_HEADER StgOffset BLACKHOLE_sizeW ( void )
+{ return sizeofW(StgHeader)+MIN_PAYLOAD_SIZE; }
+
+/* --------------------------------------------------------------------------
+   Sizes of closures
+   ------------------------------------------------------------------------*/
+
+INLINE_HEADER StgOffset sizeW_fromITBL( const StgInfoTable* itbl )
+{ return sizeofW(StgClosure)
+       + sizeofW(StgPtr)  * itbl->layout.payload.ptrs
+       + sizeofW(StgWord) * itbl->layout.payload.nptrs; }
+
+INLINE_HEADER StgOffset thunk_sizeW_fromITBL( const StgInfoTable* itbl )
+{ return sizeofW(StgThunk)
+       + sizeofW(StgPtr)  * itbl->layout.payload.ptrs
+       + sizeofW(StgWord) * itbl->layout.payload.nptrs; }
+
+INLINE_HEADER StgOffset ap_stack_sizeW( StgAP_STACK* x )
+{ return AP_STACK_sizeW(x->size); }
+
+INLINE_HEADER StgOffset ap_sizeW( StgAP* x )
+{ return AP_sizeW(x->n_args); }
+
+INLINE_HEADER StgOffset pap_sizeW( StgPAP* x )
+{ return PAP_sizeW(x->n_args); }
+
+INLINE_HEADER StgOffset arr_words_sizeW( StgArrWords* x )
+{ return sizeofW(StgArrWords) + x->words; }
+
+INLINE_HEADER StgOffset mut_arr_ptrs_sizeW( StgMutArrPtrs* x )
+{ return sizeofW(StgMutArrPtrs) + x->ptrs; }
+
+INLINE_HEADER StgWord tso_sizeW ( StgTSO *tso )
+{ return TSO_STRUCT_SIZEW + tso->stack_size; }
+
+INLINE_HEADER StgWord bco_sizeW ( StgBCO *bco )
+{ return bco->size; }
+
+INLINE_HEADER nat
+closure_sizeW_ (StgClosure *p, StgInfoTable *info)
+{
+    switch (info->type) {
+    case THUNK_0_1:
+    case THUNK_1_0:
+	return sizeofW(StgThunk) + 1;
+    case FUN_0_1:
+    case CONSTR_0_1:
+    case FUN_1_0:
+    case CONSTR_1_0:
+	return sizeofW(StgHeader) + 1;
+    case THUNK_0_2:
+    case THUNK_1_1:
+    case THUNK_2_0:
+	return sizeofW(StgThunk) + 2;
+    case FUN_0_2:
+    case CONSTR_0_2:
+    case FUN_1_1:
+    case CONSTR_1_1:
+    case FUN_2_0:
+    case CONSTR_2_0:
+	return sizeofW(StgHeader) + 2;
+    case THUNK:
+	return thunk_sizeW_fromITBL(info);
+    case THUNK_SELECTOR:
+	return THUNK_SELECTOR_sizeW();
+    case AP_STACK:
+	return ap_stack_sizeW((StgAP_STACK *)p);
+    case AP:
+	return ap_sizeW((StgAP *)p);
+    case PAP:
+	return pap_sizeW((StgPAP *)p);
+    case IND:
+    case IND_PERM:
+    case IND_OLDGEN:
+    case IND_OLDGEN_PERM:
+	return sizeofW(StgInd);
+    case ARR_WORDS:
+	return arr_words_sizeW((StgArrWords *)p);
+    case MUT_ARR_PTRS_CLEAN:
+    case MUT_ARR_PTRS_DIRTY:
+    case MUT_ARR_PTRS_FROZEN:
+    case MUT_ARR_PTRS_FROZEN0:
+	return mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
+    case TSO:
+	return tso_sizeW((StgTSO *)p);
+    case BCO:
+	return bco_sizeW((StgBCO *)p);
+    case TVAR_WATCH_QUEUE:
+        return sizeofW(StgTVarWatchQueue);
+    case TVAR:
+        return sizeofW(StgTVar);
+    case TREC_CHUNK:
+        return sizeofW(StgTRecChunk);
+    case TREC_HEADER:
+        return sizeofW(StgTRecHeader);
+    case ATOMIC_INVARIANT:
+        return sizeofW(StgAtomicInvariant);
+    case INVARIANT_CHECK_QUEUE:
+        return sizeofW(StgInvariantCheckQueue);
+    default:
+	return sizeW_fromITBL(info);
+    }
+}
+
+// The definitive way to find the size, in words, of a heap-allocated closure
+INLINE_HEADER nat
+closure_sizeW (StgClosure *p)
+{
+    return closure_sizeW_(p, get_itbl(p));
+}
+
+/* -----------------------------------------------------------------------------
+   Sizes of stack frames
+   -------------------------------------------------------------------------- */
+
+INLINE_HEADER StgWord stack_frame_sizeW( StgClosure *frame )
+{
+    StgRetInfoTable *info;
+
+    info = get_ret_itbl(frame);
+    switch (info->i.type) {
+
+    case RET_DYN:
+    {
+	StgRetDyn *dyn = (StgRetDyn *)frame;
+	return  sizeofW(StgRetDyn) + RET_DYN_BITMAP_SIZE + 
+	    RET_DYN_NONPTR_REGS_SIZE +
+	    RET_DYN_PTRS(dyn->liveness) + RET_DYN_NONPTRS(dyn->liveness);
+    }
+	    
+    case RET_FUN:
+	return sizeofW(StgRetFun) + ((StgRetFun *)frame)->size;
+
+    case RET_BIG:
+	return 1 + GET_LARGE_BITMAP(&info->i)->size;
+
+    case RET_BCO:
+	return 2 + BCO_BITMAP_SIZE((StgBCO *)((P_)frame)[1]);
+
+    default:
+	return 1 + BITMAP_SIZE(info->i.layout.bitmap);
+    }
+}
+
+#endif /* RTS_STORAGE_CLOSUREMACROS_H */
diff --git a/includes/ClosureTypes.h b/includes/rts/storage/ClosureTypes.h
similarity index 96%
rename from includes/ClosureTypes.h
rename to includes/rts/storage/ClosureTypes.h
index 99bd3060ff0d3926634f379c7736d3c8ea8cc6d0..3415d423a35487504d6df6311a5374d623cbc2f8 100644
--- a/includes/ClosureTypes.h
+++ b/includes/rts/storage/ClosureTypes.h
@@ -7,8 +7,8 @@
  *
  * -------------------------------------------------------------------------- */
 
-#ifndef CLOSURETYPES_H
-#define CLOSURETYPES_H
+#ifndef RTS_STORAGE_CLOSURETYPES_H
+#define RTS_STORAGE_CLOSURETYPES_H
 
 /* 
  * WARNING WARNING WARNING
@@ -93,4 +93,4 @@
 #define WHITEHOLE               69
 #define N_CLOSURE_TYPES         70
 
-#endif /* CLOSURETYPES_H */
+#endif /* RTS_STORAGE_CLOSURETYPES_H */
diff --git a/includes/Closures.h b/includes/rts/storage/Closures.h
similarity index 97%
rename from includes/Closures.h
rename to includes/rts/storage/Closures.h
index eb5d1ed89dea09d9782e69726d2fdaa482dbf604..6e06e57f3cf9bd0c537c394d054904d0492bc1ef 100644
--- a/includes/Closures.h
+++ b/includes/rts/storage/Closures.h
@@ -6,8 +6,8 @@
  *
  * -------------------------------------------------------------------------- */
 
-#ifndef CLOSURES_H
-#define CLOSURES_H
+#ifndef RTS_STORAGE_CLOSURES_H
+#define RTS_STORAGE_CLOSURES_H
 
 /*
  * The Layout of a closure header depends on which kind of system we're
@@ -51,14 +51,14 @@ typedef struct {
    -------------------------------------------------------------------------- */
 
 typedef struct {
-    const struct _StgInfoTable* info;
+    const StgInfoTable* info;
 #ifdef PROFILING
     StgProfHeader         prof;
 #endif
 } StgHeader;
 
 typedef struct {
-    const struct _StgInfoTable* info;
+    const StgInfoTable* info;
 #ifdef PROFILING
     StgProfHeader         prof;
 #endif
@@ -77,10 +77,10 @@ typedef struct {
 
 /* All closures follow the generic format */
 
-struct StgClosure_ {
+typedef struct StgClosure_ {
     StgHeader   header;
     struct StgClosure_ *payload[FLEXIBLE_ARRAY];
-};
+} *StgClosurePtr; // StgClosure defined in Rts.h
 
 typedef struct {
     StgThunkHeader  header;
@@ -124,7 +124,7 @@ typedef struct {
     StgHeader     header;
     StgClosure   *indirectee;
     StgClosure   *static_link;
-    struct _StgInfoTable *saved_info;
+    StgInfoTable *saved_info;
 } StgIndStatic;
 
 typedef struct {
@@ -273,7 +273,7 @@ typedef struct {
    -------------------------------------------------------------------------- */
 
 typedef struct {
-    const struct _StgInfoTable* info;
+    const StgInfoTable* info;
     StgWord        liveness;
     StgWord        ret_addr;
     StgClosure *   payload[FLEXIBLE_ARRAY];
@@ -287,7 +287,7 @@ typedef struct {
  * The stack frame size is also cached in the frame for convenience.
  */
 typedef struct {
-    const struct _StgInfoTable* info;
+    const StgInfoTable* info;
     StgWord        size;
     StgClosure *   fun;
     StgClosure *   payload[FLEXIBLE_ARRAY];
@@ -414,4 +414,4 @@ typedef struct {
   StgClosure    *alt_code;
 } StgCatchRetryFrame;
 
-#endif /* CLOSURES_H */
+#endif /* RTS_STORAGE_CLOSURES_H */
diff --git a/includes/StgFun.h b/includes/rts/storage/FunTypes.h
similarity index 92%
rename from includes/StgFun.h
rename to includes/rts/storage/FunTypes.h
index e6f9b1fe0ef07edfbf3680c42a42472fbea99d2e..402c913bcd9ab7fd3b135b7c942670e6025319e0 100644
--- a/includes/StgFun.h
+++ b/includes/rts/storage/FunTypes.h
@@ -1,11 +1,13 @@
 /* -----------------------------------------------------------------------------
+ *
  * (c) The GHC Team, 2002
  *
  * Things for functions.
+ *
  * ---------------------------------------------------------------------------*/
 
-#ifndef STGFUN_H
-#define STGFUN_H
+#ifndef RTS_STORAGE_FUNTYPES_H
+#define RTS_STORAGE_FUNTYPES_
 
 /* generic - function comes with a small bitmap */
 #define ARG_GEN      0   
@@ -49,4 +51,4 @@
 #define ARG_PPPPPPP  24
 #define ARG_PPPPPPPP 25
 
-#endif /* STGFUN_H */
+#endif /* RTS_STORAGE_FUNTYPES_H */
diff --git a/includes/rts/storage/GC.h b/includes/rts/storage/GC.h
new file mode 100644
index 0000000000000000000000000000000000000000..df4ba9d15334b2402fa9b33f62b90087cd6ced36
--- /dev/null
+++ b/includes/rts/storage/GC.h
@@ -0,0 +1,204 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2004
+ *
+ * External Storage Manger Interface
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifndef RTS_STORAGE_GC_H
+#define RTS_STORAGE_GC_H
+
+#include <stddef.h>
+#include "rts/OSThreads.h"
+
+/* -----------------------------------------------------------------------------
+ * Generational GC
+ *
+ * We support an arbitrary number of generations, with an arbitrary number
+ * of steps per generation.  Notes (in no particular order):
+ *
+ *       - all generations except the oldest should have the same
+ *         number of steps.  Multiple steps gives objects a decent
+ *         chance to age before being promoted, and helps ensure that
+ *         we don't end up with too many thunks being updated in older
+ *         generations.
+ *
+ *       - the oldest generation has one step.  There's no point in aging
+ *         objects in the oldest generation.
+ *
+ *       - generation 0, step 0 (G0S0) is the allocation area.  It is given
+ *         a fixed set of blocks during initialisation, and these blocks
+ *         normally stay in G0S0.  In parallel execution, each
+ *         Capability has its own nursery.
+ *
+ *       - during garbage collection, each step which is an evacuation
+ *         destination (i.e. all steps except G0S0) is allocated a to-space.
+ *         evacuated objects are allocated into the step's to-space until
+ *         GC is finished, when the original step's contents may be freed
+ *         and replaced by the to-space.
+ *
+ *       - the mutable-list is per-generation (not per-step).  G0 doesn't 
+ *         have one (since every garbage collection collects at least G0).
+ * 
+ *       - block descriptors contain pointers to both the step and the
+ *         generation that the block belongs to, for convenience.
+ *
+ *       - static objects are stored in per-generation lists.  See GC.c for
+ *         details of how we collect CAFs in the generational scheme.
+ *
+ *       - large objects are per-step, and are promoted in the same way
+ *         as small objects, except that we may allocate large objects into
+ *         generation 1 initially.
+ *
+ * ------------------------------------------------------------------------- */
+
+typedef struct step_ {
+    unsigned int         no;		// step number in this generation
+    unsigned int         abs_no;	// absolute step number
+
+    struct generation_ * gen;		// generation this step belongs to
+    unsigned int         gen_no;        // generation number (cached)
+
+    bdescr *             blocks;	// blocks in this step
+    unsigned int         n_blocks;	// number of blocks
+    unsigned int         n_words;       // number of words
+
+    struct step_ *       to;		// destination step for live objects
+
+    bdescr *             large_objects;	 // large objects (doubly linked)
+    unsigned int         n_large_blocks; // no. of blocks used by large objs
+
+    StgTSO *             threads;       // threads in this step
+                                        // linked via global_link
+
+    // ------------------------------------
+    // Fields below are used during GC only
+
+    // During GC, if we are collecting this step, blocks and n_blocks
+    // are copied into the following two fields.  After GC, these blocks
+    // are freed.
+
+#if defined(THREADED_RTS)
+    char pad[128];                      // make sure the following is
+                                        // on a separate cache line.
+    SpinLock     sync_large_objects;    // lock for large_objects
+                                        //    and scavenged_large_objects
+#endif
+
+    int          mark;			// mark (not copy)? (old gen only)
+    int          compact;		// compact (not sweep)? (old gen only)
+
+    bdescr *     old_blocks;	        // bdescr of first from-space block
+    unsigned int n_old_blocks;		// number of blocks in from-space
+    unsigned int live_estimate;         // for sweeping: estimate of live data
+    
+    bdescr *     part_blocks;           // partially-full scanned blocks
+    unsigned int n_part_blocks;         // count of above
+
+    bdescr *     scavenged_large_objects;  // live large objs after GC (d-link)
+    unsigned int n_scavenged_large_blocks; // size (not count) of above
+
+    bdescr *     bitmap;  		// bitmap for compacting collection
+
+    StgTSO *     old_threads;
+
+} step;
+
+
+typedef struct generation_ {
+    unsigned int   no;			// generation number
+    step *         steps;		// steps
+    unsigned int   n_steps;		// number of steps
+    unsigned int   max_blocks;		// max blocks in step 0
+    bdescr        *mut_list;      	// mut objects in this gen (not G0)
+    
+    // stats information
+    unsigned int collections;
+    unsigned int par_collections;
+    unsigned int failed_promotions;
+
+    // temporary use during GC:
+    bdescr        *saved_mut_list;
+} generation;
+
+extern generation * generations;
+
+extern generation * g0;
+extern step * g0s0;
+extern generation * oldest_gen;
+extern step * all_steps;
+extern nat total_steps;
+
+/* -----------------------------------------------------------------------------
+   Generic allocation
+
+   StgPtr allocateInGen(generation *g, nat n)
+                                Allocates a chunk of contiguous store
+   				n words long in generation g,
+   				returning a pointer to the first word.
+   				Always succeeds.
+				
+   StgPtr allocate(nat n)       Equaivalent to allocateInGen(g0)
+				
+   StgPtr allocateLocal(Capability *cap, nat n)
+                                Allocates memory from the nursery in
+				the current Capability.  This can be
+				done without taking a global lock,
+                                unlike allocate().
+
+   StgPtr allocatePinned(nat n) Allocates a chunk of contiguous store
+   				n words long, which is at a fixed
+				address (won't be moved by GC).  
+				Returns a pointer to the first word.
+				Always succeeds.
+				
+				NOTE: the GC can't in general handle
+				pinned objects, so allocatePinned()
+				can only be used for ByteArrays at the
+				moment.
+
+				Don't forget to TICK_ALLOC_XXX(...)
+				after calling allocate or
+				allocatePinned, for the
+				benefit of the ticky-ticky profiler.
+
+   rtsBool doYouWantToGC(void)  Returns True if the storage manager is
+   				ready to perform a GC, False otherwise.
+
+   lnat  allocatedBytes(void)  Returns the number of bytes allocated
+                                via allocate() since the last GC.
+				Used in the reporting of statistics.
+
+   -------------------------------------------------------------------------- */
+
+StgPtr  allocate        ( lnat n );
+StgPtr  allocateInGen   ( generation *g, lnat n );
+StgPtr  allocateLocal   ( Capability *cap, lnat n );
+StgPtr  allocatePinned  ( lnat n );
+lnat    allocatedBytes  ( void );
+
+/* memory allocator for executable memory */
+void * allocateExec(unsigned int len, void **exec_addr);
+void   freeExec (void *p);
+
+/* -----------------------------------------------------------------------------
+   Performing Garbage Collection
+   -------------------------------------------------------------------------- */
+
+void performGC(void);
+void performMajorGC(void);
+
+/* -----------------------------------------------------------------------------
+   The CAF table - used to let us revert CAFs in GHCi
+   -------------------------------------------------------------------------- */
+
+void newCAF     (StgClosure*);
+void newDynCAF  (StgClosure *);
+void revertCAFs (void);
+
+/* set to disable CAF garbage collection in GHCi. */
+/* (needed when dynamic libraries are used). */
+extern rtsBool keepCAFs;
+
+#endif /* RTS_STORAGE_GC_H */
diff --git a/includes/InfoTables.h b/includes/rts/storage/InfoTables.h
similarity index 97%
rename from includes/InfoTables.h
rename to includes/rts/storage/InfoTables.h
index 0c6ab52745380e26de719a1616dc284eb3ede56c..4596ce2d750fd71cac5578a2501811c1401faa8d 100644
--- a/includes/InfoTables.h
+++ b/includes/rts/storage/InfoTables.h
@@ -6,8 +6,8 @@
  *
  * -------------------------------------------------------------------------- */
 
-#ifndef INFOTABLES_H
-#define INFOTABLES_H
+#ifndef RTS_STORAGE_INFOTABLES_H
+#define RTS_STORAGE_INFOTABLES_H
 
 /* ----------------------------------------------------------------------------
    Relative pointers
@@ -212,11 +212,7 @@ typedef union {
 /*
  * The "standard" part of an info table.  Every info table has this bit.
  */
-typedef struct _StgInfoTable {
-
-#ifndef TABLES_NEXT_TO_CODE
-    StgFunPtr       entry;	/* pointer to the entry code */
-#endif
+typedef struct StgInfoTable_ {
 
 #ifdef PROFILING
     StgProfInfo     prof;
@@ -236,7 +232,7 @@ typedef struct _StgInfoTable {
 #ifdef TABLES_NEXT_TO_CODE
     StgCode         code[FLEXIBLE_ARRAY];
 #endif
-} StgInfoTable;
+} *StgInfoTablePtr;
 
 
 /* -----------------------------------------------------------------------------
@@ -254,7 +250,7 @@ typedef struct _StgInfoTable {
       bitmap fields have also been omitted.
    -------------------------------------------------------------------------- */
 
-typedef struct _StgFunInfoExtraRev {
+typedef struct StgFunInfoExtraRev_ {
     OFFSET_FIELD ( slow_apply_offset ); /* apply to args on the stack */
     union { 
 	StgWord bitmap;
@@ -265,7 +261,7 @@ typedef struct _StgFunInfoExtraRev {
     StgHalfWord    arity;       /* function arity */
 } StgFunInfoExtraRev;
 
-typedef struct _StgFunInfoExtraFwd {
+typedef struct StgFunInfoExtraFwd_ {
     StgHalfWord    fun_type;    /* function type */
     StgHalfWord    arity;       /* function arity */
     StgSRT         *srt;	/* pointer to the SRT table */
@@ -313,7 +309,7 @@ typedef struct {
  * pointer iff srt_bitmap is zero.
  */
 
-typedef struct _StgThunkInfoTable {
+typedef struct StgThunkInfoTable_ {
 #if !defined(TABLES_NEXT_TO_CODE)
     StgInfoTable i;
 #endif
@@ -331,7 +327,7 @@ typedef struct _StgThunkInfoTable {
    Constructor info tables
    -------------------------------------------------------------------------- */
 
-typedef struct _StgConInfoTable {
+typedef struct StgConInfoTable_ {
 #if !defined(TABLES_NEXT_TO_CODE)
     StgInfoTable i;
 #endif
@@ -410,4 +406,5 @@ typedef struct _StgConInfoTable {
 #else
 #define GET_PROF_DESC(info) ((info)->prof.closure_desc)
 #endif
-#endif /* INFOTABLES_H */
+
+#endif /* RTS_STORAGE_INFOTABLES_H */
diff --git a/includes/Liveness.h b/includes/rts/storage/Liveness.h
similarity index 90%
rename from includes/Liveness.h
rename to includes/rts/storage/Liveness.h
index cc93cae34f9c92c020ed8697e2a1a735bfc3a188..66c82f3134d33f4a52c1fe967771d4b69fea19b3 100644
--- a/includes/Liveness.h
+++ b/includes/rts/storage/Liveness.h
@@ -11,8 +11,8 @@
  *
  * -------------------------------------------------------------------------- */
 
-#ifndef LIVENESS_H
-#define LIVENESS_H
+#ifndef RTS_STORAGE_LIVENESS_H
+#define RTS_STORAGE_LIVENESS_H
 
 #define NO_PTRS   0xff
 #define R1_PTR	  (NO_PTRS ^ (1<<0))
@@ -31,4 +31,4 @@
 #define RET_DYN_PTRS(l)    ((l)>>24 & 0xff)
 #define RET_DYN_LIVENESS(l) ((l) & 0xffff)
 
-#endif /* LIVENESS_H */
+#endif /* RTS_STORAGE_LIVENESS_H */
diff --git a/rts/sm/MBlock.h b/includes/rts/storage/MBlock.h
similarity index 98%
rename from rts/sm/MBlock.h
rename to includes/rts/storage/MBlock.h
index f9dddc3138d190461f146bd9619b20263da99e22..03396c1fd7e776f8d27f5b6d63131e10b226a69e 100644
--- a/rts/sm/MBlock.h
+++ b/includes/rts/storage/MBlock.h
@@ -9,12 +9,10 @@
  *
  * ---------------------------------------------------------------------------*/
 
-#ifndef MBLOCK_H
-#define MBLOCK_H
+#ifndef RTS_STORAGE_MBLOCK_H
+#define RTS_STORAGE_MBLOCK_H
 
-#include "GC.h"
-
-extern lnat RTS_VAR(mblocks_allocated);
+extern lnat mblocks_allocated;
 
 extern void initMBlocks(void);
 extern void * getMBlock(void);
@@ -205,4 +203,4 @@ StgBool HEAP_ALLOCED_GC(void *p)
 # error HEAP_ALLOCED not defined
 #endif
 
-#endif /* MBLOCK_H */
+#endif /* RTS_STORAGE_MBLOCK_H */
diff --git a/includes/SMPClosureOps.h b/includes/rts/storage/SMPClosureOps.h
similarity index 94%
rename from includes/SMPClosureOps.h
rename to includes/rts/storage/SMPClosureOps.h
index f46dbdefe8e7d4e9624bece53d2726e4663824cc..d5f7c3f295d966b5e009cf8bdc723387118f7a7e 100644
--- a/includes/SMPClosureOps.h
+++ b/includes/rts/storage/SMPClosureOps.h
@@ -6,8 +6,8 @@
  *
  * -------------------------------------------------------------------------- */
 
-#ifndef SMPCLOSUREOPS_H
-#define SMPCLOSUREOPS_H
+#ifndef RTS_STORAGE_SMPCLOSUREOPS_H
+#define RTS_STORAGE_SMPCLOSUREOPS_H
 
 #ifdef CMINUSMINUS
 
@@ -75,4 +75,4 @@ EXTERN_INLINE void unlockTSO(StgTSO *tso)
 
 #endif /* CMINUSMINUS */
 
-#endif /* SMPCLOSUREOPS_H */
+#endif /* RTS_STORAGE_SMPCLOSUREOPS_H */
diff --git a/includes/TSO.h b/includes/rts/storage/TSO.h
similarity index 90%
rename from includes/TSO.h
rename to includes/rts/storage/TSO.h
index af624f76667a14b86b5339056a3a3fb7306ad438..7cb245909f29e9426282afacad3f7ace4c6f8ee8 100644
--- a/includes/TSO.h
+++ b/includes/rts/storage/TSO.h
@@ -1,36 +1,13 @@
 /* -----------------------------------------------------------------------------
  *
- * (c) The GHC Team, 1998-1999
+ * (c) The GHC Team, 1998-2009
  *
  * The definitions for Thread State Objects.
  *
  * ---------------------------------------------------------------------------*/
 
-#ifndef TSO_H
-#define TSO_H
-
-#if DEBUG
-#define TSO_MAGIC 4321
-#endif
-
-typedef struct {
-  StgInt   pri;
-  StgInt   magic;
-  StgInt   sparkname;
-  rtsTime  startedat;
-  rtsBool  exported;
-  StgInt   basicblocks;
-  StgInt   allocs;
-  rtsTime  exectime;
-  rtsTime  fetchtime;
-  rtsTime  fetchcount;
-  rtsTime  blocktime;
-  StgInt   blockcount;
-  rtsTime  blockedat;
-  StgInt   globalsparks;
-  StgInt   localsparks;
-  rtsTime  clock;
-} StgTSOStatBuf;
+#ifndef RTS_STORAGE_TSO_H
+#define RTS_STORAGE_TSO_H
 
 /*
  * PROFILING info in a TSO
@@ -146,14 +123,14 @@ typedef struct StgTSO_ {
     StgPtr             sp;
     
     StgWord            stack[FLEXIBLE_ARRAY];
-} StgTSO;
+} *StgTSOPtr;
 
 /* -----------------------------------------------------------------------------
    functions
    -------------------------------------------------------------------------- */
 
-extern void dirty_TSO  (Capability *cap, StgTSO *tso);
-extern void setTSOLink (Capability *cap, StgTSO *tso, StgTSO *target);
+void dirty_TSO  (Capability *cap, StgTSO *tso);
+void setTSOLink (Capability *cap, StgTSO *tso, StgTSO *target);
 
 /* -----------------------------------------------------------------------------
    Invariants:
@@ -174,7 +151,7 @@ extern void setTSOLink (Capability *cap, StgTSO *tso, StgTSO *target);
         ----------------------------------------------------------------------
 	NotBlocked             NULL                 runnable_queue, or running
 	
-        BlockedOnBlackHole     the BLACKHOLE_BQ     the BLACKHOLE_BQ's queue
+        BlockedOnBlackHole     the BLACKHOLE        blackhole_queue
 	
         BlockedOnMVar          the MVAR             the MVAR's queue
 
@@ -226,4 +203,4 @@ extern StgTSO dummy_tso;
 /* this is the NIL ptr for a TSO queue (e.g. runnable queue) */
 #define END_TSO_QUEUE  ((StgTSO *)(void*)&stg_END_TSO_QUEUE_closure)
 
-#endif /* TSO_H */
+#endif /* RTS_STORAGE_TSO_H */
diff --git a/includes/StgDLL.h b/includes/stg/DLL.h
similarity index 100%
rename from includes/StgDLL.h
rename to includes/stg/DLL.h
diff --git a/includes/MachRegs.h b/includes/stg/MachRegs.h
similarity index 100%
rename from includes/MachRegs.h
rename to includes/stg/MachRegs.h
diff --git a/includes/StgMiscClosures.h b/includes/stg/MiscClosures.h
similarity index 99%
rename from includes/StgMiscClosures.h
rename to includes/stg/MiscClosures.h
index d5a03fcdd9227ee96642af39fc49a3ff732105fb..1591570780edb5ce6c8fb7a3217e9191e93d36cb 100644
--- a/includes/StgMiscClosures.h
+++ b/includes/stg/MiscClosures.h
@@ -253,7 +253,6 @@ RTS_INFO(stg_sel_5_upd_info);
 RTS_INFO(stg_sel_6_upd_info);
 RTS_INFO(stg_sel_7_upd_info);
 RTS_INFO(stg_sel_8_upd_info);
-RTS_INFO(stg_sel_8_upd_info);
 RTS_INFO(stg_sel_9_upd_info);
 RTS_INFO(stg_sel_10_upd_info);
 RTS_INFO(stg_sel_11_upd_info);
@@ -271,7 +270,6 @@ RTS_ENTRY(stg_sel_5_upd_entry);
 RTS_ENTRY(stg_sel_6_upd_entry);
 RTS_ENTRY(stg_sel_7_upd_entry);
 RTS_ENTRY(stg_sel_8_upd_entry);
-RTS_ENTRY(stg_sel_8_upd_entry);
 RTS_ENTRY(stg_sel_9_upd_entry);
 RTS_ENTRY(stg_sel_10_upd_entry);
 RTS_ENTRY(stg_sel_11_upd_entry);
@@ -517,7 +515,7 @@ RTS_FUN(orIntegerzh_fast);
 RTS_FUN(xorIntegerzh_fast);
 RTS_FUN(complementIntegerzh_fast);
 
-#ifdef SUPPORT_LONG_LONGS
+#if SIZEOF_HSINT == 4
 
 RTS_FUN(int64ToIntegerzh_fast);
 RTS_FUN(word64ToIntegerzh_fast);
diff --git a/includes/Regs.h b/includes/stg/Regs.h
similarity index 99%
rename from includes/Regs.h
rename to includes/stg/Regs.h
index beb71c9184fded30b85f2ead36d583cb29c5f087..fb26254d5a54d7769cf56b5afa05a6d75b78123a 100644
--- a/includes/Regs.h
+++ b/includes/stg/Regs.h
@@ -37,14 +37,9 @@ typedef union {
     StgWord        w;
     StgAddr        a;
     StgChar        c;
-    StgInt8        i8;
     StgFloat       f;
     StgInt         i;
     StgPtr         p;
-    StgClosurePtr  cl;
-    StgStackOffset offset;	/* unused? */
-    StgByteArray   b;
-    StgTSOPtr      t;
 } StgUnion;
 
 /* 
diff --git a/includes/SMP.h b/includes/stg/SMP.h
similarity index 91%
rename from includes/SMP.h
rename to includes/stg/SMP.h
index b7538424b9aa6d9050a55347ec81ccb3b1193661..5d9d80169b97fcb2e468756c1c7a744c34363d5e 100644
--- a/includes/SMP.h
+++ b/includes/stg/SMP.h
@@ -9,28 +9,17 @@
 #ifndef SMP_H
 #define SMP_H
 
-/* THREADED_RTS is currently not compatible with the following options:
- *
- *      PROFILING (but only 1 CPU supported)
- *      TICKY_TICKY
- *      Unregisterised builds are ok, but only 1 CPU supported.
- */
-
 #if defined(THREADED_RTS)
 
-#if defined(TICKY_TICKY)
-#error Build options incompatible with THREADED_RTS.
-#endif
-
 /* ----------------------------------------------------------------------------
    Atomic operations
    ------------------------------------------------------------------------- */
    
-#if !IN_STG_CODE
-// We only want write_barrier() declared in .hc files.  Defining the
-// other inline functions here causes type mismatch errors from gcc,
-// because the generated C code is assuming that there are no
-// prototypes in scope.
+#if !IN_STG_CODE || IN_STGCRUN
+// We only want the barriers, e.g. write_barrier(), declared in .hc
+// files.  Defining the other inline functions here causes type
+// mismatch errors from gcc, because the generated C code is assuming
+// that there are no prototypes in scope.
 
 /* 
  * The atomic exchange operation: xchg(p,w) exchanges the value
@@ -94,12 +83,8 @@ EXTERN_INLINE void load_load_barrier(void);
    Implementations
    ------------------------------------------------------------------------- */
 
-#if !IN_STG_CODE
+#if !IN_STG_CODE || IN_STGCRUN
 
-/* 
- * NB: the xchg instruction is implicitly locked, so we do not need
- * a lock prefix here. 
- */
 EXTERN_INLINE StgWord
 xchg(StgPtr p, StgWord w)
 {
@@ -107,6 +92,8 @@ xchg(StgPtr p, StgWord w)
 #if i386_HOST_ARCH || x86_64_HOST_ARCH
     result = w;
     __asm__ __volatile__ (
+        // NB: the xchg instruction is implicitly locked, so we do not
+        // need a lock prefix here.
  	  "xchg %1,%0"
           :"+r" (result), "+m" (*p)
           : /* no input-only operands */
diff --git a/includes/TailCalls.h b/includes/stg/TailCalls.h
similarity index 100%
rename from includes/TailCalls.h
rename to includes/stg/TailCalls.h
diff --git a/includes/TickyCounters.h b/includes/stg/Ticky.h
similarity index 98%
rename from includes/TickyCounters.h
rename to includes/stg/Ticky.h
index 38e84ef2aff7a88be8acfd13022679b70743d46b..fd7edf85c5bb27342d4db10f71b7856048b296b8 100644
--- a/includes/TickyCounters.h
+++ b/includes/stg/Ticky.h
@@ -3,7 +3,8 @@
  * (c) The GHC Team, 2007
  *
  * Declarations for counters used by ticky-ticky profiling.
- *----------------------------------------------------------------------------- */
+ *
+ * -------------------------------------------------------------------------- */
 
 
 #ifndef TICKYCOUNTERS_H
@@ -25,7 +26,6 @@
 #define EXTERN extern
 #endif
 
-
 /* Here are all the counter declarations: */
 
 EXTERN StgInt ENT_VIA_NODE_ctr INIT(0);
diff --git a/includes/StgTypes.h b/includes/stg/Types.h
similarity index 77%
rename from includes/StgTypes.h
rename to includes/stg/Types.h
index 7f2c08e5e232f32c7b8905ea55158767f684b099..227356c9eae2b354a9be1fe0012ed9d0162cc954 100644
--- a/includes/StgTypes.h
+++ b/includes/stg/Types.h
@@ -19,9 +19,8 @@
 	StgInt			Signed version of StgWord
 	StgAddr			Generic address type
 	
-	StgBool, StgVoid, StgClosurePtr, StgPtr, StgOffset, 
-	StgTSOPtr, StgForeignPtr, StgStackOffset, StgStackPtr,
-	StgCode, StgArray, StgByteArray, StgStablePtr, StgFunPtr,
+	StgBool, StgVoid, StgPtr, StgOffset, 
+	StgCode, StgStablePtr, StgFunPtr,
 	StgUnion.
 
  * WARNING: Keep this file, MachDeps.h, and HsFFI.h in synch!
@@ -58,23 +57,17 @@ typedef unsigned int             StgWord32;
 #error GHC untested on this architecture: sizeof(int) != 4
 #endif
 
-#ifdef SUPPORT_LONG_LONGS
-/* assume long long is 64 bits */
-# ifndef _MSC_VER
-typedef signed long long int   StgInt64;
-typedef unsigned long long int StgWord64;
-# else
-typedef __int64 StgInt64;
-typedef unsigned __int64 StgWord64;
-# endif
-#elif SIZEOF_LONG == 8
+#if SIZEOF_LONG == 8
 typedef signed   long          StgInt64;
 typedef unsigned long          StgWord64;
 #elif defined(__MSVC__)
 typedef __int64                StgInt64;
 typedef unsigned __int64       StgWord64;
+#elif SIZEOF_LONG_LONG == 8
+typedef signed long long int   StgInt64;
+typedef unsigned long long int StgWord64;
 #else
-#error GHC untested on this architecture: sizeof(void *) < 8 and no long longs.
+#error cannot find a way to define StgInt64
 #endif
 
 /*
@@ -106,40 +99,21 @@ typedef StgWord16          StgHalfWord;
 
 #define W_MASK  (sizeof(W_)-1)
 
-typedef void*              StgAddr;
-
 /*
  * Other commonly-used STG datatypes.
  */
 
+typedef void*              StgAddr;
 typedef StgWord32          StgChar;
 typedef int                StgBool;
-
 typedef float		   StgFloat;
 typedef double		   StgDouble;
-                           
-typedef void               StgVoid;
-                           
-typedef struct StgClosure_ StgClosure;
-typedef StgClosure*        StgClosurePtr;
-typedef StgWord*           StgPtr;           /* pointer into closure       */
+typedef StgWord*           StgPtr;           /* heap or stack pointer */
 typedef StgWord volatile*  StgVolatilePtr;   /* pointer to volatile word   */
 typedef StgWord            StgOffset;        /* byte offset within closure */
-                           
-typedef struct StgTSO_*    StgTSOPtr;
-
-typedef void*              StgForeignPtr;
-
-typedef StgInt             StgStackOffset;   /* offset in words! */
-
-typedef StgWord*           StgStackPtr;
-
-typedef StgWord8 	   StgCode;  	    /* close enough */
-
-typedef StgPtr*            StgArray;        /* the goods of an Array# */
-typedef char*		   StgByteArray;    /* the goods of a ByteArray# */
-
+typedef StgWord8 	   StgCode;  	     /* close enough */
 typedef void*		   StgStablePtr;
+typedef StgWord8*          StgByteArray;
 
 /*
   Types for the generated C functions
diff --git a/rts/Adjustor.c b/rts/Adjustor.c
index e93f361aee9611fa6b0ba892c1be0b5d690deef3..0f09743a02ee9040a54e8705daca47a17a0abbb4 100644
--- a/rts/Adjustor.c
+++ b/rts/Adjustor.c
@@ -38,9 +38,9 @@ Haskell side.
 
 #include "PosixSource.h"
 #include "Rts.h"
-#include "RtsExternal.h"
+
 #include "RtsUtils.h"
-#include <stdlib.h>
+#include "Stable.h"
 
 #if defined(USE_LIBFFI_FOR_ADJUSTORS)
 
diff --git a/rts/Arena.c b/rts/Arena.c
index fcdc6cce147210ab6be6b9547bff19e4ad22391b..7fc49f44d75b2827d1a2705c0bc19f0d6ca84a56 100644
--- a/rts/Arena.c
+++ b/rts/Arena.c
@@ -18,12 +18,12 @@
    which most allocations are small.
    -------------------------------------------------------------------------- */
 
+#include "PosixSource.h"
 #include "Rts.h"
+
 #include "RtsUtils.h"
 #include "Arena.h"
 
-#include <stdlib.h>
-
 // Each arena struct is allocated using malloc().
 struct _Arena {
     bdescr *current;
diff --git a/rts/AwaitEvent.h b/rts/AwaitEvent.h
index e03cb4444e71f2c3e7dbfa2d23f1960c9a4bc9a6..758e81a288a66bf47af917e84fc1b310b9443f82 100644
--- a/rts/AwaitEvent.h
+++ b/rts/AwaitEvent.h
@@ -21,4 +21,4 @@ void awaitEvent(rtsBool wait);  /* In posix/Select.c or
 				 * win32/AwaitEvent.c */
 #endif
 
-#endif /* SELECT_H */
+#endif /* AWAITEVENT_H */
diff --git a/rts/Capability.c b/rts/Capability.c
index 02308d4c200c5b8a6893a3fa1cee2757de458cb4..ddaba699a36615da472423187a9134e268a71b15 100644
--- a/rts/Capability.c
+++ b/rts/Capability.c
@@ -18,28 +18,27 @@
 
 #include "PosixSource.h"
 #include "Rts.h"
-#include "RtsUtils.h"
-#include "RtsFlags.h"
-#include "STM.h"
-#include "OSThreads.h"
+
 #include "Capability.h"
 #include "Schedule.h"
 #include "Sparks.h"
 #include "Trace.h"
-#include "GC.h"
+#include "sm/GC.h" // for gcWorkerThread()
+#include "STM.h"
+#include "RtsUtils.h"
 
 // one global capability, this is the Capability for non-threaded
 // builds, and for +RTS -N1
 Capability MainCapability;
 
-nat n_capabilities;
+nat n_capabilities = 0;
 Capability *capabilities = NULL;
 
 // Holds the Capability which last became free.  This is used so that
 // an in-call has a chance of quickly finding a free Capability.
 // Maintaining a global free list of Capabilities would require global
 // locking, so we don't do that.
-Capability *last_free_capability;
+Capability *last_free_capability = NULL;
 
 /* GC indicator, in scope for the scheduler, init'ed to false */
 volatile StgWord waiting_for_gc = 0;
@@ -235,8 +234,8 @@ initCapability( Capability *cap, nat i )
 #endif
 
     cap->f.stgEagerBlackholeInfo = (W_)&__stg_EAGER_BLACKHOLE_info;
-    cap->f.stgGCEnter1     = (F_)__stg_gc_enter_1;
-    cap->f.stgGCFun        = (F_)__stg_gc_fun;
+    cap->f.stgGCEnter1     = (StgFunPtr)__stg_gc_enter_1;
+    cap->f.stgGCFun        = (StgFunPtr)__stg_gc_fun;
 
     cap->mut_lists  = stgMallocBytes(sizeof(bdescr *) *
 				     RtsFlags.GcFlags.generations,
diff --git a/rts/Capability.h b/rts/Capability.h
index d2fcc5ed7bfdb90449773560938618a59cc18a72..fb199e2a3e32882d2a57f18b4e7f4eaa37eec3a2 100644
--- a/rts/Capability.h
+++ b/rts/Capability.h
@@ -4,26 +4,21 @@
  *
  * Capabilities
  *
- * The notion of a capability is used when operating in multi-threaded
- * environments (which the THREADED_RTS build of the RTS does), to
- * hold all the state an OS thread/task needs to run Haskell code:
- * its STG registers, a pointer to its  TSO, a nursery etc. During
- * STG execution, a pointer to the capabilitity is kept in a 
- * register (BaseReg).
+ * A Capability holds all the state an OS thread/task needs to run
+ * Haskell code: its STG registers, a pointer to its TSO, a nursery
+ * etc. During STG execution, a pointer to the Capabilitity is kept in
+ * a register (BaseReg).
  *
- * Only in an THREADED_RTS build will there be multiple capabilities,
- * in the non-threaded builds there is one global capability, namely
+ * Only in a THREADED_RTS build will there be multiple capabilities,
+ * in the non-threaded RTS there is one global capability, called
  * MainCapability.
  *
- * This header file contains the functions for working with capabilities.
- * (the main, and only, consumer of this interface is the scheduler).
- * 
  * --------------------------------------------------------------------------*/
 
 #ifndef CAPABILITY_H
 #define CAPABILITY_H
 
-#include "RtsFlags.h"
+#include "sm/GC.h" // for evac_fn
 #include "Task.h"
 #include "Sparks.h"
 
@@ -174,14 +169,14 @@ INLINE_HEADER void releaseCapability_ (Capability* cap STG_UNUSED,
                                        rtsBool always_wakeup STG_UNUSED) {};
 #endif
 
-#if !IN_STG_CODE
-// one global capability
-extern Capability MainCapability; 
-#endif
+// declared in includes/rts/Threads.h:
+// extern Capability MainCapability; 
+
+// declared in includes/rts/Threads.h:
+// extern nat n_capabilities;
 
 // Array of all the capabilities
 //
-extern nat n_capabilities;
 extern Capability *capabilities;
 
 // The Capability that was last free.  Used as a good guess for where
@@ -281,7 +276,7 @@ INLINE_HEADER void contextSwitchCapability(Capability *cap);
 // Free all capabilities
 void freeCapabilities (void);
 
-// FOr the GC:
+// For the GC:
 void markSomeCapabilities (evac_fn evac, void *user, nat i0, nat delta, 
                            rtsBool prune_sparks);
 void markCapabilities (evac_fn evac, void *user);
diff --git a/rts/Disassembler.c b/rts/Disassembler.c
index 19c121f2f1ad8c97da89fd1e8cf037da7512f526..fff3fe9d7632bce75fc24e009f7fc714be2b23e7 100644
--- a/rts/Disassembler.c
+++ b/rts/Disassembler.c
@@ -13,12 +13,10 @@
 #include "PosixSource.h"
 #include "Rts.h"
 #include "RtsAPI.h"
+#include "rts/Bytecodes.h"
+
 #include "RtsUtils.h"
-#include "Closures.h"
-#include "TSO.h"
 #include "Schedule.h"
-
-#include "Bytecodes.h"
 #include "Printer.h"
 #include "Disassembler.h"
 #include "Interpreter.h"
diff --git a/rts/FrontPanel.c b/rts/FrontPanel.c
index 2ce91e2c6511d0eda98d34df0edddff7d734bdc5..163a7c08ca2af7b65198f0d560585ae580c865c7 100644
--- a/rts/FrontPanel.c
+++ b/rts/FrontPanel.c
@@ -12,11 +12,10 @@
 /* #include "PosixSource.h" */
 
 #include "Rts.h"
+
 #include "RtsUtils.h"
-#include "MBlock.h"
 #include "FrontPanel.h"
 #include "Stats.h"
-#include "RtsFlags.h"
 #include "Schedule.h"
 
 #include <gtk/gtk.h>
diff --git a/rts/Globals.c b/rts/Globals.c
index a0d0788335c14f4e75fe656de9ba075c62197a6f..15b10130a8a08a01c462039dab79a818eb363834 100644
--- a/rts/Globals.c
+++ b/rts/Globals.c
@@ -9,8 +9,11 @@
  *
  * ---------------------------------------------------------------------------*/
 
+#include "PosixSource.h"
 #include "Rts.h"
-#include "RtsGlobals.h"
+
+#include "Globals.h"
+#include "Stable.h"
 
 static StgStablePtr typeableStore      = 0;
 static StgStablePtr signalHandlerStore = 0;
diff --git a/rts/Globals.h b/rts/Globals.h
new file mode 100644
index 0000000000000000000000000000000000000000..bc68904f78177334855e9d5df1ed8cdb4cfa1dcf
--- /dev/null
+++ b/rts/Globals.h
@@ -0,0 +1,19 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2006-2009
+ *
+ * The RTS stores some "global" values on behalf of libraries, so that
+ * some libraries can ensure that certain top-level things are shared
+ * even when multiple versions of the library are loaded.  e.g. see
+ * Data.Typeable and GHC.Conc.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifndef GLOBALS_H
+#define GLOBALS_H
+
+void initGlobalStore(void);
+void exitGlobalStore(void);
+
+#endif
+
diff --git a/rts/Hash.c b/rts/Hash.c
index 033ccb3e73a2c9701c59bf250b7adab508375bbd..09d0a06808dd83a8ddfcc98f32239cb268e3fbba 100644
--- a/rts/Hash.c
+++ b/rts/Hash.c
@@ -10,10 +10,10 @@
 
 #include "PosixSource.h"
 #include "Rts.h"
+
 #include "Hash.h"
 #include "RtsUtils.h"
 
-#include <stdlib.h>
 #include <string.h>
 
 #define HSEGSIZE    1024    /* Size of a single hash table segment */
diff --git a/rts/Hpc.c b/rts/Hpc.c
index a6e854ba830f0ec02c6759eb93abe5b1ebd4556d..cf75a05847a5cb8ec7708224a8fb17d49a815827 100644
--- a/rts/Hpc.c
+++ b/rts/Hpc.c
@@ -2,16 +2,16 @@
  * (c)2006 Galois Connections, Inc.
  */ 
 
+#include "PosixSource.h"
+#include "Rts.h"
+
+#include "Trace.h"
+
 #include <stdio.h>
 #include <ctype.h>
-#include <stdlib.h>
 #include <string.h>
 #include <assert.h>
 
-#include "Rts.h"
-#include "Hpc.h"
-#include "Trace.h"
-
 #ifdef HAVE_SYS_TYPES_H
 #include <sys/types.h>
 #endif
@@ -49,7 +49,8 @@ int totalTixes = 0;		// total number of tix boxes.
 
 static char *tixFilename;
 
-static void failure(char *msg) {
+static void GNU_ATTRIBUTE(__noreturn__)
+failure(char *msg) {
   debugTrace(DEBUG_hpc,"hpc failure: %s\n",msg);
   fprintf(stderr,"Hpc failure: %s\n",msg);
   if (tixFilename) {
diff --git a/rts/Hpc.h b/rts/Hpc.h
deleted file mode 100644
index a0ff40b06c5a3f77062fd0b98b18ab7c2ba99408..0000000000000000000000000000000000000000
--- a/rts/Hpc.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef HPC_H
-#define HPC_H
-
-extern void startupHpc(void);
-extern void exitHpc(void);
-
-#endif /* HPC_H */
-
-
-
diff --git a/rts/HsFFI.c b/rts/HsFFI.c
index 350bcfbdec88e25ffc8320f6a3d36492af2a0635..57f91b198e4193e1b4e629828073989942ea6c5a 100644
--- a/rts/HsFFI.c
+++ b/rts/HsFFI.c
@@ -6,9 +6,12 @@
  *
  * ---------------------------------------------------------------------------*/
 
+#include "PosixSource.h"
 #include "HsFFI.h"
 #include "Rts.h"
 
+#include "Stable.h"
+
 // hs_init and hs_exit are defined in RtsStartup.c
 
 void
diff --git a/rts/Inlines.c b/rts/Inlines.c
index 5d2be70c63217cc8264a442727fbaed8e8f01df1..88cbdd2f44b584f42f707dc4de69b14973e5ec98 100644
--- a/rts/Inlines.c
+++ b/rts/Inlines.c
@@ -2,4 +2,5 @@
 // compiled for real here, just in case the definition was not inlined
 // at some call site:
 #define KEEP_INLINES
+#include "PosixSource.h"
 #include "Rts.h"
diff --git a/rts/Interpreter.c b/rts/Interpreter.c
index 91e500b8eed51c025d5adcd9fda3396b63521b8a..d047876d21d2c12fc198686fe384a688b183ce4c 100644
--- a/rts/Interpreter.c
+++ b/rts/Interpreter.c
@@ -7,21 +7,20 @@
 #include "PosixSource.h"
 #include "Rts.h"
 #include "RtsAPI.h"
+#include "rts/Bytecodes.h"
+
+// internal headers
+#include "sm/Storage.h"
 #include "RtsUtils.h"
-#include "Closures.h"
-#include "TSO.h"
 #include "Schedule.h"
-#include "RtsFlags.h"
-#include "LdvProfile.h"
 #include "Updates.h"
 #include "Sanity.h"
-#include "Liveness.h"
 #include "Prelude.h"
-
-#include "Bytecodes.h"
+#include "Stable.h"
 #include "Printer.h"
 #include "Disassembler.h"
 #include "Interpreter.h"
+#include "ThreadPaused.h"
 
 #include <string.h>     /* for memcpy */
 #ifdef HAVE_ERRNO_H
diff --git a/rts/LdvProfile.c b/rts/LdvProfile.c
index eab3ec3e4b945a5c2ed3c4649c66b73f99c53a38..c97187a9a933101921a16342d57b9edb591e5920 100644
--- a/rts/LdvProfile.c
+++ b/rts/LdvProfile.c
@@ -9,9 +9,9 @@
 
 #ifdef PROFILING
 
+#include "PosixSource.h"
 #include "Rts.h"
-#include "LdvProfile.h"
-#include "RtsFlags.h"
+
 #include "Profiling.h"
 #include "Stats.h"
 #include "RtsUtils.h"
diff --git a/rts/Linker.c b/rts/Linker.c
index 162ada872c58c29e589b937c677f698a632694e7..7db88cb62553ade95004f00e978b012911396ba6 100644
--- a/rts/Linker.c
+++ b/rts/Linker.c
@@ -18,17 +18,19 @@
 #endif
 
 #include "Rts.h"
-#include "RtsFlags.h"
 #include "HsFFI.h"
+
+#include "sm/Storage.h"
 #include "Hash.h"
-#include "Linker.h"
 #include "LinkerInternals.h"
 #include "RtsUtils.h"
-#include "Schedule.h"
-#include "Sparks.h"
-#include "RtsGlobals.h"
-#include "Timer.h"
 #include "Trace.h"
+#include "StgPrimFloat.h" // for __int_encodeFloat etc.
+#include "Stable.h"
+
+#if !defined(mingw32_HOST_OS)
+#include "posix/Signals.h"
+#endif
 
 #ifdef HAVE_SYS_TYPES_H
 #include <sys/types.h>
@@ -230,8 +232,8 @@ typedef struct _RtsSymbolVal {
 #if !defined (mingw32_HOST_OS)
 #define RTS_POSIX_ONLY_SYMBOLS                  \
       SymI_HasProto(shutdownHaskellAndSignal)	\
-      SymI_NeedsProto(lockFile)                 \
-      SymI_NeedsProto(unlockFile)               \
+      SymI_HasProto(lockFile)                   \
+      SymI_HasProto(unlockFile)                 \
       SymI_HasProto(signal_handlers)		\
       SymI_HasProto(stg_sig_install)		\
       SymI_NeedsProto(nocldstop)
@@ -613,7 +615,6 @@ typedef struct _RtsSymbolVal {
       SymI_HasProto(forkProcess)			\
       SymI_HasProto(forkOS_createThread)		\
       SymI_HasProto(freeHaskellFunctionPtr)		\
-      SymI_HasProto(freeStablePtr)		        \
       SymI_HasProto(getOrSetTypeableStore)		\
       SymI_HasProto(getOrSetSignalHandlerStore)		\
       SymI_HasProto(genSymZh)				\
@@ -635,15 +636,7 @@ typedef struct _RtsSymbolVal {
       SymI_HasProto(getApStackValzh_fast)               \
       SymI_HasProto(getSparkzh_fast)                    \
       SymI_HasProto(isCurrentThreadBoundzh_fast)	\
-      SymI_HasProto(isDoubleDenormalized)		\
-      SymI_HasProto(isDoubleInfinite)			\
-      SymI_HasProto(isDoubleNaN)			\
-      SymI_HasProto(isDoubleNegativeZero)		\
       SymI_HasProto(isEmptyMVarzh_fast)			\
-      SymI_HasProto(isFloatDenormalized)		\
-      SymI_HasProto(isFloatInfinite)			\
-      SymI_HasProto(isFloatNaN)				\
-      SymI_HasProto(isFloatNegativeZero)		\
       SymI_HasProto(killThreadzh_fast)			\
       SymI_HasProto(loadObj)          			\
       SymI_HasProto(insertStableSymbol) 		\
@@ -674,7 +667,6 @@ typedef struct _RtsSymbolVal {
       SymI_HasProto(raiseIOzh_fast)			\
       SymI_HasProto(readTVarzh_fast)			\
       SymI_HasProto(readTVarIOzh_fast)			\
-      SymI_HasProto(resetNonBlockingFd)			\
       SymI_HasProto(resumeThread)			\
       SymI_HasProto(resolveObjs)                        \
       SymI_HasProto(retryzh_fast)                       \
@@ -735,7 +727,6 @@ typedef struct _RtsSymbolVal {
       SymI_HasProto(stackOverflow)			\
       SymI_HasProto(stg_CAF_BLACKHOLE_info)		\
       SymI_HasProto(__stg_EAGER_BLACKHOLE_info)		\
-      SymI_HasProto(awakenBlockedQueue)			\
       SymI_HasProto(startTimer)                         \
       SymI_HasProto(stg_CHARLIKE_closure)		\
       SymI_HasProto(stg_MVAR_CLEAN_info)		\
diff --git a/rts/Main.c b/rts/Main.c
index 58d3f37919980a49f8a6059045db4e14c6f4f109..c1b028ff1bb40f55fd51e4dd33097de77a423464 100644
--- a/rts/Main.c
+++ b/rts/Main.c
@@ -11,6 +11,7 @@
  *
  * ---------------------------------------------------------------------------*/
 
+#include "PosixSource.h"
 #include "Rts.h"
 #include "RtsMain.h"
 
diff --git a/rts/Papi.c b/rts/Papi.c
index d95e26c8e7d09cdcaaea262a5ac77af9ef12bf47..e62fc9595c6a7c967a4f00520f1dd215de5b9a82 100644
--- a/rts/Papi.c
+++ b/rts/Papi.c
@@ -16,12 +16,12 @@
 
 #include <papi.h>
 
-#include "Papi.h"
+#include "PosixSource.h"
 #include "Rts.h"
+
 #include "RtsUtils.h"
 #include "Stats.h"
-#include "RtsFlags.h"
-#include "OSThreads.h"
+#include "Papi.h"
 
 // used to protect the aggregated counters
 #ifdef THREADED_RTS
diff --git a/rts/Papi.h b/rts/Papi.h
index fc92a913d5d08b80048105db475c906de57d7387..8f58f6dfc9fc9df9db79f367fc508caad25a481b 100644
--- a/rts/Papi.h
+++ b/rts/Papi.h
@@ -5,6 +5,9 @@
  *
  * ---------------------------------------------------------------------------*/
 
+#ifndef PAPI_H
+#define PAPI_H
+
 /* Check the error value of a PAPI call, reporting an error, if needed */
 extern int papi_error;
 
@@ -25,3 +28,5 @@ void papi_stop_gc1_count(void);
 // events and aggregate them into the main GC counters.
 void papi_thread_start_gc1_count(int event_set);
 void papi_thread_stop_gc1_count(int event_set);
+
+#endif /* PAPI_H */
diff --git a/rts/PosixSource.h b/rts/PosixSource.h
index a938f9bc0f97009b33e6d7a6d41a9d4ad2221a5d..cd1aeea34cfbf715fc9e5713869b83b90fcbe7fc 100644
--- a/rts/PosixSource.h
+++ b/rts/PosixSource.h
@@ -11,8 +11,9 @@
 
 #define _POSIX_SOURCE   1
 #define _POSIX_C_SOURCE 199506L
-#define _ISOC9X_SOURCE
+#define _XOPEN_SOURCE   500
+#define _ISOC99_SOURCE
 
-/* Let's be ISO C9X too... */
+/* Let's be ISO C99 too... */
 
 #endif /* POSIXSOURCE_H */
diff --git a/rts/Printer.c b/rts/Printer.c
index a0040a5d4664997808eaf1b99dfe4a1e78ec08a6..ee91777854ee2bef502768791c4dd750f9d43541 100644
--- a/rts/Printer.c
+++ b/rts/Printer.c
@@ -8,18 +8,16 @@
 
 #include "PosixSource.h"
 #include "Rts.h"
+#include "rts/Bytecodes.h"  /* for InstrPtr */
+
 #include "Printer.h"
 #include "RtsUtils.h"
 
 #ifdef DEBUG
 
-#include "RtsFlags.h"
-#include "MBlock.h"
-#include "Bytecodes.h"  /* for InstrPtr */
 #include "Disassembler.h"
 #include "Apply.h"
 
-#include <stdlib.h>
 #include <string.h>
 
 /* --------------------------------------------------------------------------
@@ -37,7 +35,6 @@ static rtsBool lookup_name   ( char *name, StgWord *result );
 static void    enZcode       ( char *in, char *out );
 #endif
 static char    unZcode       ( char ch );
-const char *   lookupGHCName ( void *addr );
 static void    printZcoded   ( const char *raw );
 
 /* --------------------------------------------------------------------------
@@ -186,7 +183,7 @@ printClosure( StgClosure *obj )
 
     case AP:
         {
-	    StgAP* ap = stgCast(StgAP*,obj);
+	    StgAP* ap = (StgAP*)obj;
             StgWord i;
             debugBelch("AP("); printPtr((StgPtr)ap->fun);
             for (i = 0; i < ap->n_args; ++i) {
@@ -199,7 +196,7 @@ printClosure( StgClosure *obj )
 
     case PAP:
         {
-	    StgPAP* pap = stgCast(StgPAP*,obj);
+	    StgPAP* pap = (StgPAP*)obj;
             StgWord i;
             debugBelch("PAP/%d(",pap->arity); 
 	    printPtr((StgPtr)pap->fun);
@@ -213,7 +210,7 @@ printClosure( StgClosure *obj )
 
     case AP_STACK:
         {
-	    StgAP_STACK* ap = stgCast(StgAP_STACK*,obj);
+	    StgAP_STACK* ap = (StgAP_STACK*)obj;
             StgWord i;
             debugBelch("AP_STACK("); printPtr((StgPtr)ap->fun);
             for (i = 0; i < ap->size; ++i) {
@@ -226,31 +223,31 @@ printClosure( StgClosure *obj )
 
     case IND:
             debugBelch("IND("); 
-            printPtr((StgPtr)stgCast(StgInd*,obj)->indirectee);
+            printPtr((StgPtr)((StgInd*)obj)->indirectee);
             debugBelch(")\n"); 
             break;
 
     case IND_OLDGEN:
             debugBelch("IND_OLDGEN("); 
-            printPtr((StgPtr)stgCast(StgInd*,obj)->indirectee);
+            printPtr((StgPtr)((StgInd*)obj)->indirectee);
             debugBelch(")\n"); 
             break;
 
     case IND_PERM:
             debugBelch("IND("); 
-            printPtr((StgPtr)stgCast(StgInd*,obj)->indirectee);
+            printPtr((StgPtr)((StgInd*)obj)->indirectee);
             debugBelch(")\n"); 
             break;
 
     case IND_OLDGEN_PERM:
             debugBelch("IND_OLDGEN_PERM("); 
-            printPtr((StgPtr)stgCast(StgInd*,obj)->indirectee);
+            printPtr((StgPtr)((StgInd*)obj)->indirectee);
             debugBelch(")\n"); 
             break;
 
     case IND_STATIC:
             debugBelch("IND_STATIC("); 
-            printPtr((StgPtr)stgCast(StgInd*,obj)->indirectee);
+            printPtr((StgPtr)((StgInd*)obj)->indirectee);
             debugBelch(")\n"); 
             break;
 
@@ -264,7 +261,7 @@ printClosure( StgClosure *obj )
 
     case UPDATE_FRAME:
         {
-            StgUpdateFrame* u = stgCast(StgUpdateFrame*,obj);
+            StgUpdateFrame* u = (StgUpdateFrame*)obj;
             debugBelch("UPDATE_FRAME(");
             printPtr((StgPtr)GET_INFO(u));
             debugBelch(",");
@@ -275,7 +272,7 @@ printClosure( StgClosure *obj )
 
     case CATCH_FRAME:
         {
-            StgCatchFrame* u = stgCast(StgCatchFrame*,obj);
+            StgCatchFrame* u = (StgCatchFrame*)obj;
             debugBelch("CATCH_FRAME(");
             printPtr((StgPtr)GET_INFO(u));
             debugBelch(",");
@@ -286,7 +283,7 @@ printClosure( StgClosure *obj )
 
     case STOP_FRAME:
         {
-            StgStopFrame* u = stgCast(StgStopFrame*,obj);
+            StgStopFrame* u = (StgStopFrame*)obj;
             debugBelch("STOP_FRAME(");
             printPtr((StgPtr)GET_INFO(u));
             debugBelch(")\n"); 
diff --git a/rts/Printer.h b/rts/Printer.h
index 689c2f8d4a7bc883748098c97b955dde9489d0c0..52c1c3eb213e8568fffb147fd08a8e86dd1e763b 100644
--- a/rts/Printer.h
+++ b/rts/Printer.h
@@ -15,8 +15,8 @@ extern void   	   printObj        ( StgClosure *obj );
 #ifdef DEBUG
 extern void        prettyPrintClosure (StgClosure *obj);
 extern void   	   printClosure    ( StgClosure *obj );
-extern StgStackPtr printStackObj   ( StgStackPtr sp );
-extern void        printStackChunk ( StgStackPtr sp, StgStackPtr spLim );
+extern StgPtr      printStackObj   ( StgPtr sp );
+extern void        printStackChunk ( StgPtr sp, StgPtr spLim );
 extern void        printTSO        ( StgTSO *tso );
 
 void   	           info_hdr_type   ( StgClosure *closure, char *res );
diff --git a/rts/ProfHeap.c b/rts/ProfHeap.c
index 36d4eb5f6f0192f119a139ebe0bdd5141f12ccbe..8d9843893b67dcd748dca95d5817d8c09a14992a 100644
--- a/rts/ProfHeap.c
+++ b/rts/ProfHeap.c
@@ -8,20 +8,17 @@
 
 #include "PosixSource.h"
 #include "Rts.h"
+
 #include "RtsUtils.h"
-#include "RtsFlags.h"
 #include "Profiling.h"
 #include "ProfHeap.h"
 #include "Stats.h"
 #include "Hash.h"
 #include "RetainerProfile.h"
-#include "LdvProfile.h"
 #include "Arena.h"
 #include "Printer.h"
 
 #include <string.h>
-#include <stdlib.h>
-#include <math.h>
 
 /* -----------------------------------------------------------------------------
  * era stores the current time period.  It is the same as the
@@ -95,6 +92,8 @@ static void aggregateCensusInfo( void );
 
 static void dumpCensus( Census *census );
 
+static rtsBool closureSatisfiesConstraints( StgClosure* p );
+
 /* ----------------------------------------------------------------------------
    Closure Type Profiling;
    ------------------------------------------------------------------------- */
@@ -615,7 +614,6 @@ fprint_ccs(FILE *fp, CostCentreStack *ccs, nat max_length)
     }
     fprintf(fp, "%s", buf);
 }
-#endif /* PROFILING */
 
 rtsBool
 strMatchesSelector( char* str, char* sel )
@@ -641,11 +639,13 @@ strMatchesSelector( char* str, char* sel )
    }
 }
 
+#endif /* PROFILING */
+
 /* -----------------------------------------------------------------------------
  * Figure out whether a closure should be counted in this census, by
  * testing against all the specified constraints.
  * -------------------------------------------------------------------------- */
-rtsBool
+static rtsBool
 closureSatisfiesConstraints( StgClosure* p )
 {
 #if !defined(PROFILING)
@@ -1012,7 +1012,7 @@ heapCensusChain( Census *census, bdescr *bd )
 		
 	    case ARR_WORDS:
 		prim = rtsTrue;
-		size = arr_words_sizeW(stgCast(StgArrWords*,p));
+		size = arr_words_sizeW((StgArrWords*)p);
 		break;
 		
 	    case MUT_ARR_PTRS_CLEAN:
diff --git a/rts/ProfHeap.h b/rts/ProfHeap.h
index 0251416762786cf44c12131ddfe179089f5996ca..a3da424d2423818b059525fa9b4fcf2eb6410246 100644
--- a/rts/ProfHeap.h
+++ b/rts/ProfHeap.h
@@ -12,7 +12,6 @@
 extern void    heapCensus( void );
 extern nat     initHeapProfiling( void );
 extern void    endHeapProfiling( void );
-extern rtsBool closureSatisfiesConstraints( StgClosure* p );
 extern void    LDV_recordDead( StgClosure *c, nat size );
 extern rtsBool strMatchesSelector( char* str, char* sel );
 
diff --git a/rts/Profiling.c b/rts/Profiling.c
index d729c8a09e5fff16d291a8f220cf34a796df005b..0769b529c091990bf5a14da017caf0357335ddbc 100644
--- a/rts/Profiling.c
+++ b/rts/Profiling.c
@@ -10,15 +10,13 @@
 
 #include "PosixSource.h"
 #include "Rts.h"
+
 #include "RtsUtils.h"
-#include "RtsFlags.h"
 #include "Profiling.h"
 #include "Proftimer.h"
-#include "Timer.h"
 #include "ProfHeap.h"
 #include "Arena.h"
 #include "RetainerProfile.h"
-#include "LdvProfile.h"
 
 #include <string.h>
 
diff --git a/rts/Proftimer.c b/rts/Proftimer.c
index 32e5c560737317881e24a9738b64b0d2b7e03c16..dfcc7096258ef06372eb016985a3c175f29dbb86 100644
--- a/rts/Proftimer.c
+++ b/rts/Proftimer.c
@@ -7,14 +7,15 @@
  * ---------------------------------------------------------------------------*/
 
 #include "PosixSource.h"
-
 #include "Rts.h"
+
 #include "Profiling.h"
-#include "Timer.h"
 #include "Proftimer.h"
-#include "RtsFlags.h"
 
+#ifdef PROFILING
 static rtsBool do_prof_ticks = rtsFalse;       // enable profiling ticks
+#endif
+
 static rtsBool do_heap_prof_ticks = rtsFalse;  // enable heap profiling ticks
 
 // Number of ticks until next heap census
@@ -23,6 +24,8 @@ static int ticks_to_heap_profile;
 // Time for a heap profile on the next context switch
 rtsBool performHeapProfile;
 
+#ifdef PROFILING
+
 void
 stopProfTimer( void )
 {
@@ -35,6 +38,8 @@ startProfTimer( void )
     do_prof_ticks = rtsTrue;
 }
 
+#endif
+
 void
 stopHeapProfTimer( void )
 {
diff --git a/rts/Proftimer.h b/rts/Proftimer.h
index c837b855f9143ed5f7d52b6df435460480e24559..1379792d660f4005b99ceb9fb9970983d74fb050 100644
--- a/rts/Proftimer.h
+++ b/rts/Proftimer.h
@@ -12,8 +12,11 @@
 extern void initProfTimer      ( void );
 extern void handleProfTick     ( void );
 
+#ifdef PROFILING
 extern void stopProfTimer      ( void );
 extern void startProfTimer     ( void );
+#endif
+
 extern void stopHeapProfTimer  ( void );
 extern void startHeapProfTimer ( void );
 
diff --git a/rts/RaiseAsync.c b/rts/RaiseAsync.c
index 2ff916a70ad6c2f94a9d523c382285aaf338b883..39c973bd1de8b706eeb22af55de333defbb90fa3 100644
--- a/rts/RaiseAsync.c
+++ b/rts/RaiseAsync.c
@@ -8,17 +8,17 @@
 
 #include "PosixSource.h"
 #include "Rts.h"
+
+#include "sm/Storage.h"
 #include "Threads.h"
 #include "Trace.h"
 #include "RaiseAsync.h"
-#include "SMP.h"
 #include "Schedule.h"
-#include "LdvProfile.h"
 #include "Updates.h"
 #include "STM.h"
 #include "Sanity.h"
 #include "Profiling.h"
-#include "EventLog.h"
+#include "eventlog/EventLog.h"
 #if defined(mingw32_HOST_OS)
 #include "win32/IOManager.h"
 #endif
diff --git a/rts/RetainerProfile.c b/rts/RetainerProfile.c
index 2bd213ad3de683c9f258152b94f1e2aa59c5a95d..fa12637ef635d3791220b96c56cdaa51e7668ee5 100644
--- a/rts/RetainerProfile.c
+++ b/rts/RetainerProfile.c
@@ -16,13 +16,14 @@
 #define INLINE inline
 #endif
 
+#include "PosixSource.h"
 #include "Rts.h"
+
 #include "RtsUtils.h"
 #include "RetainerProfile.h"
 #include "RetainerSet.h"
 #include "Schedule.h"
 #include "Printer.h"
-#include "RtsFlags.h"
 #include "Weak.h"
 #include "Sanity.h"
 #include "Profiling.h"
diff --git a/rts/RetainerProfile.h b/rts/RetainerProfile.h
index f33c079f040f3345ac8247dfbd5209cb588c6f10..5f4503c84bd08fd3e8a7496e7b4011f5f4019b5b 100644
--- a/rts/RetainerProfile.h
+++ b/rts/RetainerProfile.h
@@ -20,7 +20,7 @@ extern void  printRetainer         ( FILE *, retainer );
 extern void  retainerProfile       ( void );
 extern void resetStaticObjectForRetainerProfiling( StgClosure *static_objects );
 
-extern StgWord RTS_VAR(flip);
+extern StgWord flip;
 
 // extract the retainer set field from c
 #define RSET(c)   ((c)->header.prof.hp.rs)
diff --git a/rts/RetainerSet.c b/rts/RetainerSet.c
index 201412b95842585f72d73d9b1b6d370e13398497..5e9b37c04ce81d0a1aaacbaa7f4078b088b4ac5b 100644
--- a/rts/RetainerSet.c
+++ b/rts/RetainerSet.c
@@ -9,15 +9,15 @@
 
 #ifdef PROFILING
 
+#include "PosixSource.h"
 #include "Rts.h"
-#include "RtsFlags.h"
+
 #include "Stats.h"
 #include "RtsUtils.h"
 #include "RetainerSet.h"
 #include "Arena.h"
 #include "Profiling.h"
 
-#include <stdlib.h>
 #include <string.h>
 
 #define HASH_TABLE_SIZE 255
diff --git a/rts/RtsAPI.c b/rts/RtsAPI.c
index df4315fdb82183bcc3d9979f5a02389363e96671..3eecab14d4eb65a3a0d35ea34e3d133dd9388aba 100644
--- a/rts/RtsAPI.c
+++ b/rts/RtsAPI.c
@@ -8,18 +8,15 @@
 
 #include "PosixSource.h"
 #include "Rts.h"
-#include "OSThreads.h"
 #include "RtsAPI.h"
-#include "SchedAPI.h"
-#include "RtsFlags.h"
+#include "HsFFI.h"
+
 #include "RtsUtils.h"
 #include "Prelude.h"
 #include "Schedule.h"
 #include "Capability.h"
 #include "Stable.h"
 
-#include <stdlib.h>
-
 /* ----------------------------------------------------------------------------
    Building Haskell objects from C datatypes.
 
diff --git a/rts/RtsFlags.c b/rts/RtsFlags.c
index 856137119c2d9e5d07dc6811862de390f5d9a423..1204c593565b19f522ba14c6c1df6e2c561c6610 100644
--- a/rts/RtsFlags.c
+++ b/rts/RtsFlags.c
@@ -9,7 +9,7 @@
 
 #include "PosixSource.h"
 #include "Rts.h"
-#include "RtsFlags.h"
+
 #include "RtsUtils.h"
 #include "Profiling.h"
 
@@ -17,7 +17,6 @@
 #include <ctype.h>
 #endif
 
-#include <stdlib.h>
 #include <string.h>
 
 // Flag Structure
@@ -1272,7 +1271,7 @@ decode(const char *s)
     return (I_)m;
 }
 
-static void
+static void GNU_ATTRIBUTE(__noreturn__)
 bad_option(const char *s)
 {
   errorBelch("bad RTS option: %s", s);
diff --git a/rts/RtsMain.c b/rts/RtsMain.c
index 438110a1797d678b2ccd07507b21f0b40ecdd97f..b6cf546aea88b87a6fa22c52b271583f0865d80e 100644
--- a/rts/RtsMain.c
+++ b/rts/RtsMain.c
@@ -11,8 +11,7 @@
 #include "PosixSource.h"
 #include "Rts.h"
 #include "RtsAPI.h"
-#include "SchedAPI.h"
-#include "RtsFlags.h"
+
 #include "RtsUtils.h"
 #include "RtsMain.h"
 #include "Prelude.h"
@@ -20,7 +19,6 @@
 #if defined(mingw32_HOST_OS)
 #include "win32/seh_excn.h"
 #endif
-#include <stdlib.h>
 
 #ifdef DEBUG
 # include "Printer.h"   /* for printing        */
diff --git a/rts/RtsMessages.c b/rts/RtsMessages.c
index c263a2cf579d2612e43c76bb0c695ca28b6a69fe..ef5c5a226cbcdcce199297e894bfd363c7b6470a 100644
--- a/rts/RtsMessages.c
+++ b/rts/RtsMessages.c
@@ -128,7 +128,7 @@ isGUIApp(void)
 #define xstr(s) str(s)
 #define str(s) #s
 
-void
+void GNU_ATTRIBUTE(__noreturn__)
 rtsFatalInternalErrorFn(const char *s, va_list ap)
 {
 #if defined(cygwin32_HOST_OS) || defined (mingw32_HOST_OS)
diff --git a/rts/RtsSignals.h b/rts/RtsSignals.h
index e130fb4281ac698e2b6c9b4fb90e958ab783c0a6..601a46b79c4b20f46d2c644a780fc4f9701d98f2 100644
--- a/rts/RtsSignals.h
+++ b/rts/RtsSignals.h
@@ -6,8 +6,8 @@
  *
  * ---------------------------------------------------------------------------*/
 
-#ifndef RTS_SIGNALS_H
-#define RTS_SIGNALS_H
+#ifndef RTSSIGNALS_H
+#define RTSSIGNALS_H
 
 #if !defined(mingw32_HOST_OS)
 
@@ -17,12 +17,11 @@
 
 #include "win32/ConsoleHandler.h"
 
-#else /* PAR */
+#else
 
 #define signals_pending() (rtsFalse)
 
-#endif /* PAR */
-
+#endif
 
 #if RTS_USER_SIGNALS
 
@@ -78,4 +77,4 @@ extern void markSignalHandlers (evac_fn evac, void *user);
 
 #endif /* RTS_USER_SIGNALS */
 
-#endif /* RTS_SIGNALS_H */
+#endif /* RTSSIGNALS_H */
diff --git a/rts/RtsStartup.c b/rts/RtsStartup.c
index c21aac767cd97a1b536f403028591554dfae31b6..2c1c5549f154010e0e6e085fd0c5f4cee85cfd51 100644
--- a/rts/RtsStartup.c
+++ b/rts/RtsStartup.c
@@ -7,42 +7,38 @@
  * ---------------------------------------------------------------------------*/
 
 // PAPI uses caddr_t, which is not POSIX
-// #include "PosixSource.h"
+#ifndef USE_PAPI
+#include "PosixSource.h"
+#endif
 
 #include "Rts.h"
 #include "RtsAPI.h"
+#include "HsFFI.h"
+
+#include "sm/Storage.h"
 #include "RtsUtils.h"
-#include "RtsFlags.h"  
-#include "OSThreads.h"
 #include "Schedule.h"   /* initScheduler */
 #include "Stats.h"      /* initStats */
 #include "STM.h"        /* initSTM */
-#include "Signals.h"
 #include "RtsSignals.h"
-#include "ThrIOManager.h"
-#include "Timer.h"      /* startTimer, stopTimer */
 #include "Weak.h"
 #include "Ticky.h"
 #include "StgRun.h"
 #include "Prelude.h"		/* fixupRTStoPreludeRefs */
-#include "HsFFI.h"
-#include "Linker.h"
 #include "ThreadLabels.h"
-#include "BlockAlloc.h"
+#include "sm/BlockAlloc.h"
 #include "Trace.h"
-#include "RtsGlobals.h"
 #include "Stable.h"
-#include "Hpc.h"
-#include "FileLock.h"
-#include "EventLog.h"
+#include "eventlog/EventLog.h"
 #include "Hash.h"
+#include "Profiling.h"
+#include "Timer.h"
+#include "Globals.h"
 
 #if defined(RTS_GTK_FRONTPANEL)
 #include "FrontPanel.h"
 #endif
 
-# include "Profiling.h"
-
 #if defined(PROFILING)
 # include "ProfHeap.h"
 # include "RetainerProfile.h"
@@ -52,14 +48,11 @@
 #include "win32/AsyncIO.h"
 #endif
 
-#include <stdlib.h>
-
-#ifdef HAVE_TERMIOS_H
-#include <termios.h>
-#endif
-#ifdef HAVE_SIGNAL_H
-#include <signal.h>
+#if !defined(mingw32_HOST_OS)
+#include "posix/TTY.h"
+#include "posix/FileLock.h"
 #endif
+
 #ifdef HAVE_UNISTD_H
 #include <unistd.h>
 #endif
@@ -74,26 +67,6 @@
 // Count of how many outstanding hs_init()s there have been.
 static int hs_init_count = 0;
 
-// Here we save the terminal settings on the standard file
-// descriptors, if we need to change them (eg. to support NoBuffering
-// input).
-static void *saved_termios[3] = {NULL,NULL,NULL};
-
-void*
-__hscore_get_saved_termios(int fd)
-{
-  return (0 <= fd && fd < (int)(sizeof(saved_termios) / sizeof(*saved_termios))) ?
-    saved_termios[fd] : NULL;
-}
-
-void
-__hscore_set_saved_termios(int fd, void* ts)
-{
-  if (0 <= fd && fd < (int)(sizeof(saved_termios) / sizeof(*saved_termios))) {
-    saved_termios[fd] = ts;
-  }
-}
-
 /* -----------------------------------------------------------------------------
    Initialise floating point unit on x86 (currently disabled. why?)
    (see comment in ghc/compiler/nativeGen/MachInstrs.lhs).
@@ -292,7 +265,7 @@ startupHaskell(int argc, char *argv[], void (*init_root)(void))
 /* The init functions use an explicit stack... 
  */
 #define INIT_STACK_BLOCKS  4
-static F_ *init_stack = NULL;
+static StgFunPtr *init_stack = NULL;
 
 void
 hs_add_root(void (*init_root)(void))
@@ -311,10 +284,10 @@ hs_add_root(void (*init_root)(void))
        to the last occupied word */
     init_sp = INIT_STACK_BLOCKS*BLOCK_SIZE_W;
     bd = allocGroup_lock(INIT_STACK_BLOCKS);
-    init_stack = (F_ *)bd->start;
-    init_stack[--init_sp] = (F_)stg_init_finish;
+    init_stack = (StgFunPtr *)bd->start;
+    init_stack[--init_sp] = (StgFunPtr)stg_init_finish;
     if (init_root != NULL) {
-	init_stack[--init_sp] = (F_)init_root;
+	init_stack[--init_sp] = (StgFunPtr)init_root;
     }
     
     cap->r.rSp = (P_)(init_stack + init_sp);
@@ -391,30 +364,9 @@ hs_exit_(rtsBool wait_foreign)
     stopTimer();
     exitTimer();
 
-    /* reset the standard file descriptors to blocking mode */
-    resetNonBlockingFd(0);
-    resetNonBlockingFd(1);
-    resetNonBlockingFd(2);
-
-#if HAVE_TERMIOS_H
-    // Reset the terminal settings on the standard file descriptors,
-    // if we changed them.  See System.Posix.Internals.tcSetAttr for
-    // more details, including the reason we termporarily disable
-    // SIGTTOU here.
-    { 
-	int fd;
-	sigset_t sigset, old_sigset;
-	sigemptyset(&sigset);
-	sigaddset(&sigset, SIGTTOU);
-	sigprocmask(SIG_BLOCK, &sigset, &old_sigset);
-	for (fd = 0; fd <= 2; fd++) {
-	    struct termios* ts = (struct termios*)__hscore_get_saved_termios(fd);
-	    if (ts != NULL) {
-		tcsetattr(fd,TCSANOW,ts);
-	    }
-	}
-	sigprocmask(SIG_SETMASK, &old_sigset, NULL);
-    }
+    // set the terminal settings back to what they were
+#if !defined(mingw32_HOST_OS)    
+    resetTerminalSettings();
 #endif
 
     // uninstall signal handlers
diff --git a/rts/RtsUtils.c b/rts/RtsUtils.c
index ed082a31f439dca3e96afb626bfa526db6e3d5e2..1953e1e838e84198240e7fe26eb1531c89cdea8e 100644
--- a/rts/RtsUtils.c
+++ b/rts/RtsUtils.c
@@ -7,10 +7,9 @@
  * ---------------------------------------------------------------------------*/
 
 #include "PosixSource.h"
-
 #include "Rts.h"
 #include "RtsAPI.h"
-#include "RtsFlags.h"
+
 #include "RtsUtils.h"
 #include "Ticky.h"
 #include "Schedule.h"
@@ -283,22 +282,6 @@ heapOverflow(void)
     }
 }
 
-/* -----------------------------------------------------------------------------
-   Out-of-line strlen.
-
-   Used in addr2Integer because the C compiler on x86 chokes on
-   strlen, trying to inline it with not enough registers available.
-   -------------------------------------------------------------------------- */
-
-nat stg_strlen(char *s)
-{
-   char *p = s;
-
-   while (*p) p++;
-   return p-s;
-}
-
-
 /* -----------------------------------------------------------------------------
    genSym stuff, used by GHC itself for its splitting unique supply.
 
@@ -342,44 +325,6 @@ time_str(void)
     return nowstr;
 }
 
-/* -----------------------------------------------------------------------------
- * Reset a file handle to blocking mode.  We do this for the standard
- * file descriptors before exiting, because the shell doesn't always
- * clean up for us.
- * -------------------------------------------------------------------------- */
-
-#if !defined(mingw32_HOST_OS)
-void
-resetNonBlockingFd(int fd)
-{
-  long fd_flags;
-
-  /* clear the non-blocking flag on this file descriptor */
-  fd_flags = fcntl(fd, F_GETFL);
-  if (fd_flags & O_NONBLOCK) {
-    fcntl(fd, F_SETFL, fd_flags & ~O_NONBLOCK);
-  }
-}
-
-void
-setNonBlockingFd(int fd)
-{
-  long fd_flags;
-
-  /* clear the non-blocking flag on this file descriptor */
-  fd_flags = fcntl(fd, F_GETFL);
-  if (!(fd_flags & O_NONBLOCK)) {
-    fcntl(fd, F_SETFL, fd_flags | O_NONBLOCK);
-  }
-}
-#else
-/* Stub defns -- async / non-blocking IO is not done 
- * via O_NONBLOCK and select() under Win32. 
- */
-void resetNonBlockingFd(int fd STG_UNUSED) {}
-void setNonBlockingFd(int fd STG_UNUSED) {}
-#endif
-
 /* -----------------------------------------------------------------------------
    Print large numbers, with punctuation.
    -------------------------------------------------------------------------- */
diff --git a/rts/RtsUtils.h b/rts/RtsUtils.h
index 11a2826596e2c99d4eb0d690df9b9790b9ac8262..14856bf94338461d2876bb34116e85a7285fec5d 100644
--- a/rts/RtsUtils.h
+++ b/rts/RtsUtils.h
@@ -1,6 +1,6 @@
 /* -----------------------------------------------------------------------------
  *
- * (c) The GHC Team, 1998-2005
+ * (c) The GHC Team, 1998-2009
  *
  * General utility functions used in the RTS.
  *
@@ -13,42 +13,44 @@
  * (Checked) dynamic allocation
  * -------------------------------------------------------------------------- */
 
-extern void initAllocator(void);
-extern void shutdownAllocator(void);
+void initAllocator(void);
+void shutdownAllocator(void);
 
-extern void *stgMallocBytes(int n, char *msg)
+void *stgMallocBytes(int n, char *msg)
     GNUC3_ATTRIBUTE(__malloc__);
 
-extern void *stgReallocBytes(void *p, int n, char *msg);
+void *stgReallocBytes(void *p, int n, char *msg);
 
-extern void *stgCallocBytes(int n, int m, char *msg)
+void *stgCallocBytes(int n, int m, char *msg)
      GNUC3_ATTRIBUTE(__malloc__);
 
-extern void stgFree(void* p);
+void stgFree(void* p);
 
 /* -----------------------------------------------------------------------------
  * Misc other utilities
  * -------------------------------------------------------------------------- */
 
-extern void heapOverflow(void);
+void heapOverflow(void);
 
-extern void setNonBlockingFd(int fd);
-extern void resetNonBlockingFd(int fd);
-
-extern nat stg_strlen(char *str);
-
-extern char *time_str(void);
-extern char *ullong_format_string(ullong, char *, rtsBool);
+char *time_str(void);
+char *ullong_format_string(ullong, char *, rtsBool);
 
 #ifdef DEBUG
-extern void heapCheckFail( void );
+void heapCheckFail( void );
 #endif
 
-extern void* __hscore_get_saved_termios(int fd);
-extern void __hscore_set_saved_termios(int fd, void* ts);
+// XXX shouldn't be here
+void* __hscore_get_saved_termios(int fd);
+void  __hscore_set_saved_termios(int fd, void* ts);
 
 void printRtsInfo(void);
 
+HsInt genSymZh(void);
+HsInt resetGenSymZh(void);
+
+/* Alternate to raise(3) for threaded rts, for OpenBSD */
+int genericRaise(int sig);
+
 int rts_isProfiled(void);
 
 #endif /* RTSUTILS_H */
diff --git a/rts/STM.c b/rts/STM.c
index b9be955f55dfb5e8ebb94ee03e40ec03d1b40f3f..723f77acb6aaa6ae10d81b88f0d8d229612c06ea 100644
--- a/rts/STM.c
+++ b/rts/STM.c
@@ -74,25 +74,24 @@
  * (d) release the locks on the TVars, writing updates to them in the case of a 
  * commit, (e) unlock the STM.
  *
- * Queues of waiting threads hang off the first_watch_queue_entry field of each
- * TVar.  This may only be manipulated when holding that TVar's lock.  In
- * particular, when a thread is putting itself to sleep, it mustn't release
- * the TVar's lock until it has added itself to the wait queue and marked its
- * TSO as BlockedOnSTM -- this makes sure that other threads will know to wake it.
+ * Queues of waiting threads hang off the first_watch_queue_entry
+ * field of each TVar.  This may only be manipulated when holding that
+ * TVar's lock.  In particular, when a thread is putting itself to
+ * sleep, it mustn't release the TVar's lock until it has added itself
+ * to the wait queue and marked its TSO as BlockedOnSTM -- this makes
+ * sure that other threads will know to wake it.
  *
  * ---------------------------------------------------------------------------*/
 
 #include "PosixSource.h"
 #include "Rts.h"
-#include "RtsFlags.h"
+
 #include "RtsUtils.h"
-#include "Storage.h"
 #include "Schedule.h"
-#include "SMP.h"
 #include "STM.h"
 #include "Trace.h"
+#include "Threads.h"
 
-#include <stdlib.h>
 #include <stdio.h>
 
 #define TRUE 1
diff --git a/includes/STM.h b/rts/STM.h
similarity index 100%
rename from includes/STM.h
rename to rts/STM.h
diff --git a/rts/Sanity.c b/rts/Sanity.c
index 02d81ed7cee8cd354b0fa76723ae31fb50f96fd2..d666d57f3b3e5723686f88bf463ab8394daf5403 100644
--- a/rts/Sanity.c
+++ b/rts/Sanity.c
@@ -18,14 +18,13 @@
 
 #ifdef DEBUG                                                   /* whole file */
 
-#include "RtsFlags.h"
 #include "RtsUtils.h"
-#include "BlockAlloc.h"
+#include "sm/Storage.h"
+#include "sm/BlockAlloc.h"
 #include "Sanity.h"
-#include "MBlock.h"
-#include "Storage.h"
 #include "Schedule.h"
 #include "Apply.h"
+#include "Printer.h"
 
 /* -----------------------------------------------------------------------------
    Forward decls.
diff --git a/rts/Sanity.h b/rts/Sanity.h
index 48f3383714625e26f3401d02f9c25443bca0f520..3020246a70156c6997879b49278d7306e21433c2 100644
--- a/rts/Sanity.h
+++ b/rts/Sanity.h
@@ -7,6 +7,7 @@
  * ---------------------------------------------------------------------------*/
 
 #ifndef SANITY_H
+#define SANITY_H
 
 #ifdef DEBUG
 
@@ -39,4 +40,3 @@ extern rtsBool isBlackhole( StgTSO* tso, StgClosure* p );
 #endif /* DEBUG */
  
 #endif /* SANITY_H */
-
diff --git a/rts/Schedule.c b/rts/Schedule.c
index c840c7864435f8eb86a70e8f4e8bbc1b4f2db386..b3d523e111488b5830b650b656970889f045aced 100644
--- a/rts/Schedule.c
+++ b/rts/Schedule.c
@@ -9,34 +9,25 @@
 #include "PosixSource.h"
 #define KEEP_LOCKCLOSURE
 #include "Rts.h"
-#include "SchedAPI.h"
+
+#include "sm/Storage.h"
 #include "RtsUtils.h"
-#include "RtsFlags.h"
-#include "OSThreads.h"
-#include "Storage.h"
 #include "StgRun.h"
-#include "Hooks.h"
 #include "Schedule.h"
-#include "StgMiscClosures.h"
 #include "Interpreter.h"
 #include "Printer.h"
 #include "RtsSignals.h"
 #include "Sanity.h"
 #include "Stats.h"
 #include "STM.h"
-#include "Timer.h"
 #include "Prelude.h"
 #include "ThreadLabels.h"
-#include "LdvProfile.h"
 #include "Updates.h"
 #include "Proftimer.h"
 #include "ProfHeap.h"
-#include "GC.h"
 #include "Weak.h"
-#include "EventLog.h"
-
-/* PARALLEL_HASKELL includes go here */
-
+#include "eventlog/EventLog.h"
+#include "sm/GC.h" // waitForGcThreads, releaseGCThreads, N
 #include "Sparks.h"
 #include "Capability.h"
 #include "Task.h"
@@ -47,7 +38,8 @@
 #include "Trace.h"
 #include "RaiseAsync.h"
 #include "Threads.h"
-#include "ThrIOManager.h"
+#include "Timer.h"
+#include "ThreadPaused.h"
 
 #ifdef HAVE_SYS_TYPES_H
 #include <sys/types.h>
@@ -64,12 +56,6 @@
 #include <errno.h>
 #endif
 
-// Turn off inlining when debugging - it obfuscates things
-#ifdef DEBUG
-# undef  STATIC_INLINE
-# define STATIC_INLINE static
-#endif
-
 /* -----------------------------------------------------------------------------
  * Global variables
  * -------------------------------------------------------------------------- */
@@ -2383,7 +2369,9 @@ interruptStgRts(void)
 {
     sched_state = SCHED_INTERRUPTING;
     setContextSwitches();
+#if defined(THREADED_RTS)
     wakeUpRts();
+#endif
 }
 
 /* -----------------------------------------------------------------------------
@@ -2399,16 +2387,15 @@ interruptStgRts(void)
    will have interrupted any blocking system call in progress anyway.
    -------------------------------------------------------------------------- */
 
-void
-wakeUpRts(void)
-{
 #if defined(THREADED_RTS)
+void wakeUpRts(void)
+{
     // This forces the IO Manager thread to wakeup, which will
     // in turn ensure that some OS thread wakes up and runs the
     // scheduler loop, which will cause a GC and deadlock check.
     ioManagerWakeup();
-#endif
 }
+#endif
 
 /* -----------------------------------------------------------------------------
  * checkBlackHoles()
diff --git a/rts/Schedule.h b/rts/Schedule.h
index 0e18168755b0c1df65f3c38bdd5500bfca629456..378bd68c66219041440a8404805dd432b13d460d 100644
--- a/rts/Schedule.h
+++ b/rts/Schedule.h
@@ -10,9 +10,9 @@
 #ifndef SCHEDULE_H
 #define SCHEDULE_H
 
-#include "OSThreads.h"
+#include "rts/OSThreads.h"
 #include "Capability.h"
-#include "EventLog.h"
+#include "eventlog/EventLog.h"
 
 /* initScheduler(), exitScheduler()
  * Called from STG :  no
@@ -30,28 +30,13 @@ void scheduleThread (Capability *cap, StgTSO *tso);
 // the desired Capability).
 void scheduleThreadOn(Capability *cap, StgWord cpu, StgTSO *tso);
 
-/* awakenBlockedQueue()
- *
- * Takes a pointer to the beginning of a blocked TSO queue, and
- * wakes up the entire queue.
- * Called from STG :  yes
- * Locks assumed   :  none
- */
-void awakenBlockedQueue (Capability *cap, StgTSO *tso);
-
 /* wakeUpRts()
  * 
  * Causes an OS thread to wake up and run the scheduler, if necessary.
  */
+#if defined(THREADED_RTS)
 void wakeUpRts(void);
-
-/* unblockOne()
- *
- * Put the specified thread on the run queue of the given Capability.
- * Called from STG :  yes
- * Locks assumed   :  we own the Capability.
- */
-StgTSO * unblockOne (Capability *cap, StgTSO *tso);
+#endif
 
 /* raiseExceptionHelper */
 StgWord raiseExceptionHelper (StgRegTable *reg, StgTSO *tso, StgClosure *exception);
@@ -69,11 +54,6 @@ StgWord findRetryFrameHelper (StgTSO *tso);
 void OSThreadProcAttr workerStart(Task *task);
 #endif
 
-char   *info_type(StgClosure *closure);    // dummy
-char   *info_type_by_ip(StgInfoTable *ip); // dummy
-void    awaken_blocked_queue(StgTSO *q);
-void    initThread(StgTSO *tso, nat stack_size);
-
 /* The state of the scheduler.  This is used to control the sequence
  * of events during shutdown, and when the runtime is interrupted
  * using ^C.
@@ -82,7 +62,7 @@ void    initThread(StgTSO *tso, nat stack_size);
 #define SCHED_INTERRUPTING  1  /* ^C detected, before threads are deleted */
 #define SCHED_SHUTTING_DOWN 2  /* final shutdown */
 
-extern volatile StgWord RTS_VAR(sched_state);
+extern volatile StgWord sched_state;
 
 /* 
  * flag that tracks whether we have done any execution in this time slice.
@@ -105,10 +85,10 @@ extern volatile StgWord recent_activity;
  *
  * In GranSim we have one run/blocked_queue per PE.
  */
-extern  StgTSO *RTS_VAR(blackhole_queue);
+extern  StgTSO *blackhole_queue;
 #if !defined(THREADED_RTS)
-extern  StgTSO *RTS_VAR(blocked_queue_hd), *RTS_VAR(blocked_queue_tl);
-extern  StgTSO *RTS_VAR(sleeping_queue);
+extern  StgTSO *blocked_queue_hd, *blocked_queue_tl;
+extern  StgTSO *sleeping_queue;
 #endif
 
 /* Set to rtsTrue if there are threads on the blackhole_queue, and
@@ -123,27 +103,15 @@ extern rtsBool blackholes_need_checking;
 extern rtsBool heap_overflow;
 
 #if defined(THREADED_RTS)
-extern Mutex RTS_VAR(sched_mutex);
+extern Mutex sched_mutex;
 #endif
 
-SchedulerStatus rts_mainLazyIO(HaskellObj p, /*out*/HaskellObj *ret);
-
 /* Called by shutdown_handler(). */
 void interruptStgRts (void);
 
-nat  run_queue_len (void);
-
 void resurrectThreads (StgTSO *);
 void performPendingThrowTos (StgTSO *);
 
-void printAllThreads(void);
-
-/* debugging only 
- */
-#ifdef DEBUG
-void print_bq (StgClosure *node);
-#endif
-
 /* -----------------------------------------------------------------------------
  * Some convenient macros/inline functions...
  */
diff --git a/rts/Sparks.c b/rts/Sparks.c
index 0fe8b61b81665be44e5c39d79ffe3eefcea0606f..ff4beb7fd58db9897de1aa14ba8b463a2074b58d 100644
--- a/rts/Sparks.c
+++ b/rts/Sparks.c
@@ -8,17 +8,11 @@
 
 #include "PosixSource.h"
 #include "Rts.h"
-#include "Storage.h"
+
 #include "Schedule.h"
-#include "SchedAPI.h"
-#include "RtsFlags.h"
 #include "RtsUtils.h"
-#include "ParTicky.h"
 #include "Trace.h"
 #include "Prelude.h"
-
-#include "SMP.h" // for cas
-
 #include "Sparks.h"
 
 #if defined(THREADED_RTS)
@@ -128,8 +122,6 @@ pruneSparkQueue (evac_fn evac, void *user, Capability *cap)
     StgWord botInd,oldBotInd,currInd; // indices in array (always < size)
     const StgInfoTable *info;
     
-    PAR_TICKY_MARK_SPARK_QUEUE_START();
-    
     n = 0;
     pruned_sparks = 0;
     
@@ -246,8 +238,6 @@ pruneSparkQueue (evac_fn evac, void *user, Capability *cap)
     pool->bottom = (oldBotInd <= botInd) ? botInd : (botInd + pool->size); 
     // first free place we did not use (corrected by wraparound)
 
-    PAR_TICKY_MARK_SPARK_QUEUE_END(n);
-
     debugTrace(DEBUG_sched, "pruned %d sparks", pruned_sparks);
     
     debugTrace(DEBUG_sched,
@@ -296,7 +286,8 @@ traverseSparkQueue (evac_fn evac, void *user, Capability *cap)
  *
  * Could be called after GC, before Cap. release, from scheduler. 
  * -------------------------------------------------------------------------- */
-void balanceSparkPoolsCaps(nat n_caps, Capability caps[]);
+void balanceSparkPoolsCaps(nat n_caps, Capability caps[])
+   GNUC3_ATTRIBUTE(__noreturn__);
 
 void balanceSparkPoolsCaps(nat n_caps STG_UNUSED, 
                            Capability caps[] STG_UNUSED) {
diff --git a/rts/Stable.c b/rts/Stable.c
index 97796b89a79b843fb59d69d8e51de2d1a58256cd..b427c94965da2800397f0f7e944181654c873e5f 100644
--- a/rts/Stable.c
+++ b/rts/Stable.c
@@ -8,13 +8,10 @@
 
 #include "PosixSource.h"
 #include "Rts.h"
+#include "RtsAPI.h"
+
 #include "Hash.h"
 #include "RtsUtils.h"
-#include "OSThreads.h"
-#include "Storage.h"
-#include "RtsAPI.h"
-#include "RtsFlags.h"
-#include "OSThreads.h"
 #include "Trace.h"
 #include "Stable.h"
 
@@ -83,6 +80,8 @@ static unsigned int SPT_size = 0;
 static Mutex stable_mutex;
 #endif
 
+static void enlargeStablePtrTable(void);
+
 /* This hash table maps Haskell objects to stable names, so that every
  * call to lookupStableName on a given object will return the same
  * stable name.
@@ -300,7 +299,7 @@ freeStablePtr(StgStablePtr sp)
     RELEASE_LOCK(&stable_mutex);
 }
 
-void
+static void
 enlargeStablePtrTable(void)
 {
   nat old_SPT_size = SPT_size;
diff --git a/rts/Stable.h b/rts/Stable.h
new file mode 100644
index 0000000000000000000000000000000000000000..258a6bea802dbb533d7c882bffe62b2cb31ce358
--- /dev/null
+++ b/rts/Stable.h
@@ -0,0 +1,34 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2004
+ *
+ * Stable Pointers: A stable pointer is represented as an index into
+ * the stable pointer table.
+ *
+ * StgStablePtr used to be a synonym for StgWord, but stable pointers
+ * are guaranteed to be void* on the C-side, so we have to do some
+ * occasional casting. Size is not a matter, because StgWord is always
+ * the same size as a void*.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifndef STABLE_H
+#define STABLE_H
+
+#include "sm/GC.h" // for evac_fn below
+
+void         freeStablePtr (StgStablePtr sp);
+
+void    initStablePtrTable    ( void );
+void    exitStablePtrTable    ( void );
+StgWord lookupStableName      ( StgPtr p );
+
+void    markStablePtrTable    ( evac_fn evac, void *user );
+void    threadStablePtrTable  ( evac_fn evac, void *user );
+void    gcStablePtrTable      ( void );
+void    updateStablePtrTable  ( rtsBool full );
+
+void    stablePtrPreGC        ( void );
+void    stablePtrPostGC       ( void );
+
+#endif /* STABLE_H */
diff --git a/rts/Stats.c b/rts/Stats.c
index ae3d8431576434ee52c98426e3f2997530e312bb..6029745368f6a967377e8ed47e7cf8884ff8b247 100644
--- a/rts/Stats.c
+++ b/rts/Stats.c
@@ -6,17 +6,16 @@
  *
  * ---------------------------------------------------------------------------*/
 
+#include "PosixSource.h"
 #include "Rts.h"
-#include "RtsFlags.h"
+
 #include "RtsUtils.h"
-#include "MBlock.h"
-#include "Storage.h"
 #include "Schedule.h"
 #include "Stats.h"
-#include "ParTicky.h"                       /* ToDo: move into Rts.h */
 #include "Profiling.h"
 #include "GetTime.h"
-#include "GC.h"
+#include "sm/Storage.h"
+#include "sm/GC.h" // gc_alloc_block_sync, whitehole_spin
 
 #if USE_PAPI
 #include "Papi.h"
@@ -506,21 +505,11 @@ stat_endHeapCensus(void)
    -------------------------------------------------------------------------- */
 
 #ifdef DEBUG
-#define TICK_VAR(arity) \
-  extern StgInt SLOW_CALLS_##arity; \
-  extern StgInt RIGHT_ARITY_##arity; \
-  extern StgInt TAGGED_PTR_##arity;
-
 #define TICK_VAR_INI(arity) \
   StgInt SLOW_CALLS_##arity = 1; \
   StgInt RIGHT_ARITY_##arity = 1; \
   StgInt TAGGED_PTR_##arity = 0;
 
-extern StgInt TOTAL_CALLS;
-
-TICK_VAR(1)
-TICK_VAR(2)
-
 TICK_VAR_INI(1)
 TICK_VAR_INI(2)
 
diff --git a/rts/Stats.h b/rts/Stats.h
index bd39ced57df624115a5ec2c21a5fb2388efafc85..4b98739fe227692b9de8e4c76ec16c2a5803b881 100644
--- a/rts/Stats.h
+++ b/rts/Stats.h
@@ -51,7 +51,6 @@ double    mut_user_time_during_heap_census(void);
 #endif /* PROFILING */
 
 void      statDescribeGens( void );
-HsInt64   getAllocations( void );
 
 Ticks stat_getElapsedGCTime(void);
 Ticks stat_getElapsedTime(void);
diff --git a/rts/StgCRun.c b/rts/StgCRun.c
index a211da3577bd12abc1a6542a9a7839bb3963f39e..92ce6ebb19742699e34949b33fd5503d9ac6a083 100644
--- a/rts/StgCRun.c
+++ b/rts/StgCRun.c
@@ -34,7 +34,6 @@
 
 #include "PosixSource.h"
 
-
 /*
  * We define the following (unused) global register variables, because for
  * some reason gcc generates sub-optimal code for StgRun() on the Alpha
@@ -70,9 +69,8 @@ register double fake_f9 __asm__("$f9");
 #define IN_STGCRUN 1
 #include "Stg.h"
 #include "Rts.h"
+
 #include "StgRun.h"
-#include "RtsFlags.h"
-#include "OSThreads.h"
 #include "Capability.h"
 
 #ifdef DEBUG
diff --git a/rts/StgPrimFloat.c b/rts/StgPrimFloat.c
index 5987aa95891a9e23dfac03b81f1a75105caa0e4b..e523f328c33b14f804acc29fb5ca7088b025e6fd 100644
--- a/rts/StgPrimFloat.c
+++ b/rts/StgPrimFloat.c
@@ -10,7 +10,12 @@
 #include "PosixSource.h"
 #include "Rts.h"
 
+#include "StgPrimFloat.h"
+
 #include <math.h>
+#include <float.h>
+
+#define IEEE_FLOATING_POINT 1
 
 /*
  * Encoding and decoding Doubles.  Code based on the HBC code
@@ -218,252 +223,3 @@ __decodeFloat_Int (I_ *man, I_ *exp, StgFloat flt)
     }
 }
 
-union stg_ieee754_flt
-{
-   float f;
-   struct {
-
-#if WORDS_BIGENDIAN
-	unsigned int negative:1;
-	unsigned int exponent:8;
-	unsigned int mantissa:23;
-#else
-	unsigned int mantissa:23;
-	unsigned int exponent:8;
-	unsigned int negative:1;
-#endif
-   } ieee;
-   struct {
-
-#if WORDS_BIGENDIAN
-	unsigned int negative:1;
-	unsigned int exponent:8;
-	unsigned int quiet_nan:1;
-	unsigned int mantissa:22;
-#else
-	unsigned int mantissa:22;
-	unsigned int quiet_nan:1;
-	unsigned int exponent:8;
-	unsigned int negative:1;
-#endif
-   } ieee_nan;
-};
-
-/*
- 
- To recap, here's the representation of a double precision
- IEEE floating point number:
-
- sign         63           sign bit (0==positive, 1==negative)
- exponent     62-52        exponent (biased by 1023)
- fraction     51-0         fraction (bits to right of binary point)
-*/
-
-union stg_ieee754_dbl
-{
-   double d;
-   struct {
-
-#if WORDS_BIGENDIAN
-	unsigned int negative:1;
-	unsigned int exponent:11;
-	unsigned int mantissa0:20;
-	unsigned int mantissa1:32;
-#else
-#if FLOAT_WORDS_BIGENDIAN
-	unsigned int mantissa0:20;
-	unsigned int exponent:11;
-	unsigned int negative:1;
-	unsigned int mantissa1:32;
-#else
-	unsigned int mantissa1:32;
-	unsigned int mantissa0:20;
-	unsigned int exponent:11;
-	unsigned int negative:1;
-#endif
-#endif
-   } ieee;
-    /* This format makes it easier to see if a NaN is a signalling NaN.  */
-   struct {
-
-#if WORDS_BIGENDIAN
-	unsigned int negative:1;
-	unsigned int exponent:11;
-	unsigned int quiet_nan:1;
-	unsigned int mantissa0:19;
-	unsigned int mantissa1:32;
-#else
-#if FLOAT_WORDS_BIGENDIAN
-	unsigned int mantissa0:19;
-	unsigned int quiet_nan:1;
-	unsigned int exponent:11;
-	unsigned int negative:1;
-	unsigned int mantissa1:32;
-#else
-	unsigned int mantissa1:32;
-	unsigned int mantissa0:19;
-	unsigned int quiet_nan:1;
-	unsigned int exponent:11;
-	unsigned int negative:1;
-#endif
-#endif
-   } ieee_nan;
-};
-
-/*
- * Predicates for testing for extreme IEEE fp values. Used
- * by the bytecode evaluator and the Prelude.
- *
- */ 
-
-/* In case you don't suppport IEEE, you'll just get dummy defs.. */
-#ifdef IEEE_FLOATING_POINT
-
-StgInt
-isDoubleNaN(StgDouble d)
-{
-  union stg_ieee754_dbl u;
-  
-  u.d = d;
-
-  return (
-    u.ieee.exponent  == 2047 /* 2^11 - 1 */ &&  /* Is the exponent all ones? */
-    (u.ieee.mantissa0 != 0 || u.ieee.mantissa1 != 0)
-    	/* and the mantissa non-zero? */
-    );
-}
-
-StgInt
-isDoubleInfinite(StgDouble d)
-{
-    union stg_ieee754_dbl u;
-
-    u.d = d;
-
-    /* Inf iff exponent is all ones, mantissa all zeros */
-    return (
-        u.ieee.exponent  == 2047 /* 2^11 - 1 */ &&
-	u.ieee.mantissa0 == 0 		        &&
-	u.ieee.mantissa1 == 0
-      );
-}
-
-StgInt
-isDoubleDenormalized(StgDouble d) 
-{
-    union stg_ieee754_dbl u;
-
-    u.d = d;
-
-    /* A (single/double/quad) precision floating point number
-       is denormalised iff:
-        - exponent is zero
-	- mantissa is non-zero.
-        - (don't care about setting of sign bit.)
-
-    */
-    return (  
-	u.ieee.exponent  == 0 &&
-	(u.ieee.mantissa0 != 0 ||
-	 u.ieee.mantissa1 != 0)
-      );
-	 
-}
-
-StgInt
-isDoubleNegativeZero(StgDouble d) 
-{
-    union stg_ieee754_dbl u;
-
-    u.d = d;
-    /* sign (bit 63) set (only) => negative zero */
-
-    return (
-    	u.ieee.negative  == 1 &&
-	u.ieee.exponent  == 0 &&
-	u.ieee.mantissa0 == 0 &&
-	u.ieee.mantissa1 == 0);
-}
-
-/* Same tests, this time for StgFloats. */
-
-/*
- To recap, here's the representation of a single precision
- IEEE floating point number:
-
- sign         31           sign bit (0 == positive, 1 == negative)
- exponent     30-23        exponent (biased by 127)
- fraction     22-0         fraction (bits to right of binary point)
-*/
-
-
-StgInt
-isFloatNaN(StgFloat f)
-{
-    union stg_ieee754_flt u;
-    u.f = f;
-
-   /* Floating point NaN iff exponent is all ones, mantissa is
-      non-zero (but see below.) */
-   return (
-   	u.ieee.exponent == 255 /* 2^8 - 1 */ &&
-	u.ieee.mantissa != 0);
-}
-
-StgInt
-isFloatInfinite(StgFloat f)
-{
-    union stg_ieee754_flt u;
-    u.f = f;
-  
-    /* A float is Inf iff exponent is max (all ones),
-       and mantissa is min(all zeros.) */
-    return (
-    	u.ieee.exponent == 255 /* 2^8 - 1 */ &&
-	u.ieee.mantissa == 0);
-}
-
-StgInt
-isFloatDenormalized(StgFloat f)
-{
-    union stg_ieee754_flt u;
-    u.f = f;
-
-    /* A (single/double/quad) precision floating point number
-       is denormalised iff:
-        - exponent is zero
-	- mantissa is non-zero.
-        - (don't care about setting of sign bit.)
-
-    */
-    return (
-    	u.ieee.exponent == 0 &&
-	u.ieee.mantissa != 0);
-}
-
-StgInt
-isFloatNegativeZero(StgFloat f) 
-{
-    union stg_ieee754_flt u;
-    u.f = f;
-
-    /* sign (bit 31) set (only) => negative zero */
-    return (
-	u.ieee.negative      &&
-	u.ieee.exponent == 0 &&
-	u.ieee.mantissa == 0);
-}
-
-#else /* ! IEEE_FLOATING_POINT */
-
-/* Dummy definitions of predicates - they all return false */
-StgInt isDoubleNaN(d) StgDouble d; { return 0; }
-StgInt isDoubleInfinite(d) StgDouble d; { return 0; }
-StgInt isDoubleDenormalized(d) StgDouble d; { return 0; }
-StgInt isDoubleNegativeZero(d) StgDouble d; { return 0; }
-StgInt isFloatNaN(f) StgFloat f; { return 0; }
-StgInt isFloatInfinite(f) StgFloat f; { return 0; }
-StgInt isFloatDenormalized(f) StgFloat f; { return 0; }
-StgInt isFloatNegativeZero(f) StgFloat f; { return 0; }
-
-#endif /* ! IEEE_FLOATING_POINT */
diff --git a/rts/StgPrimFloat.h b/rts/StgPrimFloat.h
new file mode 100644
index 0000000000000000000000000000000000000000..5de8360efdceb0054e5f5291efdeb9765b22debd
--- /dev/null
+++ b/rts/StgPrimFloat.h
@@ -0,0 +1,21 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2009
+ *
+ * Primitive floating-point operations
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifndef STGPRIMFLOAT_H
+#define STGPRIMFLOAT_H
+
+/* grimy low-level support functions defined in StgPrimFloat.c */
+extern void      __decodeDouble_2Int (I_ *man_sign, W_ *man_high, W_ *man_low, I_ *exp, StgDouble dbl);
+extern void      __decodeFloat_Int (I_ *man, I_ *exp, StgFloat flt);
+extern StgDouble __2Int_encodeDouble (I_ j_high, I_ j_low, I_ e);
+extern StgDouble __int_encodeDouble (I_ j, I_ e);
+extern StgDouble __word_encodeDouble (W_ j, I_ e);
+extern StgFloat  __int_encodeFloat (I_ j, I_ e);
+extern StgFloat  __word_encodeFloat (W_ j, I_ e);
+
+#endif /* STGPRIMFLOAT_H */
diff --git a/rts/Task.c b/rts/Task.c
index af94a8aabed967b0101ceb523f75923d799ff5fa..9a8ebd69630b3864cb46bd27a7191e3328678509 100644
--- a/rts/Task.c
+++ b/rts/Task.c
@@ -8,14 +8,13 @@
  * 
  * -------------------------------------------------------------------------*/
 
+#include "PosixSource.h"
 #include "Rts.h"
+
 #include "RtsUtils.h"
-#include "OSThreads.h"
 #include "Task.h"
 #include "Capability.h"
 #include "Stats.h"
-#include "RtsFlags.h"
-#include "Storage.h"
 #include "Schedule.h"
 #include "Hash.h"
 #include "Trace.h"
@@ -258,15 +257,15 @@ taskTimeStamp (Task *task USED_IF_THREADS)
 #endif
 }
 
+#if defined(THREADED_RTS)
+
 void
 workerTaskStop (Task *task)
 {
-#if defined(THREADED_RTS)
     OSThreadId id;
     id = osThreadId();
     ASSERT(task->id == id);
     ASSERT(myTask() == task);
-#endif
 
     task->cap = NULL;
     taskTimeStamp(task);
@@ -280,12 +279,7 @@ workerTaskStop (Task *task)
     RELEASE_LOCK(&sched_mutex);
 }
 
-void
-resetTaskManagerAfterFork (void)
-{
-    // TODO!
-    taskCount = 0;
-}
+#endif
 
 #if defined(THREADED_RTS)
 
diff --git a/rts/Task.h b/rts/Task.h
index 590dd679b3ca45cd57a9b72c5d2577236a501064..c11afb584c3a65895bcba962582513c0d5a7a494 100644
--- a/rts/Task.h
+++ b/rts/Task.h
@@ -20,26 +20,14 @@
    Task, and OS threads that enter the Haskell RTS for the purposes of
    making a call-in are also Tasks.
    
-   The relationship between the number of tasks and capabilities, and
-   the runtime build (-threaded, -smp etc.) is summarised by the
-   following table:
+   In the THREADED_RTS build, multiple Tasks may all be running
+   Haskell code simultaneously. A task relinquishes its Capability
+   when it is asked to evaluate an external (C) call.
 
-     build        Tasks   Capabilities
-     ---------------------------------
-     normal         1          1
-     -threaded      N          N
-
-   The non-threaded build has a single Task and a single global
-   Capability.
-   
-   The THREADED_RTS build allows multiple tasks and mulitple Capabilities.
-   Multiple Tasks may all be running Haskell code simultaneously. A task
-   relinquishes its Capability when it is asked to evaluate an external
-   (C) call.
-
-   In general, there may be multiple Tasks for an OS thread.  This
-   happens if one Task makes a foreign call from Haskell, and
-   subsequently calls back in to create a new bound thread.
+   In general, there may be multiple Tasks associated with a given OS
+   thread.  A second Task is created when one Task makes a foreign
+   call from Haskell, and subsequently calls back in to Haskell,
+   creating a new bound thread.
 
    A particular Task structure can belong to more than one OS thread
    over its lifetime.  This is to avoid creating an unbounded number
@@ -190,7 +178,10 @@ INLINE_HEADER void taskEnter (Task *task);
 // mainly for stats-gathering purposes.
 // Requires: sched_mutex.
 //
+#if defined(THREADED_RTS)
+// In the non-threaded RTS, tasks never stop.
 void workerTaskStop (Task *task);
+#endif
 
 // Record the time spent in this Task.
 // This is called by workerTaskStop() but not by boundTaskExiting(),
@@ -207,12 +198,6 @@ void discardTask (Task *task);
 //
 INLINE_HEADER Task *myTask (void);
 
-// After a fork, the tasks are not carried into the child process, so
-// we must tell the task manager.
-// Requires: sched_mutex.
-//
-void resetTaskManagerAfterFork (void);
-
 #if defined(THREADED_RTS)
 
 // Workers are attached to the supplied Capability.  This Capability
diff --git a/rts/ThrIOManager.h b/rts/ThrIOManager.h
deleted file mode 100644
index eeccc6c42092739a1037be1fc86da1452e16512b..0000000000000000000000000000000000000000
--- a/rts/ThrIOManager.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/* -----------------------------------------------------------------------------
- *
- * (c) The GHC Team 1998-2006
- *
- * Communicating with the IO manager thread (see GHC.Conc).
- * Posix implementation in posix/Signals.c
- * Win32 implementation in win32/ThrIOManager.c
- *
- * -------------------------------------------------------------------------*/
-
-#if defined(THREADED_RTS)
-void ioManagerWakeup (void);
-void ioManagerDie (void);
-void ioManagerStart (void);
-#endif
diff --git a/rts/ThreadLabels.c b/rts/ThreadLabels.c
index 6919e1ae383efdb3e7feb26a8316dedaa8cae8d9..aa620f0b9e2d14954435e875c7cd787742295942 100644
--- a/rts/ThreadLabels.c
+++ b/rts/ThreadLabels.c
@@ -9,6 +9,7 @@
 
 #include "PosixSource.h"
 #include "Rts.h"
+
 #include "ThreadLabels.h"
 #include "RtsUtils.h"
 #include "Hash.h"
diff --git a/rts/ThreadLabels.h b/rts/ThreadLabels.h
index 59eb321cb7c45fa83c29de81c6f677f20c18447f..60289ef3066420b301e5bb5627f0cbcee147a5cb 100644
--- a/rts/ThreadLabels.h
+++ b/rts/ThreadLabels.h
@@ -7,8 +7,8 @@
  *
  * ---------------------------------------------------------------------------*/
 
-#ifndef __THREADLABELS_H__
-#define __THREADLABELS_H__
+#ifndef THREADLABELS_H
+#define THREADLABELS_H
 
 #if defined(DEBUG)
 void    initThreadLabelTable (void);
@@ -19,4 +19,4 @@ void    removeThreadLabel    (StgWord key);
 void    labelThread          (StgPtr tso, char *label);
 #endif
 
-#endif /* __THREADLABELS_H__ */
+#endif /* THREADLABELS_H */
diff --git a/rts/ThreadPaused.c b/rts/ThreadPaused.c
index 4882ab2374b148c603661378a8c58c9b2c989816..58c30e330e1a147abe5066c0a1a049a1aa9f7f30 100644
--- a/rts/ThreadPaused.c
+++ b/rts/ThreadPaused.c
@@ -6,13 +6,14 @@
  *
  * ---------------------------------------------------------------------------*/
 
+// #include "PosixSource.h"
 #include "Rts.h"
-#include "Storage.h"
-#include "LdvProfile.h"
+
+#include "ThreadPaused.h"
+#include "sm/Storage.h"
 #include "Updates.h"
 #include "RaiseAsync.h"
 #include "Trace.h"
-#include "RtsFlags.h"
 
 #include <string.h> // for memmove()
 
@@ -141,23 +142,23 @@ stackSqueeze(StgTSO *tso, StgPtr bottom)
     // <empty> indicates unused
     //
     {
-	void *sp;
-	void *gap_start, *next_gap_start, *gap_end;
+	StgWord8 *sp;
+	StgWord8 *gap_start, *next_gap_start, *gap_end;
 	nat chunk_size;
 
-	next_gap_start = (void *)((unsigned char*)gap + sizeof(StgUpdateFrame));
+	next_gap_start = (StgWord8*)gap + sizeof(StgUpdateFrame);
 	sp = next_gap_start;
 
 	while ((StgPtr)gap > tso->sp) {
 
 	    // we're working in *bytes* now...
 	    gap_start = next_gap_start;
-	    gap_end = (void*) ((unsigned char*)gap_start - gap->gap_size * sizeof(W_));
+	    gap_end = gap_start - gap->gap_size * sizeof(W_);
 
 	    gap = gap->next_gap;
-	    next_gap_start = (void *)((unsigned char*)gap + sizeof(StgUpdateFrame));
+	    next_gap_start = (StgWord8*)gap + sizeof(StgUpdateFrame);
 
-	    chunk_size = (unsigned char*)gap_end - (unsigned char*)next_gap_start;
+	    chunk_size = gap_end - next_gap_start;
 	    sp -= chunk_size;
 	    memmove(sp, next_gap_start, chunk_size);
 	}
diff --git a/rts/ThreadPaused.h b/rts/ThreadPaused.h
new file mode 100644
index 0000000000000000000000000000000000000000..60cded28853e14f7c6759de34b57ffe42fdd3a74
--- /dev/null
+++ b/rts/ThreadPaused.h
@@ -0,0 +1,14 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team 1998-2006
+ *
+ * Tidying up a thread when it stops running
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifndef THREADPAUSED_H
+#define THREADPAUSED_H
+
+void threadPaused ( Capability *cap, StgTSO * );
+
+#endif /* THREADPAUSED_H */
diff --git a/rts/Threads.c b/rts/Threads.c
index 1d871a58562e479b87d6219ccda52ad47f2c3541..28820c8d440e8e1d59cb2daec2d180e6e674889c 100644
--- a/rts/Threads.c
+++ b/rts/Threads.c
@@ -8,10 +8,8 @@
 
 #include "PosixSource.h"
 #include "Rts.h"
-#include "SchedAPI.h"
-#include "Storage.h"
+
 #include "Threads.h"
-#include "RtsFlags.h"
 #include "STM.h"
 #include "Schedule.h"
 #include "Trace.h"
diff --git a/rts/Threads.h b/rts/Threads.h
index f6d2dfd11a5d4c5adb8fc417f48bdfd2728365d2..d17235a2cbfaff890fc7d185adf8682da8c6e0a1 100644
--- a/rts/Threads.h
+++ b/rts/Threads.h
@@ -25,13 +25,6 @@ void printThreadBlockage (StgTSO *tso);
 void printThreadStatus (StgTSO *t);
 void printAllThreads (void);
 void printThreadQueue (StgTSO *t);
-# if defined(PARALLEL_HASKELL)
-void print_bq (StgClosure *node);
-void print_bqe (StgBlockingQueueElement *bqe);
-nat  run_queue_len (void);
-# elif defined(GRAN)
-void print_bq (StgClosure *node);
-# endif
 #endif
 
 #endif /* THREADS_H */
diff --git a/rts/Ticky.c b/rts/Ticky.c
index d319d18f09e2ba186f6726a1197f9aab4a789cd5..4737f4a5778651ef2499cf57fc6416d3eece2677 100644
--- a/rts/Ticky.c
+++ b/rts/Ticky.c
@@ -11,8 +11,8 @@
 #define TICKY_C			/* define those variables */
 #include "PosixSource.h"
 #include "Rts.h"
+
 #include "TickyCounters.h"
-#include "RtsFlags.h"
 #include "Ticky.h"
 
 /* -----------------------------------------------------------------------------
diff --git a/rts/Ticky.h b/rts/Ticky.h
index 21765e4bbb621a99293520fa21dff641def793b0..c8da50a9406a914f4a42917cd10ce15e43919e1e 100644
--- a/rts/Ticky.h
+++ b/rts/Ticky.h
@@ -6,4 +6,9 @@
  *
  * ---------------------------------------------------------------------------*/
 
-extern void PrintTickyInfo(void);
+#ifndef TICKY_H
+#define TICKY_H
+
+void PrintTickyInfo(void);
+
+#endif /* TICKY_H */
diff --git a/rts/Timer.c b/rts/Timer.c
index 96ea5e221128f0cbb394b33be527f88e87f402e9..8c178a076bab249cef90d2c7bee8225a70e93331 100644
--- a/rts/Timer.c
+++ b/rts/Timer.c
@@ -14,12 +14,13 @@
  * on platform-specific services to install and run the timers.
  *
  */
+
+#include "PosixSource.h"
 #include "Rts.h"
-#include "RtsFlags.h"
+
+#include "Timer.h"
 #include "Proftimer.h"
-#include "Storage.h"
 #include "Schedule.h"
-#include "Timer.h"
 #include "Ticker.h"
 #include "Capability.h"
 #include "RtsSignals.h"
diff --git a/rts/Timer.h b/rts/Timer.h
index 59b695cac27d8eebdff8a28d7642a391f5eba12c..c679a5881a1f6dfa9472d19b668c2497e6d1ca45 100644
--- a/rts/Timer.h
+++ b/rts/Timer.h
@@ -9,9 +9,7 @@
 #ifndef TIMER_H
 #define TIMER_H
 
-extern void initTimer(void);
-extern void startTimer(void);
-extern void stopTimer(void);
-extern void exitTimer(void);
+void initTimer (void);
+void exitTimer (void);
 
 #endif /* TIMER_H */
diff --git a/rts/Trace.c b/rts/Trace.c
index 63d4816585ec72951bc66cc0866dcf50404b9587..0a47ea38608c1b579847529045f9387c9d1aac9d 100644
--- a/rts/Trace.c
+++ b/rts/Trace.c
@@ -8,10 +8,12 @@
 
 #ifdef DEBUG
 
+// external headers
 #include "Rts.h"
-#include "OSThreads.h"
+#include "rts/Flags.h"
+
+// internal headers
 #include "Trace.h"
-#include "RtsFlags.h"
 #include "GetTime.h"
 #include "Stats.h"
 
diff --git a/rts/Updates.cmm b/rts/Updates.cmm
index 4043da05a555ca246abcf5b144f73ec64ac82778..fadf63b8575c850476daa519f6c4d53ec26cd6ef 100644
--- a/rts/Updates.cmm
+++ b/rts/Updates.cmm
@@ -12,8 +12,9 @@
 
 
 #include "Cmm.h"
+#include "rts/prof/LDV.h"
+
 #include "Updates.h"
-#include "StgLdvProf.h"
 
 /* on entry to the update code
    (1) R1 points to the closure being returned
diff --git a/rts/parallel/WSDeque.c b/rts/WSDeque.c
similarity index 99%
rename from rts/parallel/WSDeque.c
rename to rts/WSDeque.c
index acecb85e5f8fe8fe0360652c0ec037f8b8669a76..090a5497098986976b13261b210e2b3a4592c454 100644
--- a/rts/parallel/WSDeque.c
+++ b/rts/WSDeque.c
@@ -38,10 +38,11 @@
  * 
  * ---------------------------------------------------------------------------*/
 
+#include "PosixSource.h"
 #include "Rts.h"
+
 #include "RtsUtils.h"
 #include "WSDeque.h"
-#include "SMP.h" // for cas
 
 #define CASTOP(addr,old,new) ((old) == cas(((StgPtr)addr),(old),(new)))
 
diff --git a/rts/parallel/WSDeque.h b/rts/WSDeque.h
similarity index 100%
rename from rts/parallel/WSDeque.h
rename to rts/WSDeque.h
diff --git a/rts/Weak.c b/rts/Weak.c
index 17150f6b3cc7357d04aee573cb3a1d3d04bb9530..f5c3a62bda18b41e5c2435d6cd48c0087bb82919 100644
--- a/rts/Weak.c
+++ b/rts/Weak.c
@@ -8,13 +8,12 @@
 
 #include "PosixSource.h"
 #include "Rts.h"
+#include "RtsAPI.h"
+
 #include "RtsUtils.h"
-#include "SchedAPI.h"
-#include "RtsFlags.h"
 #include "Weak.h"
 #include "Schedule.h"
 #include "Prelude.h"
-#include "RtsAPI.h"
 #include "Trace.h"
 
 // ForeignPtrs with C finalizers rely on weak pointers inside weak_ptr_list
@@ -26,7 +25,7 @@ StgWeak *weak_ptr_list;
 rtsBool running_finalizers = rtsFalse;
 
 void
-runCFinalizer(StgVoid *fn, StgVoid *ptr, StgVoid *env, StgWord flag)
+runCFinalizer(void *fn, void *ptr, void *env, StgWord flag)
 {
     if (flag)
 	((void (*)(void *, void *))fn)(env, ptr);
@@ -47,9 +46,9 @@ runAllCFinalizers(StgWeak *list)
 	farr = (StgArrWords *)UNTAG_CLOSURE(w->cfinalizer);
 
 	if ((StgClosure *)farr != &stg_NO_FINALIZER_closure)
-	    runCFinalizer((StgVoid *)farr->payload[0],
-	                  (StgVoid *)farr->payload[1],
-	                  (StgVoid *)farr->payload[2],
+	    runCFinalizer((void *)farr->payload[0],
+	                  (void *)farr->payload[1],
+	                  (void *)farr->payload[2],
 	                  farr->payload[3]);
     }
 
@@ -97,9 +96,9 @@ scheduleFinalizers(Capability *cap, StgWeak *list)
 	farr = (StgArrWords *)UNTAG_CLOSURE(w->cfinalizer);
 
 	if ((StgClosure *)farr != &stg_NO_FINALIZER_closure)
-	    runCFinalizer((StgVoid *)farr->payload[0],
-	                  (StgVoid *)farr->payload[1],
-	                  (StgVoid *)farr->payload[2],
+	    runCFinalizer((void *)farr->payload[0],
+	                  (void *)farr->payload[1],
+	                  (void *)farr->payload[2],
 	                  farr->payload[3]);
 
 #ifdef PROFILING
diff --git a/rts/Weak.h b/rts/Weak.h
index 8fccae2a63201bcb7278c96e74c5275db05efd5c..4f60bf9ebd30e1d91165ca57875a6a0e1ef79859 100644
--- a/rts/Weak.h
+++ b/rts/Weak.h
@@ -12,10 +12,12 @@
 #include "Capability.h"
 
 extern rtsBool running_finalizers;
+extern StgWeak * weak_ptr_list;
 
-void runCFinalizer(StgVoid *fn, StgVoid *ptr, StgVoid *env, StgWord flag);
+void runCFinalizer(void *fn, void *ptr, void *env, StgWord flag);
 void runAllCFinalizers(StgWeak *w);
 void scheduleFinalizers(Capability *cap, StgWeak *w);
 void markWeakList(void);
 
-#endif
+#endif /* WEAK_H */
+
diff --git a/rts/eventlog/EventLog.c b/rts/eventlog/EventLog.c
index 098934c1babfdafa6d60c8fcc5adb2b06bf0ea7e..7f93d73d002e59e3ae6d769e7b5aa0bb8c9732f7 100644
--- a/rts/eventlog/EventLog.c
+++ b/rts/eventlog/EventLog.c
@@ -8,12 +8,15 @@
 
 #ifdef EVENTLOG
 
+#include "PosixSource.h"
 #include "Rts.h"
+
 #include "EventLog.h"
 #include "Capability.h"
 #include "Trace.h"
 #include "RtsUtils.h"
 #include "Stats.h"
+
 #include <string.h> 
 #include <stdio.h>
 
diff --git a/rts/eventlog/EventLog.h b/rts/eventlog/EventLog.h
index c2511b986bdb3e165a14d4ac8ce37291e21de7cc..10903709d8d2599b61cc03d8e972a0f52808395f 100644
--- a/rts/eventlog/EventLog.h
+++ b/rts/eventlog/EventLog.h
@@ -9,8 +9,8 @@
 #ifndef EVENTLOG_H
 #define EVENTLOG_H
 
+#include "rts/EventLogFormat.h"
 #include "Capability.h"
-#include "EventLogFormat.h"
 
 #ifdef EVENTLOG
 
diff --git a/rts/ghc.mk b/rts/ghc.mk
index a7bfd12e58fc69589a96673fddd8e7e2933e6f25..4bb41182844a904db07e8cced3ec699e88f0b972 100644
--- a/rts/ghc.mk
+++ b/rts/ghc.mk
@@ -72,7 +72,7 @@ rts/dist/build/sm/Scav_thr.c : rts/sm/Scav.c
 	"$(MKDIRHIER)" $(dir $@)
 	cp $< $@
 
-rts_H_FILES = $(wildcard $(GHC_INCLUDE_DIR)/*.h) $(wildcard rts/*.h)
+rts_H_FILES = $(wildcard includes/*.h) $(wildcard rts/*.h)
 
 # collect the -l flags that we need to link the rts dyn lib.
 rts/libs.depend : $(GHC_PKG_INPLACE)
@@ -130,27 +130,26 @@ $(foreach way,$(rts_WAYS),$(eval $(call build-rts-way,$(way))))
 #-----------------------------------------------------------------------------
 # Flags for compiling every file
 
-# gcc provides lots of useful warnings if you ask it.
-# This is a pretty good list to start with - use a # to comment out
-# any you don't like.
-WARNING_OPTS += -Wall
-WARNING_OPTS += -W
+# We like plenty of warnings.
+WARNING_OPTS += -Wall -Wextra
 WARNING_OPTS += -Wstrict-prototypes 
 WARNING_OPTS += -Wmissing-prototypes 
 WARNING_OPTS += -Wmissing-declarations
 WARNING_OPTS += -Winline
 WARNING_OPTS += -Waggregate-return
-#WARNING_OPTS += -Wpointer-arith
+WARNING_OPTS += -Wpointer-arith
+WARNING_OPTS += -Wmissing-noreturn
+WARNING_OPTS += -Wcast-align
+WARNING_OPTS += -Wnested-externs
+WARNING_OPTS += -Wredundant-decls 
+
+# These ones are hard to avoid:
+#WARNING_OPTS += -Wconversion
 #WARNING_OPTS += -Wbad-function-cast
-#WARNING_OPTS += -Wcast-align
-#WARNING_OPTS += -Wnested-externs
 #WARNING_OPTS += -Wshadow
 #WARNING_OPTS += -Wcast-qual
-#WARNING_OPTS += -Wno-unused 
-#WARNING_OPTS += -Wredundant-decls 
-#WARNING_OPTS += -Wconversion
 
-STANDARD_OPTS += -I$(GHC_INCLUDE_DIR) -I$(GHC_RTS_DIR) -Irts/parallel -Irts/sm -Irts/eventlog
+STANDARD_OPTS += -Iincludes -Irts
 # COMPILING_RTS is only used when building Win32 DLL support.
 STANDARD_OPTS += -DCOMPILING_RTS
 
@@ -205,6 +204,8 @@ rts_HC_OPTS += -dcmm-lint
 # upd_evacee() assigments get moved before the object copy.
 rts_CC_OPTS += -fno-strict-aliasing
 
+rts_CC_OPTS += -fno-common
+
 ifeq "$(BeConservative)" "YES"
 rts_CC_OPTS += -DBE_CONSERVATIVE
 endif
@@ -244,14 +245,14 @@ RtsUtils_CC_OPTS += -DGhcEnableTablesNextToCode=$(DQ)$(GhcEnableTablesNextToCode
 
 # ffi.h triggers prototype warnings, so disable them here:
 Interpreter_CC_OPTS += -Wno-strict-prototypes
-Adjustor_CC_OPTS += -Wno-strict-prototypes
-sm/Storage_CC_OPTS += -Wno-strict-prototypes
+Adjustor_CC_OPTS    += -Wno-strict-prototypes
+sm/Storage_CC_OPTS  += -Wno-strict-prototypes
+
+# inlining warnings happen in Compact
+sm/Compact_CC_OPTS += -Wno-inline
 
-StgCRun_CC_OPTS += -w
-Typeable_CC_OPTS += -w
 RetainerProfile_CC_OPTS += -w
 RetainerSet_CC_OPTS += -Wno-format
-sm/Compact_CC_OPTS += -w
 # On Windows:
 win32/ConsoleHandler_CC_OPTS += -w
 win32/ThrIOManager_CC_OPTS += -w
@@ -271,8 +272,8 @@ sm/Evac_thr_HC_OPTS += -optc-funroll-loops
 
 # These files are just copies of sm/Evac.c and sm/Scav.c respectively,
 # but compiled with -DPARALLEL_GC.
-sm/Evac_thr_HC_OPTS += -optc-DPARALLEL_GC
-sm/Scav_thr_HC_OPTS += -optc-DPARALLEL_GC
+sm/Evac_thr_HC_OPTS += -optc-DPARALLEL_GC -Irts/sm
+sm/Scav_thr_HC_OPTS += -optc-DPARALLEL_GC -Irts/sm
 
 #-----------------------------------------------------------------------------
 # Add PAPI library if needed
diff --git a/rts/hooks/FlagDefaults.c b/rts/hooks/FlagDefaults.c
index 393d39bc39fc4c63a4f3690980f3b1a023a07fef..ce1666f06d9537256e39c6166bf294be0ac2e01d 100644
--- a/rts/hooks/FlagDefaults.c
+++ b/rts/hooks/FlagDefaults.c
@@ -4,6 +4,7 @@
  *
  * ---------------------------------------------------------------------------*/
 
+#include "PosixSource.h"
 #include "Rts.h"
 
 void
diff --git a/rts/hooks/MallocFail.c b/rts/hooks/MallocFail.c
index 1218d1d8d0122d392d4df48cb83193a3c7deef37..41c0d2a8556c09a98f6e0ded5d7e76697305bc85 100644
--- a/rts/hooks/MallocFail.c
+++ b/rts/hooks/MallocFail.c
@@ -4,6 +4,7 @@
  *
  * ---------------------------------------------------------------------------*/
 
+#include "PosixSource.h"
 #include "Rts.h"
 
 #include <stdio.h>
diff --git a/rts/hooks/OnExit.c b/rts/hooks/OnExit.c
index dd4c3b4bb043f65b10ea631ca5ac8f6e44cd4a8d..e8019c046b27f309e0119e6afec4b771ea8fdd59 100644
--- a/rts/hooks/OnExit.c
+++ b/rts/hooks/OnExit.c
@@ -4,6 +4,7 @@
  *
  * ---------------------------------------------------------------------------*/
 
+#include "PosixSource.h"
 #include "Rts.h"
 
 /* Note: by the time this hook has been called, Haskell land
diff --git a/rts/hooks/OutOfHeap.c b/rts/hooks/OutOfHeap.c
index e9982377644ac5060f42114257f12dad5e523020..1945c51802095a85b2adefef0c6105ff0b5a73cd 100644
--- a/rts/hooks/OutOfHeap.c
+++ b/rts/hooks/OutOfHeap.c
@@ -4,6 +4,7 @@
  *
  * ---------------------------------------------------------------------------*/
 
+#include "PosixSource.h"
 #include "Rts.h"
 #include <stdio.h>
 
diff --git a/rts/hooks/RtsOpts.c b/rts/hooks/RtsOpts.c
index b934b05f1b72f2164dc85d18f98360a604f4c458..2aae37246ea190c5edbe80472a3dd31814474ffb 100644
--- a/rts/hooks/RtsOpts.c
+++ b/rts/hooks/RtsOpts.c
@@ -4,6 +4,7 @@
  *
  * ---------------------------------------------------------------------------*/
 
+#include "PosixSource.h"
 #include "Rts.h"
 
 #include <stdlib.h>
diff --git a/rts/hooks/StackOverflow.c b/rts/hooks/StackOverflow.c
index a395a3a1a53b66cb671483e91948845a913190ca..0a1a23a65b36f79fd59c7770e7966751c77168e4 100644
--- a/rts/hooks/StackOverflow.c
+++ b/rts/hooks/StackOverflow.c
@@ -4,6 +4,7 @@
  *
  * ---------------------------------------------------------------------------*/
 
+#include "PosixSource.h"
 #include "Rts.h"
 
 #include <stdio.h>
diff --git a/rts/parallel/GranSim.c b/rts/parallel/GranSim.c
index 1b26bb9dff47b952d9ba30c64607f7702cb2e2a0..7f7ad4424fbdfa3fc5dc84610776e297af29c0d8 100644
--- a/rts/parallel/GranSim.c
+++ b/rts/parallel/GranSim.c
@@ -1,5 +1,5 @@
 /* 
-   Time-stamp: <2006-10-19 15:12:58 simonmar>
+   Time-stamp: <2009-07-06 21:48:36 simonmar>
 
    Variables and functions specific to GranSim the parallelism simulator
    for GPH.
@@ -40,6 +40,8 @@
 //@node Includes, Prototypes and externs, GranSim specific code, GranSim specific code
 //@subsection Includes
 
+#if defined(GRAN)
+
 #include "Rts.h"
 #include "RtsFlags.h"
 #include "RtsUtils.h"
@@ -58,8 +60,6 @@
 //@node Prototypes and externs, Constants and Variables, Includes, GranSim specific code
 //@subsection Prototypes and externs
 
-#if defined(GRAN)
-
 /* Prototypes */
 static inline PEs      ga_to_proc(StgWord);
 static inline rtsBool  any_idle(void);
diff --git a/rts/posix/FileLock.c b/rts/posix/FileLock.c
index 26e9de4dc2a866c95e8334535ffbcc37088dbb88..a6052c73819aee5ff2c4fb729638a6be1cbc2d76 100644
--- a/rts/posix/FileLock.c
+++ b/rts/posix/FileLock.c
@@ -6,11 +6,12 @@
  *
  * ---------------------------------------------------------------------------*/
  
+#include "PosixSource.h"
 #include "Rts.h"
-#include "Hash.h"
+
 #include "FileLock.h"
+#include "Hash.h"
 #include "RtsUtils.h"
-#include "OSThreads.h"
 
 #include <unistd.h>
 #include <sys/stat.h>
diff --git a/rts/posix/FileLock.h b/rts/posix/FileLock.h
new file mode 100644
index 0000000000000000000000000000000000000000..2edee5ba6e796554018d0fa1f64b149fe8a95afc
--- /dev/null
+++ b/rts/posix/FileLock.h
@@ -0,0 +1,15 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2007
+ *
+ * File locking support as required by Haskell 98
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifndef POSIX_FILELOCK_H
+#define POSIX_FILELOCK_H
+
+void initFileLocking(void);
+void freeFileLocking(void);
+
+#endif /* POSIX_FILELOCK_H */
diff --git a/rts/posix/Itimer.c b/rts/posix/Itimer.c
index eb26cd3699bd11a30900e9c1a093d676b5173a89..3a09e804b5570e66c3bd7adb250c78b0e2831d4a 100644
--- a/rts/posix/Itimer.c
+++ b/rts/posix/Itimer.c
@@ -16,15 +16,15 @@
  * Hence, we use the old-fashioned @setitimer@ that just about everyone seems
  * to support.  So much for standards.
  */
+
+#include "PosixSource.h"
 #include "Rts.h"
-#include "RtsFlags.h"
-#include "Timer.h"
+
 #include "Ticker.h"
-#include "posix/Itimer.h"
+#include "Itimer.h"
 #include "Proftimer.h"
-#include "Storage.h"
 #include "Schedule.h"
-#include "posix/Select.h"
+#include "Select.h"
 
 /* As recommended in the autoconf manual */
 # ifdef TIME_WITH_SYS_TIME
@@ -229,31 +229,6 @@ exitTicker(void)
 #endif
 }
 
-#if 0
-/* Currently unused */
-void
-block_vtalrm_signal(void)
-{
-    sigset_t signals;
-    
-    sigemptyset(&signals);
-    sigaddset(&signals, ITIMER_SIGNAL);
-
-    (void) sigprocmask(SIG_BLOCK, &signals, NULL);
-}
-
-void
-unblock_vtalrm_signal(void)
-{
-    sigset_t signals;
-    
-    sigemptyset(&signals);
-    sigaddset(&signals, ITIMER_SIGNAL);
-
-    (void) sigprocmask(SIG_UNBLOCK, &signals, NULL);
-}
-#endif
-
 /* gettimeofday() takes around 1us on our 500MHz PIII.  Since we're
  * only calling it 50 times/s, it shouldn't have any great impact.
  */
diff --git a/rts/posix/Itimer.h b/rts/posix/Itimer.h
index 09d01bde54e48f9e862a095ad3ccb500f2914e3a..4cae9357101982c519a2773acb28f3cfd125e6cc 100644
--- a/rts/posix/Itimer.h
+++ b/rts/posix/Itimer.h
@@ -10,10 +10,5 @@
 #define ITIMER_H
 
 extern lnat getourtimeofday   ( void );
-#if 0
-/* unused */
-extern void block_vtalrm_signal       ( void );
-extern void unblock_vtalrm_signal     ( void );
-#endif
 
 #endif /* ITIMER_H */
diff --git a/rts/posix/OSMem.c b/rts/posix/OSMem.c
index 51737ad65057f0c33e866d121cf76b131970eee4..0a372560227027f4b79c6e0d1c4dde71a97f789d 100644
--- a/rts/posix/OSMem.c
+++ b/rts/posix/OSMem.c
@@ -6,12 +6,12 @@
  *
  * ---------------------------------------------------------------------------*/
 
-/* This is non-posix compliant. */
-/* #include "PosixSource.h" */
+// This is non-posix compliant.
+// #include "PosixSource.h"
 
 #include "Rts.h"
-#include "OSMem.h"
-#include "RtsFlags.h"
+
+#include "sm/OSMem.h"
 
 #ifdef HAVE_UNISTD_H
 #include <unistd.h>
@@ -138,7 +138,7 @@ static void *
 gen_map_mblocks (lnat size)
 {
     int slop;
-    void *ret;
+    StgWord8 *ret;
 
     // Try to map a larger block, and take the aligned portion from
     // it (unmap the rest).
diff --git a/rts/posix/OSThreads.c b/rts/posix/OSThreads.c
index 324701d40d84f3a981e8aa67a98a438633fa881e..a813eebf9ee8924437bb76e8f03f48964c81fc89 100644
--- a/rts/posix/OSThreads.c
+++ b/rts/posix/OSThreads.c
@@ -13,9 +13,10 @@
 #define _GNU_SOURCE
 #endif
 
+#include "PosixSource.h"
 #include "Rts.h"
+
 #if defined(THREADED_RTS)
-#include "OSThreads.h"
 #include "RtsUtils.h"
 #include "Task.h"
 
diff --git a/rts/posix/Select.c b/rts/posix/Select.c
index 32dca96cd8c7c1f389bad938213570438ce68806..46db4054bb6917eba1d45d9abd5ae02d6830ac95 100644
--- a/rts/posix/Select.c
+++ b/rts/posix/Select.c
@@ -6,19 +6,16 @@
  *
  * ---------------------------------------------------------------------------*/
 
-/* we're outside the realms of POSIX here... */
-/* #include "PosixSource.h" */
-
+#include "PosixSource.h"
 #include "Rts.h"
-#include "Storage.h"
+
+#include "Signals.h"
 #include "Schedule.h"
 #include "RtsUtils.h"
-#include "RtsFlags.h"
-#include "Timer.h"
 #include "Itimer.h"
-#include "Signals.h"
 #include "Capability.h"
-#include "posix/Select.h"
+#include "Select.h"
+#include "AwaitEvent.h"
 
 # ifdef HAVE_SYS_TYPES_H
 #  include <sys/types.h>
diff --git a/rts/posix/Select.h b/rts/posix/Select.h
index 88255629744e9914e4d51da31f4a18ef72b0eca1..e92a4bc889b6fc2f7ce01eae0158e300a3c7b9e2 100644
--- a/rts/posix/Select.h
+++ b/rts/posix/Select.h
@@ -6,21 +6,12 @@
  *
  * -------------------------------------------------------------------------*/
 
-#ifndef SELECT_H
-#define SELECT_H
+#ifndef POSIX_SELECT_H
+#define POSIX_SELECT_H
 
 #if !defined(THREADED_RTS)
 /* In Select.c */
-extern lnat RTS_VAR(timestamp);
-
-/* awaitEvent(rtsBool wait)
- *
- * Checks for blocked threads that need to be woken.
- *
- * Called from STG :  NO
- * Locks assumed   :  sched_mutex
- */
-void awaitEvent(rtsBool wait);  /* In Select.c */
+extern lnat timestamp;
 #endif
 
-#endif /* SELECT_H */
+#endif /* POSIX_SELECT_H */
diff --git a/rts/posix/Signals.c b/rts/posix/Signals.c
index ace58c2f982364936385615d693549e6acb4ba91..660065734b84ff07dbc8442edb9603092f1897fe 100644
--- a/rts/posix/Signals.c
+++ b/rts/posix/Signals.c
@@ -6,18 +6,15 @@
  *
  * ---------------------------------------------------------------------------*/
 
-/* This is non-Posix-compliant.
-   #include "PosixSource.h" 
-*/
+#include "PosixSource.h" 
 #include "Rts.h"
-#include "SchedAPI.h"
+
 #include "Schedule.h"
 #include "RtsSignals.h"
-#include "posix/Signals.h"
+#include "Signals.h"
 #include "RtsUtils.h"
-#include "RtsFlags.h"
 #include "Prelude.h"
-#include "ThrIOManager.h"
+#include "Stable.h"
 
 #ifdef alpha_HOST_ARCH
 # if defined(linux_HOST_OS)
diff --git a/rts/posix/TTY.c b/rts/posix/TTY.c
new file mode 100644
index 0000000000000000000000000000000000000000..d39ef37b864754ca72c7774342ecdf9b4da03afb
--- /dev/null
+++ b/rts/posix/TTY.c
@@ -0,0 +1,65 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2009
+ *
+ * TTY-related functionality
+ *
+ * ---------------------------------------------------------------------------*/
+
+#include "PosixSource.h"
+#include "Rts.h"
+
+#include "RtsUtils.h" // __hscore_get/set prototypes
+#include "TTY.h"
+
+#ifdef HAVE_TERMIOS_H
+#include <termios.h>
+#endif
+#ifdef HAVE_SIGNAL_H
+#include <signal.h>
+#endif
+
+// Here we save the terminal settings on the standard file
+// descriptors, if we need to change them (eg. to support NoBuffering
+// input).
+static void *saved_termios[3] = {NULL,NULL,NULL};
+
+void*
+__hscore_get_saved_termios(int fd)
+{
+  return (0 <= fd && fd < (int)(sizeof(saved_termios) / sizeof(*saved_termios))) ?
+    saved_termios[fd] : NULL;
+}
+
+void
+__hscore_set_saved_termios(int fd, void* ts)
+{
+  if (0 <= fd && fd < (int)(sizeof(saved_termios) / sizeof(*saved_termios))) {
+    saved_termios[fd] = ts;
+  }
+}
+
+void
+resetTerminalSettings (void)
+{
+#if HAVE_TERMIOS_H
+    // Reset the terminal settings on the standard file descriptors,
+    // if we changed them.  See System.Posix.Internals.tcSetAttr for
+    // more details, including the reason we termporarily disable
+    // SIGTTOU here.
+    { 
+	int fd;
+	sigset_t sigset, old_sigset;
+	sigemptyset(&sigset);
+	sigaddset(&sigset, SIGTTOU);
+	sigprocmask(SIG_BLOCK, &sigset, &old_sigset);
+	for (fd = 0; fd <= 2; fd++) {
+	    struct termios* ts = (struct termios*)__hscore_get_saved_termios(fd);
+	    if (ts != NULL) {
+		tcsetattr(fd,TCSANOW,ts);
+	    }
+	}
+	sigprocmask(SIG_SETMASK, &old_sigset, NULL);
+    }
+#endif
+}
diff --git a/rts/posix/TTY.h b/rts/posix/TTY.h
new file mode 100644
index 0000000000000000000000000000000000000000..f291d30de4c8d59263c875573071579fae4d68a1
--- /dev/null
+++ b/rts/posix/TTY.h
@@ -0,0 +1,15 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2009
+ *
+ * TTY-related functionality
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifndef POSIX_TTY_H
+#define POSIX_TTY_H
+
+void resetTerminalSettings (void);
+
+#endif /* POSIX_TTY_H */
+
diff --git a/rts/sm/BlockAlloc.c b/rts/sm/BlockAlloc.c
index 280ebfc7dbdfb94bbe3f5757771572916b34fe8d..bf7a55e7a96d9d52a418d6495168b7a35c5481e5 100644
--- a/rts/sm/BlockAlloc.c
+++ b/rts/sm/BlockAlloc.c
@@ -17,11 +17,10 @@
 
 #include "PosixSource.h"
 #include "Rts.h"
-#include "RtsFlags.h"
+
+#include "Storage.h"
 #include "RtsUtils.h"
 #include "BlockAlloc.h"
-#include "MBlock.h"
-#include "Storage.h"
 
 #include <string.h>
 
@@ -284,7 +283,7 @@ alloc_mega_group (nat mblocks)
     {
         // we take our chunk off the end here.
         nat best_mblocks  = BLOCKS_TO_MBLOCKS(best->blocks);
-        bd = FIRST_BDESCR(MBLOCK_ROUND_DOWN(best) + 
+        bd = FIRST_BDESCR((StgWord8*)MBLOCK_ROUND_DOWN(best) + 
                           (best_mblocks-mblocks)*MBLOCK_SIZE);
 
         best->blocks = MBLOCK_GROUP_BLOCKS(best_mblocks - mblocks);
@@ -415,7 +414,8 @@ coalesce_mblocks (bdescr *p)
     q = p->link;
     if (q != NULL && 
         MBLOCK_ROUND_DOWN(q) == 
-        MBLOCK_ROUND_DOWN(p) + BLOCKS_TO_MBLOCKS(p->blocks) * MBLOCK_SIZE) {
+        (StgWord8*)MBLOCK_ROUND_DOWN(p) + 
+        BLOCKS_TO_MBLOCKS(p->blocks) * MBLOCK_SIZE) {
         // can coalesce
         p->blocks  = MBLOCK_GROUP_BLOCKS(BLOCKS_TO_MBLOCKS(p->blocks) +
                                          BLOCKS_TO_MBLOCKS(q->blocks));
@@ -610,20 +610,21 @@ splitBlockGroup (bdescr *bd, nat blocks)
 static void
 initMBlock(void *mblock)
 {
-  bdescr *bd;
-  void *block;
-
-  /* the first few Bdescr's in a block are unused, so we don't want to
-   * put them all on the free list.
-   */
-  block = FIRST_BLOCK(mblock);
-  bd    = FIRST_BDESCR(mblock);
-
-  /* Initialise the start field of each block descriptor
-   */
-  for (; block <= LAST_BLOCK(mblock); bd += 1, block += BLOCK_SIZE) {
-    bd->start = block;
-  }
+    bdescr *bd;
+    StgWord8 *block;
+
+    /* the first few Bdescr's in a block are unused, so we don't want to
+     * put them all on the free list.
+     */
+    block = FIRST_BLOCK(mblock);
+    bd    = FIRST_BDESCR(mblock);
+    
+    /* Initialise the start field of each block descriptor
+     */
+    for (; block <= (StgWord8*)LAST_BLOCK(mblock); bd += 1, 
+             block += BLOCK_SIZE) {
+        bd->start = (void*)block;
+    }
 }
 
 /* -----------------------------------------------------------------------------
@@ -708,7 +709,7 @@ checkFreeListSanity(void)
         if (bd->link != NULL)
         {
             ASSERT (MBLOCK_ROUND_DOWN(bd->link) != 
-                    MBLOCK_ROUND_DOWN(bd) + 
+                    (StgWord8*)MBLOCK_ROUND_DOWN(bd) + 
                     BLOCKS_TO_MBLOCKS(bd->blocks) * MBLOCK_SIZE);
         }
     }
@@ -758,7 +759,8 @@ reportUnmarkedBlocks (void)
                 debugBelch("  %p\n",bd);
             }
             if (bd->blocks >= BLOCKS_PER_MBLOCK) {
-                mblock += (BLOCKS_TO_MBLOCKS(bd->blocks) - 1) * MBLOCK_SIZE;
+                mblock = (StgWord8*)mblock +
+                    (BLOCKS_TO_MBLOCKS(bd->blocks) - 1) * MBLOCK_SIZE;
                 break;
             } else {
                 bd += bd->blocks;
diff --git a/rts/sm/Compact.c b/rts/sm/Compact.c
index 9c13253c3271942e9df583d64ef789a2202e0430..892364dfa500b68db5c4b51173da2a5bdbeba190 100644
--- a/rts/sm/Compact.c
+++ b/rts/sm/Compact.c
@@ -13,16 +13,18 @@
 
 #include "PosixSource.h"
 #include "Rts.h"
+
+#include "Storage.h"
 #include "RtsUtils.h"
-#include "RtsFlags.h"
-#include "OSThreads.h"
 #include "BlockAlloc.h"
-#include "MBlock.h"
 #include "GC.h"
 #include "Compact.h"
 #include "Schedule.h"
 #include "Apply.h"
 #include "Trace.h"
+#include "Weak.h"
+#include "MarkWeak.h"
+#include "Stable.h"
 
 // Turn off inlining when debugging - it obfuscates things
 #ifdef DEBUG
@@ -166,7 +168,7 @@ loop:
     case 1:
     {
         StgWord r = *(StgPtr)(q-1);
-        ASSERT(LOOKS_LIKE_INFO_PTR(UNTAG_CLOSURE((StgClosure *)r)));
+        ASSERT(LOOKS_LIKE_INFO_PTR((StgWord)UNTAG_CLOSURE((StgClosure *)r)));
         return r;
     }
     case 2:
@@ -929,7 +931,7 @@ update_bkwd_compact( step *stp )
 
             iptr = get_threaded_info(p);
 	    unthread(p, (StgWord)free + GET_CLOSURE_TAG((StgClosure *)iptr));
-	    ASSERT(LOOKS_LIKE_INFO_PTR(((StgClosure *)p)->header.info));
+	    ASSERT(LOOKS_LIKE_INFO_PTR((StgWord)((StgClosure *)p)->header.info));
 	    info = get_itbl((StgClosure *)p);
 	    size = closure_sizeW_((StgClosure *)p,info);
 
diff --git a/rts/sm/Compact.h b/rts/sm/Compact.h
index 40622c56828155a7d69d19192ae7d47526ad9aba..7a237ac362cdf490c131016e147d17407f8c03b1 100644
--- a/rts/sm/Compact.h
+++ b/rts/sm/Compact.h
@@ -11,8 +11,8 @@
  *
  * ---------------------------------------------------------------------------*/
 
-#ifndef GCCOMPACT_H
-#define GCCOMPACT_H
+#ifndef SM_COMPACT_H
+#define SM_COMPACT_H
 
 INLINE_HEADER rtsBool
 mark_stack_empty(void)
@@ -76,4 +76,4 @@ is_marked(StgPtr p, bdescr *bd)
 
 extern void compact (StgClosure *static_objects);
 
-#endif /* GCCOMPACT_H */
+#endif /* SM_COMPACT_H */
diff --git a/rts/sm/Evac.c b/rts/sm/Evac.c
index 5935f9069e403b1e2ab89e53d03105d72feba8aa..9e6d0f1783d304aae3649defb15df7e886dd3532 100644
--- a/rts/sm/Evac.c
+++ b/rts/sm/Evac.c
@@ -11,16 +11,16 @@
  *
  * ---------------------------------------------------------------------------*/
 
+#include "PosixSource.h"
 #include "Rts.h"
-#include "Storage.h"
-#include "MBlock.h"
+
 #include "Evac.h"
+#include "Storage.h"
 #include "GC.h"
 #include "GCThread.h"
 #include "GCUtils.h"
 #include "Compact.h"
 #include "Prelude.h"
-#include "LdvProfile.h"
 #include "Trace.h"
 
 #if defined(PROF_SPIN) && defined(THREADED_RTS) && defined(PARALLEL_GC)
diff --git a/rts/sm/Evac.h b/rts/sm/Evac.h
index e6ef02cfcc97528b07d9b3904710b769a33ff662..78d024f3e99409549d73fa78cf77846828056868 100644
--- a/rts/sm/Evac.h
+++ b/rts/sm/Evac.h
@@ -11,6 +11,9 @@
  *
  * ---------------------------------------------------------------------------*/
 
+#ifndef SM_EVAC_H
+#define SM_EVAC_H
+
 // Use a register argument for evacuate, if available.
 // Earlier, the regparm attribute was used whenever __GNUC__ >= 2, but this
 // generated warnings on PPC. So the use is restricted further.
@@ -31,3 +34,6 @@ REGPARM1 void evacuate  (StgClosure **p);
 REGPARM1 void evacuate1 (StgClosure **p);
 
 extern lnat thunk_selector_depth;
+
+#endif /* SM_EVAC_H */
+
diff --git a/rts/sm/GC.c b/rts/sm/GC.c
index 88b11aae88cf1fa13b4a73051938bed0b923ed31..02fd6d91610b363f3c09018c13b2ef0972b7c73f 100644
--- a/rts/sm/GC.c
+++ b/rts/sm/GC.c
@@ -11,28 +11,23 @@
  *
  * ---------------------------------------------------------------------------*/
 
-// #include "PosixSource.h"
+#include "PosixSource.h"
 #include "Rts.h"
-#include "RtsFlags.h"
+#include "HsFFI.h"
+
+#include "Storage.h"
 #include "RtsUtils.h"
 #include "Apply.h"
-#include "OSThreads.h"
-#include "LdvProfile.h"
 #include "Updates.h"
 #include "Stats.h"
 #include "Schedule.h"
 #include "Sanity.h"
 #include "BlockAlloc.h"
-#include "MBlock.h"
 #include "ProfHeap.h"
-#include "SchedAPI.h"
 #include "Weak.h"
 #include "Prelude.h"
-#include "ParTicky.h"		// ToDo: move into Rts.h
 #include "RtsSignals.h"
 #include "STM.h"
-#include "HsFFI.h"
-#include "Linker.h"
 #if defined(RTS_GTK_FRONTPANEL)
 #include "FrontPanel.h"
 #endif
@@ -40,6 +35,7 @@
 #include "RetainerProfile.h"
 #include "RaiseAsync.h"
 #include "Papi.h"
+#include "Stable.h"
 
 #include "GC.h"
 #include "GCThread.h"
@@ -1112,10 +1108,11 @@ gcWorkerThread (Capability *cap)
 
 #endif
 
+#if defined(THREADED_RTS)
+
 void
 waitForGcThreads (Capability *cap USED_IF_THREADS)
 {
-#if defined(THREADED_RTS)
     nat n_threads = RtsFlags.ParFlags.nNodes;
     nat me = cap->no;
     nat i, j;
@@ -1141,9 +1138,10 @@ waitForGcThreads (Capability *cap USED_IF_THREADS)
             if (!retry) break;
         }
     }
-#endif
 }
 
+#endif // THREADED_RTS
+
 static void
 start_gc_threads (void)
 {
@@ -1185,10 +1183,10 @@ shutdown_gc_threads (nat n_threads USED_IF_THREADS, nat me USED_IF_THREADS)
 #endif
 }
 
+#if defined(THREADED_RTS)
 void
 releaseGCThreads (Capability *cap USED_IF_THREADS)
 {
-#if defined(THREADED_RTS)
     nat n_threads = RtsFlags.ParFlags.nNodes;
     nat me = cap->no;
     nat i;
@@ -1201,8 +1199,8 @@ releaseGCThreads (Capability *cap USED_IF_THREADS)
         ACQUIRE_SPIN_LOCK(&gc_threads[i]->gc_spin);
         RELEASE_SPIN_LOCK(&gc_threads[i]->mut_spin);
     }
-#endif
 }
+#endif
 
 /* ----------------------------------------------------------------------------
    Initialise a generation that is to be collected 
diff --git a/rts/sm/GC.h b/rts/sm/GC.h
index fb4381dc0b150cedf4e7e54d7bc70cef3b15255b..920b464bbbca237de9b1171ae7a46ae6cdea7e6d 100644
--- a/rts/sm/GC.h
+++ b/rts/sm/GC.h
@@ -11,8 +11,15 @@
  *
  * ---------------------------------------------------------------------------*/
 
-#ifndef GC_H
-#define GC_H
+#ifndef SM_GC_H
+#define SM_GC_H
+
+void GarbageCollect(rtsBool force_major_gc, nat gc_type, Capability *cap);
+
+typedef void (*evac_fn)(void *user, StgClosure **root);
+
+StgClosure * isAlive      ( StgClosure *p );
+void         markCAFs     ( evac_fn evac, void *user );
 
 extern nat N;
 extern rtsBool major_gc;
@@ -45,9 +52,12 @@ extern StgWord64 whitehole_spin;
 void gcWorkerThread (Capability *cap);
 void initGcThreads (void);
 void freeGcThreads (void);
+
+#if defined(THREADED_RTS)
 void waitForGcThreads (Capability *cap);
 void releaseGCThreads (Capability *cap);
+#endif
 
 #define WORK_UNIT_WORDS 128
 
-#endif /* GC_H */
+#endif /* SM_GC_H */
diff --git a/rts/sm/GCAux.c b/rts/sm/GCAux.c
index c1ff54123dda8db7c2b99738661b27d41f20ff47..404e9bbcbcc981697d60221a669148649b0b30fd 100644
--- a/rts/sm/GCAux.c
+++ b/rts/sm/GCAux.c
@@ -7,10 +7,11 @@
  *
  * ---------------------------------------------------------------------------*/
 
+#include "PosixSource.h"
 #include "Rts.h"
-#include "Storage.h"
-#include "MBlock.h"
+
 #include "GC.h"
+#include "Storage.h"
 #include "Compact.h"
 #include "Task.h"
 #include "Capability.h"
diff --git a/rts/sm/GCThread.h b/rts/sm/GCThread.h
index f91092b7ea977f143ecf903f8ab1d1f4fb03c763..9188a20a9a26a3b3a8fd23ea6514bab90e2f35c4 100644
--- a/rts/sm/GCThread.h
+++ b/rts/sm/GCThread.h
@@ -11,10 +11,9 @@
  *
  * ---------------------------------------------------------------------------*/
 
-#ifndef GCTHREAD_H
-#define GCTHREAD_H
+#ifndef SM_GCTHREAD_H
+#define SM_GCTHREAD_H
 
-#include "OSThreads.h"
 #include "WSDeque.h"
 
 /* -----------------------------------------------------------------------------
@@ -271,5 +270,5 @@ extern StgWord8 the_gc_thread[];
 
 #endif
 
-#endif // GCTHREAD_H
+#endif // SM_GCTHREAD_H
 
diff --git a/rts/sm/GCUtils.c b/rts/sm/GCUtils.c
index 57cede7d43390dc4605404792b1d212278e61fd6..6c6f10e01f2b6fdc51261cf47dbdbe75c487b273 100644
--- a/rts/sm/GCUtils.c
+++ b/rts/sm/GCUtils.c
@@ -11,8 +11,9 @@
  *
  * ---------------------------------------------------------------------------*/
 
+#include "PosixSource.h"
 #include "Rts.h"
-#include "RtsFlags.h"
+
 #include "Storage.h"
 #include "GC.h"
 #include "GCThread.h"
@@ -101,6 +102,7 @@ grab_local_todo_block (step_workspace *ws)
     return NULL;
 }
 
+#if defined(THREADED_RTS)
 bdescr *
 steal_todo_block (nat s)
 {
@@ -117,6 +119,7 @@ steal_todo_block (nat s)
     }
     return NULL;
 }
+#endif
 
 void
 push_scanned_block (bdescr *bd, step_workspace *ws)
diff --git a/rts/sm/GCUtils.h b/rts/sm/GCUtils.h
index e71f4374fb012c9dc88fce914761312ea9c4a50f..d68ce7876f0fbfa5680ab9e7b9ed8cd55bb7870b 100644
--- a/rts/sm/GCUtils.h
+++ b/rts/sm/GCUtils.h
@@ -11,7 +11,8 @@
  *
  * --------------------------------------------------------------------------*/
 
-#include "SMP.h"
+#ifndef SM_GCUTILS_H
+#define SM_GCUTILS_H
 
 bdescr *allocBlock_sync(void);
 void    freeChain_sync(bdescr *bd);
@@ -21,7 +22,9 @@ StgPtr  todo_block_full      (nat size, step_workspace *ws);
 StgPtr  alloc_todo_block     (step_workspace *ws, nat size);
 
 bdescr *grab_local_todo_block  (step_workspace *ws);
+#if defined(THREADED_RTS)
 bdescr *steal_todo_block       (nat s);
+#endif
 
 // Returns true if a block is partially full.  This predicate is used to try
 // to re-use partial blocks wherever possible, and to reduce wastage.
@@ -55,3 +58,5 @@ recordMutableGen_GC (StgClosure *p, nat gen_no)
     }
     *bd->free++ = (StgWord)p;
 }
+
+#endif /* SM_GCUTILS_H */
diff --git a/rts/sm/MBlock.c b/rts/sm/MBlock.c
index b3fa13b0bca2c170cf644890f57a8610fb33e4df..996b2c9ae9f79d3bacba56e908e81572d51e8f51 100644
--- a/rts/sm/MBlock.c
+++ b/rts/sm/MBlock.c
@@ -9,10 +9,9 @@
  * ---------------------------------------------------------------------------*/
 
 #include "PosixSource.h"
-
 #include "Rts.h"
+
 #include "RtsUtils.h"
-#include "MBlock.h"
 #include "BlockAlloc.h"
 #include "Trace.h"
 #include "OSMem.h"
@@ -235,7 +234,7 @@ getMBlocks(nat n)
     
     // fill in the table
     for (i = 0; i < n; i++) {
-        markHeapAlloced( ret + i * MBLOCK_SIZE );
+        markHeapAlloced( (StgWord8*)ret + i * MBLOCK_SIZE );
     }
     
     mblocks_allocated += n;
diff --git a/rts/sm/MarkWeak.c b/rts/sm/MarkWeak.c
index 78c84ed77aaa8da0a4a13c15f7e5cdf66c855064..4f0a7a451ba30dd4383dbb3287361dc137a602a9 100644
--- a/rts/sm/MarkWeak.c
+++ b/rts/sm/MarkWeak.c
@@ -11,14 +11,16 @@
  *
  * ---------------------------------------------------------------------------*/
 
+#include "PosixSource.h"
 #include "Rts.h"
-#include "Storage.h"
+
 #include "MarkWeak.h"
 #include "GC.h"
 #include "GCThread.h"
 #include "Evac.h"
 #include "Trace.h"
 #include "Schedule.h"
+#include "Weak.h"
 
 /* -----------------------------------------------------------------------------
    Weak Pointers
diff --git a/rts/sm/MarkWeak.h b/rts/sm/MarkWeak.h
index 7b3a806857c6a5f9ed064eb2151fab1427d290d4..2647a22eec8b9735adc5a6f790d77408d6bf915f 100644
--- a/rts/sm/MarkWeak.h
+++ b/rts/sm/MarkWeak.h
@@ -11,6 +11,9 @@
  *
  * ---------------------------------------------------------------------------*/
 
+#ifndef SM_MARKWEAK_H
+#define SM_MARKWEAK_H
+
 extern StgWeak *old_weak_ptr_list;
 extern StgTSO *resurrected_threads;
 extern StgTSO *exception_threads;
@@ -19,3 +22,5 @@ void    initWeakForGC          ( void );
 rtsBool traverseWeakPtrList    ( void );
 void    markWeakPtrList        ( void );
 rtsBool traverseBlackholeQueue ( void );
+
+#endif /* SM_MARKWEAK_H */
diff --git a/rts/sm/OSMem.h b/rts/sm/OSMem.h
index dcf8eb73dff15df7252bcb630124ba352424b3ee..3dbca23d11c5b01bd49fafc55b8bb74cf9d5bdf8 100644
--- a/rts/sm/OSMem.h
+++ b/rts/sm/OSMem.h
@@ -6,8 +6,13 @@
  *
  * ---------------------------------------------------------------------------*/
 
+#ifndef SM_OSMEM_H
+#define SM_OSMEM_H
+
 void osMemInit(void);
 void *osGetMBlocks(nat n);
 void osFreeAllMBlocks(void);
 lnat getPageSize (void);
 void setExecutable (void *p, lnat len, rtsBool exec);
+
+#endif /* SM_OSMEM_H */
diff --git a/rts/sm/README b/rts/sm/README
deleted file mode 100644
index 61cb7d2c06f85c77cf4a2bcdab2eace52ec69e7a..0000000000000000000000000000000000000000
--- a/rts/sm/README
+++ /dev/null
@@ -1,11 +0,0 @@
-The Storage Manager
-===================
-
-This directory contains the storage manager and garbage collector.
-The interfaces exported from here are:
-
-  Storage.h (in ../includes)
-  Block.h (in ../includes)
-  GC.h
-  Arena.h
-  BlockAlloc.h
diff --git a/rts/sm/Scav.c b/rts/sm/Scav.c
index b850423244e0f0b17c51b305bab359c6d7b7ac56..9ebd4c5597de06446af7ea1c777cfda07c6a0be7 100644
--- a/rts/sm/Scav.c
+++ b/rts/sm/Scav.c
@@ -11,10 +11,10 @@
  *
  * ---------------------------------------------------------------------------*/
 
+#include "PosixSource.h"
 #include "Rts.h"
-#include "RtsFlags.h"
+
 #include "Storage.h"
-#include "MBlock.h"
 #include "GC.h"
 #include "GCThread.h"
 #include "GCUtils.h"
@@ -23,7 +23,6 @@
 #include "Scav.h"
 #include "Apply.h"
 #include "Trace.h"
-#include "LdvProfile.h"
 #include "Sanity.h"
 #include "Capability.h"
 
diff --git a/rts/sm/Scav.h b/rts/sm/Scav.h
index df774cd63d6b0ab7308dc91787c4567b3e9983be..10b9ffde401756483ed1ebddac525d457fce0458 100644
--- a/rts/sm/Scav.h
+++ b/rts/sm/Scav.h
@@ -11,6 +11,9 @@
  *
  * ---------------------------------------------------------------------------*/
 
+#ifndef SM_SCAV_H
+#define SM_SCAV_H
+
 void    scavenge_loop (void);
 void    scavenge_mutable_list (bdescr *bd, generation *gen);
 void    scavenge_capability_mut_lists (Capability *cap);
@@ -20,3 +23,6 @@ void    scavenge_loop1 (void);
 void    scavenge_mutable_list1 (bdescr *bd, generation *gen);
 void    scavenge_capability_mut_Lists1 (Capability *cap);
 #endif
+
+#endif /* SM_SCAV_H */
+
diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c
index d14e58856f10aa5a99617d1085e1367ccb0e3a05..97615e9d1b5b0df06c9c4179b1a4081238b21413 100644
--- a/rts/sm/Storage.c
+++ b/rts/sm/Storage.c
@@ -13,18 +13,15 @@
 
 #include "PosixSource.h"
 #include "Rts.h"
+
+#include "Storage.h"
 #include "RtsUtils.h"
-#include "RtsFlags.h"
 #include "Stats.h"
-#include "Hooks.h"
 #include "BlockAlloc.h"
-#include "MBlock.h"
 #include "Weak.h"
 #include "Sanity.h"
 #include "Arena.h"
-#include "OSThreads.h"
 #include "Capability.h"
-#include "Storage.h"
 #include "Schedule.h"
 #include "RetainerProfile.h"	// for counting memory blocks (memInventory)
 #include "OSMem.h"
@@ -32,7 +29,6 @@
 #include "GC.h"
 #include "Evac.h"
 
-#include <stdlib.h>
 #include <string.h>
 
 #include "ffi.h"
@@ -71,6 +67,7 @@ step *nurseries         = NULL; /* array of nurseries, >1 only if THREADED_RTS *
 Mutex sm_mutex;
 #endif
 
+static void allocNurseries ( void );
 
 static void
 initStep (step *stp, int g, int s)
@@ -440,7 +437,7 @@ assignNurseriesToCapabilities (void)
 #endif
 }
 
-void
+static void
 allocNurseries( void )
 { 
     nat i;
diff --git a/rts/sm/Storage.h b/rts/sm/Storage.h
new file mode 100644
index 0000000000000000000000000000000000000000..c6aa45e16274cf861b983644cd016a2fea200146
--- /dev/null
+++ b/rts/sm/Storage.h
@@ -0,0 +1,169 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2009
+ *
+ * External Storage Manger Interface
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifndef SM_STORAGE_H
+#define SM_STORAGE_H
+
+/* -----------------------------------------------------------------------------
+   Initialisation / De-initialisation
+   -------------------------------------------------------------------------- */
+
+void initStorage(void);
+void exitStorage(void);
+void freeStorage(void);
+
+/* -----------------------------------------------------------------------------
+   Storage manager state
+   -------------------------------------------------------------------------- */
+
+extern bdescr * pinned_object_block;
+
+extern nat alloc_blocks;
+extern nat alloc_blocks_lim;
+
+INLINE_HEADER rtsBool
+doYouWantToGC( void )
+{
+  return (alloc_blocks >= alloc_blocks_lim);
+}
+
+/* for splitting blocks groups in two */
+bdescr * splitLargeBlock (bdescr *bd, nat blocks);
+
+/* -----------------------------------------------------------------------------
+   Generational garbage collection support
+
+   recordMutable(StgPtr p)       Informs the garbage collector that a
+				 previously immutable object has
+				 become (permanently) mutable.  Used
+				 by thawArray and similar.
+
+   updateWithIndirection(p1,p2)  Updates the object at p1 with an
+				 indirection pointing to p2.  This is
+				 normally called for objects in an old
+				 generation (>0) when they are updated.
+
+   updateWithPermIndirection(p1,p2)  As above but uses a permanent indir.
+
+   -------------------------------------------------------------------------- */
+
+/*
+ * Storage manager mutex
+ */
+#if defined(THREADED_RTS)
+extern Mutex sm_mutex;
+#endif
+
+#if defined(THREADED_RTS)
+#define ACQUIRE_SM_LOCK   ACQUIRE_LOCK(&sm_mutex);
+#define RELEASE_SM_LOCK   RELEASE_LOCK(&sm_mutex);
+#define ASSERT_SM_LOCK()  ASSERT_LOCK_HELD(&sm_mutex);
+#else
+#define ACQUIRE_SM_LOCK
+#define RELEASE_SM_LOCK
+#define ASSERT_SM_LOCK()
+#endif
+
+INLINE_HEADER void
+recordMutableGen(StgClosure *p, nat gen_no)
+{
+    bdescr *bd;
+
+    bd = generations[gen_no].mut_list;
+    if (bd->free >= bd->start + BLOCK_SIZE_W) {
+	bdescr *new_bd;
+	new_bd = allocBlock();
+	new_bd->link = bd;
+	bd = new_bd;
+	generations[gen_no].mut_list = bd;
+    }
+    *bd->free++ = (StgWord)p;
+
+}
+
+INLINE_HEADER void
+recordMutableGenLock(StgClosure *p, nat gen_no)
+{
+    ACQUIRE_SM_LOCK;
+    recordMutableGen(p,gen_no);
+    RELEASE_SM_LOCK;
+}
+
+INLINE_HEADER void
+recordMutable(StgClosure *p)
+{
+    bdescr *bd;
+    ASSERT(closure_MUTABLE(p));
+    bd = Bdescr((P_)p);
+    if (bd->gen_no > 0) recordMutableGen(p, bd->gen_no);
+}
+
+INLINE_HEADER void
+recordMutableLock(StgClosure *p)
+{
+    ACQUIRE_SM_LOCK;
+    recordMutable(p);
+    RELEASE_SM_LOCK;
+}
+
+/* -----------------------------------------------------------------------------
+   This is the write barrier for MUT_VARs, a.k.a. IORefs.  A
+   MUT_VAR_CLEAN object is not on the mutable list; a MUT_VAR_DIRTY
+   is.  When written to, a MUT_VAR_CLEAN turns into a MUT_VAR_DIRTY
+   and is put on the mutable list.
+   -------------------------------------------------------------------------- */
+
+void dirty_MUT_VAR(StgRegTable *reg, StgClosure *p);
+
+/* -----------------------------------------------------------------------------
+   Similarly, the write barrier for MVARs
+   -------------------------------------------------------------------------- */
+
+void dirty_MVAR(StgRegTable *reg, StgClosure *p);
+
+/* -----------------------------------------------------------------------------
+   Nursery manipulation
+   -------------------------------------------------------------------------- */
+
+void     resetNurseries       ( void );
+void     resizeNurseries      ( nat blocks );
+void     resizeNurseriesFixed ( nat blocks );
+lnat     countNurseryBlocks   ( void );
+
+/* -----------------------------------------------------------------------------
+   Stats 'n' DEBUG stuff
+   -------------------------------------------------------------------------- */
+
+extern ullong total_allocated;
+
+lnat    calcAllocated  (void);
+lnat    calcLiveBlocks (void);
+lnat    calcLiveWords  (void);
+lnat    countOccupied  (bdescr *bd);
+lnat    calcNeeded     (void);
+HsInt64 getAllocations (void);
+
+#if defined(DEBUG)
+void    memInventory       (rtsBool show);
+void    checkSanity        (void);
+nat     countBlocks        (bdescr *);
+void    checkNurserySanity (step *stp);
+#endif
+
+/* ----------------------------------------------------------------------------
+   Storage manager internal APIs and globals
+   ------------------------------------------------------------------------- */
+
+#define END_OF_STATIC_LIST ((StgClosure*)1)
+
+void move_TSO  (StgTSO *src, StgTSO *dest);
+
+extern StgClosure * caf_list;
+extern StgClosure * revertible_caf_list;
+
+#endif /* SM_STORAGE_H */
diff --git a/rts/sm/Sweep.c b/rts/sm/Sweep.c
index 444c3d51116487f56ed33757c655dc46f2dcbc83..b6574024ebe673492a1c4e6df09563791b548365 100644
--- a/rts/sm/Sweep.c
+++ b/rts/sm/Sweep.c
@@ -11,9 +11,11 @@
  *
  * ---------------------------------------------------------------------------*/
 
+#include "PosixSource.h"
 #include "Rts.h"
+
+#include "Storage.h"
 #include "Sweep.h"
-#include "Block.h"
 #include "Trace.h"
 
 void
diff --git a/rts/sm/Sweep.h b/rts/sm/Sweep.h
index e7904a90a81b04f31a5acca882da7f9f1c8c1e62..562a934faaa83c3ff4733881adebd926e40d3bbc 100644
--- a/rts/sm/Sweep.h
+++ b/rts/sm/Sweep.h
@@ -1,6 +1,6 @@
 /* -----------------------------------------------------------------------------
  *
- * (c) The GHC Team 2008 
+ * (c) The GHC Team 2008
  *
  * Simple mark/sweep, collecting whole blocks.
  *
@@ -11,4 +11,9 @@
  *
  * ---------------------------------------------------------------------------*/
 
+#ifndef SM_SWEEP_H
+#define SM_SWEEP_H
+
 void sweep(step *gen);
+
+#endif /* SM_SWEEP_H */
diff --git a/rts/win32/AsyncIO.h b/rts/win32/AsyncIO.h
index ffbe71e1afa4615b1f616f5e4ccf15148c6374c2..27669ce0b44ccc62178cc917a02b13ff8b94baa9 100644
--- a/rts/win32/AsyncIO.h
+++ b/rts/win32/AsyncIO.h
@@ -4,8 +4,10 @@
  *
  * (c) sof, 2002-2003.
  */
-#ifndef __ASYNCHIO_H__
-#define __ASYNCHIO_H__
+
+#ifndef WIN32_ASYNCHIO_H
+#define WIN32_ASYNCHIO_H
+
 extern unsigned int
 addIORequest(int   fd,
 	     int   forWriting,
@@ -22,4 +24,4 @@ extern int awaitRequests(rtsBool wait);
 extern void abandonRequestWait(void);
 extern void resetAbandonRequestWait(void);
 
-#endif /* __ASYNCHIO_H__ */
+#endif /* WIN32_ASYNCHIO_H */
diff --git a/rts/win32/ConsoleHandler.h b/rts/win32/ConsoleHandler.h
index 33fa065733cb590b02e90eb91e88d78c425b2510..0d09a67b94f8743806a8a05e5f8077477dce1edc 100644
--- a/rts/win32/ConsoleHandler.h
+++ b/rts/win32/ConsoleHandler.h
@@ -2,8 +2,8 @@
  * Console control handler support.
  *
  */
-#ifndef __CONSOLEHANDLER_H__
-#define __CONSOLEHANDLER_H__
+#ifndef WIN32_CONSOLEHANDLER_H
+#define WIN32_CONSOLEHANDLER_H
 
 /*
  * Console control handlers lets an application handle Ctrl+C, Ctrl+Break etc.
@@ -51,14 +51,6 @@ extern StgInt stg_pending_events;
  */
 extern void startSignalHandlers(Capability *cap);
 
-/*
- * Function: handleSignalsInThisThread()
- * 
- * Have current (OS) thread assume responsibility of handling console events/signals.
- * Currently not used (by the console event handling code.)
- */
-extern void handleSignalsInThisThread(void);
-
 /*
  * Function: rts_waitConsoleHandlerCompletion()
  *
@@ -69,4 +61,4 @@ extern int rts_waitConsoleHandlerCompletion(void);
 
 #endif /* THREADED_RTS */
 
-#endif /* __CONSOLEHANDLER_H__ */
+#endif /* Win32_CONSOLEHANDLER_H */
diff --git a/rts/win32/IOManager.h b/rts/win32/IOManager.h
index 7379ce3b16308792fb263913e969730dbfdc0bb6..145a1e549b42621a487b9b8575cd5542d98eb813 100644
--- a/rts/win32/IOManager.h
+++ b/rts/win32/IOManager.h
@@ -4,8 +4,9 @@
  *
  * (c) sof, 2002-2003
  */
-#ifndef __IOMANAGER_H__
-#define __IOMANAGER_H__
+
+#ifndef WIN32_IOMANAGER_H
+#define WIN32_IOMANAGER_H
 
 #include <windows.h>
 
@@ -103,4 +104,4 @@ extern int AddProcRequest ( void*          proc,
 
 extern void abandonWorkRequest ( int reqID );
 
-#endif /* __IOMANAGER_H__ */
+#endif /* WIN32_IOMANAGER_H */
diff --git a/rts/win32/ThrIOManager.c b/rts/win32/ThrIOManager.c
index c52928c3a22760fd4209ae9cb204f4f49c2e16d0..e62b33d9d31ab49af43732eda9586a95b676f874 100644
--- a/rts/win32/ThrIOManager.c
+++ b/rts/win32/ThrIOManager.c
@@ -8,7 +8,7 @@
  * ---------------------------------------------------------------------------*/
 
 #include "Rts.h"
-#include "ThrIOManager.h"
+#include "IOManager.h"
 #include "Prelude.h"
 #include <windows.h>
 
diff --git a/rts/win32/WorkQueue.h b/rts/win32/WorkQueue.h
index bde82a3a77e7f8dcbbce5910a3e5a1f66db05ce5..3ed2385ec94261462b581cacfb0abdabee9ec280 100644
--- a/rts/win32/WorkQueue.h
+++ b/rts/win32/WorkQueue.h
@@ -5,8 +5,9 @@
  * (c) sof, 2002-2003
  *
  */
-#ifndef __WORKQUEUE_H__
-#define __WORKQUEUE_H__
+
+#ifndef WIN32_WORKQUEUE_H
+#define WIN32_WORKQUEUE_H
 #include <windows.h>
 
 /* This is a fixed-size queue. */
@@ -34,4 +35,4 @@ extern BOOL       GetWork            ( WorkQueue* pq, void** ppw );
 extern BOOL       FetchWork          ( WorkQueue* pq, void** ppw );
 extern int        SubmitWork         ( WorkQueue* pq, void*   pw );
 
-#endif /* __WORKQUEUE_H__ */
+#endif /* WIN32_WORKQUEUE_H */
diff --git a/rts/win32/seh_excn.h b/rts/win32/seh_excn.h
index 410d4308717b21f2f55172cd2fe02ddf6513117a..9d67fb405a75c8603da90134e11c9748e69eba77 100644
--- a/rts/win32/seh_excn.h
+++ b/rts/win32/seh_excn.h
@@ -1,5 +1,6 @@
-#ifndef __SEH_EXCN_H__
-#define __SEH_EXCN_H__
+#ifndef WIN32_SEH_EXCN_H
+#define WIN32_SEH_EXCN_H
+
 #include <stdio.h>
 #include <stdlib.h>
 
@@ -87,5 +88,5 @@ catchDivZero(struct _EXCEPTION_RECORD*,
 #error Don't know what sort of Windows system this is
 #endif
 
-#endif /* __SEH_EXCN_H__ */
+#endif /* WIN32_SEH_EXCN_H */
 
diff --git a/utils/genapply/GenApply.hs b/utils/genapply/GenApply.hs
index eb29e2d4ef34c264446c1c865d4336963a162b3f..765bfb3be6ec37c267993c01dc7c3ae161335598 100644
--- a/utils/genapply/GenApply.hs
+++ b/utils/genapply/GenApply.hs
@@ -8,8 +8,8 @@
 module Main(main) where
 
 #include "../../includes/ghcconfig.h"
-#include "../../includes/MachRegs.h"
-#include "../../includes/Constants.h"
+#include "../../includes/stg/MachRegs.h"
+#include "../../includes/rts/Constants.h"
 
 -- Needed for TAG_BITS
 #include "../../includes/MachDeps.h"