Commit f0fcc41d authored by Simon Marlow's avatar Simon Marlow
Browse files

Revert "Per-thread allocation counters and limits"

Problems were found on 32-bit platforms, I'll commit again when I have a fix.

This reverts the following commits:
   54b31f74
   b0534f78
parent 5141baf7
......@@ -988,12 +988,9 @@ lowerSafeForeignCall dflags block
id <- newTemp (bWord dflags)
new_base <- newTemp (cmmRegType dflags (CmmGlobal BaseReg))
let (caller_save, caller_load) = callerSaveVolatileRegs dflags
load_tso <- newTemp (gcWord dflags)
load_stack <- newTemp (gcWord dflags)
tso <- newTemp (gcWord dflags)
cn <- newTemp (bWord dflags)
bdfree <- newTemp (bWord dflags)
bdstart <- newTemp (bWord dflags)
let suspend = saveThreadState dflags tso cn <*>
let suspend = saveThreadState dflags <*>
caller_save <*>
mkMiddle (callSuspendThread dflags id intrbl)
midCall = mkUnsafeCall tgt res args
......@@ -1002,7 +999,7 @@ lowerSafeForeignCall dflags block
-- might now have a different Capability!
mkAssign (CmmGlobal BaseReg) (CmmReg (CmmLocal new_base)) <*>
caller_load <*>
loadThreadState dflags tso load_stack cn bdfree bdstart
loadThreadState dflags load_tso load_stack
(_, regs, copyout) =
copyOutOflow dflags NativeReturn Jump (Young succ)
......
......@@ -7,15 +7,12 @@
-----------------------------------------------------------------------------
module StgCmmForeign (
cgForeignCall,
cgForeignCall, loadThreadState, saveThreadState,
emitPrimCall, emitCCall,
emitForeignCall, -- For CmmParse
emitSaveThreadState,
saveThreadState,
emitLoadThreadState,
loadThreadState,
emitOpenNursery,
emitCloseNursery,
emitSaveThreadState, -- will be needed by the Cmm parser
emitLoadThreadState, -- ditto
emitCloseNursery, emitOpenNursery
) where
#include "HsVersions.h"
......@@ -267,215 +264,94 @@ maybe_assign_temp e = do
-- This stuff can't be done in suspendThread/resumeThread, because it
-- refers to global registers which aren't available in the C world.
saveThreadState :: DynFlags -> CmmAGraph
saveThreadState dflags =
-- CurrentTSO->stackobj->sp = Sp;
mkStore (cmmOffset dflags (CmmLoad (cmmOffset dflags stgCurrentTSO (tso_stackobj dflags)) (bWord dflags)) (stack_SP dflags)) stgSp
<*> closeNursery dflags
-- and save the current cost centre stack in the TSO when profiling:
<*> if gopt Opt_SccProfilingOn dflags then
mkStore (cmmOffset dflags stgCurrentTSO (tso_CCCS dflags)) curCCS
else mkNop
emitSaveThreadState :: FCode ()
emitSaveThreadState = do
dflags <- getDynFlags
tso <- newTemp (gcWord dflags)
cn <- newTemp (bWord dflags)
emit $ saveThreadState dflags tso cn
-- saveThreadState must be usable from the stack layout pass, where we
-- don't have FCode. Therefore it takes LocalRegs as arguments, so
-- the caller can create these.
saveThreadState :: DynFlags -> LocalReg -> LocalReg -> CmmAGraph
saveThreadState dflags tso cn =
catAGraphs [
-- tso = CurrentTSO;
mkAssign (CmmLocal tso) stgCurrentTSO,
-- tso->stackobj->sp = Sp;
mkStore (cmmOffset dflags (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal tso)) (tso_stackobj dflags)) (bWord dflags)) (stack_SP dflags)) stgSp,
closeNursery dflags tso cn,
-- and save the current cost centre stack in the TSO when profiling:
if gopt Opt_SccProfilingOn dflags then
mkStore (cmmOffset dflags (CmmReg (CmmLocal tso)) (tso_CCCS dflags)) curCCS
else mkNop
]
emit (saveThreadState dflags)
emitCloseNursery :: FCode ()
emitCloseNursery = do
dflags <- getDynFlags
tso <- newTemp (gcWord dflags)
cn <- newTemp (bWord dflags)
emit $ mkAssign (CmmLocal tso) stgCurrentTSO <*>
closeNursery dflags tso cn
{-
Closing the nursery corresponds to the following code:
tso = CurrentTSO;
cn = CurrentNuresry;
df <- getDynFlags
emit (closeNursery df)
// Update the allocation limit for the current thread. We don't
// check to see whether it has overflowed at this point, that check is
// made when we run out of space in the current heap block (stg_gc_noregs)
// and in the scheduler when context switching (schedulePostRunThread).
tso->alloc_limit -= Hp + WDS(1) - cn->start;
-- CurrentNursery->free = Hp+1;
closeNursery :: DynFlags -> CmmAGraph
closeNursery dflags = mkStore (nursery_bdescr_free dflags) (cmmOffsetW dflags stgHp 1)
// Set cn->free to the next unoccupied word in the block
cn->free = Hp + WDS(1);
-}
closeNursery :: DynFlags -> LocalReg -> LocalReg -> CmmAGraph
closeNursery df tso cn =
let
tsoreg = CmmLocal tso
cnreg = CmmLocal cn
in
loadThreadState :: DynFlags -> LocalReg -> LocalReg -> CmmAGraph
loadThreadState dflags tso stack = do
catAGraphs [
mkAssign cnreg stgCurrentNursery,
let alloc =
CmmMachOp (mo_wordSub df)
[ cmmOffsetW df stgHp 1
, CmmLoad (nursery_bdescr_start df cnreg) (bWord df)
]
alloc_limit = cmmOffset df (CmmReg tsoreg) (tso_alloc_limit df)
in
-- tso->alloc_limit += alloc
mkStore alloc_limit (CmmMachOp (mo_wordSub df)
[ CmmLoad alloc_limit b64
, alloc ]),
-- CurrentNursery->free = Hp+1;
mkStore (nursery_bdescr_free df cnreg) (cmmOffsetW df stgHp 1)
]
-- tso = CurrentTSO;
mkAssign (CmmLocal tso) stgCurrentTSO,
-- stack = tso->stackobj;
mkAssign (CmmLocal stack) (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal tso)) (tso_stackobj dflags)) (bWord dflags)),
-- Sp = stack->sp;
mkAssign sp (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal stack)) (stack_SP dflags)) (bWord dflags)),
-- SpLim = stack->stack + RESERVED_STACK_WORDS;
mkAssign spLim (cmmOffsetW dflags (cmmOffset dflags (CmmReg (CmmLocal stack)) (stack_STACK dflags))
(rESERVED_STACK_WORDS dflags)),
-- HpAlloc = 0;
-- HpAlloc is assumed to be set to non-zero only by a failed
-- a heap check, see HeapStackCheck.cmm:GC_GENERIC
mkAssign hpAlloc (zeroExpr dflags),
openNursery dflags,
-- and load the current cost centre stack from the TSO when profiling:
if gopt Opt_SccProfilingOn dflags then
storeCurCCS
(CmmLoad (cmmOffset dflags (CmmReg (CmmLocal tso)) (tso_CCCS dflags)) (ccsType dflags))
else mkNop]
emitLoadThreadState :: FCode ()
emitLoadThreadState = do
dflags <- getDynFlags
tso <- newTemp (gcWord dflags)
stack <- newTemp (gcWord dflags)
cn <- newTemp (bWord dflags)
bdfree <- newTemp (bWord dflags)
bdstart <- newTemp (bWord dflags)
emit $ loadThreadState dflags tso stack cn bdfree bdstart
-- loadThreadState must be usable from the stack layout pass, where we
-- don't have FCode. Therefore it takes LocalRegs as arguments, so
-- the caller can create these.
loadThreadState :: DynFlags
-> LocalReg -> LocalReg -> LocalReg -> LocalReg -> LocalReg
-> CmmAGraph
loadThreadState dflags tso stack cn bdfree bdstart =
catAGraphs [
-- tso = CurrentTSO;
mkAssign (CmmLocal tso) stgCurrentTSO,
-- stack = tso->stackobj;
mkAssign (CmmLocal stack) (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal tso)) (tso_stackobj dflags)) (bWord dflags)),
-- Sp = stack->sp;
mkAssign sp (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal stack)) (stack_SP dflags)) (bWord dflags)),
-- SpLim = stack->stack + RESERVED_STACK_WORDS;
mkAssign spLim (cmmOffsetW dflags (cmmOffset dflags (CmmReg (CmmLocal stack)) (stack_STACK dflags))
(rESERVED_STACK_WORDS dflags)),
-- HpAlloc = 0;
-- HpAlloc is assumed to be set to non-zero only by a failed
-- a heap check, see HeapStackCheck.cmm:GC_GENERIC
mkAssign hpAlloc (zeroExpr dflags),
openNursery dflags tso cn bdfree bdstart,
-- and load the current cost centre stack from the TSO when profiling:
if gopt Opt_SccProfilingOn dflags
then storeCurCCS
(CmmLoad (cmmOffset dflags (CmmReg (CmmLocal tso))
(tso_CCCS dflags)) (ccsType dflags))
else mkNop
]
load_tso <- newTemp (gcWord dflags)
load_stack <- newTemp (gcWord dflags)
emit $ loadThreadState dflags load_tso load_stack
emitOpenNursery :: FCode ()
emitOpenNursery = do
dflags <- getDynFlags
tso <- newTemp (gcWord dflags)
cn <- newTemp (bWord dflags)
bdfree <- newTemp (bWord dflags)
bdstart <- newTemp (bWord dflags)
emit $ mkAssign (CmmLocal tso) stgCurrentTSO <*>
openNursery dflags tso cn bdfree bdstart
{-
Opening the nursery corresponds to the following code:
tso = CurrentTSO;
cn = CurrentNursery;
bdfree = CurrentNuresry->free;
bdstart = CurrentNuresry->start;
// We *add* the currently occupied portion of the nursery block to
// the allocation limit, because we will subtract it again in
// closeNursery.
tso->alloc_limit += bdfree - bdstart;
// Set Hp to the last occupied word of the heap block. Why not the
// next unocupied word? Doing it this way means that we get to use
// an offset of zero more often, which might lead to slightly smaller
// code on some architectures.
Hp = bdfree - WDS(1);
// Set HpLim to the end of the current nursery block (note that this block
// might be a block group, consisting of several adjacent blocks.
HpLim = bdstart + CurrentNursery->blocks*BLOCK_SIZE_W - 1;
-}
openNursery :: DynFlags
-> LocalReg -> LocalReg -> LocalReg -> LocalReg
-> CmmAGraph
openNursery df tso cn bdfree bdstart =
let
tsoreg = CmmLocal tso
cnreg = CmmLocal cn
bdfreereg = CmmLocal bdfree
bdstartreg = CmmLocal bdstart
in
catAGraphs [
mkAssign cnreg stgCurrentNursery,
mkAssign bdfreereg (CmmLoad (nursery_bdescr_free df cnreg) (bWord df)),
mkAssign bdstartreg (CmmLoad (nursery_bdescr_start df cnreg) (bWord df)),
-- alloc = bd->free - bd->start
let alloc =
CmmMachOp (mo_wordSub df) [CmmReg bdfreereg, CmmReg bdstartreg]
alloc_limit = cmmOffset df (CmmReg tsoreg) (tso_alloc_limit df)
in
-- tso->alloc_limit += alloc
mkStore alloc_limit (CmmMachOp (mo_wordAdd df)
[ CmmLoad alloc_limit b64
, alloc ]),
-- Hp = CurrentNursery->free - 1;
mkAssign hp (cmmOffsetW df (CmmReg bdfreereg) (-1)),
-- HpLim = CurrentNursery->start +
-- CurrentNursery->blocks*BLOCK_SIZE_W - 1;
mkAssign hpLim
(cmmOffsetExpr df
(CmmReg bdstartreg)
(cmmOffset df
(CmmMachOp (mo_wordMul df) [
CmmMachOp (MO_SS_Conv W32 (wordWidth df))
[CmmLoad (nursery_bdescr_blocks df cnreg) b32],
mkIntExpr df (bLOCK_SIZE df)
])
(-1)
)
)
df <- getDynFlags
emit (openNursery df)
openNursery :: DynFlags -> CmmAGraph
openNursery dflags = catAGraphs [
-- Hp = CurrentNursery->free - 1;
mkAssign hp (cmmOffsetW dflags (CmmLoad (nursery_bdescr_free dflags) (bWord dflags)) (-1)),
-- HpLim = CurrentNursery->start +
-- CurrentNursery->blocks*BLOCK_SIZE_W - 1;
mkAssign hpLim
(cmmOffsetExpr dflags
(CmmLoad (nursery_bdescr_start dflags) (bWord dflags))
(cmmOffset dflags
(CmmMachOp (mo_wordMul dflags) [
CmmMachOp (MO_SS_Conv W32 (wordWidth dflags))
[CmmLoad (nursery_bdescr_blocks dflags) b32],
mkIntExpr dflags (bLOCK_SIZE dflags)
])
(-1)
)
)
]
nursery_bdescr_free, nursery_bdescr_start, nursery_bdescr_blocks
:: DynFlags -> CmmReg -> CmmExpr
nursery_bdescr_free dflags cn =
cmmOffset dflags (CmmReg cn) (oFFSET_bdescr_free dflags)
nursery_bdescr_start dflags cn =
cmmOffset dflags (CmmReg cn) (oFFSET_bdescr_start dflags)
nursery_bdescr_blocks dflags cn =
cmmOffset dflags (CmmReg cn) (oFFSET_bdescr_blocks dflags)
nursery_bdescr_free, nursery_bdescr_start, nursery_bdescr_blocks :: DynFlags -> CmmExpr
nursery_bdescr_free dflags = cmmOffset dflags stgCurrentNursery (oFFSET_bdescr_free dflags)
nursery_bdescr_start dflags = cmmOffset dflags stgCurrentNursery (oFFSET_bdescr_start dflags)
nursery_bdescr_blocks dflags = cmmOffset dflags stgCurrentNursery (oFFSET_bdescr_blocks dflags)
tso_stackobj, tso_CCCS, tso_alloc_limit, stack_STACK, stack_SP :: DynFlags -> ByteOff
tso_stackobj, tso_CCCS, stack_STACK, stack_SP :: DynFlags -> ByteOff
tso_stackobj dflags = closureField dflags (oFFSET_StgTSO_stackobj dflags)
tso_alloc_limit dflags = closureField dflags (oFFSET_StgTSO_alloc_limit dflags)
tso_CCCS dflags = closureField dflags (oFFSET_StgTSO_cccs dflags)
stack_STACK dflags = closureField dflags (oFFSET_StgStack_stack dflags)
stack_SP dflags = closureField dflags (oFFSET_StgStack_sp dflags)
......
......@@ -741,8 +741,10 @@ globalRegMaybe CurrentTSO = Just (RealRegSingle REG_CurrentTSO)
# ifdef REG_CurrentNursery
globalRegMaybe CurrentNursery = Just (RealRegSingle REG_CurrentNursery)
# endif
#endif
globalRegMaybe _ = Nothing
#else
globalRegMaybe = panic "globalRegMaybe not defined for this platform"
#endif
freeReg :: RegNo -> FastBool
......
......@@ -274,12 +274,6 @@
*/
#define TSO_SQUEEZED 128
/*
* Enables the AllocationLimitExceeded exception when the thread's
* allocation limit goes negative.
*/
#define TSO_ALLOC_LIMIT 256
/*
* The number of times we spin in a spin lock before yielding (see
* #3758). To tune this value, use the benchmark in #3758: run the
......
......@@ -56,14 +56,6 @@ struct GC_FLAGS {
rtsBool doIdleGC;
StgWord heapBase; /* address to ask the OS for memory */
StgWord allocLimitGrace; /* units: *blocks*
* After an AllocationLimitExceeded
* exception has been raised, how much
* extra space is given to the thread
* to handle the exception before we
* raise it again.
*/
};
struct DEBUG_FLAGS {
......
......@@ -42,12 +42,8 @@ StgRegTable * resumeThread (void *);
//
// Thread operations from Threads.c
//
int cmp_thread (StgPtr tso1, StgPtr tso2);
int rts_getThreadId (StgPtr tso);
HsInt64 rts_getThreadAllocationCounter (StgPtr tso);
void rts_setThreadAllocationCounter (StgPtr tso, HsInt64 i);
void rts_enableThreadAllocationLimit (StgPtr tso);
void rts_disableThreadAllocationLimit (StgPtr tso);
int cmp_thread (StgPtr tso1, StgPtr tso2);
int rts_getThreadId (StgPtr tso);
#if !defined(mingw32_HOST_OS)
pid_t forkProcess (HsStablePtr *entry);
......
......@@ -145,18 +145,15 @@ typedef struct StgTSO_ {
*/
struct StgBlockingQueue_ *bq;
/*
* The allocation limit for this thread, which is updated as the
* thread allocates. If the value drops below zero, and
* TSO_ALLOC_LIMIT is set in flags, we raise an exception in the
* thread, and give the thread a little more space to handle the
* exception before we raise the exception again.
*
* This is an integer, because we might update it in a place where
* it isn't convenient to raise the exception, so we want it to
* stay negative until we get around to checking it.
*/
StgInt64 alloc_limit; /* in bytes */
#ifdef TICKY_TICKY
/* TICKY-specific stuff would go here. */
#endif
#ifdef PROFILING
StgTSOProfInfo prof;
#endif
#ifdef mingw32_HOST_OS
StgWord32 saved_winerror;
#endif
/*
* sum of the sizes of all stack chunks (in words), used to decide
......@@ -171,16 +168,6 @@ typedef struct StgTSO_ {
*/
StgWord32 tot_stack_size;
#ifdef TICKY_TICKY
/* TICKY-specific stuff would go here. */
#endif
#ifdef PROFILING
StgTSOProfInfo prof;
#endif
#ifdef mingw32_HOST_OS
StgWord32 saved_winerror;
#endif
} *StgTSOPtr;
typedef struct StgStack_ {
......
......@@ -48,7 +48,6 @@ module Control.Exception (
NestedAtomically(..),
BlockedIndefinitelyOnMVar(..),
BlockedIndefinitelyOnSTM(..),
AllocationLimitExceeded(..),
Deadlock(..),
NoMethodError(..),
PatternMatchFail(..),
......
......@@ -31,7 +31,6 @@ module Control.Exception.Base (
NestedAtomically(..),
BlockedIndefinitelyOnMVar(..),
BlockedIndefinitelyOnSTM(..),
AllocationLimitExceeded(..),
Deadlock(..),
NoMethodError(..),
PatternMatchFail(..),
......
......@@ -60,12 +60,6 @@ module GHC.Conc
, threadWaitWriteSTM
, closeFdWith
-- * Allocation counter and limit
, setAllocationCounter
, getAllocationCounter
, enableAllocationLimit
, disableAllocationLimit
-- * TVars
, STM(..)
, atomically
......
......@@ -61,12 +61,6 @@ module GHC.Conc.Sync
, threadStatus
, threadCapability
-- * Allocation counter and quota
, setAllocationCounter
, getAllocationCounter
, enableAllocationLimit
, disableAllocationLimit
-- * TVars
, STM(..)
, atomically
......@@ -183,92 +177,16 @@ instance Eq ThreadId where
instance Ord ThreadId where
compare = cmpThread
-- | Every thread has an allocation counter that tracks how much
-- memory has been allocated by the thread. The counter is
-- initialized to zero, and 'setAllocationCounter' sets the current
-- value. The allocation counter counts *down*, so in the absence of
-- a call to 'setAllocationCounter' its value is the negation of the
-- number of bytes of memory allocated by the thread.
--
-- There are two things that you can do with this counter:
--
-- * Use it as a simple profiling mechanism, with
-- 'getAllocationCounter'.
--
-- * Use it as a resource limit. See 'enableAllocationLimit'.
--
-- Allocation accounting is accurate only to about 4Kbytes.
--
setAllocationCounter :: Int64 -> IO ()
setAllocationCounter i = do
ThreadId t <- myThreadId
rts_setThreadAllocationCounter t i
-- | Return the current value of the allocation counter for the
-- current thread.
getAllocationCounter :: IO Int64
getAllocationCounter = do
ThreadId t <- myThreadId
rts_getThreadAllocationCounter t
-- | Enables the allocation counter to be treated as a limit for the
-- current thread. When the allocation limit is enabled, if the
-- allocation counter counts down below zero, the thread will be sent
-- the 'AllocationLimitExceeded' asynchronous exception. When this
-- happens, the counter is reinitialised (by default
-- to 100K, but tunable with the @+RTS -xq@ option) so that it can handle
-- the exception and perform any necessary clean up. If it exhausts
-- this additional allowance, another 'AllocationLimitExceeded' exception
-- is sent, and so forth.
--
-- Note that memory allocation is unrelated to /live memory/, also
-- known as /heap residency/. A thread can allocate a large amount of
-- memory and retain anything between none and all of it. It is
-- better to think of the allocation limit as a limit on
-- /CPU time/, rather than a limit on memory.
--
-- Compared to using timeouts, allocation limits don't count time
-- spent blocked or in foreign calls.
--
enableAllocationLimit :: IO ()
enableAllocationLimit = do
ThreadId t <- myThreadId
rts_enableThreadAllocationLimit t
-- | Disable allocation limit processing for the current thread.
disableAllocationLimit :: IO ()
disableAllocationLimit = do
ThreadId t <- myThreadId
rts_disableThreadAllocationLimit t
-- We cannot do these operations safely on another thread, because on
-- a 32-bit machine we cannot do atomic operations on a 64-bit value.
-- Therefore, we only expose APIs that allow getting and setting the
-- limit of the current thread.
foreign import ccall unsafe "rts_setThreadAllocationCounter"
rts_setThreadAllocationCounter :: ThreadId# -> Int64 -> IO ()
foreign import ccall unsafe "rts_getThreadAllocationCounter"
rts_getThreadAllocationCounter :: ThreadId# -> IO Int64
foreign import ccall unsafe "rts_enableThreadAllocationLimit"
rts_enableThreadAllocationLimit :: ThreadId# -> IO ()
foreign import ccall unsafe "rts_disableThreadAllocationLimit"
rts_disableThreadAllocationLimit :: ThreadId# -> IO ()
{- |
Creates a new thread to run the 'IO' computation passed as the
Sparks off a new thread to run the 'IO' computation passed as the
first argument, and returns the 'ThreadId' of the newly created
thread.
The new thread will be a lightweight, /unbound/ thread. Foreign calls
made by this thread are not guaranteed to be made by any particular OS
thread; if you need foreign calls to be made by a particular OS
thread, then use 'Control.Concurrent.forkOS' instead.
The new thread will be a lightweight thread; if you want to use a foreign
library that uses thread-local storage, use 'Control.Concurrent.forkOS' instead.
The new thread inherits the /masked/ state of the parent (see
'Control.Exception.mask').
GHC note: the new thread inherits the /masked/ state of the parent
(see 'Control.Exception.mask').
The newly created thread has an exception handler that discards the
exceptions 'BlockedIndefinitelyOnMVar', 'BlockedIndefinitelyOnSTM', and
......
......@@ -22,7 +22,6 @@ module GHC.IO.Exception (
BlockedIndefinitelyOnMVar(..), blockedIndefinitelyOnMVar,
BlockedIndefinitelyOnSTM(..), blockedIndefinitelyOnSTM,
Deadlock(..),
AllocationLimitExceeded(..), allocationLimitExceeded,
AssertionFailed(..),
SomeAsyncException(..),
......@@ -100,23 +99,6 @@ instance Show Deadlock where
-----
-- |This thread has exceeded its allocation limit. See
-- 'GHC.Conc.setAllocationCounter' and
-- 'GHC.Conc.enableAllocationLimit'.
data AllocationLimitExceeded = AllocationLimitExceeded
deriving Typeable
instance Exception AllocationLimitExceeded
instance Show AllocationLimitExceeded where
showsPrec _ AllocationLimitExceeded =
showString "allocation limit exceeded"
allocationLimitExceeded :: SomeException -- for the RTS
allocationLimitExceeded = toException AllocationLimitExceeded
-----
-- |'assert' was applied to 'False'.
data AssertionFailed = AssertionFailed String
deriving Typeable
......@@ -193,8 +175,7 @@ data ArrayException
instance Exception ArrayException