Commit d70b19bf authored by Simon Marlow's avatar Simon Marlow

Per-thread allocation counters and limits

This reverts commit f0fcc41d.

New changes: now works on 32-bit platforms too.  I added some basic
support for 64-bit subtraction and comparison operations to the x86
NCG.
parent c774b28f
......@@ -992,9 +992,12 @@ lowerSafeForeignCall dflags block
id <- newTemp (bWord dflags)
new_base <- newTemp (cmmRegType dflags (CmmGlobal BaseReg))
let (caller_save, caller_load) = callerSaveVolatileRegs dflags
load_tso <- newTemp (gcWord dflags)
load_stack <- newTemp (gcWord dflags)
let suspend = saveThreadState dflags <*>
tso <- newTemp (gcWord dflags)
cn <- newTemp (bWord dflags)
bdfree <- newTemp (bWord dflags)
bdstart <- newTemp (bWord dflags)
let suspend = saveThreadState dflags tso cn <*>
caller_save <*>
mkMiddle (callSuspendThread dflags id intrbl)
midCall = mkUnsafeCall tgt res args
......@@ -1003,7 +1006,7 @@ lowerSafeForeignCall dflags block
-- might now have a different Capability!
mkAssign (CmmGlobal BaseReg) (CmmReg (CmmLocal new_base)) <*>
caller_load <*>
loadThreadState dflags load_tso load_stack
loadThreadState dflags tso load_stack cn bdfree bdstart
(_, regs, copyout) =
copyOutOflow dflags NativeReturn Jump (Young succ)
......
......@@ -3,7 +3,7 @@
module CmmMachOp
( MachOp(..)
, pprMachOp, isCommutableMachOp, isAssociativeMachOp
, isComparisonMachOp, machOpResultType
, isComparisonMachOp, maybeIntComparison, machOpResultType
, machOpArgReps, maybeInvertComparison
-- MachOp builders
......@@ -11,9 +11,11 @@ module CmmMachOp
, mo_wordSRem, mo_wordSNeg, mo_wordUQuot, mo_wordURem
, mo_wordSGe, mo_wordSLe, mo_wordSGt, mo_wordSLt, mo_wordUGe
, mo_wordULe, mo_wordUGt, mo_wordULt
, mo_wordAnd, mo_wordOr, mo_wordXor, mo_wordNot, mo_wordShl, mo_wordSShr, mo_wordUShr
, mo_wordAnd, mo_wordOr, mo_wordXor, mo_wordNot
, mo_wordShl, mo_wordSShr, mo_wordUShr
, mo_u_8To32, mo_s_8To32, mo_u_16To32, mo_s_16To32
, mo_u_8ToWord, mo_s_8ToWord, mo_u_16ToWord, mo_s_16ToWord, mo_u_32ToWord, mo_s_32ToWord
, mo_u_8ToWord, mo_s_8ToWord, mo_u_16ToWord, mo_s_16ToWord
, mo_u_32ToWord, mo_s_32ToWord
, mo_32To8, mo_32To16, mo_WordTo8, mo_WordTo16, mo_WordTo32, mo_WordTo64
-- CallishMachOp
......@@ -260,6 +262,7 @@ isAssociativeMachOp mop =
MO_Xor {} -> True
_other -> False
-- ----------------------------------------------------------------------------
-- isComparisonMachOp
......@@ -290,6 +293,25 @@ isComparisonMachOp mop =
MO_F_Lt {} -> True
_other -> False
{- |
Returns @Just w@ if the operation is an integer comparison with width
@w@, or @Nothing@ otherwise.
-}
maybeIntComparison :: MachOp -> Maybe Width
maybeIntComparison mop =
case mop of
MO_Eq w -> Just w
MO_Ne w -> Just w
MO_S_Ge w -> Just w
MO_S_Le w -> Just w
MO_S_Gt w -> Just w
MO_S_Lt w -> Just w
MO_U_Ge w -> Just w
MO_U_Le w -> Just w
MO_U_Gt w -> Just w
MO_U_Lt w -> Just w
_ -> Nothing
-- -----------------------------------------------------------------------------
-- Inverting conditions
......
......@@ -9,12 +9,15 @@
-----------------------------------------------------------------------------
module StgCmmForeign (
cgForeignCall, loadThreadState, saveThreadState,
cgForeignCall,
emitPrimCall, emitCCall,
emitForeignCall, -- For CmmParse
emitSaveThreadState, -- will be needed by the Cmm parser
emitLoadThreadState, -- ditto
emitCloseNursery, emitOpenNursery
emitSaveThreadState,
saveThreadState,
emitLoadThreadState,
loadThreadState,
emitOpenNursery,
emitCloseNursery,
) where
#include "HsVersions.h"
......@@ -271,94 +274,221 @@ maybe_assign_temp e = do
-- This stuff can't be done in suspendThread/resumeThread, because it
-- refers to global registers which aren't available in the C world.
saveThreadState :: DynFlags -> CmmAGraph
saveThreadState dflags =
-- CurrentTSO->stackobj->sp = Sp;
mkStore (cmmOffset dflags (CmmLoad (cmmOffset dflags stgCurrentTSO (tso_stackobj dflags)) (bWord dflags)) (stack_SP dflags)) stgSp
<*> closeNursery dflags
-- and save the current cost centre stack in the TSO when profiling:
<*> if gopt Opt_SccProfilingOn dflags then
mkStore (cmmOffset dflags stgCurrentTSO (tso_CCCS dflags)) curCCS
else mkNop
emitSaveThreadState :: FCode ()
emitSaveThreadState = do
dflags <- getDynFlags
emit (saveThreadState dflags)
tso <- newTemp (gcWord dflags)
cn <- newTemp (bWord dflags)
emit $ saveThreadState dflags tso cn
-- saveThreadState must be usable from the stack layout pass, where we
-- don't have FCode. Therefore it takes LocalRegs as arguments, so
-- the caller can create these.
saveThreadState :: DynFlags -> LocalReg -> LocalReg -> CmmAGraph
saveThreadState dflags tso cn =
catAGraphs [
-- tso = CurrentTSO;
mkAssign (CmmLocal tso) stgCurrentTSO,
-- tso->stackobj->sp = Sp;
mkStore (cmmOffset dflags (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal tso)) (tso_stackobj dflags)) (bWord dflags)) (stack_SP dflags)) stgSp,
closeNursery dflags tso cn,
-- and save the current cost centre stack in the TSO when profiling:
if gopt Opt_SccProfilingOn dflags then
mkStore (cmmOffset dflags (CmmReg (CmmLocal tso)) (tso_CCCS dflags)) curCCS
else mkNop
]
emitCloseNursery :: FCode ()
emitCloseNursery = do
df <- getDynFlags
emit (closeNursery df)
dflags <- getDynFlags
tso <- newTemp (gcWord dflags)
cn <- newTemp (bWord dflags)
emit $ mkAssign (CmmLocal tso) stgCurrentTSO <*>
closeNursery dflags tso cn
{-
Closing the nursery corresponds to the following code:
tso = CurrentTSO;
cn = CurrentNuresry;
-- CurrentNursery->free = Hp+1;
closeNursery :: DynFlags -> CmmAGraph
closeNursery dflags = mkStore (nursery_bdescr_free dflags) (cmmOffsetW dflags stgHp 1)
// Update the allocation limit for the current thread. We don't
// check to see whether it has overflowed at this point, that check is
// made when we run out of space in the current heap block (stg_gc_noregs)
// and in the scheduler when context switching (schedulePostRunThread).
tso->alloc_limit -= Hp + WDS(1) - cn->start;
loadThreadState :: DynFlags -> LocalReg -> LocalReg -> CmmAGraph
loadThreadState dflags tso stack = do
// Set cn->free to the next unoccupied word in the block
cn->free = Hp + WDS(1);
-}
closeNursery :: DynFlags -> LocalReg -> LocalReg -> CmmAGraph
closeNursery df tso cn =
let
tsoreg = CmmLocal tso
cnreg = CmmLocal cn
in
catAGraphs [
-- tso = CurrentTSO;
mkAssign (CmmLocal tso) stgCurrentTSO,
-- stack = tso->stackobj;
mkAssign (CmmLocal stack) (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal tso)) (tso_stackobj dflags)) (bWord dflags)),
-- Sp = stack->sp;
mkAssign sp (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal stack)) (stack_SP dflags)) (bWord dflags)),
-- SpLim = stack->stack + RESERVED_STACK_WORDS;
mkAssign spLim (cmmOffsetW dflags (cmmOffset dflags (CmmReg (CmmLocal stack)) (stack_STACK dflags))
(rESERVED_STACK_WORDS dflags)),
-- HpAlloc = 0;
-- HpAlloc is assumed to be set to non-zero only by a failed
-- a heap check, see HeapStackCheck.cmm:GC_GENERIC
mkAssign hpAlloc (zeroExpr dflags),
openNursery dflags,
-- and load the current cost centre stack from the TSO when profiling:
if gopt Opt_SccProfilingOn dflags then
storeCurCCS
(CmmLoad (cmmOffset dflags (CmmReg (CmmLocal tso)) (tso_CCCS dflags)) (ccsType dflags))
else mkNop]
mkAssign cnreg stgCurrentNursery,
-- CurrentNursery->free = Hp+1;
mkStore (nursery_bdescr_free df cnreg) (cmmOffsetW df stgHp 1),
let alloc =
CmmMachOp (mo_wordSub df)
[ cmmOffsetW df stgHp 1
, CmmLoad (nursery_bdescr_start df cnreg) (bWord df)
]
alloc_limit = cmmOffset df (CmmReg tsoreg) (tso_alloc_limit df)
in
-- tso->alloc_limit += alloc
mkStore alloc_limit (CmmMachOp (MO_Sub W64)
[ CmmLoad alloc_limit b64
, CmmMachOp (mo_WordTo64 df) [alloc] ])
]
emitLoadThreadState :: FCode ()
emitLoadThreadState = do
dflags <- getDynFlags
load_tso <- newTemp (gcWord dflags)
load_stack <- newTemp (gcWord dflags)
emit $ loadThreadState dflags load_tso load_stack
tso <- newTemp (gcWord dflags)
stack <- newTemp (gcWord dflags)
cn <- newTemp (bWord dflags)
bdfree <- newTemp (bWord dflags)
bdstart <- newTemp (bWord dflags)
emit $ loadThreadState dflags tso stack cn bdfree bdstart
-- loadThreadState must be usable from the stack layout pass, where we
-- don't have FCode. Therefore it takes LocalRegs as arguments, so
-- the caller can create these.
loadThreadState :: DynFlags
-> LocalReg -> LocalReg -> LocalReg -> LocalReg -> LocalReg
-> CmmAGraph
loadThreadState dflags tso stack cn bdfree bdstart =
catAGraphs [
-- tso = CurrentTSO;
mkAssign (CmmLocal tso) stgCurrentTSO,
-- stack = tso->stackobj;
mkAssign (CmmLocal stack) (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal tso)) (tso_stackobj dflags)) (bWord dflags)),
-- Sp = stack->sp;
mkAssign sp (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal stack)) (stack_SP dflags)) (bWord dflags)),
-- SpLim = stack->stack + RESERVED_STACK_WORDS;
mkAssign spLim (cmmOffsetW dflags (cmmOffset dflags (CmmReg (CmmLocal stack)) (stack_STACK dflags))
(rESERVED_STACK_WORDS dflags)),
-- HpAlloc = 0;
-- HpAlloc is assumed to be set to non-zero only by a failed
-- a heap check, see HeapStackCheck.cmm:GC_GENERIC
mkAssign hpAlloc (zeroExpr dflags),
openNursery dflags tso cn bdfree bdstart,
-- and load the current cost centre stack from the TSO when profiling:
if gopt Opt_SccProfilingOn dflags
then storeCurCCS
(CmmLoad (cmmOffset dflags (CmmReg (CmmLocal tso))
(tso_CCCS dflags)) (ccsType dflags))
else mkNop
]
emitOpenNursery :: FCode ()
emitOpenNursery = do
df <- getDynFlags
emit (openNursery df)
openNursery :: DynFlags -> CmmAGraph
openNursery dflags = catAGraphs [
-- Hp = CurrentNursery->free - 1;
mkAssign hp (cmmOffsetW dflags (CmmLoad (nursery_bdescr_free dflags) (bWord dflags)) (-1)),
-- HpLim = CurrentNursery->start +
-- CurrentNursery->blocks*BLOCK_SIZE_W - 1;
mkAssign hpLim
(cmmOffsetExpr dflags
(CmmLoad (nursery_bdescr_start dflags) (bWord dflags))
(cmmOffset dflags
(CmmMachOp (mo_wordMul dflags) [
CmmMachOp (MO_SS_Conv W32 (wordWidth dflags))
[CmmLoad (nursery_bdescr_blocks dflags) b32],
mkIntExpr dflags (bLOCK_SIZE dflags)
])
(-1)
)
)
dflags <- getDynFlags
tso <- newTemp (gcWord dflags)
cn <- newTemp (bWord dflags)
bdfree <- newTemp (bWord dflags)
bdstart <- newTemp (bWord dflags)
emit $ mkAssign (CmmLocal tso) stgCurrentTSO <*>
openNursery dflags tso cn bdfree bdstart
{-
Opening the nursery corresponds to the following code:
tso = CurrentTSO;
cn = CurrentNursery;
bdfree = CurrentNuresry->free;
bdstart = CurrentNuresry->start;
// We *add* the currently occupied portion of the nursery block to
// the allocation limit, because we will subtract it again in
// closeNursery.
tso->alloc_limit += bdfree - bdstart;
// Set Hp to the last occupied word of the heap block. Why not the
// next unocupied word? Doing it this way means that we get to use
// an offset of zero more often, which might lead to slightly smaller
// code on some architectures.
Hp = bdfree - WDS(1);
// Set HpLim to the end of the current nursery block (note that this block
// might be a block group, consisting of several adjacent blocks.
HpLim = bdstart + CurrentNursery->blocks*BLOCK_SIZE_W - 1;
-}
openNursery :: DynFlags
-> LocalReg -> LocalReg -> LocalReg -> LocalReg
-> CmmAGraph
openNursery df tso cn bdfree bdstart =
let
tsoreg = CmmLocal tso
cnreg = CmmLocal cn
bdfreereg = CmmLocal bdfree
bdstartreg = CmmLocal bdstart
in
-- These assignments are carefully ordered to reduce register
-- pressure and generate not completely awful code on x86. To see
-- what code we generate, look at the assembly for
-- stg_returnToStackTop in rts/StgStartup.cmm.
catAGraphs [
mkAssign cnreg stgCurrentNursery,
mkAssign bdfreereg (CmmLoad (nursery_bdescr_free df cnreg) (bWord df)),
-- Hp = CurrentNursery->free - 1;
mkAssign hp (cmmOffsetW df (CmmReg bdfreereg) (-1)),
mkAssign bdstartreg (CmmLoad (nursery_bdescr_start df cnreg) (bWord df)),
-- HpLim = CurrentNursery->start +
-- CurrentNursery->blocks*BLOCK_SIZE_W - 1;
mkAssign hpLim
(cmmOffsetExpr df
(CmmReg bdstartreg)
(cmmOffset df
(CmmMachOp (mo_wordMul df) [
CmmMachOp (MO_SS_Conv W32 (wordWidth df))
[CmmLoad (nursery_bdescr_blocks df cnreg) b32],
mkIntExpr df (bLOCK_SIZE df)
])
(-1)
)
),
-- alloc = bd->free - bd->start
let alloc =
CmmMachOp (mo_wordSub df) [CmmReg bdfreereg, CmmReg bdstartreg]
alloc_limit = cmmOffset df (CmmReg tsoreg) (tso_alloc_limit df)
in
-- tso->alloc_limit += alloc
mkStore alloc_limit (CmmMachOp (MO_Add W64)
[ CmmLoad alloc_limit b64
, CmmMachOp (mo_WordTo64 df) [alloc] ])
]
nursery_bdescr_free, nursery_bdescr_start, nursery_bdescr_blocks :: DynFlags -> CmmExpr
nursery_bdescr_free dflags = cmmOffset dflags stgCurrentNursery (oFFSET_bdescr_free dflags)
nursery_bdescr_start dflags = cmmOffset dflags stgCurrentNursery (oFFSET_bdescr_start dflags)
nursery_bdescr_blocks dflags = cmmOffset dflags stgCurrentNursery (oFFSET_bdescr_blocks dflags)
nursery_bdescr_free, nursery_bdescr_start, nursery_bdescr_blocks
:: DynFlags -> CmmReg -> CmmExpr
nursery_bdescr_free dflags cn =
cmmOffset dflags (CmmReg cn) (oFFSET_bdescr_free dflags)
nursery_bdescr_start dflags cn =
cmmOffset dflags (CmmReg cn) (oFFSET_bdescr_start dflags)
nursery_bdescr_blocks dflags cn =
cmmOffset dflags (CmmReg cn) (oFFSET_bdescr_blocks dflags)
tso_stackobj, tso_CCCS, stack_STACK, stack_SP :: DynFlags -> ByteOff
tso_stackobj, tso_CCCS, tso_alloc_limit, stack_STACK, stack_SP :: DynFlags -> ByteOff
tso_stackobj dflags = closureField dflags (oFFSET_StgTSO_stackobj dflags)
tso_alloc_limit dflags = closureField dflags (oFFSET_StgTSO_alloc_limit dflags)
tso_CCCS dflags = closureField dflags (oFFSET_StgTSO_cccs dflags)
stack_STACK dflags = closureField dflags (oFFSET_StgStack_stack dflags)
stack_SP dflags = closureField dflags (oFFSET_StgStack_sp dflags)
......
......@@ -391,6 +391,21 @@ iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
ADC II32 (OpReg r2hi) (OpReg rhi) ]
return (ChildCode64 code rlo)
iselExpr64 (CmmMachOp (MO_Sub _) [e1,e2]) = do
ChildCode64 code1 r1lo <- iselExpr64 e1
ChildCode64 code2 r2lo <- iselExpr64 e2
(rlo,rhi) <- getNewRegPairNat II32
let
r1hi = getHiVRegFromLo r1lo
r2hi = getHiVRegFromLo r2lo
code = code1 `appOL`
code2 `appOL`
toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
SUB II32 (OpReg r2lo) (OpReg rlo),
MOV II32 (OpReg r1hi) (OpReg rhi),
SBB II32 (OpReg r2hi) (OpReg rhi) ]
return (ChildCode64 code rlo)
iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do
fn <- getAnyReg expr
r_dst_lo <- getNewRegNat II32
......@@ -1272,24 +1287,23 @@ getCondCode (CmmMachOp mop [x, y])
MO_F_Lt W64 -> condFltCode LTT x y
MO_F_Le W64 -> condFltCode LE x y
MO_Eq _ -> condIntCode EQQ x y
MO_Ne _ -> condIntCode NE x y
MO_S_Gt _ -> condIntCode GTT x y
MO_S_Ge _ -> condIntCode GE x y
MO_S_Lt _ -> condIntCode LTT x y
MO_S_Le _ -> condIntCode LE x y
MO_U_Gt _ -> condIntCode GU x y
MO_U_Ge _ -> condIntCode GEU x y
MO_U_Lt _ -> condIntCode LU x y
MO_U_Le _ -> condIntCode LEU x y
_other -> pprPanic "getCondCode(x86,x86_64)" (ppr (CmmMachOp mop [x,y]))
_ -> condIntCode (machOpToCond mop) x y
getCondCode other = pprPanic "getCondCode(2)(x86,x86_64)" (ppr other)
machOpToCond :: MachOp -> Cond
machOpToCond mo = case mo of
MO_Eq _ -> EQQ
MO_Ne _ -> NE
MO_S_Gt _ -> GTT
MO_S_Ge _ -> GE
MO_S_Lt _ -> LTT
MO_S_Le _ -> LE
MO_U_Gt _ -> GU
MO_U_Ge _ -> GEU
MO_U_Lt _ -> LU
MO_U_Le _ -> LEU
_other -> pprPanic "machOpToCond" (pprMachOp mo)
-- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
......@@ -1538,7 +1552,31 @@ genCondJump
-> CmmExpr -- the condition on which to branch
-> NatM InstrBlock
genCondJump id bool = do
genCondJump id expr = do
is32Bit <- is32BitPlatform
genCondJump' is32Bit id expr
genCondJump' :: Bool -> BlockId -> CmmExpr -> NatM InstrBlock
-- 64-bit integer comparisons on 32-bit
genCondJump' is32Bit true (CmmMachOp mop [e1,e2])
| is32Bit, Just W64 <- maybeIntComparison mop = do
ChildCode64 code1 r1_lo <- iselExpr64 e1
ChildCode64 code2 r2_lo <- iselExpr64 e2
let r1_hi = getHiVRegFromLo r1_lo
r2_hi = getHiVRegFromLo r2_lo
cond = machOpToCond mop
Just cond' = maybeFlipCond cond
false <- getBlockIdNat
return $ code1 `appOL` code2 `appOL` toOL [
CMP II32 (OpReg r2_hi) (OpReg r1_hi),
JXX cond true,
JXX cond' false,
CMP II32 (OpReg r2_lo) (OpReg r1_lo),
JXX cond true,
NEWBLOCK false ]
genCondJump' _ id bool = do
CondCode is_float cond cond_code <- getCondCode bool
use_sse2 <- sse2Enabled
if not is_float || not use_sse2
......@@ -1569,7 +1607,6 @@ genCondJump id bool = do
]
return (cond_code `appOL` code)
-- -----------------------------------------------------------------------------
-- Generating C calls
......
......@@ -196,6 +196,7 @@ data Instr
| ADD Size Operand Operand
| ADC Size Operand Operand
| SUB Size Operand Operand
| SBB Size Operand Operand
| MUL Size Operand Operand
| MUL2 Size Operand -- %edx:%eax = operand * %rax
......@@ -365,6 +366,7 @@ x86_regUsageOfInstr platform instr
ADD _ src dst -> usageRM src dst
ADC _ src dst -> usageRM src dst
SUB _ src dst -> usageRM src dst
SBB _ src dst -> usageRM src dst
IMUL _ src dst -> usageRM src dst
IMUL2 _ src -> mkRU (eax:use_R src []) [eax,edx]
MUL _ src dst -> usageRM src dst
......@@ -543,6 +545,7 @@ x86_patchRegsOfInstr instr env
ADD sz src dst -> patch2 (ADD sz) src dst
ADC sz src dst -> patch2 (ADC sz) src dst
SUB sz src dst -> patch2 (SUB sz) src dst
SBB sz src dst -> patch2 (SBB sz) src dst
IMUL sz src dst -> patch2 (IMUL sz) src dst
IMUL2 sz src -> patch1 (IMUL2 sz) src
MUL sz src dst -> patch2 (MUL sz) src dst
......
......@@ -570,11 +570,10 @@ pprInstr (ADD size (OpImm (ImmInt (-1))) dst)
= pprSizeOp (sLit "dec") size dst
pprInstr (ADD size (OpImm (ImmInt 1)) dst)
= pprSizeOp (sLit "inc") size dst
pprInstr (ADD size src dst)
= pprSizeOpOp (sLit "add") size src dst
pprInstr (ADC size src dst)
= pprSizeOpOp (sLit "adc") size src dst
pprInstr (ADD size src dst) = pprSizeOpOp (sLit "add") size src dst
pprInstr (ADC size src dst) = pprSizeOpOp (sLit "adc") size src dst
pprInstr (SUB size src dst) = pprSizeOpOp (sLit "sub") size src dst
pprInstr (SBB size src dst) = pprSizeOpOp (sLit "sbb") size src dst
pprInstr (IMUL size op1 op2) = pprSizeOpOp (sLit "imul") size op1 op2
pprInstr (ADD_CC size src dst)
......
......@@ -276,6 +276,12 @@
*/
#define TSO_SQUEEZED 128
/*
* Enables the AllocationLimitExceeded exception when the thread's
* allocation limit goes negative.
*/
#define TSO_ALLOC_LIMIT 256
/*
* The number of times we spin in a spin lock before yielding (see
* #3758). To tune this value, use the benchmark in #3758: run the
......
......@@ -56,6 +56,14 @@ struct GC_FLAGS {
rtsBool doIdleGC;
StgWord heapBase; /* address to ask the OS for memory */
StgWord allocLimitGrace; /* units: *blocks*
* After an AllocationLimitExceeded
* exception has been raised, how much
* extra space is given to the thread
* to handle the exception before we
* raise it again.
*/
};
struct DEBUG_FLAGS {
......
......@@ -42,8 +42,12 @@ StgRegTable * resumeThread (void *);
//
// Thread operations from Threads.c
//
int cmp_thread (StgPtr tso1, StgPtr tso2);
int rts_getThreadId (StgPtr tso);
int cmp_thread (StgPtr tso1, StgPtr tso2);
int rts_getThreadId (StgPtr tso);
HsInt64 rts_getThreadAllocationCounter (StgPtr tso);
void rts_setThreadAllocationCounter (StgPtr tso, HsInt64 i);
void rts_enableThreadAllocationLimit (StgPtr tso);
void rts_disableThreadAllocationLimit (StgPtr tso);
#if !defined(mingw32_HOST_OS)
pid_t forkProcess (HsStablePtr *entry);
......
......@@ -145,15 +145,18 @@ typedef struct StgTSO_ {
*/
struct StgBlockingQueue_ *bq;
#ifdef TICKY_TICKY
/* TICKY-specific stuff would go here. */
#endif
#ifdef PROFILING
StgTSOProfInfo prof;
#endif
#ifdef mingw32_HOST_OS
StgWord32 saved_winerror;
#endif
/*
* The allocation limit for this thread, which is updated as the
* thread allocates. If the value drops below zero, and
* TSO_ALLOC_LIMIT is set in flags, we raise an exception in the
* thread, and give the thread a little more space to handle the
* exception before we raise the exception again.
*
* This is an integer, because we might update it in a place where
* it isn't convenient to raise the exception, so we want it to
* stay negative until we get around to checking it.
*/
StgInt64 alloc_limit; /* in bytes */
/*
* sum of the sizes of all stack chunks (in words), used to decide
......@@ -168,6 +171,16 @@ typedef struct StgTSO_ {
*/
StgWord32 tot_stack_size;
#ifdef TICKY_TICKY
/* TICKY-specific stuff would go here. */
#endif
#ifdef PROFILING
StgTSOProfInfo prof;
#endif
#ifdef mingw32_HOST_OS
StgWord32 saved_winerror;
#endif
} *StgTSOPtr;
typedef struct StgStack_ {
......
......@@ -48,6 +48,7 @@ module Control.Exception (
NestedAtomically(..),
BlockedIndefinitelyOnMVar(..),
BlockedIndefinitelyOnSTM(..),
AllocationLimitExceeded(..),
Deadlock(..),
NoMethodError(..),
PatternMatchFail(..),
......
......@@ -31,6 +31,7 @@ module Control.Exception.Base (
NestedAtomically(..),
BlockedIndefinitelyOnMVar(..),
BlockedIndefinitelyOnSTM(..),
AllocationLimitExceeded(..),
Deadlock(..),
NoMethodError(..),
PatternMatchFail(..),
......
......@@ -59,6 +59,12 @@ module GHC.Conc
, threadWaitWriteSTM
, closeFdWith
-- * Allocation counter and limit
, setAllocationCounter
, getAllocationCounter
, enableAllocationLimit
, disableAllocationLimit
-- * TVars
, STM(..)
, atomically
......
......@@ -60,6 +60,12 @@ module GHC.Conc.Sync
, threadStatus
, threadCapability
-- * Allocation counter and quota
, setAllocationCounter
, getAllocationCounter
, enableAllocationLimit
, disableAllocationLimit
-- * TVars
, STM(..)
, atomically
......@@ -171,16 +177,92 @@ instance Eq ThreadId where
instance Ord ThreadId where
compare = cmpThread
-- | Every thread has an allocation counter that tracks how much
-- memory has been allocated by the thread. The counter is
-- initialized to zero, and 'setAllocationCounter' sets the current
-- value. The allocation counter counts *down*, so in the absence of
-- a call to 'setAllocationCounter' its value is the negation of the
-- number of bytes of memory allocated by the thread.
--
-- There are two things that you can do with this counter:
--
-- * Use it as a simple profiling mechanism, with
-- 'getAllocationCounter'.
--
-- * Use it as a resource limit. See 'enableAllocationLimit'.
--
-- Allocation accounting is accurate only to about 4Kbytes.
--
setAllocationCounter :: Int64 -> IO ()
setAllocationCounter i = do
ThreadId t <- myThreadId
rts_setThreadAllocationCounter t i
-- | Return the current value of the allocation counter for the
-- current thread.
getAllocationCounter :: IO Int64
getAllocationCounter = do
ThreadId t <- myThreadId
rts_getThreadAllocationCounter t
-- | Enables the allocation counter to be treated as a limit for the
-- current thread. When the allocation limit is enabled, if the
-- allocation counter counts down below zero, the thread will be sent
-- the 'AllocationLimitExceeded' asynchronous exception. When this
-- happens, the counter is reinitialised (by default
-- to 100K, but tunable with the @+RTS -xq@ option) so that it can handle
-- the exception and perform any necessary clean up. If it exhausts
-- this additional allowance, another 'AllocationLimitExceeded' exception
-- is sent, and so forth.
--
-- Note that memory allocation is unrelated to /live memory/, also
-- known as /heap residency/. A thread can allocate a large amount of
-- memory and retain anything between none and all of it. It is