Commit 20cbb016 authored by Ben Gamari's avatar Ben Gamari Committed by Ben Gamari

Improve accuracy of get/setAllocationCounter

Summary:
get/setAllocationCounter didn't take into account allocations in the
current block. This was known at the time, but it turns out to be
important to have more accuracy when using these in a fine-grained
way.

Test Plan:
New unit test to test incrementally larger allocaitons.  Before I got
results like this:

```
+0
+0
+0
+0
+0
+4096
+0
+0
+0
+0
+0
+4064
+0
+0
+4088
+4056
+0
+0
+0
+4088
+4096
+4056
+4096
```

Notice how the results aren't always monotonically increasing.  After
this patch:

```
+344
+416
+488
+560
+632
+704
+776
+848
+920
+992
+1064
+1136
+1208
+1280
+1352
+1424
+1496
+1568
+1640
+1712
+1784
+1856
+1928
+2000
+2072
+2144
```

Reviewers: hvr, erikd, simonmar, jrtc27, trommler

Reviewed By: simonmar

Subscribers: trommler, jrtc27, rwbarton, thomie, carter

Differential Revision: https://phabricator.haskell.org/D4363
parent d27336ed
......@@ -404,8 +404,8 @@ Opening the nursery corresponds to the following code:
@
tso = CurrentTSO;
cn = CurrentNursery;
bdfree = CurrentNuresry->free;
bdstart = CurrentNuresry->start;
bdfree = CurrentNursery->free;
bdstart = CurrentNursery->start;
// We *add* the currently occupied portion of the nursery block to
// the allocation limit, because we will subtract it again in
......
......@@ -2942,6 +2942,20 @@ primop TraceMarkerOp "traceMarker#" GenPrimOp
has_side_effects = True
out_of_line = True
primop GetThreadAllocationCounter "getThreadAllocationCounter#" GenPrimOp
State# RealWorld -> (# State# RealWorld, INT64 #)
{ Retrieves the allocation counter for the current thread. }
with
has_side_effects = True
out_of_line = True
primop SetThreadAllocationCounter "setThreadAllocationCounter#" GenPrimOp
INT64 -> State# RealWorld -> State# RealWorld
{ Sets the allocation counter for the current thread to the given value. }
with
has_side_effects = True
out_of_line = True
------------------------------------------------------------------------
section "Safe coercions"
------------------------------------------------------------------------
......
......@@ -161,9 +161,11 @@
/* TO_W_(n) converts n to W_ type from a smaller type */
#if SIZEOF_W == 4
#define TO_I64(x) %sx64(x)
#define TO_W_(x) %sx32(x)
#define HALF_W_(x) %lobits16(x)
#elif SIZEOF_W == 8
#define TO_I64(x) (x)
#define TO_W_(x) %sx64(x)
#define HALF_W_(x) %lobits32(x)
#endif
......
......@@ -43,8 +43,6 @@ StgRegTable * resumeThread (void *);
//
int cmp_thread (StgPtr tso1, StgPtr tso2);
int rts_getThreadId (StgPtr tso);
HsInt64 rts_getThreadAllocationCounter (StgPtr tso);
void rts_setThreadAllocationCounter (StgPtr tso, HsInt64 i);
void rts_enableThreadAllocationLimit (StgPtr tso);
void rts_disableThreadAllocationLimit (StgPtr tso);
......
......@@ -468,6 +468,9 @@ RTS_FUN_DECL(stg_traceCcszh);
RTS_FUN_DECL(stg_clearCCSzh);
RTS_FUN_DECL(stg_traceEventzh);
RTS_FUN_DECL(stg_traceMarkerzh);
RTS_FUN_DECL(stg_getThreadAllocationCounterzh);
RTS_FUN_DECL(stg_setThreadAllocationCounterzh);
/* Other misc stuff */
// See wiki:Commentary/Compiler/Backends/PprC#Prototypes
......
......@@ -105,6 +105,7 @@ import Data.Maybe
import GHC.Base
import {-# SOURCE #-} GHC.IO.Handle ( hFlush )
import {-# SOURCE #-} GHC.IO.Handle.FD ( stdout )
import GHC.Int
import GHC.IO
import GHC.IO.Encoding.UTF8
import GHC.IO.Exception
......@@ -194,18 +195,16 @@ instance Ord ThreadId where
--
-- @since 4.8.0.0
setAllocationCounter :: Int64 -> IO ()
setAllocationCounter i = do
ThreadId t <- myThreadId
rts_setThreadAllocationCounter t i
setAllocationCounter (I64# i) = IO $ \s ->
case setThreadAllocationCounter# i s of s' -> (# s', () #)
-- | Return the current value of the allocation counter for the
-- current thread.
--
-- @since 4.8.0.0
getAllocationCounter :: IO Int64
getAllocationCounter = do
ThreadId t <- myThreadId
rts_getThreadAllocationCounter t
getAllocationCounter = IO $ \s ->
case getThreadAllocationCounter# s of (# s', ctr #) -> (# s', I64# ctr #)
-- | Enables the allocation counter to be treated as a limit for the
-- current thread. When the allocation limit is enabled, if the
......@@ -242,16 +241,6 @@ disableAllocationLimit = do
ThreadId t <- myThreadId
rts_disableThreadAllocationLimit t
-- We cannot do these operations safely on another thread, because on
-- a 32-bit machine we cannot do atomic operations on a 64-bit value.
-- Therefore, we only expose APIs that allow getting and setting the
-- limit of the current thread.
foreign import ccall unsafe "rts_setThreadAllocationCounter"
rts_setThreadAllocationCounter :: ThreadId# -> Int64 -> IO ()
foreign import ccall unsafe "rts_getThreadAllocationCounter"
rts_getThreadAllocationCounter :: ThreadId# -> IO Int64
foreign import ccall unsafe "rts_enableThreadAllocationLimit"
rts_enableThreadAllocationLimit :: ThreadId# -> IO ()
......
......@@ -2491,3 +2491,23 @@ stg_traceMarkerzh ( W_ msg )
return ();
}
stg_getThreadAllocationCounterzh ()
{
// Account for the allocation in the current block
W_ offset;
offset = Hp - bdescr_start(CurrentNursery);
return (StgTSO_alloc_limit(CurrentTSO) - TO_I64(offset));
}
stg_setThreadAllocationCounterzh ( I64 counter )
{
// Allocation in the current block will be subtracted by
// getThreadAllocationCounter#, so we have to offset any existing
// allocation here. See also openNursery/closeNursery in
// compiler/codeGen/StgCmmForeign.hs.
W_ offset;
offset = Hp - bdescr_start(CurrentNursery);
StgTSO_alloc_limit(CurrentTSO) = counter + TO_I64(offset);
return ();
}
......@@ -743,8 +743,6 @@
SymI_HasProto(rts_isProfiled) \
SymI_HasProto(rts_isDynamic) \
SymI_HasProto(rts_setInCallCapability) \
SymI_HasProto(rts_getThreadAllocationCounter) \
SymI_HasProto(rts_setThreadAllocationCounter) \
SymI_HasProto(rts_enableThreadAllocationLimit) \
SymI_HasProto(rts_disableThreadAllocationLimit) \
SymI_HasProto(rts_setMainThread) \
......@@ -895,6 +893,8 @@
SymI_HasProto(stg_traceCcszh) \
SymI_HasProto(stg_traceEventzh) \
SymI_HasProto(stg_traceMarkerzh) \
SymI_HasProto(stg_getThreadAllocationCounterzh) \
SymI_HasProto(stg_setThreadAllocationCounterzh) \
SymI_HasProto(getMonotonicNSec) \
SymI_HasProto(lockFile) \
SymI_HasProto(unlockFile) \
......
......@@ -165,19 +165,8 @@ rts_getThreadId(StgPtr tso)
}
/* ---------------------------------------------------------------------------
* Getting & setting the thread allocation limit
* Enabling and disabling the thread allocation limit
* ------------------------------------------------------------------------ */
HsInt64 rts_getThreadAllocationCounter(StgPtr tso)
{
// NB. doesn't take into account allocation in the current nursery
// block, so it might be off by up to 4k.
return PK_Int64((W_*)&(((StgTSO *)tso)->alloc_limit));
}
void rts_setThreadAllocationCounter(StgPtr tso, HsInt64 i)
{
ASSIGN_Int64((W_*)&(((StgTSO *)tso)->alloc_limit), i);
}
void rts_enableThreadAllocationLimit(StgPtr tso)
{
......
......@@ -392,3 +392,9 @@ test('T14702', [ ignore_stdout
test('T14900', normal, compile_and_run, ['-package ghc-compact'])
test('InternalCounters', normal, run_command,
['$MAKE -s --no-print-directory InternalCounters'])
test('alloccounter1', normal, compile_and_run,
[
# avoid allocating stack chunks, which counts as
# allocation and messes up the results:
'-with-rtsopts=-k1m'
])
module Main where
import Control.Exception
import Control.Monad
import Data.List
import System.Mem
main = do
let
testAlloc n = do
let start = 999999
setAllocationCounter start
evaluate (last [1..n])
c <- getAllocationCounter
-- print (start - c)
return (start - c)
results <- forM [1..1000] testAlloc
print (sort results == results)
-- results better be in ascending order
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment