Commit 41230e26 authored by Daniel Gröber (dxld)'s avatar Daniel Gröber (dxld) Committed by Marge Bot
Browse files

Zero out pinned block alignment slop when profiling

The heap profiler currently cannot traverse pinned blocks because of
alignment slop. This used to just be a minor annoyance as the whole block
is accounted into a special cost center rather than the respective object's
CCS, cf. #7275. However for the new root profiler we would like to be able
to visit _every_ closure on the heap. We need to do this so we can get rid
of the current 'flip' bit hack in the heap traversal code.

Since info pointers are always non-zero we can in principle skip all the
slop in the profiler if we can rely on it being zeroed. This assumption
caused problems in the past though, commit a586b33f ("rts: Correct
handling of LARGE ARR_WORDS in LDV profiler"), part of !1118, tried to use
the same trick for BF_LARGE objects but neglected to take into account that
shrink*Array# functions don't ensure that slop is zeroed when not
compiling with profiling.

Later, commit 0c114c65 ("Handle large ARR_WORDS in heap census (fix
as we will only be assuming slop is zeroed when profiling is on.

This commit also reduces the ammount of slop we introduce in the first
place by calculating the needed alignment before doing the allocation for
small objects where we know the next available address. For large objects
we don't know how much alignment we'll have to do yet since those details
are hidden behind the allocateMightFail function so there we continue to
allocate the maximum additional words we'll need to do the alignment.

So we don't have to duplicate all this logic in the cmm code we pull it
into the RTS allocatePinned function instead.

Metric Decrease:
    T7257
    haddock.Cabal
    haddock.base
parent 7b41f21b
......@@ -170,10 +170,13 @@ extern generation * oldest_gen;
Allocates memory from the nursery in
the current Capability.
StgPtr allocatePinned(Capability *cap, W_ n)
StgPtr allocatePinned(Capability *cap, W_ n, W_ alignment, W_ align_off)
Allocates a chunk of contiguous store
n words long, which is at a fixed
address (won't be moved by GC).
address (won't be moved by GC). The
word at the byte offset 'align_off'
will be aligned to 'alignment', which
must be a power of two.
Returns a pointer to the first word.
Always succeeds.
......@@ -191,7 +194,7 @@ extern generation * oldest_gen;
StgPtr allocate ( Capability *cap, W_ n );
StgPtr allocateMightFail ( Capability *cap, W_ n );
StgPtr allocatePinned ( Capability *cap, W_ n );
StgPtr allocatePinned ( Capability *cap, W_ n, W_ alignment, W_ align_off);
/* memory allocator for executable memory */
typedef void* AdjustorWritable;
......
......@@ -89,22 +89,15 @@ stg_newPinnedByteArrayzh ( W_ n )
/* When we actually allocate memory, we need to allow space for the
header: */
bytes = bytes + SIZEOF_StgArrBytes;
/* And we want to align to BA_ALIGN bytes, so we need to allow space
to shift up to BA_ALIGN - 1 bytes: */
bytes = bytes + BA_ALIGN - 1;
/* Now we convert to a number of words: */
words = ROUNDUP_BYTES_TO_WDS(bytes);
("ptr" p) = ccall allocatePinned(MyCapability() "ptr", words);
("ptr" p) = ccall allocatePinned(MyCapability() "ptr", words, BA_ALIGN, SIZEOF_StgArrBytes);
if (p == NULL) {
jump stg_raisezh(base_GHCziIOziException_heapOverflow_closure);
}
TICK_ALLOC_PRIM(SIZEOF_StgArrBytes,WDS(payload_words),0);
/* Now we need to move p forward so that the payload is aligned
to BA_ALIGN bytes: */
p = p + ((-p - SIZEOF_StgArrBytes) & BA_MASK);
/* No write barrier needed since this is a new allocation. */
SET_HDR(p, stg_ARR_WORDS_info, CCCS);
StgArrBytes_bytes(p) = n;
......@@ -121,7 +114,7 @@ stg_newAlignedPinnedByteArrayzh ( W_ n, W_ alignment )
/* we always supply at least word-aligned memory, so there's no
need to allow extra space for alignment if the requirement is less
than a word. This also prevents mischief with alignment == 0. */
if (alignment <= SIZEOF_W) { alignment = 1; }
if (alignment <= SIZEOF_W) { alignment = SIZEOF_W; }
bytes = n;
......@@ -131,23 +124,15 @@ stg_newAlignedPinnedByteArrayzh ( W_ n, W_ alignment )
/* When we actually allocate memory, we need to allow space for the
header: */
bytes = bytes + SIZEOF_StgArrBytes;
/* And we want to align to <alignment> bytes, so we need to allow space
to shift up to <alignment - 1> bytes: */
bytes = bytes + alignment - 1;
/* Now we convert to a number of words: */
words = ROUNDUP_BYTES_TO_WDS(bytes);
("ptr" p) = ccall allocatePinned(MyCapability() "ptr", words);
("ptr" p) = ccall allocatePinned(MyCapability() "ptr", words, alignment, SIZEOF_StgArrBytes);
if (p == NULL) {
jump stg_raisezh(base_GHCziIOziException_heapOverflow_closure);
}
TICK_ALLOC_PRIM(SIZEOF_StgArrBytes,WDS(payload_words),0);
/* Now we need to move p forward so that the payload is aligned
to <alignment> bytes. Note that we are assuming that
<alignment> is a power of 2, which is technically not guaranteed */
p = p + ((-p - SIZEOF_StgArrBytes) & (alignment - 1));
/* No write barrier needed since this is a new allocation. */
SET_HDR(p, stg_ARR_WORDS_info, CCCS);
StgArrBytes_bytes(p) = n;
......
......@@ -1059,6 +1059,24 @@ allocateMightFail (Capability *cap, W_ n)
return p;
}
/**
* Calculate the number of words we need to add to 'p' so it satisfies the
* alignment constraint '(p + off) & (align-1) == 0'.
*/
#define ALIGN_WITH_OFF_W(p, align, off) \
(((-((uintptr_t)p) - off) & (align-1)) / sizeof(W_))
/**
* When profiling we zero the space used for alignment. This allows us to
* traverse pinned blocks in the heap profiler.
*/
#if defined(PROFILING)
#define MEMSET_IF_PROFILING_W(p, val, len) memset(p, val, (len) * sizeof(W_))
#else
#define MEMSET_IF_PROFILING_W(p, val, len) \
do { (void)(p); (void)(val); (void)(len); } while(0)
#endif
/* ---------------------------------------------------------------------------
Allocate a fixed/pinned object.
......@@ -1084,29 +1102,48 @@ allocateMightFail (Capability *cap, W_ n)
------------------------------------------------------------------------- */
StgPtr
allocatePinned (Capability *cap, W_ n)
allocatePinned (Capability *cap, W_ n, W_ alignment, W_ align_off)
{
StgPtr p;
bdescr *bd;
// Alignment and offset have to be a power of two
ASSERT(alignment && !(alignment & (alignment - 1)));
ASSERT(alignment >= sizeof(W_));
ASSERT(align_off && !(align_off & (align_off - 1)));
ASSERT(align_off >= sizeof(W_));
// If the request is for a large object, then allocate()
// will give us a pinned object anyway.
if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
p = allocateMightFail(cap, n);
// For large objects we don't bother optimizing the number of words
// allocated for alignment reasons. Here we just allocate the maximum
// number of extra words we could possibly need to satisfy the alignment
// constraint.
p = allocateMightFail(cap, n + ROUNDUP_BYTES_TO_WDS(alignment)-1);
if (p == NULL) {
return NULL;
} else {
Bdescr(p)->flags |= BF_PINNED;
W_ off = ALIGN_WITH_OFF_W(p, alignment, align_off);
MEMSET_IF_PROFILING_W(p, 0, off);
p += off;
MEMSET_IF_PROFILING_W(p + n, 0, alignment - off - 1);
return p;
}
}
accountAllocation(cap, n);
bd = cap->pinned_object_block;
W_ off = 0;
if(bd)
off = ALIGN_WITH_OFF_W(bd->free, alignment, align_off);
// If we don't have a block of pinned objects yet, or the current
// one isn't large enough to hold the new object, get a new one.
if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) {
if (bd == NULL || (bd->free + off + n) > (bd->start + BLOCK_SIZE_W)) {
// stash the old block on cap->pinned_object_blocks. On the
// next GC cycle these objects will be moved to
......@@ -1158,11 +1195,19 @@ allocatePinned (Capability *cap, W_ n)
// the next GC the BF_EVACUATED flag will be cleared, and the
// block will be promoted as usual (if anything in it is
// live).
off = ALIGN_WITH_OFF_W(bd->free, alignment, align_off);
}
p = bd->free;
MEMSET_IF_PROFILING_W(p, 0, off);
n += off;
accountAllocation(cap, n);
bd->free += n;
return p;
return p + off;
}
/* -----------------------------------------------------------------------------
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment