Commit f5974c88 authored by Ben Gamari's avatar Ben Gamari Committed by Ben Gamari

rts: Make MBLOCK_SPACE_SIZE dynamic

Previously this was introduced in D524 as a compile-time constant.
Sadly, this isn't flexible enough to allow for environments where
ulimits restrict the maximum address space size (see, for instance,

Consequently, we are forced to make this dynamic. In principle this
shouldn't be so terrible as we can place both the beginning and end
addresses within the same cache line, likely incurring only one or so
additional instruction in HEAP_ALLOCED.

Test Plan: validate

Reviewers: austin, simonmar

Reviewed By: simonmar

Subscribers: thomie

Differential Revision: https://phabricator.haskell.org/D1353

GHC Trac Issues: #10877
parent 39b71e81
......@@ -377,22 +377,22 @@ void setExecutable (void *p, W_ len, rtsBool exec)
#ifdef USE_LARGE_ADDRESS_SPACE
static void *
osTryReserveHeapMemory (void *hint)
osTryReserveHeapMemory (W_ len, void *hint)
{
void *base, *top;
void *start, *end;
/* We try to allocate MBLOCK_SPACE_SIZE + MBLOCK_SIZE,
/* We try to allocate len + MBLOCK_SIZE,
because we need memory which is MBLOCK_SIZE aligned,
and then we discard what we don't need */
base = my_mmap(hint, MBLOCK_SPACE_SIZE + MBLOCK_SIZE, MEM_RESERVE);
top = (void*)((W_)base + MBLOCK_SPACE_SIZE + MBLOCK_SIZE);
base = my_mmap(hint, len + MBLOCK_SIZE, MEM_RESERVE);
top = (void*)((W_)base + len + MBLOCK_SIZE);
if (((W_)base & MBLOCK_MASK) != 0) {
start = MBLOCK_ROUND_UP(base);
end = MBLOCK_ROUND_DOWN(top);
ASSERT(((W_)end - (W_)start) == MBLOCK_SPACE_SIZE);
ASSERT(((W_)end - (W_)start) == len);
if (munmap(base, (W_)start-(W_)base) < 0) {
sysErrorBelch("unable to release slop before heap");
......@@ -407,7 +407,7 @@ osTryReserveHeapMemory (void *hint)
return start;
}
void *osReserveHeapMemory(void)
void *osReserveHeapMemory(W_ len)
{
int attempt;
void *at;
......@@ -425,8 +425,8 @@ void *osReserveHeapMemory(void)
attempt = 0;
do {
at = osTryReserveHeapMemory((void*)((W_)8 * (1 << 30) +
attempt * BLOCK_SIZE));
void *hint = (void*)((W_)8 * (1 << 30) + attempt * BLOCK_SIZE);
at = osTryReserveHeapMemory(len, hint);
} while ((W_)at < ((W_)8 * (1 << 30)));
return at;
......@@ -467,7 +467,8 @@ void osReleaseHeapMemory(void)
{
int r;
r = munmap((void*)mblock_address_space_begin, MBLOCK_SPACE_SIZE);
r = munmap((void*)mblock_address_space.begin,
mblock_address_space.end - mblock_address_space.begin);
if(r < 0)
sysErrorBelch("unable to release address space");
}
......
......@@ -34,12 +34,12 @@
On 64-bit machines, we have two possibilities. One is to request
a single chunk of address space that we deem "large enough"
(currently 1TB, could easily be extended to, say 16TB or more).
Memory from that chunk is GC memory, everything else is not. This
case is tricky in that it requires support from the OS to allocate
address space without allocating memory (in practice, all modern
OSes do this). It's also tricky in that it is the only case where
a successful HEAP_ALLOCED(p) check can trigger a segfault when
(currently 1TB or the ulimit size, whichever is smaller, although this could
easily be extended to, say 16TB or more). Memory from that chunk is GC
memory, everything else is not. This case is tricky in that it requires
support from the OS to allocate address space without allocating memory (in
practice, all modern OSes do this). It's also tricky in that it is the only
case where a successful HEAP_ALLOCED(p) check can trigger a segfault when
accessing p (and for debugging purposes, it will).
Alternatively, the older implementation caches one 12-bit block map
......@@ -51,16 +51,14 @@
#ifdef USE_LARGE_ADDRESS_SPACE
extern W_ mblock_address_space_begin;
#if aarch64_HOST_ARCH
# define MBLOCK_SPACE_SIZE ((StgWord)1 << 38) /* 1/4 TB */
#else
# define MBLOCK_SPACE_SIZE ((StgWord)1 << 40) /* 1 TB */
#endif
struct mblock_address_range {
W_ begin, end;
W_ padding[6]; // ensure nothing else inhabits this cache line
} ATTRIBUTE_ALIGNED(64);
extern struct mblock_address_range mblock_address_space;
# define HEAP_ALLOCED(p) ((W_)(p) >= mblock_address_space_begin && \
(W_)(p) < (mblock_address_space_begin + \
MBLOCK_SPACE_SIZE))
# define HEAP_ALLOCED(p) ((W_)(p) >= mblock_address_space.begin && \
(W_)(p) < (mblock_address_space.end))
# define HEAP_ALLOCED_GC(p) HEAP_ALLOCED(p)
#elif SIZEOF_VOID_P == 4
......
......@@ -96,7 +96,12 @@ typedef struct free_list {
static free_list *free_list_head;
static W_ mblock_high_watermark;
W_ mblock_address_space_begin = 0;
/*
* it is quite important that these are in the same cache line as they
* are both needed by HEAP_ALLOCED. Moreover, we need to ensure that they
* don't share a cache line with anything else to prevent false sharing.
*/
struct mblock_address_range mblock_address_space = { 0, 0, {} };
static void *getAllocatedMBlock(free_list **start_iter, W_ startingAt)
{
......@@ -131,7 +136,7 @@ void * getFirstMBlock(void **state STG_UNUSED)
casted_state = &fake_state;
*casted_state = free_list_head;
return getAllocatedMBlock(casted_state, mblock_address_space_begin);
return getAllocatedMBlock(casted_state, mblock_address_space.begin);
}
void * getNextMBlock(void **state STG_UNUSED, void *mblock)
......@@ -190,8 +195,7 @@ static void *getFreshMBlocks(nat n)
W_ size = MBLOCK_SIZE * (W_)n;
void *addr = (void*)mblock_high_watermark;
if (mblock_high_watermark + size >
mblock_address_space_begin + MBLOCK_SPACE_SIZE)
if (mblock_high_watermark + size > mblock_address_space.end)
{
// whoa, 1 TB of heap?
errorBelch("out of memory");
......@@ -611,7 +615,8 @@ freeAllMBlocks(void)
osReleaseHeapMemory();
mblock_address_space_begin = (W_)-1;
mblock_address_space.begin = (W_)-1;
mblock_address_space.end = (W_)-1;
mblock_high_watermark = (W_)-1;
#else
osFreeAllMBlocks();
......@@ -634,9 +639,16 @@ initMBlocks(void)
#ifdef USE_LARGE_ADDRESS_SPACE
{
void *addr = osReserveHeapMemory();
W_ size;
#if aarch64_HOST_ARCH
size = (W_)1 << 38; // 1/4 TByte
#else
size = (W_)1 << 40; // 1 TByte
#endif
void *addr = osReserveHeapMemory(size);
mblock_address_space_begin = (W_)addr;
mblock_address_space.begin = (W_)addr;
mblock_address_space.end = (W_)addr + size;
mblock_high_watermark = (W_)addr;
}
#elif SIZEOF_VOID_P == 8
......
......@@ -29,13 +29,13 @@ void setExecutable (void *p, W_ len, rtsBool exec);
we will ever need, which keeps everything nice and consecutive.
*/
// Reserve the large address space blob, and return the address that
// the OS has chosen for it. It is not safe to access the memory
// pointed to by the return value, until that memory is committed
// using osCommitMemory().
// Reserve the large address space blob of the given size, and return the
// address that the OS has chosen for it. It is not safe to access the memory
// pointed to by the return value, until that memory is committed using
// osCommitMemory().
//
// This function is called once when the block allocator is initialized.
void *osReserveHeapMemory(void);
void *osReserveHeapMemory(W_ len);
// Commit (allocate memory for) a piece of address space, which must
// be within the previously reserved space After this call, it is safe
......
......@@ -429,11 +429,11 @@ void setExecutable (void *p, W_ len, rtsBool exec)
static void* heap_base = NULL;
void *osReserveHeapMemory (void)
void *osReserveHeapMemory (W_ len)
{
void *start;
heap_base = VirtualAlloc(NULL, MBLOCK_SPACE_SIZE + MBLOCK_SIZE,
heap_base = VirtualAlloc(NULL, len + MBLOCK_SIZE,
MEM_RESERVE, PAGE_READWRITE);
if (heap_base == NULL) {
if (GetLastError() == ERROR_NOT_ENOUGH_MEMORY) {
......@@ -441,7 +441,7 @@ void *osReserveHeapMemory (void)
} else {
sysErrorBelch(
"osReserveHeapMemory: VirtualAlloc MEM_RESERVE %llu bytes failed",
MBLOCK_SPACE_SIZE + MBLOCK_SIZE);
len + MBLOCK_SIZE);
}
stg_exit(EXIT_FAILURE);
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment