Commit c6759080 authored by Ben Gamari's avatar Ben Gamari 🐢 Committed by Marge Bot

rts: Make m32 allocator per-ObjectCode

MacOS Catalina is finally going to force our hand in forbidden writable
exeutable mappings. Unfortunately, this is quite incompatible with the
current global m32 allocator, which mixes symbols from various objects
in a single page. The problem here is that some of these symbols may not
yet be resolved (e.g. had relocations performed) as this happens lazily
(and therefore we can't yet make the section read-only and therefore
executable).

The easiest way around this is to simply create one m32 allocator per
ObjectCode. This may slightly increase fragmentation for short-running
programs but I suspect will actually improve fragmentation for programs
doing lots of loading/unloading since we can always free all of the
pages allocated to an object when it is unloaded (although this ability
will only be implemented in a later patch).
parent 70b62c97
Pipeline #12034 passed with stages
in 429 minutes and 27 seconds
......@@ -501,9 +501,6 @@ initLinker_ (int retain_cafs)
mmap_32bit_base = (void*)RtsFlags.MiscFlags.linkerMemBase;
}
if (RTS_LINKER_USE_MMAP)
m32_allocator_init();
#if defined(OBJFORMAT_PEi386)
initLinker_PEi386();
#endif
......@@ -1233,6 +1230,7 @@ void freeObjectCode (ObjectCode *oc)
ocDeinit_ELF(oc);
#endif
m32_allocator_free(oc->m32);
stgFree(oc->fileName);
stgFree(oc->archiveMemberName);
......@@ -1311,6 +1309,10 @@ mkOc( pathchar *path, char *image, int imageSize,
/* chain it onto the list of objects */
oc->next = NULL;
#if RTS_LINKER_USE_MMAP
oc->m32 = m32_allocator_new();
#endif
IF_DEBUG(linker, debugBelch("mkOc: done\n"));
return oc;
}
......@@ -1629,6 +1631,8 @@ int ocTryLoad (ObjectCode* oc) {
# endif
if (!r) { return r; }
m32_allocator_flush(oc->m32);
// run init/init_array/ctors/mod_init_func
IF_DEBUG(linker, debugBelch("ocTryLoad: ocRunInit start\n"));
......
......@@ -10,6 +10,7 @@
#include "Rts.h"
#include "Hash.h"
#include "linker/M32Alloc.h"
#if RTS_LINKER_USE_MMAP
#include <sys/mman.h>
......@@ -244,6 +245,11 @@ typedef struct _ObjectCode {
require extra information.*/
HashTable *extraInfos;
#if RTS_LINKER_USE_MMAP == 1
/* The m32 allocator used for allocating small sections
* and symbol extras during loading */
m32_allocator *m32;
#endif
} ObjectCode;
#define OC_INFORMATIVE_FILENAME(OC) \
......
......@@ -778,7 +778,7 @@ ocGetNames_ELF ( ObjectCode* oc )
// (i.e. we cannot map the secions separately), or if the section
// size is small.
else if (!oc->imageMapped || size < getPageSize() / 3) {
start = m32_alloc(size, 8);
start = m32_alloc(oc->m32, size, 8);
if (start == NULL) goto fail;
memcpy(start, oc->image + offset, size);
alloc = SECTION_M32;
......
......@@ -616,9 +616,6 @@ fail:
#endif
}
if (RTS_LINKER_USE_MMAP)
m32_allocator_flush();
DEBUG_LOG("done\n");
return retcode;
}
......
......@@ -8,6 +8,7 @@
#include "Rts.h"
#include "sm/OSMem.h"
#include "RtsUtils.h"
#include "linker/M32Alloc.h"
#include "LinkerInternals.h"
......@@ -123,12 +124,9 @@ struct m32_alloc_t {
* Currently an allocator is just a set of pages being filled. The maximum
* number of pages can be configured with M32_MAX_PAGES.
*/
typedef struct m32_allocator_t {
struct m32_allocator_t {
struct m32_alloc_t pages[M32_MAX_PAGES];
} m32_allocator;
// We use a global memory allocator
static struct m32_allocator_t alloc;
};
/**
* Wrapper for `unmap` that handles error cases.
......@@ -150,23 +148,37 @@ munmapForLinker (void * addr, size_t size)
* This is the real implementation. There is another dummy implementation below.
* See the note titled "Compile Time Trickery" at the top of this file.
*/
void
m32_allocator_init(void)
m32_allocator *
m32_allocator_new()
{
memset(&alloc, 0, sizeof(struct m32_allocator_t));
// Preallocate the initial M32_MAX_PAGES to ensure that they don't
// fragment the memory.
size_t pgsz = getPageSize();
char* bigchunk = mmapForLinker(pgsz * M32_MAX_PAGES,MAP_ANONYMOUS,-1,0);
if (bigchunk == NULL)
barf("m32_allocator_init: Failed to map");
m32_allocator *alloc =
stgMallocBytes(sizeof(m32_allocator), "m32_new_allocator");
memset(alloc, 0, sizeof(struct m32_allocator_t));
// Preallocate the initial M32_MAX_PAGES to ensure that they don't
// fragment the memory.
size_t pgsz = getPageSize();
char* bigchunk = mmapForLinker(pgsz * M32_MAX_PAGES,MAP_ANONYMOUS,-1,0);
if (bigchunk == NULL)
barf("m32_allocator_init: Failed to map");
int i;
for (i=0; i<M32_MAX_PAGES; i++) {
alloc->pages[i].base_addr = bigchunk + i*pgsz;
*((uintptr_t*)alloc->pages[i].base_addr) = 1;
alloc->pages[i].current_size = M32_REFCOUNT_BYTES;
}
return alloc;
}
int i;
for (i=0; i<M32_MAX_PAGES; i++) {
alloc.pages[i].base_addr = bigchunk + i*pgsz;
*((uintptr_t*)alloc.pages[i].base_addr) = 1;
alloc.pages[i].current_size = M32_REFCOUNT_BYTES;
}
/**
* Free an m32_allocator. Note that this doesn't free the pages
* allocated using the allocator. This must be done separately with m32_free.
*/
void m32_allocator_free(m32_allocator *alloc)
{
m32_allocator_flush(alloc);
stgFree(alloc);
}
/**
......@@ -193,10 +205,10 @@ m32_free_internal(void * addr) {
* See the note titled "Compile Time Trickery" at the top of this file.
*/
void
m32_allocator_flush(void) {
m32_allocator_flush(m32_allocator *alloc) {
int i;
for (i=0; i<M32_MAX_PAGES; i++) {
void * addr = __sync_fetch_and_and(&alloc.pages[i].base_addr, 0x0);
void * addr = __sync_fetch_and_and(&alloc->pages[i].base_addr, 0x0);
if (addr != 0) {
m32_free_internal(addr);
}
......@@ -243,7 +255,7 @@ m32_free(void *addr, size_t size)
* See the note titled "Compile Time Trickery" at the top of this file.
*/
void *
m32_alloc(size_t size, size_t alignment)
m32_alloc(struct m32_allocator_t *alloc, size_t size, size_t alignment)
{
size_t pgsz = getPageSize();
......@@ -259,7 +271,7 @@ m32_alloc(size_t size, size_t alignment)
int i;
for (i=0; i<M32_MAX_PAGES; i++) {
// empty page
if (alloc.pages[i].base_addr == 0) {
if (alloc->pages[i].base_addr == 0) {
empty = empty == -1 ? i : empty;
continue;
}
......@@ -268,21 +280,21 @@ m32_alloc(size_t size, size_t alignment)
// few bytes left to allocate and we don't get to use or free them
// until we use up all the "filling" pages. This will unnecessarily
// allocate new pages and fragment the address space.
if (*((uintptr_t*)(alloc.pages[i].base_addr)) == 1) {
alloc.pages[i].current_size = M32_REFCOUNT_BYTES;
if (*((uintptr_t*)(alloc->pages[i].base_addr)) == 1) {
alloc->pages[i].current_size = M32_REFCOUNT_BYTES;
}
// page can contain the buffer?
size_t alsize = ROUND_UP(alloc.pages[i].current_size, alignment);
size_t alsize = ROUND_UP(alloc->pages[i].current_size, alignment);
if (size <= pgsz - alsize) {
void * addr = (char*)alloc.pages[i].base_addr + alsize;
alloc.pages[i].current_size = alsize + size;
void * addr = (char*)alloc->pages[i].base_addr + alsize;
alloc->pages[i].current_size = alsize + size;
// increment the counter atomically
__sync_fetch_and_add((uintptr_t*)alloc.pages[i].base_addr, 1);
__sync_fetch_and_add((uintptr_t*)alloc->pages[i].base_addr, 1);
return addr;
}
// most filled?
if (most_filled == -1
|| alloc.pages[most_filled].current_size < alloc.pages[i].current_size)
|| alloc->pages[most_filled].current_size < alloc->pages[i].current_size)
{
most_filled = i;
}
......@@ -290,9 +302,9 @@ m32_alloc(size_t size, size_t alignment)
// If we haven't found an empty page, flush the most filled one
if (empty == -1) {
m32_free_internal(alloc.pages[most_filled].base_addr);
alloc.pages[most_filled].base_addr = 0;
alloc.pages[most_filled].current_size = 0;
m32_free_internal(alloc->pages[most_filled].base_addr);
alloc->pages[most_filled].base_addr = 0;
alloc->pages[most_filled].current_size = 0;
empty = most_filled;
}
......@@ -301,9 +313,9 @@ m32_alloc(size_t size, size_t alignment)
if (addr == NULL) {
return NULL;
}
alloc.pages[empty].base_addr = addr;
alloc->pages[empty].base_addr = addr;
// Add M32_REFCOUNT_BYTES bytes for the counter + padding
alloc.pages[empty].current_size =
alloc->pages[empty].current_size =
size+ROUND_UP(M32_REFCOUNT_BYTES,alignment);
// Initialize the counter:
// 1 for the allocator + 1 for the returned allocated memory
......@@ -317,14 +329,19 @@ m32_alloc(size_t size, size_t alignment)
// they are, there is a bug at the call site.
// See the note titled "Compile Time Trickery" at the top of this file.
void
m32_allocator_init(void)
m32_allocator *
m32_allocator_new(void)
{
barf("%s: RTS_LINKER_USE_MMAP is %d", __func__, RTS_LINKER_USE_MMAP);
}
void m32_allocator_free(m32_allocator *alloc)
{
barf("%s: RTS_LINKER_USE_MMAP is %d", __func__, RTS_LINKER_USE_MMAP);
}
void
m32_allocator_flush(void)
m32_flush(void)
{
barf("%s: RTS_LINKER_USE_MMAP is %d", __func__, RTS_LINKER_USE_MMAP);
}
......
......@@ -26,12 +26,17 @@
#define M32_NO_RETURN GNUC3_ATTRIBUTE(__noreturn__)
#endif
void m32_allocator_init(void) M32_NO_RETURN;
struct m32_allocator_t;
typedef struct m32_allocator_t m32_allocator;
void m32_allocator_flush(void) M32_NO_RETURN;
m32_allocator *m32_allocator_new(void) M32_NO_RETURN;
void m32_allocator_free(m32_allocator *alloc) M32_NO_RETURN;
void m32_allocator_flush(m32_allocator *alloc) M32_NO_RETURN;
void m32_free(void *addr, size_t size) M32_NO_RETURN;
void * m32_alloc(size_t size, size_t alignment) M32_NO_RETURN;
void * m32_alloc(m32_allocator *alloc, size_t size, size_t alignment) M32_NO_RETURN;
#include "EndPrivate.h"
......@@ -53,6 +53,7 @@
int ocAllocateExtras(ObjectCode* oc, int count, int first, int bssSize)
{
void* oldImage = oc->image;
const size_t extras_size = sizeof(SymbolExtra) * count;
if (count > 0 || bssSize > 0) {
if (!RTS_LINKER_USE_MMAP) {
......@@ -64,7 +65,7 @@ int ocAllocateExtras(ObjectCode* oc, int count, int first, int bssSize)
oc->image -= misalignment;
oc->image = stgReallocBytes( oc->image,
misalignment +
aligned + sizeof (SymbolExtra) * count,
aligned + extras_size,
"ocAllocateExtras" );
oc->image += misalignment;
......@@ -73,7 +74,7 @@ int ocAllocateExtras(ObjectCode* oc, int count, int first, int bssSize)
/* Keep image, bssExtras and symbol_extras contiguous */
size_t n = roundUpToPage(oc->fileSize);
bssSize = roundUpToAlign(bssSize, 8);
size_t allocated_size = n + bssSize + (sizeof(SymbolExtra) * count);
size_t allocated_size = n + bssSize + extras_size;
void *new = mmapForLinker(allocated_size, MAP_ANONYMOUS, -1, 0);
if (new) {
memcpy(new, oc->image, oc->fileSize);
......@@ -92,13 +93,13 @@ int ocAllocateExtras(ObjectCode* oc, int count, int first, int bssSize)
return 0;
}
} else {
oc->symbol_extras = m32_alloc(sizeof(SymbolExtra) * count, 8);
oc->symbol_extras = m32_alloc(oc->m32, extras_size, 8);
if (oc->symbol_extras == NULL) return 0;
}
}
if (oc->symbol_extras != NULL) {
memset( oc->symbol_extras, 0, sizeof (SymbolExtra) * count );
memset( oc->symbol_extras, 0, extras_size );
}
// ObjectCodeFormatInfo contains computed addresses based on offset to
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment