Commit 04e83666 authored by Simon Marlow's avatar Simon Marlow

ELF/x86_64: map object file sections separately into the low 2GB

On 64-bit ELF we need to link object files into the low 2GB due to the
small memory model.  Previously we would map the entire object file
using MAP_32BIT, but the object file can consist of 75% or more
symbols, which only need to be present during linking, so this is
wasteful.  In our particular application, we're already running out of
space here.

This patch changes the way we load object files on ELF platforms so
that the object is first mapped above the 2GB boundary, parsed, and
then the important sections are re-mapped into the low 2GB area.

Test Plan:
validate
(also needs testing on OS X & Windows, preferably 32 & 64 bit)

Reviewers: Phyx, trommler, bgamari, austin

Subscribers: hsyl20, thomie, bgamari

Differential Revision: https://phabricator.haskell.org/D975
parent 20e30d5f
......@@ -40,15 +40,21 @@
static void checkAddress (HashTable *addrs, void *addr)
{
ObjectCode *oc;
int i;
if (!lookupHashTable(addrs, (W_)addr)) {
insertHashTable(addrs, (W_)addr, addr);
for (oc = unloaded_objects; oc; oc = oc->next) {
if ((W_)addr >= (W_)oc->image &&
(W_)addr < (W_)oc->image + oc->fileSize) {
oc->referenced = 1;
break;
for (i = 0; i < oc->n_sections; i++) {
if (oc->sections[i].kind != SECTIONKIND_OTHER) {
if ((W_)addr >= (W_)oc->sections[i].start &&
(W_)addr < (W_)oc->sections[i].start
+ oc->sections[i].size) {
oc->referenced = 1;
return;
}
}
}
}
}
......
......@@ -174,7 +174,7 @@ typedef void (*init_t) (int argc, char **argv, char **env);
static HsInt isAlreadyLoaded( pathchar *path );
static HsInt loadOc( ObjectCode* oc );
static ObjectCode* mkOc( pathchar *path, char *image, int imageSize,
char *archiveMemberName
rtsBool mapped, char *archiveMemberName
#ifndef USE_MMAP
#ifdef darwin_HOST_OS
, int misalignment
......@@ -219,7 +219,7 @@ static int ocVerifyImage_ELF ( ObjectCode* oc );
static int ocGetNames_ELF ( ObjectCode* oc );
static int ocResolve_ELF ( ObjectCode* oc );
static int ocRunInit_ELF ( ObjectCode* oc );
#if defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH) || defined(arm_HOST_ARCH)
#if NEED_SYMBOL_EXTRAS
static int ocAllocateSymbolExtras_ELF ( ObjectCode* oc );
#endif
#elif defined(OBJFORMAT_PEi386)
......@@ -254,7 +254,7 @@ static int ocRunInit_MachO ( ObjectCode* oc );
#ifndef USE_MMAP
static int machoGetMisalignment( FILE * );
#endif
#if defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH)
#if NEED_SYMBOL_EXTRAS
static int ocAllocateSymbolExtras_MachO ( ObjectCode* oc );
#endif
#ifdef powerpc_HOST_ARCH
......@@ -264,6 +264,34 @@ static void machoInitSymbolsWithoutUnderscore( void );
static void freeProddableBlocks (ObjectCode *oc);
#ifdef USE_MMAP
/**
* An allocated page being filled by the allocator
*/
struct m32_alloc_t {
void * base_addr; // Page address
unsigned int current_size; // Number of bytes already reserved
};
#define M32_MAX_PAGES 32
/**
* Allocator
*
* Currently an allocator is just a set of pages being filled. The maximum
* number of pages can be configured with M32_MAX_PAGES.
*/
typedef struct m32_allocator_t {
struct m32_alloc_t pages[M32_MAX_PAGES];
} * m32_allocator;
// We use a global memory allocator
static struct m32_allocator_t allocator;
struct m32_allocator_t;
static void m32_allocator_init(struct m32_allocator_t *m32);
#endif
/* on x86_64 we have a problem with relocating symbol references in
* code that was compiled without -fPIC. By default, the small memory
* model is used, which assumes that symbol references can fit in a
......@@ -1730,6 +1758,8 @@ initLinker_ (int retain_cafs)
addDLLHandle(WSTR("*.exe"), GetModuleHandle(NULL));
#endif
m32_allocator_init(&allocator);
IF_DEBUG(linker, debugBelch("initLinker: done\n"));
return;
}
......@@ -2194,21 +2224,42 @@ void ghci_enquire ( char* addr )
#ifdef USE_MMAP
#define ROUND_UP(x,size) ((x + size - 1) & ~(size - 1))
#define ROUND_DOWN(x,size) (x & ~(size - 1))
static StgWord getPageSize(void)
{
static StgWord pagesize = 0;
if (pagesize == 0) {
return sysconf(_SC_PAGESIZE);
} else {
return pagesize;
}
}
static StgWord roundUpToPage (StgWord size)
{
return ROUND_UP(size, getPageSize());
}
#ifdef OBJFORMAT_ELF
static StgWord roundDownToPage (StgWord size)
{
return ROUND_DOWN(size, getPageSize());
}
#endif
//
// Returns NULL on failure.
//
static void * mmapForLinker (size_t bytes, nat flags, int fd)
static void * mmapForLinker (size_t bytes, nat flags, int fd, int offset)
{
void *map_addr = NULL;
void *result;
int pagesize;
StgWord size;
static nat fixed = 0;
IF_DEBUG(linker, debugBelch("mmapForLinker: start\n"));
pagesize = getpagesize();
size = ROUND_UP(bytes, pagesize);
size = roundUpToPage(bytes);
#if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
mmap_again:
......@@ -2227,7 +2278,7 @@ mmap_again:
result = mmap(map_addr, size,
PROT_EXEC|PROT_READ|PROT_WRITE,
MAP_PRIVATE|TRY_MAP_32BIT|fixed|flags, fd, 0);
MAP_PRIVATE|TRY_MAP_32BIT|fixed|flags, fd, offset);
if (result == MAP_FAILED) {
sysErrorBelch("mmap %" FMT_Word " bytes at %p",(W_)size,map_addr);
......@@ -2289,6 +2340,211 @@ mmap_again:
return result;
}
/*
Note [M32 Allocator]
~~~~~~~~~~~~~~~~~~~~
A memory allocator that allocates only pages in the 32-bit range (lower 2GB).
This is useful on 64-bit platforms to ensure that addresses of allocated
objects can be referenced with a 32-bit relative offset.
Initially, the linker used `mmap` to allocate a page per object. Hence it
wasted a lot of space for small objects (see #9314). With this allocator, we
try to fill pages as much as we can for small objects.
How does it work?
-----------------
For small objects, a Word64 counter is added at the beginning of the page they
are stored in. It indicates the number of objects that are still alive in the
page. When the counter drops down to zero, the page is freed. The counter is
atomically decremented, hence the deallocation is thread-safe.
During the allocation phase, the allocator keeps track of some pages that are
not totally filled: the number of pages in the "filling" list is configurable
with M32_MAX_PAGES. Allocation consists in finding some place in one of these
pages or starting a new one, then increasing the page counter. If none of the
pages in the "filling" list has enough free space, the most filled one is
flushed (see below) and a new one is allocated.
The allocator holds a reference on pages in the "filling" list: the counter in
these pages is 1+n where n is the current number of objects allocated in the
page. Hence allocated objects can be freed while the allocator is using
(filling) the page. Flushing a page consists in decreasing its counter and
removing it from the "filling" list. By extension, flushing the allocator
consists in flushing all the pages in the "filling" list. Don't forget to
flush the allocator at the end of the allocation phase in order to avoid space
leaks!
Large objects are objects that are larger than a page (minus the bytes required
for the counter and the optional padding). These objects are allocated into
their own set of pages. We can differentiate large and small objects from
their address: large objects are aligned on page size while small objects never
are (because of the space reserved for the page's object counter).
For large objects, the remaining space at the end of the last page is left
unused by the allocator. It can be used with care as it will be freed with the
associated large object. GHC linker uses this feature/hack, hence changing the
implementation of the M32 allocator must be done with care (i.e. do not try to
improve the allocator to avoid wasting this space without modifying the linker
code accordingly).
Object allocation is *not* thread-safe (however it could be done easily with a
lock in the allocator structure). Object deallocation is thread-safe.
*/
/****************************************************************************
* M32 ALLOCATOR (see Note [M32 Allocator]
***************************************************************************/
/**
* Wrapper for `unmap` that handles error cases.
*/
static void munmapForLinker (void * addr, size_t size)
{
int r = munmap(addr,size);
if (r == -1) {
// Should we abort here?
sysErrorBelch("munmap");
}
}
/**
* Initialize the allocator structure
*/
static void m32_allocator_init(m32_allocator m32) {
memset(m32, 0, sizeof(struct m32_allocator_t));
}
/**
* Atomically decrement the object counter on the given page and release the
* page if necessary. The given address must be the *base address* of the page.
*
* You shouldn't have to use this method. Use `m32_free` instead.
*/
static void m32_free_internal(void * addr) {
uint64_t c = __sync_sub_and_fetch((uint64_t*)addr, 1);
if (c == 0) {
munmapForLinker(addr, getPageSize());
}
}
/**
* Release the allocator's reference to pages on the "filling" list. This
* should be called when it is believed that no more allocations will be needed
* from the allocator to ensure that empty pages waiting to be filled aren't
* unnecessarily held.
*/
static void m32_allocator_flush(m32_allocator m32) {
int i;
for (i=0; i<M32_MAX_PAGES; i++) {
void * addr = __sync_fetch_and_and(&m32->pages[i].base_addr, 0x0);
if (addr != 0) {
m32_free_internal(addr);
}
}
}
// Return true if the object has its own dedicated set of pages
#define m32_is_large_object(size,alignment) \
(size >= getPageSize() - ROUND_UP(8,alignment))
// Return true if the object has its own dedicated set of pages
#define m32_is_large_object_addr(addr) \
((uintptr_t) addr % getPageSize() == 0)
/**
* Free the memory associated with an object.
*
* If the object is "small", the object counter of the page it is allocated in
* is decremented and the page is not freed until all of its objects are freed.
*/
static void m32_free(void *addr, unsigned int size) {
uintptr_t m = (uintptr_t) addr % getPageSize();
if (m == 0) {
// large object
munmapForLinker(addr,ROUND_UP(size,getPageSize()));
}
else {
// small object
void * page_addr = (void*)((uintptr_t)addr - m);
m32_free_internal(page_addr);
}
}
/**
* Allocate `size` bytes of memory with the given alignment
*/
static void *
m32_alloc(m32_allocator m32, unsigned int size,
unsigned int alignment) {
unsigned int pgsz = (unsigned int)getPageSize();
if (m32_is_large_object(size,alignment)) {
// large object
return mmapForLinker(size,MAP_ANONYMOUS,-1,0);
}
else {
// small object
// Try to find a page that can contain it
int empty = -1;
int most_filled = -1;
int i;
for (i=0; i<M32_MAX_PAGES; i++) {
// empty page
if (m32->pages[i].base_addr == 0) {
empty = empty == -1 ? i : empty;
continue;
}
// page can contain the buffer?
unsigned int alsize = ROUND_UP(m32->pages[i].current_size, alignment);
if (size <= pgsz - alsize) {
void * addr = (char*)m32->pages[i].base_addr + alsize;
m32->pages[i].current_size = alsize + size;
// increment the counter atomically
__sync_fetch_and_add((uint64_t*)m32->pages[i].base_addr, 1);
return addr;
}
// most filled?
if (most_filled == -1
|| m32->pages[most_filled].current_size < m32->pages[i].current_size)
{
most_filled = i;
}
}
// If we haven't found an empty page, flush the most filled one
if (empty == -1) {
m32_free_internal(m32->pages[most_filled].base_addr);
m32->pages[most_filled].base_addr = 0;
m32->pages[most_filled].current_size = 0;
empty = most_filled;
}
// Allocate a new page
void * addr = mmapForLinker(pgsz,MAP_ANONYMOUS,-1,0);
if (addr == NULL) {
return NULL;
}
m32->pages[empty].base_addr = addr;
// Add 8 bytes for the counter + padding
m32->pages[empty].current_size = size+ROUND_UP(8,alignment);
// Initialize the counter:
// 1 for the allocator + 1 for the returned allocated memory
*((uint64_t*)addr) = 2;
return (char*)addr + ROUND_UP(8,alignment);
}
}
/****************************************************************************
* END (M32 ALLOCATOR)
***************************************************************************/
#endif // USE_MMAP
/*
......@@ -2329,6 +2585,39 @@ static void freeOcStablePtrs (ObjectCode *oc)
oc->stable_ptrs = NULL;
}
static void
freePreloadObjectFile (ObjectCode *oc)
{
#ifdef USE_MMAP
if (oc->imageMapped) {
munmap(oc->image, oc->fileSize);
} else {
stgFree(oc->image);
}
#elif defined(mingw32_HOST_OS)
VirtualFree(oc->image - PEi386_IMAGE_OFFSET, 0, MEM_RELEASE);
IndirectAddr *ia, *ia_next;
ia = indirects;
while (ia != NULL) {
ia_next = ia->next;
stgFree(ia);
ia = ia_next;
}
indirects = NULL;
#else
stgFree(oc->image);
#endif
oc->image = NULL;
oc->fileSize = 0;
}
/*
* freeObjectCode() releases all the pieces of an ObjectCode. It is called by
......@@ -2337,67 +2626,52 @@ static void freeOcStablePtrs (ObjectCode *oc)
*/
void freeObjectCode (ObjectCode *oc)
{
freePreloadObjectFile(oc);
if (oc->symbols != NULL) {
stgFree(oc->symbols);
oc->symbols = NULL;
}
{
Section *s, *nexts;
for (s = oc->sections; s != NULL; s = nexts) {
nexts = s->next;
stgFree(s);
if (oc->sections != NULL) {
int i;
for (i=0; i < oc->n_sections; i++) {
if (oc->sections[i].start != NULL) {
switch(oc->sections[i].alloc){
#ifdef USE_MMAP
case SECTION_MMAP:
munmap(oc->sections[i].mapped_start,
oc->sections[i].mapped_size);
break;
case SECTION_M32:
m32_free(oc->sections[i].start,
oc->sections[i].size);
break;
#endif
case SECTION_MALLOC:
stgFree(oc->sections[i].start);
break;
default:
break;
}
}
}
stgFree(oc->sections);
}
freeProddableBlocks(oc);
/* Free symbol_extras. On x86_64 Windows, symbol_extras are allocated
* alongside the image, so we don't need to free. */
#if NEED_SYMBOL_EXTRAS && (!defined(x86_64_HOST_ARCH) || !defined(mingw32_HOST_OS))
#ifdef USE_MMAP
int pagesize, size, r;
pagesize = getpagesize();
size = ROUND_UP(oc->fileSize, pagesize);
r = munmap(oc->image, size);
if (r == -1) {
sysErrorBelch("munmap");
}
#if defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH) || defined(arm_HOST_ARCH)
#if !defined(x86_64_HOST_ARCH) || !defined(mingw32_HOST_OS)
if (!USE_CONTIGUOUS_MMAP && oc->symbol_extras != NULL)
{
munmap(oc->symbol_extras,
ROUND_UP(sizeof(SymbolExtra) * oc->n_symbol_extras, pagesize));
m32_free(oc->symbol_extras, sizeof(SymbolExtra) * oc->n_symbol_extras);
}
#endif
#endif
#else
#ifndef mingw32_HOST_OS
stgFree(oc->image);
#else
VirtualFree(oc->image - PEi386_IMAGE_OFFSET, 0, MEM_RELEASE);
IndirectAddr *ia, *ia_next;
ia = indirects;
while (ia != NULL) {
ia_next = ia->next;
stgFree(ia);
ia = ia_next;
}
indirects = NULL;
#endif
#if defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH) || defined(arm_HOST_ARCH)
#if !defined(x86_64_HOST_ARCH) || !defined(mingw32_HOST_OS)
#else // !USE_MMAP
stgFree(oc->symbol_extras);
#endif
#endif
#endif
stgFree(oc->fileName);
......@@ -2408,7 +2682,7 @@ void freeObjectCode (ObjectCode *oc)
static ObjectCode*
mkOc( pathchar *path, char *image, int imageSize,
char *archiveMemberName
rtsBool mapped, char *archiveMemberName
#ifndef USE_MMAP
#ifdef darwin_HOST_OS
, int misalignment
......@@ -2444,12 +2718,14 @@ mkOc( pathchar *path, char *image, int imageSize,
oc->fileSize = imageSize;
oc->symbols = NULL;
oc->n_sections = 0;
oc->sections = NULL;
oc->proddables = NULL;
oc->stable_ptrs = NULL;
#if powerpc_HOST_ARCH || x86_64_HOST_ARCH || arm_HOST_ARCH
#if NEED_SYMBOL_EXTRAS
oc->symbol_extras = NULL;
#endif
oc->imageMapped = mapped;
#ifndef USE_MMAP
#ifdef darwin_HOST_OS
......@@ -2788,16 +3064,7 @@ static HsInt loadArchive_ (pathchar *path)
IF_DEBUG(linker, debugBelch("loadArchive: Member is an object file...loading...\n"));
/* We can't mmap from the archive directly, as object
files need to be 8-byte aligned but files in .ar
archives are 2-byte aligned. When possible we use mmap
to get some anonymous memory, as on 64-bit platforms if
we use malloc then we can be given memory above 2^32.
In the mmap case we're probably wasting lots of space;
we could do better. */
#if defined(USE_MMAP)
image = mmapForLinker(memberSize, MAP_ANONYMOUS, -1);
#elif defined(mingw32_HOST_OS)
#if defined(mingw32_HOST_OS)
// TODO: We would like to use allocateExec here, but allocateExec
// cannot currently allocate blocks large enough.
image = allocateImageAndTrampolines(path, fileName,
......@@ -2806,11 +3073,16 @@ static HsInt loadArchive_ (pathchar *path)
#endif
memberSize);
#elif defined(darwin_HOST_OS)
#if defined(USE_MMAP)
image = mmapForLinker(memberSize, MAP_ANONYMOUS, -1, 0);
#else
/* See loadObj() */
misalignment = machoGetMisalignment(f);
image = stgMallocBytes(memberSize + misalignment, "loadArchive(image)");
image += misalignment;
#else
#endif // USE_MMAP
#else // not windows or darwin
image = stgMallocBytes(memberSize, "loadArchive(image)");
#endif
......@@ -2867,11 +3139,9 @@ static HsInt loadArchive_ (pathchar *path)
sprintf(archiveMemberName, "%" PATH_FMT "(%.*s)",
path, (int)thisFileNameSize, fileName);
oc = mkOc(path, image, memberSize, archiveMemberName
#ifndef USE_MMAP
#ifdef darwin_HOST_OS
oc = mkOc(path, image, memberSize, rtsFalse, archiveMemberName
#if !defined(USE_MMAP) && defined(darwin_HOST_OS)
, misalignment
#endif
#endif
);
......@@ -2892,7 +3162,7 @@ static HsInt loadArchive_ (pathchar *path)
}
IF_DEBUG(linker, debugBelch("loadArchive: Found GNU-variant file index\n"));
#ifdef USE_MMAP
gnuFileIndex = mmapForLinker(memberSize + 1, MAP_ANONYMOUS, -1);
gnuFileIndex = mmapForLinker(memberSize + 1, MAP_ANONYMOUS, -1, 0);
#else
gnuFileIndex = stgMallocBytes(memberSize + 1, "loadArchive(image)");
#endif
......@@ -2942,6 +3212,10 @@ static HsInt loadArchive_ (pathchar *path)
#endif
}
#ifdef USE_MMAP
m32_allocator_flush(&allocator);
#endif
IF_DEBUG(linker, debugBelch("loadArchive: done\n"));
return 1;
}
......@@ -2954,78 +3228,65 @@ HsInt loadArchive (pathchar *path)
return r;
}
/* -----------------------------------------------------------------------------
* Load an obj (populate the global symbol table, but don't resolve yet)
*
* Returns: 1 if ok, 0 on error.
*/
static HsInt loadObj_ (pathchar *path)
//
// Load the object file into memory. This will not be its final resting place,
// as on 64-bit platforms we need to map its segments into the low 2Gb of the
// address space, properly aligned.
//
static ObjectCode *
preloadObjectFile (pathchar *path)
{
ObjectCode* oc;
char *image;
int fileSize;
struct_stat st;
int r;
#ifdef USE_MMAP
int fd;
#else
FILE *f;
# if defined(darwin_HOST_OS)
void *image;
ObjectCode *oc;
#if !defined(USE_MMAP) && defined(darwin_HOST_OS)
int misalignment;
# endif
#endif
IF_DEBUG(linker, debugBelch("loadObj %" PATH_FMT "\n", path));
/* debugBelch("loadObj %s\n", path ); */
/* Check that we haven't already loaded this object.
Ignore requests to load multiple times */
if (isAlreadyLoaded(path)) {
IF_DEBUG(linker,
debugBelch("ignoring repeated load of %" PATH_FMT "\n", path));
return 1; /* success */
}
r = pathstat(path, &st);
if (r == -1) {
IF_DEBUG(linker, debugBelch("File doesn't exist\n"));
return 0;
errorBelch("loadObj: %" PATH_FMT ": file doesn't exist", path);
return NULL;
}
fileSize = st.st_size;
#ifdef USE_MMAP
/* On many architectures malloc'd memory isn't executable, so we need to use mmap. */
int fd;
/* On many architectures malloc'd memory isn't executable, so we need to use
* mmap. */
#if defined(openbsd_HOST_OS)
/* coverity[toctou] */
fd = open(path, O_RDONLY, S_IRUSR);
#else
/* coverity[toctou] */
fd = open(path, O_RDONLY);
#endif
if (fd == -1) {
errorBelch("loadObj: can't open `%s'", path);
return 0;
errorBelch("loadObj: can't open %s", path);
return NULL;
}
image = mmapForLinker(fileSize, 0, fd);
image = mmap(NULL, fileSize, PROT_READ|PROT_WRITE|PROT_EXEC,
MAP_PRIVATE, fd, 0);
// not 32-bit yet, we'll remap later
close(fd);
if (image == NULL) {
return 0;
}
#else /* !USE_MMAP */
FILE *f;
/* load the image into memory */
/* coverity[toctou] */
f = pathopen(path, WSTR("rb"));
if (!f) {
errorBelch("loadObj: can't read `%" PATH_FMT "'", path);
return 0;
return NULL;
}
# if defined(mingw32_HOST_OS)
# if defined(mingw32_HOST_OS)
// TODO: We would like to use allocateExec here, but allocateExec
// cannot currently allocate blocks large enough.
image = allocateImageAndTrampolines(path, "itself",
......@@ -3035,9 +3296,11 @@ static HsInt loadObj_ (pathchar *path)
fileSize);
if (image == NULL) {
fclose(f);
return 0;
return NULL;
}
# elif defined(darwin_HOST_OS)
// In a Mach-O .o file, all sections can and will be misaligned
// if the total size of the headers is not a multiple of the
// desired alignment. This is fine for .o files that only serve
......@@ -3050,30 +3313,57 @@ static HsInt loadObj_ (pathchar *path)
misalignment = machoGetMisalignment(f);
image = stgMallocBytes(fileSize + misalignment, "loadObj(image)");
image += misalignment;
# else
# else /* !defined(mingw32_HOST_OS) */
image = stgMallocBytes(fileSize, "loadObj(image)");
# endif
{
int n;
n = fread ( image, 1, fileSize, f );
fclose(f);
if (n != fileSize) {
errorBelch("loadObj: error whilst reading `%" PATH_FMT "'", path);
stgFree(image);
return 0;
}
#endif
int n;
n = fread ( image, 1, fileSize, f );
fclose(f);
if (n != fileSize) {
errorBelch("loadObj: error whilst reading `%" PATH_FMT "'", path);
stgFree(image);
return NULL;
}
#endif /* USE_MMAP */
oc = mkOc(path, image, fileSize, NULL
#ifndef USE_MMAP
#ifdef darwin_HOST_OS
oc = mkOc(path, image, fileSize, rtsTrue, NULL
#if !defined(USE_MMAP) && defined(darwin_HOST_OS)
, misalignment
#endif
#endif
);
return oc;
}
/* -----------------------------------------------------------------------------