Commit c34a4b98 authored by Ömer Sinan Ağacan's avatar Ömer Sinan Ağacan Committed by Marge Bot

Fix and enable object unloading in GHCi

Fixes #16525 by tracking dependencies between object file symbols and
marking symbol liveness during garbage collection

See Note [Object unloading] in CheckUnload.c for details.
parent 584058dd
......@@ -1150,15 +1150,15 @@ unload_wkr hsc_env keep_linkables pls@LoaderState{..} = do
where
unloadObjs :: Linkable -> IO ()
unloadObjs lnk
-- The RTS's PEi386 linker currently doesn't support unloading.
| isWindowsHost = return ()
| hostIsDynamic = return ()
-- We don't do any cleanup when linking objects with the
-- dynamic linker. Doing so introduces extra complexity for
-- not much benefit.
-- Code unloading currently disabled due to instability.
-- See #16841.
-- id False, so that the pattern-match checker doesn't complain
| id False -- otherwise
| otherwise
= mapM_ (unloadObj hsc_env) [f | DotO f <- linkableUnlinked lnk]
-- The components of a BCO linkable may contain
-- dot-o files. Which is very confusing.
......@@ -1166,7 +1166,6 @@ unload_wkr hsc_env keep_linkables pls@LoaderState{..} = do
-- But the BCO parts can be unlinked just by
-- letting go of them (plus of course depopulating
-- the symbol table which is done in the main body)
| otherwise = return () -- see #16841
{- **********************************************************************
......
This diff is collapsed.
......@@ -12,6 +12,34 @@
#include "BeginPrivate.h"
void checkUnload (StgClosure *static_objects);
#include "LinkerInternals.h"
// Currently live objects
extern ObjectCode *objects;
// Root set for object collection
extern ObjectCode *loaded_objects;
// Mark bit for live objects
extern uint8_t object_code_mark_bit;
// Number of object code currently marked for unloading. See the definition in
// CheckUnload.c for details.
extern int n_unloaded_objects;
void initUnloadCheck(void);
void exitUnloadCheck(void);
// Call before major GC to prepare section index table for marking
bool prepareUnloadCheck(void);
// Mark object code of a static closure address as 'live'
void markObjectCode(const void *addr);
// Call after major GC to unload unused and unmarked object code
void checkUnload(void);
// Call on loaded object code
void insertOCSectionIndices(ObjectCode *oc);
#include "EndPrivate.h"
......@@ -492,6 +492,27 @@ mapHashTableKeys(HashTable *table, void *data, MapHashFnKeys fn)
}
}
void
iterHashTable(HashTable *table, void *data, IterHashFn fn)
{
/* The last bucket with something in it is table->max + table->split - 1 */
long segment = (table->max + table->split - 1) / HSEGSIZE;
long index = (table->max + table->split - 1) % HSEGSIZE;
while (segment >= 0) {
while (index >= 0) {
for (HashList *hl = table->dir[segment][index]; hl != NULL; hl = hl->next) {
if (!fn(data, hl->key, hl->data)) {
return;
}
}
index--;
}
segment--;
index = HSEGSIZE - 1;
}
}
/* -----------------------------------------------------------------------------
* When we initialize a hash table, we set up the first segment as well,
* initializing all of the first segment's hash buckets to NULL.
......@@ -522,12 +543,6 @@ allocHashTable(void)
return table;
}
void
exitHashTable(void)
{
/* nothing to do */
}
int keyCountHashTable (HashTable *table)
{
return table->kcount;
......
......@@ -19,7 +19,7 @@ typedef struct strhashtable StrHashTable;
* `const` so that calling function can mutate what the pointer points to if it
* needs to.
*/
HashTable * allocHashTable ( void );
HashTable * allocHashTable ( void );
void insertHashTable ( HashTable *table, StgWord key, const void *data );
void * lookupHashTable ( const HashTable *table, StgWord key );
void * removeHashTable ( HashTable *table, StgWord key, const void *data );
......@@ -35,9 +35,12 @@ int keysHashTable(HashTable *table, StgWord keys[], int szKeys);
typedef void (*MapHashFn)(void *data, StgWord key, const void *value);
typedef void (*MapHashFnKeys)(void *data, StgWord *key, const void *value);
// Return true -> continue; false -> stop
typedef bool (*IterHashFn)(void *data, StgWord key, const void *value);
void mapHashTable(HashTable *table, void *data, MapHashFn fn);
void mapHashTableKeys(HashTable *table, void *data, MapHashFnKeys fn);
void iterHashTable(HashTable *table, void *data, IterHashFn);
/* Hash table access where the keys are C strings (the strings are
* assumed to be allocated by the caller, and mustn't be deallocated
......@@ -79,9 +82,33 @@ void * removeHashTable_ ( HashTable *table, StgWord key,
/* Freeing hash tables
*/
void freeHashTable ( HashTable *table, void (*freeDataFun)(void *) );
#define freeStrHashTable(table, f) \
(freeHashTable((HashTable*) table, f))
void exitHashTable ( void );
INLINE_HEADER void freeStrHashTable ( StrHashTable *table, void (*freeDataFun)(void *) )
{
freeHashTable((HashTable*)table, freeDataFun);
}
/*
* Hash set API
*
* A hash set is bascially a hash table where values are NULL.
*/
typedef struct hashtable HashSet;
INLINE_HEADER HashSet *allocHashSet ( void )
{
return (HashSet*)allocHashTable();
}
INLINE_HEADER void freeHashSet ( HashSet *set )
{
freeHashTable((HashTable*)set, NULL);
}
INLINE_HEADER void insertHashSet ( HashSet *set, StgWord key )
{
insertHashTable((HashTable*)set, key, NULL);
}
#include "EndPrivate.h"
......@@ -32,6 +32,7 @@
#include "linker/CacheFlush.h"
#include "linker/SymbolExtras.h"
#include "PathUtils.h"
#include "CheckUnload.h" // createOCSectionIndices
#if !defined(mingw32_HOST_OS)
#include "posix/Signals.h"
......@@ -161,23 +162,9 @@
*/
StrHashTable *symhash;
/* List of currently loaded objects */
ObjectCode *objects = NULL; /* initially empty */
/* List of objects that have been unloaded via unloadObj(), but are waiting
to be actually freed via checkUnload() */
ObjectCode *unloaded_objects = NULL; /* initially empty */
#if defined(THREADED_RTS)
/* This protects all the Linker's global state except unloaded_objects */
/* This protects all the Linker's global state */
Mutex linker_mutex;
/*
* This protects unloaded_objects. We have a separate mutex for this, because
* the GC needs to access unloaded_objects in checkUnload, while the linker only
* needs to access unloaded_objects in unloadObj(), so this allows most linker
* operations proceed concurrently with the GC.
*/
Mutex linker_unloaded_mutex;
#endif
/* Generic wrapper function to try and Resolve and RunInit oc files */
......@@ -441,12 +428,10 @@ initLinker_ (int retain_cafs)
linker_init_done = 1;
}
objects = NULL;
unloaded_objects = NULL;
initUnloadCheck();
#if defined(THREADED_RTS)
initMutex(&linker_mutex);
initMutex(&linker_unloaded_mutex);
#if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
initMutex(&dl_mutex);
#endif
......@@ -532,6 +517,7 @@ exitLinker( void ) {
#endif
if (linker_init_done == 1) {
freeStrHashTable(symhash, free);
exitUnloadCheck();
}
#if defined(THREADED_RTS)
closeMutex(&linker_mutex);
......@@ -858,18 +844,24 @@ HsInt insertSymbol(pathchar* obj_name, SymbolName* key, SymbolAddr* data)
}
/* -----------------------------------------------------------------------------
* lookup a symbol in the hash table
* Lookup a symbol in the hash table
*
* When 'dependent' is not NULL, adds it as a dependent to the owner of the
* symbol.
*/
#if defined(OBJFORMAT_PEi386)
SymbolAddr* lookupSymbol_ (SymbolName* lbl)
SymbolAddr* lookupDependentSymbol (SymbolName* lbl, ObjectCode *dependent)
{
(void)dependent; // TODO
ASSERT_LOCK_HELD(&linker_mutex);
return lookupSymbol_PEi386(lbl);
}
#else
SymbolAddr* lookupSymbol_ (SymbolName* lbl)
SymbolAddr* lookupDependentSymbol (SymbolName* lbl, ObjectCode *dependent)
{
ASSERT_LOCK_HELD(&linker_mutex);
IF_DEBUG(linker, debugBelch("lookupSymbol: looking up '%s'\n", lbl));
ASSERT(symhash != NULL);
......@@ -894,10 +886,18 @@ SymbolAddr* lookupSymbol_ (SymbolName* lbl)
return internal_dlsym(lbl + 1);
# else
ASSERT(2+2 == 5);
ASSERT(false);
return NULL;
# endif
} else {
if (dependent) {
// Add dependent as symbol's owner's dependency
ObjectCode *owner = pinfo->owner;
if (owner) {
// TODO: what does it mean for a symbol to not have an owner?
insertHashSet(dependent->dependencies, (W_)owner);
}
}
return loadSymbol(lbl, pinfo);
}
}
......@@ -958,7 +958,9 @@ printLoadedObjects() {
SymbolAddr* lookupSymbol( SymbolName* lbl )
{
ACQUIRE_LOCK(&linker_mutex);
SymbolAddr* r = lookupSymbol_(lbl);
// NULL for "don't add dependent". When adding a dependency we call
// lookupDependentSymbol directly.
SymbolAddr* r = lookupDependentSymbol(lbl, NULL);
if (!r) {
errorBelch("^^ Could not load '%s', dependency unresolved. "
"See top entry above.\n", lbl);
......@@ -1267,9 +1269,6 @@ void freeObjectCode (ObjectCode *oc)
oc->sections[i].mapped_size);
break;
case SECTION_M32:
IF_DEBUG(zero_on_gc,
memset(oc->sections[i].start,
0x00, oc->sections[i].size));
// Freed by m32_allocator_free
break;
#endif
......@@ -1323,6 +1322,8 @@ void freeObjectCode (ObjectCode *oc)
stgFree(oc->fileName);
stgFree(oc->archiveMemberName);
freeHashSet(oc->dependencies);
stgFree(oc);
}
......@@ -1385,6 +1386,10 @@ mkOc( pathchar *path, char *image, int imageSize,
/* chain it onto the list of objects */
oc->next = NULL;
oc->prev = NULL;
oc->next_loaded_object = NULL;
oc->mark = object_code_mark_bit;
oc->dependencies = allocHashSet();
#if RTS_LINKER_USE_MMAP
oc->rw_m32 = m32_allocator_new(false);
......@@ -1403,9 +1408,9 @@ mkOc( pathchar *path, char *image, int imageSize,
HsInt
isAlreadyLoaded( pathchar *path )
{
ObjectCode *o;
for (o = objects; o; o = o->next) {
if (0 == pathcmp(o->fileName, path)) {
for (ObjectCode *o = objects; o; o = o->next) {
if (0 == pathcmp(o->fileName, path)
&& o->status != OBJECT_UNLOADED) {
return 1; /* already loaded */
}
}
......@@ -1539,21 +1544,16 @@ preloadObjectFile (pathchar *path)
*/
static HsInt loadObj_ (pathchar *path)
{
ObjectCode* oc;
IF_DEBUG(linker, debugBelch("loadObj: %" PATH_FMT "\n", path));
/* debugBelch("loadObj %s\n", path ); */
/* Check that we haven't already loaded this object.
Ignore requests to load multiple times */
// Check that we haven't already loaded this object.
// Ignore requests to load multiple times
if (isAlreadyLoaded(path)) {
IF_DEBUG(linker,
debugBelch("ignoring repeated load of %" PATH_FMT "\n", path));
return 1; /* success */
return 1; // success
}
oc = preloadObjectFile(path);
ObjectCode *oc = preloadObjectFile(path);
if (oc == NULL) return 0;
if (! loadOc(oc)) {
......@@ -1564,8 +1564,10 @@ static HsInt loadObj_ (pathchar *path)
return 0;
}
oc->next = objects;
objects = oc;
insertOCSectionIndices(oc);
oc->next_loaded_object = loaded_objects;
loaded_objects = oc;
return 1;
}
......@@ -1758,13 +1760,10 @@ int ocTryLoad (ObjectCode* oc) {
*/
static HsInt resolveObjs_ (void)
{
ObjectCode *oc;
int r;
IF_DEBUG(linker, debugBelch("resolveObjs: start\n"));
for (oc = objects; oc; oc = oc->next) {
r = ocTryLoad(oc);
for (ObjectCode *oc = objects; oc; oc = oc->next) {
int r = ocTryLoad(oc);
if (!r)
{
errorBelch("Could not load Object Code %" PATH_FMT ".\n", OC_INFORMATIVE_FILENAME(oc));
......@@ -1796,45 +1795,35 @@ HsInt resolveObjs (void)
*/
static HsInt unloadObj_ (pathchar *path, bool just_purge)
{
ObjectCode *oc, *prev, *next;
HsBool unloadedAnyObj = HS_BOOL_FALSE;
ASSERT(symhash != NULL);
ASSERT(objects != NULL);
IF_DEBUG(linker, debugBelch("unloadObj: %" PATH_FMT "\n", path));
prev = NULL;
for (oc = objects; oc; oc = next) {
next = oc->next; // oc might be freed
if (!pathcmp(oc->fileName,path)) {
bool unloadedAnyObj = false;
ObjectCode *prev = NULL;
// NOTE (osa): There may be more than one object with the same file name
// (happens when loading archive files) so we don't stop after unloading one
for (ObjectCode *oc = loaded_objects; oc; oc = oc->next_loaded_object) {
if (pathcmp(oc->fileName,path) == 0) {
oc->status = OBJECT_UNLOADED;
// these are both idempotent, so in just_purge mode we can
// later call unloadObj() to really unload the object.
// These are both idempotent, so in just_purge mode we can later
// call unloadObj() to really unload the object.
removeOcSymbols(oc);
freeOcStablePtrs(oc);
unloadedAnyObj = true;
if (!just_purge) {
n_unloaded_objects += 1;
// Remove object code from root set
if (prev == NULL) {
objects = oc->next;
loaded_objects = oc->next_loaded_object;
} else {
prev->next = oc->next;
prev->next_loaded_object = oc->next_loaded_object;
}
ACQUIRE_LOCK(&linker_unloaded_mutex);
oc->next = unloaded_objects;
unloaded_objects = oc;
oc->status = OBJECT_UNLOADED;
RELEASE_LOCK(&linker_unloaded_mutex);
// We do not own oc any more; it can be released at any time by
// the GC in checkUnload().
} else {
prev = oc;
}
/* This could be a member of an archive so continue
* unloading other members. */
unloadedAnyObj = HS_BOOL_TRUE;
} else {
prev = oc;
}
......@@ -1842,8 +1831,7 @@ static HsInt unloadObj_ (pathchar *path, bool just_purge)
if (unloadedAnyObj) {
return 1;
}
else {
} else {
errorBelch("unloadObj: can't find `%" PATH_FMT "' to unload", path);
return 0;
}
......@@ -1867,13 +1855,7 @@ HsInt purgeObj (pathchar *path)
static OStatus getObjectLoadStatus_ (pathchar *path)
{
ObjectCode *o;
for (o = objects; o; o = o->next) {
if (0 == pathcmp(o->fileName, path)) {
return o->status;
}
}
for (o = unloaded_objects; o; o = o->next) {
for (ObjectCode *o = objects; o; o = o->next) {
if (0 == pathcmp(o->fileName, path)) {
return o->status;
}
......
......@@ -190,9 +190,6 @@ typedef struct _ObjectCode {
/* non-zero if the object file was mmap'd, otherwise malloc'd */
int imageMapped;
/* flag used when deciding whether to unload an object file */
int referenced;
/* record by how much image has been deliberately misaligned
after allocation, so that we can use realloc */
int misalignment;
......@@ -204,8 +201,37 @@ typedef struct _ObjectCode {
int n_segments;
Segment *segments;
/* Allow a chain of these things */
struct _ObjectCode * next;
//
// Garbage collection fields
//
// Next object in `objects` list
struct _ObjectCode *next;
// Previous object in `objects` list
struct _ObjectCode *prev;
// Next object in `loaded_objects` list
struct _ObjectCode *next_loaded_object;
// Mark bit
uint8_t mark;
// Set of dependencies (ObjectCode*) of the object file. Traverse
// dependencies using `iterHashTable`.
//
// New entries are added as we resolve symbols in an object file, in
// `lookupDependentSymbol`. When an object file uses multiple symbols from
// another object file we add the dependent multiple times, so we use a
// `HashTable` here rather than a list/array to avoid copies.
//
// Used when unloading object files. See Note [Object unloading] in
// CheckUnload.c.
HashSet *dependencies;
//
// End of garbage collection fields
//
/* SANITY CHECK ONLY: a list of the only memory regions which may
safely be prodded during relocation. Any attempt to prod
......@@ -249,12 +275,8 @@ typedef struct _ObjectCode {
(OC)->fileName \
)
extern ObjectCode *objects;
extern ObjectCode *unloaded_objects;
#if defined(THREADED_RTS)
extern Mutex linker_mutex;
extern Mutex linker_unloaded_mutex;
#endif
/* Type of the initializer */
......@@ -305,8 +327,9 @@ int ghciInsertSymbolTable(
HsBool weak,
ObjectCode *owner);
/* lock-free version of lookupSymbol */
SymbolAddr* lookupSymbol_ (SymbolName* lbl);
/* Lock-free version of lookupSymbol. When 'dependent' is not NULL, adds it as a
* dependent to the owner of the symbol. */
SymbolAddr* lookupDependentSymbol (SymbolName* lbl, ObjectCode *dependent);
extern StrHashTable *symhash;
......
......@@ -585,9 +585,6 @@ hs_exit_(bool wait_foreign)
/* tear down statistics subsystem */
stat_exit();
/* free hash table storage */
exitHashTable();
// Finally, free all our storage. However, we only free the heap
// memory if we have waited for foreign calls to complete;
// otherwise a foreign call in progress may still be referencing
......
......@@ -1100,7 +1100,7 @@ do_Elf_Rel_relocations ( ObjectCode* oc, char* ehdrC,
if (ELF_ST_BIND(symbol->elf_sym->st_info) == STB_LOCAL || strncmp(symbol->name, "_GLOBAL_OFFSET_TABLE_", 21) == 0) {
S = (Elf_Addr)symbol->addr;
} else {
S_tmp = lookupSymbol_( symbol->name );
S_tmp = lookupDependentSymbol( symbol->name, oc );
S = (Elf_Addr)S_tmp;
}
if (!S) {
......@@ -1520,7 +1520,7 @@ do_Elf_Rela_relocations ( ObjectCode* oc, char* ehdrC,
} else {
/* No, so look up the name in our global table. */
symbol = strtab + sym.st_name;
S_tmp = lookupSymbol_( symbol );
S_tmp = lookupDependentSymbol( symbol, oc );
S = (Elf_Addr)S_tmp;
}
if (!S) {
......
......@@ -5,6 +5,7 @@
#include "sm/OSMem.h"
#include "RtsUtils.h"
#include "LinkerInternals.h"
#include "CheckUnload.h" // loaded_objects, insertOCSectionIndices
#include "linker/M32Alloc.h"
/* Platform specific headers */
......@@ -241,7 +242,6 @@ lookupGNUArchiveIndex(int gnuFileIndexSize, char **fileName_,
static HsInt loadArchive_ (pathchar *path)
{
ObjectCode* oc = NULL;
char *image = NULL;
HsInt retcode = 0;
int memberSize;
......@@ -521,8 +521,8 @@ static HsInt loadArchive_ (pathchar *path)
pathprintf(archiveMemberName, size, WSTR("%" PATH_FMT "(%.*s)"),
path, (int)thisFileNameSize, fileName);
oc = mkOc(path, image, memberSize, false, archiveMemberName
, misalignment);
ObjectCode *oc = mkOc(path, image, memberSize, false, archiveMemberName,
misalignment);
#if defined(OBJFORMAT_MACHO)
ocInit_MachO( oc );
#endif
......@@ -537,8 +537,9 @@ static HsInt loadArchive_ (pathchar *path)
fclose(f);
return 0;
} else {
oc->next = objects;
objects = oc;
insertOCSectionIndices(oc); // also adds the object to `objects` list
oc->next_loaded_object = loaded_objects;
loaded_objects = oc;
}
}
else if (isGnuIndex) {
......
......@@ -242,7 +242,7 @@ resolveImports(
addr = (SymbolAddr*) (symbol->nlist->n_value);
IF_DEBUG(linker, debugBelch("resolveImports: undefined external %s has value %p\n", symbol->name, addr));
} else {
addr = lookupSymbol_(symbol->name);
addr = lookupDependentSymbol(symbol->name, oc);
IF_DEBUG(linker, debugBelch("resolveImports: looking up %s, %p\n", symbol->name, addr));
}
......@@ -564,12 +564,12 @@ relocateSectionAarch64(ObjectCode * oc, Section * section)
uint64_t value = 0;
if(symbol->nlist->n_type & N_EXT) {
/* external symbols should be able to be
* looked up via the lookupSymbol_ function.
* looked up via the lookupDependentSymbol function.
* Either through the global symbol hashmap
* or asking the system, if not found
* in the symbol hashmap
*/
value = (uint64_t)lookupSymbol_((char*)symbol->name);
value = (uint64_t)lookupDependentSymbol((char*)symbol->name, oc);
if(!value)
barf("Could not lookup symbol: %s!", symbol->name);
} else {
......@@ -609,7 +609,7 @@ relocateSectionAarch64(ObjectCode * oc, Section * section)
uint64_t pc = (uint64_t)section->start + ri->r_address;
uint64_t value = 0;
if(symbol->nlist->n_type & N_EXT) {
value = (uint64_t)lookupSymbol_((char*)symbol->name);
value = (uint64_t)lookupDependentSymbol((char*)symbol->name, oc);
if(!value)
barf("Could not lookup symbol: %s!", symbol->name);
} else {
......@@ -792,7 +792,7 @@ relocateSection(ObjectCode* oc, int curSection)
// symtab, or it is undefined, meaning dlsym must be used
// to resolve it.
addr = lookupSymbol_(nm);
addr = lookupDependentSymbol(nm, oc);
IF_DEBUG(linker, debugBelch("relocateSection: looked up %s, "
"external X86_64_RELOC_GOT or X86_64_RELOC_GOT_LOAD\n"
" : addr = %p\n", nm, addr));
......@@ -853,7 +853,7 @@ relocateSection(ObjectCode* oc, int curSection)
nm, (void *)value));
}
else {
addr = lookupSymbol_(nm);
addr = lookupDependentSymbol(nm, oc);
if (addr == NULL)
{
errorBelch("\nlookupSymbol failed in relocateSection (relocate external)\n"
......@@ -1353,7 +1353,7 @@ ocGetNames_MachO(ObjectCode* oc)
if (oc->info->nlist[i].n_type & N_EXT)
{
if ( (oc->info->nlist[i].n_desc & N_WEAK_DEF)
&& lookupSymbol_(nm)) {