Linker.c 218 KB
Newer Older
1 2
/* -----------------------------------------------------------------------------
 *
Gabor Greif's avatar
typo  
Gabor Greif committed
3
 * (c) The GHC Team, 2000-2012
4 5 6 7 8
 *
 * RTS Object Linker
 *
 * ---------------------------------------------------------------------------*/

sof's avatar
sof committed
9
#if 0
10
#include "PosixSource.h"
sof's avatar
sof committed
11
#endif
12

13
/* Linux needs _GNU_SOURCE to get RTLD_DEFAULT from <dlfcn.h> and
14 15
   MREMAP_MAYMOVE from <sys/mman.h>.
 */
16 17
#if defined(__linux__)  || defined(__GLIBC__)
#define _GNU_SOURCE 1
18 19
#endif

20 21
#include "Rts.h"
#include "HsFFI.h"
Simon Marlow's avatar
Simon Marlow committed
22 23

#include "sm/Storage.h"
Simon Marlow's avatar
Simon Marlow committed
24
#include "Stats.h"
25
#include "Hash.h"
26
#include "LinkerInternals.h"
27
#include "RtsUtils.h"
28
#include "Trace.h"
Simon Marlow's avatar
Simon Marlow committed
29
#include "StgPrimFloat.h" // for __int_encodeFloat etc.
30
#include "Proftimer.h"
31
#include "GetEnv.h"
32
#include "Stable.h"
33
#include "RtsSymbols.h"
Simon Marlow's avatar
Simon Marlow committed
34 35 36 37

#if !defined(mingw32_HOST_OS)
#include "posix/Signals.h"
#endif
38

Simon Marlow's avatar
Simon Marlow committed
39 40 41
// get protos for is*()
#include <ctype.h>

42
#ifdef HAVE_SYS_TYPES_H
43
#include <sys/types.h>
44 45
#endif

Ian Lynagh's avatar
Ian Lynagh committed
46
#include <inttypes.h>
47 48
#include <stdlib.h>
#include <string.h>
49 50
#include <stdio.h>
#include <assert.h>
Simon Marlow's avatar
Simon Marlow committed
51
#include <libgen.h>
52

53
#ifdef HAVE_SYS_STAT_H
54
#include <sys/stat.h>
55
#endif
56

57
#if defined(HAVE_DLFCN_H)
58
#include <dlfcn.h>
59
#endif
60

61
#if defined(cygwin32_HOST_OS)
sof's avatar
sof committed
62 63 64 65 66 67 68 69 70 71 72 73 74 75 76
#ifdef HAVE_DIRENT_H
#include <dirent.h>
#endif

#ifdef HAVE_SYS_TIME_H
#include <sys/time.h>
#endif
#include <regex.h>
#include <sys/fcntl.h>
#include <sys/termios.h>
#include <sys/utime.h>
#include <sys/utsname.h>
#include <sys/wait.h>
#endif

77 78 79
#if (defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)) \
 || (!defined(powerpc_HOST_ARCH) && \
    (   defined(linux_HOST_OS)     || defined(freebsd_HOST_OS) || \
80 81
        defined(dragonfly_HOST_OS) || defined(netbsd_HOST_OS ) || \
        defined(openbsd_HOST_OS  ) || defined(darwin_HOST_OS ) || \
82 83
        defined(kfreebsdgnu_HOST_OS) || defined(gnu_HOST_OS  ) || \
        defined(solaris2_HOST_OS)))
84
/* Don't use mmap on powerpc_HOST_ARCH as mmap doesn't support
85 86 87 88
 * reallocating but we need to allocate jump islands just after each
 * object images. Otherwise relative branches to jump islands can fail
 * due to 24-bits displacement overflow.
 */
89 90 91
#define USE_MMAP
#include <fcntl.h>
#include <sys/mman.h>
dons's avatar
dons committed
92 93 94 95 96

#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif

97 98
#endif

99 100 101 102 103 104 105 106 107 108

/* PowerPC has relative branch instructions with only 24 bit displacements
 * and therefore needs jump islands contiguous with each object code module.
 */
#if (defined(USE_MMAP) && defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS))
#define USE_CONTIGUOUS_MMAP 1
#else
#define USE_CONTIGUOUS_MMAP 0
#endif

pcapriotti's avatar
pcapriotti committed
109
#if defined(linux_HOST_OS) || defined(solaris2_HOST_OS) || defined(freebsd_HOST_OS) || defined(kfreebsdgnu_HOST_OS) || defined(dragonfly_HOST_OS) || defined(netbsd_HOST_OS) || defined(openbsd_HOST_OS) || defined(gnu_HOST_OS)
110
#  define OBJFORMAT_ELF
Ian Lynagh's avatar
Ian Lynagh committed
111 112
#  include <regex.h>    // regex is already used by dlopen() so this is OK
                        // to use here without requiring an additional lib
113
#elif defined(cygwin32_HOST_OS) || defined (mingw32_HOST_OS)
114
#  define OBJFORMAT_PEi386
115
#  include <windows.h>
116
#  include <shfolder.h> /* SHGetFolderPathW */
sof's avatar
sof committed
117
#  include <math.h>
118
#elif defined(darwin_HOST_OS)
119
#  define OBJFORMAT_MACHO
120
#  include <regex.h>
Ian Lynagh's avatar
Ian Lynagh committed
121 122
#  include <mach/machine.h>
#  include <mach-o/fat.h>
123 124 125
#  include <mach-o/loader.h>
#  include <mach-o/nlist.h>
#  include <mach-o/reloc.h>
126
#if !defined(HAVE_DLFCN_H)
127
#  include <mach-o/dyld.h>
128
#endif
129 130 131
#if defined(powerpc_HOST_ARCH)
#  include <mach-o/ppc/reloc.h>
#endif
132 133 134
#if defined(x86_64_HOST_ARCH)
#  include <mach-o/x86_64/reloc.h>
#endif
135 136
#endif

137 138 139 140
#if defined(x86_64_HOST_ARCH) && defined(darwin_HOST_OS)
#define ALWAYS_PIC
#endif

141 142 143 144
#if defined(dragonfly_HOST_OS)
#include <sys/tls.h>
#endif

145 146 147 148 149 150 151
typedef struct _RtsSymbolInfo {
    void *value;
    const ObjectCode *owner;
    HsBool weak;
} RtsSymbolInfo;

/* Hash table mapping symbol names to RtsSymbolInfo */
152
static /*Str*/HashTable *symhash;
153

154
/* List of currently loaded objects */
Ian Lynagh's avatar
Ian Lynagh committed
155
ObjectCode *objects = NULL;     /* initially empty */
156

157 158 159 160
/* List of objects that have been unloaded via unloadObj(), but are waiting
   to be actually freed via checkUnload() */
ObjectCode *unloaded_objects = NULL; /* initially empty */

161
#ifdef THREADED_RTS
162
/* This protects all the Linker's global state except unloaded_objects */
163
Mutex linker_mutex;
164 165 166 167
/*
 * This protects unloaded_objects.  We have a separate mutex for this, because
 * the GC needs to access unloaded_objects in checkUnload, while the linker only
 * needs to access unloaded_objects in unloadObj(), so this allows most linker
168
 * operations proceed concurrently with the GC.
169 170
 */
Mutex linker_unloaded_mutex;
171 172
#endif

173 174 175
/* Type of the initializer */
typedef void (*init_t) (int argc, char **argv, char **env);

176
static HsInt isAlreadyLoaded( pathchar *path );
177
static HsInt loadOc( ObjectCode* oc );
178
static ObjectCode* mkOc( pathchar *path, char *image, int imageSize,
179
                         rtsBool mapped, char *archiveMemberName
180 181 182 183 184 185 186
#ifndef USE_MMAP
#ifdef darwin_HOST_OS
                       , int misalignment
#endif
#endif
                       );

187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218
// Use wchar_t for pathnames on Windows (#5697)
#if defined(mingw32_HOST_OS)
#define pathcmp wcscmp
#define pathlen wcslen
#define pathopen _wfopen
#define pathstat _wstat
#define struct_stat struct _stat
#define open wopen
#define WSTR(s) L##s
#else
#define pathcmp strcmp
#define pathlen strlen
#define pathopen fopen
#define pathstat stat
#define struct_stat struct stat
#define WSTR(s) s
#endif

static pathchar* pathdup(pathchar *path)
{
    pathchar *ret;
#if defined(mingw32_HOST_OS)
    ret = wcsdup(path);
#else
    /* sigh, strdup() isn't a POSIX function, so do it the long way */
    ret = stgMallocBytes( strlen(path)+1, "loadObj" );
    strcpy(ret, path);
#endif
    return ret;
}


219
#if defined(OBJFORMAT_ELF)
220 221 222
static int ocVerifyImage_ELF    ( ObjectCode* oc );
static int ocGetNames_ELF       ( ObjectCode* oc );
static int ocResolve_ELF        ( ObjectCode* oc );
223
static int ocRunInit_ELF        ( ObjectCode* oc );
224
#if NEED_SYMBOL_EXTRAS
225
static int ocAllocateSymbolExtras_ELF ( ObjectCode* oc );
226
#endif
227
#elif defined(OBJFORMAT_PEi386)
228 229 230
static int ocVerifyImage_PEi386 ( ObjectCode* oc );
static int ocGetNames_PEi386    ( ObjectCode* oc );
static int ocResolve_PEi386     ( ObjectCode* oc );
231
static int ocRunInit_PEi386     ( ObjectCode* oc );
232
static void *lookupSymbolInDLLs ( unsigned char *lbl );
233 234 235 236
/* See Note [mingw-w64 name decoration scheme] */
#ifndef x86_64_HOST_ARCH
 static void zapTrailingAtSign   ( unsigned char *sym );
#endif
237
static char *allocateImageAndTrampolines (
Austin Seipp's avatar
Austin Seipp committed
238
   pathchar* arch_name, char* member_name,
239
#if defined(x86_64_HOST_ARCH)
Austin Seipp's avatar
Austin Seipp committed
240
   FILE* f,
241 242 243 244 245 246 247 248 249
#endif
   int size );
#if defined(x86_64_HOST_ARCH)
static int ocAllocateSymbolExtras_PEi386 ( ObjectCode* oc );
static size_t makeSymbolExtra_PEi386( ObjectCode* oc, size_t, char* symbol );
#define PEi386_IMAGE_OFFSET 4
#else
#define PEi386_IMAGE_OFFSET 0
#endif
250 251 252 253
#elif defined(OBJFORMAT_MACHO)
static int ocVerifyImage_MachO    ( ObjectCode* oc );
static int ocGetNames_MachO       ( ObjectCode* oc );
static int ocResolve_MachO        ( ObjectCode* oc );
254
static int ocRunInit_MachO        ( ObjectCode* oc );
255

Ian Lynagh's avatar
Ian Lynagh committed
256
#ifndef USE_MMAP
257
static int machoGetMisalignment( FILE * );
Ian Lynagh's avatar
Ian Lynagh committed
258
#endif
259
#if NEED_SYMBOL_EXTRAS
260 261
static int ocAllocateSymbolExtras_MachO ( ObjectCode* oc );
#endif
262
#ifdef powerpc_HOST_ARCH
263
static void machoInitSymbolsWithoutUnderscore( void );
264
#endif
265
#endif
266

267 268
static void freeProddableBlocks (ObjectCode *oc);

269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296
#ifdef USE_MMAP
/**
 * An allocated page being filled by the allocator
 */
struct m32_alloc_t {
   void * base_addr;             // Page address
   unsigned int current_size;    // Number of bytes already reserved
};

#define M32_MAX_PAGES 32

/**
 * Allocator
 *
 * Currently an allocator is just a set of pages being filled. The maximum
 * number of pages can be configured with M32_MAX_PAGES.
 */
typedef struct m32_allocator_t {
   struct m32_alloc_t pages[M32_MAX_PAGES];
} * m32_allocator;

// We use a global memory allocator
static struct m32_allocator_t allocator;

struct m32_allocator_t;
static void m32_allocator_init(struct m32_allocator_t *m32);
#endif

297 298 299 300 301 302 303 304 305 306 307 308 309
/* on x86_64 we have a problem with relocating symbol references in
 * code that was compiled without -fPIC.  By default, the small memory
 * model is used, which assumes that symbol references can fit in a
 * 32-bit slot.  The system dynamic linker makes this work for
 * references to shared libraries by either (a) allocating a jump
 * table slot for code references, or (b) moving the symbol at load
 * time (and copying its contents, if necessary) for data references.
 *
 * We unfortunately can't tell whether symbol references are to code
 * or data.  So for now we assume they are code (the vast majority
 * are), and allocate jump-table slots.  Unfortunately this will
 * SILENTLY generate crashing code for data references.  This hack is
 * enabled by X86_64_ELF_NONPIC_HACK.
Ian Lynagh's avatar
Ian Lynagh committed
310
 *
311 312 313 314 315 316 317 318
 * One workaround is to use shared Haskell libraries.  This is
 * coming.  Another workaround is to keep the static libraries but
 * compile them with -fPIC, because that will generate PIC references
 * to data which can be relocated.  The PIC code is still too green to
 * do this systematically, though.
 *
 * See bug #781
 * See thread http://www.haskell.org/pipermail/cvs-ghc/2007-September/038458.html
319 320 321 322 323 324 325 326 327 328 329 330
 *
 * Naming Scheme for Symbol Macros
 *
 * SymI_*: symbol is internal to the RTS. It resides in an object
 *         file/library that is statically.
 * SymE_*: symbol is external to the RTS library. It might be linked
 *         dynamically.
 *
 * Sym*_HasProto  : the symbol prototype is imported in an include file
 *                  or defined explicitly
 * Sym*_NeedsProto: the symbol is undefined and we add a dummy
 *                  default proto extern void sym(void);
331 332
 */
#define X86_64_ELF_NONPIC_HACK 1
333

334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359
/* Link objects into the lower 2Gb on x86_64.  GHC assumes the
 * small memory model on this architecture (see gcc docs,
 * -mcmodel=small).
 *
 * MAP_32BIT not available on OpenBSD/amd64
 */
#if defined(x86_64_HOST_ARCH) && defined(MAP_32BIT)
#define TRY_MAP_32BIT MAP_32BIT
#else
#define TRY_MAP_32BIT 0
#endif

/*
 * Due to the small memory model (see above), on x86_64 we have to map
 * all our non-PIC object files into the low 2Gb of the address space
 * (why 2Gb and not 4Gb?  Because all addresses must be reachable
 * using a 32-bit signed PC-relative offset). On Linux we can do this
 * using the MAP_32BIT flag to mmap(), however on other OSs
 * (e.g. *BSD, see #2063, and also on Linux inside Xen, see #2512), we
 * can't do this.  So on these systems, we have to pick a base address
 * in the low 2Gb of the address space and try to allocate memory from
 * there.
 *
 * We pick a default address based on the OS, but also make this
 * configurable via an RTS flag (+RTS -xm)
 */
360
#if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
361 362 363 364 365 366 367 368 369

#if defined(MAP_32BIT)
// Try to use MAP_32BIT
#define MMAP_32BIT_BASE_DEFAULT 0
#else
// A guess: 1Gb.
#define MMAP_32BIT_BASE_DEFAULT 0x40000000
#endif

Ian Lynagh's avatar
Ian Lynagh committed
370
static void *mmap_32bit_base = (void *)MMAP_32BIT_BASE_DEFAULT;
371 372 373 374 375 376 377
#endif

/* MAP_ANONYMOUS is MAP_ANON on some systems, e.g. OpenBSD */
#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
#define MAP_ANONYMOUS MAP_ANON
#endif

378 379
/* -----------------------------------------------------------------------------
 * Insert symbols into hash tables, checking for duplicates.
380 381
 *
 * Returns: 0 on failure, nonzero on success
382
 */
383

384
static int ghciInsertSymbolTable(
385 386
   pathchar* obj_name,
   HashTable *table,
387
   const char* key,
388 389 390
   void *data,
   HsBool weak,
   ObjectCode *owner)
391
{
392 393 394 395 396 397 398 399
   RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
   if (!pinfo) /* new entry */
   {
      pinfo = stgMallocBytes(sizeof (*pinfo), "ghciInsertToSymbolTable");
      pinfo->value = data;
      pinfo->owner = owner;
      pinfo->weak = weak;
      insertStrHashTable(table, key, pinfo);
400 401 402 403 404 405 406
      return 1;
   }
   else if ((!pinfo->weak || pinfo->value) && weak)
   {
     return 1; /* duplicate weak symbol, throw it away */
   }
   else if (pinfo->weak) /* weak symbol is in the table */
Simon Marlow's avatar
Simon Marlow committed
407
   {
408 409 410 411
      /* override the weak definition with the non-weak one */
      pinfo->value = data;
      pinfo->owner = owner;
      pinfo->weak = HS_BOOL_FALSE;
412
      return 1;
Simon Marlow's avatar
Simon Marlow committed
413 414
   }
   debugBelch(
415
      "GHC runtime linker: fatal error: I found a duplicate definition for symbol\n"
416 417
      "   %s\n"
      "whilst processing object file\n"
418
      "   %" PATH_FMT "\n"
419 420 421 422
      "This could be caused by:\n"
      "   * Loading two different object files which export the same symbol\n"
      "   * Specifying the same object file twice on the GHCi command line\n"
      "   * An incorrect `package.conf' entry, causing some object to be\n"
423
      "     loaded twice.\n",
424 425
      (char*)key,
      obj_name
Simon Marlow's avatar
Simon Marlow committed
426
   );
427
   return 0;
428
}
429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454

static HsBool ghciLookupSymbolTable(HashTable *table,
    const char *key, void **result)
{
    RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
    if (!pinfo) {
        *result = NULL;
        return HS_BOOL_FALSE;
    }
    if (pinfo->weak)
        IF_DEBUG(linker, debugBelch("lookup: promoting %s\n", key));
    /* Once it's looked up, it can no longer be overridden */
    pinfo->weak = HS_BOOL_FALSE;

    *result = pinfo->value;
    return HS_BOOL_TRUE;
}

static void ghciRemoveSymbolTable(HashTable *table, const char *key,
    ObjectCode *owner)
{
    RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
    if (!pinfo || owner != pinfo->owner) return;
    removeStrHashTable(table, key, NULL);
    stgFree(pinfo);
}
455 456 457
/* -----------------------------------------------------------------------------
 * initialize the object linker
 */
458 459 460 461


static int linker_init_done = 0 ;

462
#if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
463
static void *dl_prog_handle;
464 465 466 467 468
static regex_t re_invalid;
static regex_t re_realso;
#ifdef THREADED_RTS
static Mutex dl_mutex; // mutex to protect dlopen/dlerror critical section
#endif
469 470
#elif defined(OBJFORMAT_PEi386)
void addDLLHandle(pathchar* dll_name, HINSTANCE instance);
471
#endif
472

473 474 475 476
void initLinker (void)
{
    // default to retaining CAFs for backwards compatibility.  Most
    // users will want initLinker_(0): otherwise unloadObj() will not
Gabor Greif's avatar
Gabor Greif committed
477
    // be able to unload object files when they contain CAFs.
478 479 480
    initLinker_(1);
}

481
void
482
initLinker_ (int retain_cafs)
483
{
484
    RtsSymbolVal *sym;
Simon Marlow's avatar
Simon Marlow committed
485
#if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
486
    int compileResult;
Simon Marlow's avatar
Simon Marlow committed
487
#endif
488

489 490
    IF_DEBUG(linker, debugBelch("initLinker: start\n"));

491
    /* Make initLinker idempotent, so we can call it
Gabor Greif's avatar
typo  
Gabor Greif committed
492
       before every relevant operation; that means we
493
       don't need to initialise the linker separately */
Ian Lynagh's avatar
Ian Lynagh committed
494 495 496
    if (linker_init_done == 1) {
        IF_DEBUG(linker, debugBelch("initLinker: idempotent return\n"));
        return;
497 498
    } else {
        linker_init_done = 1;
499 500
    }

501 502 503
    objects = NULL;
    unloaded_objects = NULL;

504 505
#if defined(THREADED_RTS)
    initMutex(&linker_mutex);
506
    initMutex(&linker_unloaded_mutex);
507
#if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
508
    initMutex(&dl_mutex);
509
#endif
510
#endif
511 512 513 514
    symhash = allocStrHashTable();

    /* populate the symbol table with stuff from the RTS */
    for (sym = rtsSyms; sym->lbl != NULL; sym++) {
515 516 517 518
        if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"),
                                    symhash, sym->lbl, sym->addr, HS_BOOL_FALSE, NULL)) {
            barf("ghciInsertSymbolTable failed");
        }
Ian Lynagh's avatar
Ian Lynagh committed
519
        IF_DEBUG(linker, debugBelch("initLinker: inserting rts symbol %s, %p\n", sym->lbl, sym->addr));
520
    }
521
#   if defined(OBJFORMAT_MACHO) && defined(powerpc_HOST_ARCH)
522 523
    machoInitSymbolsWithoutUnderscore();
#   endif
524 525 526 527 528
    /* GCC defines a special symbol __dso_handle which is resolved to NULL if
       referenced from a statically linked module. We need to mimic this, but
       we cannot use NULL because we use it to mean nonexistent symbols. So we
       use an arbitrary (hopefully unique) address here.
    */
529 530 531 532
    if (! ghciInsertSymbolTable(WSTR("(GHCi special symbols)"),
                                symhash, "__dso_handle", (void *)0x12345687, HS_BOOL_FALSE, NULL)) {
        barf("ghciInsertSymbolTable failed");
    }
533

Gabor Greif's avatar
Gabor Greif committed
534
    // Redirect newCAF to newRetainedCAF if retain_cafs is true.
535 536
    if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"), symhash,
                                MAYBE_LEADING_UNDERSCORE_STR("newCAF"),
537
                                retain_cafs ? newRetainedCAF : newGCdCAF,
538 539 540
                                HS_BOOL_FALSE, NULL)) {
        barf("ghciInsertSymbolTable failed");
    }
541

542
#   if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
543
#   if defined(RTLD_DEFAULT)
544 545
    dl_prog_handle = RTLD_DEFAULT;
#   else
546
    dl_prog_handle = dlopen(NULL, RTLD_LAZY);
547
#   endif /* RTLD_DEFAULT */
548 549

    compileResult = regcomp(&re_invalid,
550
           "(([^ \t()])+\\.so([^ \t:()])*):([ \t])*(invalid ELF header|file too short)",
551
           REG_EXTENDED);
Ian Lynagh's avatar
Ian Lynagh committed
552 553 554
    if (compileResult != 0) {
        barf("Compiling re_invalid failed");
    }
555
    compileResult = regcomp(&re_realso,
556
           "(GROUP|INPUT) *\\( *([^ )]+)",
557
           REG_EXTENDED);
Ian Lynagh's avatar
Ian Lynagh committed
558 559 560
    if (compileResult != 0) {
        barf("Compiling re_realso failed");
    }
561
#   endif
562

563
#if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
564 565 566 567 568
    if (RtsFlags.MiscFlags.linkerMemBase != 0) {
        // User-override for mmap_32bit_base
        mmap_32bit_base = (void*)RtsFlags.MiscFlags.linkerMemBase;
    }
#endif
569 570 571 572 573 574 575

#if defined(mingw32_HOST_OS)
    /*
     * These two libraries cause problems when added to the static link,
     * but are necessary for resolving symbols in GHCi, hence we load
     * them manually here.
     */
576 577
    addDLL(WSTR("msvcrt"));
    addDLL(WSTR("kernel32"));
578
    addDLLHandle(WSTR("*.exe"), GetModuleHandle(NULL));
579
#endif
580

Tamar Christina's avatar
Tamar Christina committed
581
#ifdef USE_MMAP
582
    m32_allocator_init(&allocator);
Tamar Christina's avatar
Tamar Christina committed
583
#endif
584

585 586
    IF_DEBUG(linker, debugBelch("initLinker: done\n"));
    return;
587 588
}

589 590 591 592 593 594 595 596 597 598 599
void
exitLinker( void ) {
#if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
   if (linker_init_done == 1) {
      regfree(&re_invalid);
      regfree(&re_realso);
#ifdef THREADED_RTS
      closeMutex(&dl_mutex);
#endif
   }
#endif
600 601 602
   if (linker_init_done == 1) {
       freeHashTable(symhash, free);
   }
603 604 605
#ifdef THREADED_RTS
   closeMutex(&linker_mutex);
#endif
606 607
}

608
/* -----------------------------------------------------------------------------
609 610 611
 *                  Loading DLL or .so dynamic libraries
 * -----------------------------------------------------------------------------
 *
612 613 614 615
 * Add a DLL from which symbols may be found.  In the ELF case, just
 * do RTLD_GLOBAL-style add, so no further messing around needs to
 * happen in order that symbols in the loaded .so are findable --
 * lookupSymbol() will subsequently see them by dlsym on the program's
616 617
 * dl-handle.  Returns NULL if success, otherwise ptr to an err msg.
 *
618
 * In the PEi386 case, open the DLLs and put handles to them in a
619
 * linked list.  When looking for a symbol, try all handles in the
620 621 622 623 624
 * list.  This means that we need to load even DLLs that are guaranteed
 * to be in the ghc.exe image already, just so we can get a handle
 * to give to loadSymbol, so that we can find the symbols.  For such
 * libraries, the LoadLibrary call should be a no-op except for returning
 * the handle.
625
 *
626
 */
627 628 629 630 631 632

#if defined(OBJFORMAT_PEi386)
/* A record for storing handles into DLLs. */

typedef
   struct _OpenedDLL {
633
      pathchar*          name;
634 635
      struct _OpenedDLL* next;
      HINSTANCE instance;
636
   }
637 638 639 640
   OpenedDLL;

/* A list thereof. */
static OpenedDLL* opened_dlls = NULL;
641 642 643 644 645 646 647 648 649 650 651 652

/* A record for storing indirectly linked functions from DLLs. */
typedef
   struct _IndirectAddr {
      void*                 addr;
      struct _IndirectAddr* next;
   }
   IndirectAddr;

/* A list thereof. */
static IndirectAddr* indirects = NULL;

653 654 655 656 657 658 659 660 661 662
/* Adds a DLL instance to the list of DLLs in which to search for symbols. */
void addDLLHandle(pathchar* dll_name, HINSTANCE instance) {
   OpenedDLL* o_dll;
   o_dll = stgMallocBytes( sizeof(OpenedDLL), "addDLLHandle" );
   o_dll->name     = dll_name ? pathdup(dll_name) : NULL;
   o_dll->instance = instance;
   o_dll->next     = opened_dlls;
   opened_dlls     = o_dll;
}

663 664
#endif

665
#  if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
666

667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688
/* Suppose in ghci we load a temporary SO for a module containing
       f = 1
   and then modify the module, recompile, and load another temporary
   SO with
       f = 2
   Then as we don't unload the first SO, dlsym will find the
       f = 1
   symbol whereas we want the
       f = 2
   symbol. We therefore need to keep our own SO handle list, and
   try SOs in the right order. */

typedef
   struct _OpenedSO {
      struct _OpenedSO* next;
      void *handle;
   }
   OpenedSO;

/* A list thereof. */
static OpenedSO* openedSOs = NULL;

689
static const char *
690 691
internal_dlopen(const char *dll_name)
{
692
   OpenedSO* o_so;
693
   void *hdl;
694 695
   const char *errmsg;
   char *errmsg_copy;
696

697 698
   // omitted: RTLD_NOW
   // see http://www.haskell.org/pipermail/cvs-ghc/2007-September/038570.html
699 700 701 702 703 704 705 706 707 708
   IF_DEBUG(linker,
      debugBelch("internal_dlopen: dll_name = '%s'\n", dll_name));

   //-------------- Begin critical section ------------------
   // This critical section is necessary because dlerror() is not
   // required to be reentrant (see POSIX -- IEEE Std 1003.1-2008)
   // Also, the error message returned must be copied to preserve it
   // (see POSIX also)

   ACQUIRE_LOCK(&dl_mutex);
709
   hdl = dlopen(dll_name, RTLD_LAZY|RTLD_LOCAL); /* see Note [RTLD_LOCAL] */
dons's avatar
dons committed
710

711
   errmsg = NULL;
712 713 714 715
   if (hdl == NULL) {
      /* dlopen failed; return a ptr to the error msg. */
      errmsg = dlerror();
      if (errmsg == NULL) errmsg = "addDLL: unknown error";
716 717 718
      errmsg_copy = stgMallocBytes(strlen(errmsg)+1, "addDLL");
      strcpy(errmsg_copy, errmsg);
      errmsg = errmsg_copy;
719 720 721 722 723
   } else {
      o_so = stgMallocBytes(sizeof(OpenedSO), "addDLL");
      o_so->handle = hdl;
      o_so->next   = openedSOs;
      openedSOs    = o_so;
724
   }
725

726 727 728 729 730
   RELEASE_LOCK(&dl_mutex);
   //--------------- End critical section -------------------

   return errmsg;
}
731

732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749
/*
  Note [RTLD_LOCAL]

  In GHCi we want to be able to override previous .so's with newly
  loaded .so's when we recompile something.  This further implies that
  when we look up a symbol in internal_dlsym() we have to iterate
  through the loaded libraries (in order from most recently loaded to
  oldest) looking up the symbol in each one until we find it.

  However, this can cause problems for some symbols that are copied
  by the linker into the executable image at runtime - see #8935 for a
  lengthy discussion.  To solve that problem we need to look up
  symbols in the main executable *first*, before attempting to look
  them up in the loaded .so's.  But in order to make that work, we
  have to always call dlopen with RTLD_LOCAL, so that the loaded
  libraries don't populate the global symbol table.
*/

750
static void *
751
internal_dlsym(const char *symbol) {
752 753 754 755 756 757
    OpenedSO* o_so;
    void *v;

    // We acquire dl_mutex as concurrent dl* calls may alter dlerror
    ACQUIRE_LOCK(&dl_mutex);
    dlerror();
758 759 760 761 762 763 764
    // look in program first
    v = dlsym(dl_prog_handle, symbol);
    if (dlerror() == NULL) {
        RELEASE_LOCK(&dl_mutex);
        return v;
    }

765 766 767 768 769 770 771 772 773 774
    for (o_so = openedSOs; o_so != NULL; o_so = o_so->next) {
        v = dlsym(o_so->handle, symbol);
        if (dlerror() == NULL) {
            RELEASE_LOCK(&dl_mutex);
            return v;
        }
    }
    RELEASE_LOCK(&dl_mutex);
    return v;
}
775 776 777
#  endif

const char *
778
addDLL( pathchar *dll_name )
779 780 781 782 783 784
{
#  if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
   /* ------------------- ELF DLL loader ------------------- */

#define NMATCH 5
   regmatch_t match[NMATCH];
785
   const char *errmsg;
786 787 788 789 790 791 792 793 794 795
   FILE* fp;
   size_t match_length;
#define MAXLINE 1000
   char line[MAXLINE];
   int result;

   IF_DEBUG(linker, debugBelch("addDLL: dll_name = '%s'\n", dll_name));
   errmsg = internal_dlopen(dll_name);

   if (errmsg == NULL) {
796 797
      return NULL;
   }
798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819

   // GHC Trac ticket #2615
   // On some systems (e.g., Gentoo Linux) dynamic files (e.g. libc.so)
   // contain linker scripts rather than ELF-format object code. This
   // code handles the situation by recognizing the real object code
   // file name given in the linker script.
   //
   // If an "invalid ELF header" error occurs, it is assumed that the
   // .so file contains a linker script instead of ELF object code.
   // In this case, the code looks for the GROUP ( ... ) linker
   // directive. If one is found, the first file name inside the
   // parentheses is treated as the name of a dynamic library and the
   // code attempts to dlopen that file. If this is also unsuccessful,
   // an error message is returned.

   // see if the error message is due to an invalid ELF header
   IF_DEBUG(linker, debugBelch("errmsg = '%s'\n", errmsg));
   result = regexec(&re_invalid, errmsg, (size_t) NMATCH, match, 0);
   IF_DEBUG(linker, debugBelch("result = %i\n", result));
   if (result == 0) {
      // success -- try to read the named file as a linker script
      match_length = (size_t) stg_min((match[1].rm_eo - match[1].rm_so),
Ian Lynagh's avatar
Ian Lynagh committed
820
                                 MAXLINE-1);
821 822 823 824
      strncpy(line, (errmsg+(match[1].rm_so)),match_length);
      line[match_length] = '\0'; // make sure string is null-terminated
      IF_DEBUG(linker, debugBelch ("file name = '%s'\n", line));
      if ((fp = fopen(line, "r")) == NULL) {
Ian Lynagh's avatar
Ian Lynagh committed
825
         return errmsg; // return original error if open fails
826
      }
827
      // try to find a GROUP or INPUT ( ... ) command
828
      while (fgets(line, MAXLINE, fp) != NULL) {
Ian Lynagh's avatar
Ian Lynagh committed
829 830
         IF_DEBUG(linker, debugBelch("input line = %s", line));
         if (regexec(&re_realso, line, (size_t) NMATCH, match, 0) == 0) {
831 832
            // success -- try to dlopen the first named file
            IF_DEBUG(linker, debugBelch("match%s\n",""));
833
            line[match[2].rm_eo] = '\0';
834
            stgFree((void*)errmsg); // Free old message before creating new one
835
            errmsg = internal_dlopen(line+match[2].rm_so);
Ian Lynagh's avatar
Ian Lynagh committed
836 837
            break;
         }
838 839 840
         // if control reaches here, no GROUP or INPUT ( ... ) directive
         // was found and the original error message is returned to the
         // caller
841 842 843 844
      }
      fclose(fp);
   }
   return errmsg;
845

846
#  elif defined(OBJFORMAT_PEi386)
847
   /* ------------------- Win32 DLL loader ------------------- */
848

849
   pathchar*      buf;
850
   OpenedDLL* o_dll;
851
   HINSTANCE  instance;
852

853
   /* debugBelch("\naddDLL; dll_name = `%s'\n", dll_name); */
854 855 856

   /* See if we've already got it, and ignore if so. */
   for (o_dll = opened_dlls; o_dll != NULL; o_dll = o_dll->next) {
857
      if (0 == pathcmp(o_dll->name, dll_name))
858
         return NULL;
859 860
   }

861 862 863 864
   /* The file name has no suffix (yet) so that we can try
      both foo.dll and foo.drv

      The documentation for LoadLibrary says:
Ian Lynagh's avatar
Ian Lynagh committed
865 866 867 868 869
        If no file name extension is specified in the lpFileName
        parameter, the default library extension .dll is
        appended. However, the file name string can include a trailing
        point character (.) to indicate that the module name has no
        extension. */
870

871 872 873
   size_t bufsize = pathlen(dll_name) + 10;
   buf = stgMallocBytes(bufsize * sizeof(wchar_t), "addDLL");
   snwprintf(buf, bufsize, L"%s.DLL", dll_name);
874
   instance = LoadLibraryW(buf);
875
   if (instance == NULL) {
876 877
       if (GetLastError() != ERROR_MOD_NOT_FOUND) goto error;
       // KAA: allow loading of drivers (like winspool.drv)
878
       snwprintf(buf, bufsize, L"%s.DRV", dll_name);
879
       instance = LoadLibraryW(buf);
880 881 882
       if (instance == NULL) {
           if (GetLastError() != ERROR_MOD_NOT_FOUND) goto error;
           // #1883: allow loading of unix-style libfoo.dll DLLs
883
           snwprintf(buf, bufsize, L"lib%s.DLL", dll_name);
884
           instance = LoadLibraryW(buf);
885 886 887 888
           if (instance == NULL) {
               goto error;
           }
       }
889
   }
sof's avatar
sof committed
890
   stgFree(buf);
891

892
   addDLLHandle(dll_name, instance);
893 894

   return NULL;
895 896 897

error:
   stgFree(buf);
898
   sysErrorBelch("%" PATH_FMT, dll_name);
Simon Marlow's avatar
Simon Marlow committed
899

900 901
   /* LoadLibrary failed; return a ptr to the error msg. */
   return "addDLL: could not load DLL";
Simon Marlow's avatar
Simon Marlow committed
902 903 904 905

#  else
   barf("addDLL: not implemented on this platform");
#  endif
906 907
}

908 909
/* -----------------------------------------------------------------------------
 * insert a symbol in the hash table
910 911
 *
 * Returns: 0 on failure, nozero on success
912
 */
913
HsInt insertSymbol(pathchar* obj_name, char* key, void* data)
914
{
915
    return ghciInsertSymbolTable(obj_name, symhash, key, data, HS_BOOL_FALSE, NULL);
916 917
}

918 919
/* -----------------------------------------------------------------------------
 * lookup a symbol in the hash table
920
 */
921
static void* lookupSymbol_ (char *lbl)
922
{
923
    void *val;
924
    IF_DEBUG(linker, debugBelch("lookupSymbol: looking up %s\n", lbl));
925

926
    ASSERT(symhash != NULL);
927

928
    if (!ghciLookupSymbolTable(symhash, lbl, &val)) {
Ian Lynagh's avatar
Ian Lynagh committed
929
        IF_DEBUG(linker, debugBelch("lookupSymbol: symbol not found\n"));
930
#       if defined(OBJFORMAT_ELF)
931
        return internal_dlsym(lbl);
932
#       elif defined(OBJFORMAT_MACHO)
933 934 935 936
#       if HAVE_DLFCN_H
        /* On OS X 10.3 and later, we use dlsym instead of the old legacy
           interface.

937
           HACK: On OS X, all symbols are prefixed with an underscore.
938
                 However, dlsym wants us to omit the leading underscore from the
939
                 symbol name -- the dlsym routine puts it back on before searching
Austin Seipp's avatar
Austin Seipp committed
940
                 for the symbol. For now, we simply strip it off here (and ONLY
941 942
                 here).
        */
943
        IF_DEBUG(linker, debugBelch("lookupSymbol: looking up %s with dlsym\n", lbl));
Austin Seipp's avatar
Austin Seipp committed
944
        ASSERT(lbl[0] == '_');
945
        return internal_dlsym(lbl + 1);
946
#       else
947
        if (NSIsSymbolNameDefined(lbl)) {
Ian Lynagh's avatar
Ian Lynagh committed
948 949 950 951 952
            NSSymbol symbol = NSLookupAndBindSymbol(lbl);
            return NSAddressOfSymbol(symbol);
        } else {
            return NULL;
        }
953
#       endif /* HAVE_DLFCN_H */
954
#       elif defined(OBJFORMAT_PEi386)
955
        void* sym;
956

957 958 959 960
/* See Note [mingw-w64 name decoration scheme] */
#ifndef x86_64_HOST_ARCH
         zapTrailingAtSign ( (unsigned char*)lbl );
#endif
Simon Marlow's avatar
Simon Marlow committed
961
        sym = lookupSymbolInDLLs((unsigned char*)lbl);
962
        return sym; // might be NULL if not found
963

ken's avatar
ken committed
964 965 966
#       else
        ASSERT(2+2 == 5);
        return NULL;
967
#       endif
968
    } else {
Ian Lynagh's avatar
Ian Lynagh committed
969 970
        IF_DEBUG(linker, debugBelch("lookupSymbol: value of %s is %p\n", lbl, val));
        return val;
971 972 973
    }
}

974 975 976 977 978 979 980 981
void* lookupSymbol( char *lbl )
{
    ACQUIRE_LOCK(&linker_mutex);
    char *r = lookupSymbol_(lbl);
    RELEASE_LOCK(&linker_mutex);
    return r;
}

982
/* -----------------------------------------------------------------------------
Gabor Greif's avatar
Gabor Greif committed
983
   Create a StablePtr for a foreign export.  This is normally called by
984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012
   a C function with __attribute__((constructor)), which is generated
   by GHC and linked into the module.

   If the object code is being loaded dynamically, then we remember
   which StablePtrs were allocated by the constructors and free them
   again in unloadObj().
   -------------------------------------------------------------------------- */

static ObjectCode *loading_obj = NULL;

StgStablePtr foreignExportStablePtr (StgPtr p)
{
    ForeignExportStablePtr *fe_sptr;
    StgStablePtr *sptr;

    sptr = getStablePtr(p);

    if (loading_obj != NULL) {
        fe_sptr = stgMallocBytes(sizeof(ForeignExportStablePtr),
                                 "foreignExportStablePtr");
        fe_sptr->stable_ptr = sptr;
        fe_sptr->next = loading_obj->stable_ptrs;
        loading_obj->stable_ptrs = fe_sptr;
    }

    return sptr;
}


1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026
/* -----------------------------------------------------------------------------
 * Debugging aid: look in GHCi's object symbol tables for symbols
 * within DELTA bytes of the specified address, and show their names.
 */
#ifdef DEBUG
void ghci_enquire ( char* addr );

void ghci_enquire ( char* addr )
{
   int   i;
   char* sym;
   char* a;
   const int DELTA = 64;
   ObjectCode* oc;
1027

1028 1029 1030 1031 1032
   for (oc = objects; oc; oc = oc->next) {
      for (i = 0; i < oc->n_symbols; i++) {
         sym = oc->symbols[i];
         if (sym == NULL) continue;
         a = NULL;
1033
         if (a == NULL) {
1034
            ghciLookupSymbolTable(symhash, sym, (void **)&a);
Ian Lynagh's avatar
Ian Lynagh committed
1035
         }
1036
         if (a == NULL) {
Ian Lynagh's avatar
Ian Lynagh committed
1037
             // debugBelch("ghci_enquire: can't find %s\n", sym);
1038
         }
1039
         else if (addr-DELTA <= a && a <= addr+DELTA) {
1040
            debugBelch("%p + %3d  ==  `%s'\n", addr, (int)(a - addr), sym);
1041 1042 1043 1044 1045 1046
         }
      }
   }
}
#endif

Simon Marlow's avatar
Simon Marlow committed
1047
#ifdef USE_MMAP
1048
#define ROUND_UP(x,size) ((x + size - 1) & ~(size - 1))
1049 1050 1051 1052 1053 1054
#define ROUND_DOWN(x,size) (x & ~(size - 1))

static StgWord getPageSize(void)
{
    static StgWord pagesize = 0;
    if (pagesize == 0) {
Simon Marlow's avatar
Simon Marlow committed
1055
        pagesize = sysconf(_SC_PAGESIZE);
1056
    }