Linker.c 229 KB
Newer Older
1 2
/* -----------------------------------------------------------------------------
 *
Gabor Greif's avatar
typo  
Gabor Greif committed
3
 * (c) The GHC Team, 2000-2012
4 5 6 7 8
 *
 * RTS Object Linker
 *
 * ---------------------------------------------------------------------------*/

sof's avatar
sof committed
9
#if 0
10
#include "PosixSource.h"
sof's avatar
sof committed
11
#endif
12

13 14
#include "Rts.h"
#include "HsFFI.h"
Simon Marlow's avatar
Simon Marlow committed
15 16

#include "sm/Storage.h"
Simon Marlow's avatar
Simon Marlow committed
17
#include "Stats.h"
18
#include "Hash.h"
19
#include "LinkerInternals.h"
20
#include "RtsUtils.h"
21
#include "Trace.h"
Simon Marlow's avatar
Simon Marlow committed
22
#include "StgPrimFloat.h" // for __int_encodeFloat etc.
23
#include "Proftimer.h"
24
#include "GetEnv.h"
25
#include "Stable.h"
26
#include "RtsSymbols.h"
27
#include "Profiling.h"
Simon Marlow's avatar
Simon Marlow committed
28 29 30 31

#if !defined(mingw32_HOST_OS)
#include "posix/Signals.h"
#endif
32

Simon Marlow's avatar
Simon Marlow committed
33 34 35
// get protos for is*()
#include <ctype.h>

36
#ifdef HAVE_SYS_TYPES_H
37
#include <sys/types.h>
38 39
#endif

Ian Lynagh's avatar
Ian Lynagh committed
40
#include <inttypes.h>
41 42
#include <stdlib.h>
#include <string.h>
43 44
#include <stdio.h>
#include <assert.h>
Simon Marlow's avatar
Simon Marlow committed
45
#include <libgen.h>
46

47
#ifdef HAVE_SYS_STAT_H
48
#include <sys/stat.h>
49
#endif
50

51
#if defined(HAVE_DLFCN_H)
52
#include <dlfcn.h>
53
#endif
54

55 56 57
#if (defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)) \
 || (!defined(powerpc_HOST_ARCH) && \
    (   defined(linux_HOST_OS)     || defined(freebsd_HOST_OS) || \
58 59
        defined(dragonfly_HOST_OS) || defined(netbsd_HOST_OS ) || \
        defined(openbsd_HOST_OS  ) || defined(darwin_HOST_OS ) || \
60 61
        defined(kfreebsdgnu_HOST_OS) || defined(gnu_HOST_OS  ) || \
        defined(solaris2_HOST_OS)))
62
/* Don't use mmap on powerpc/darwin as the mmap there doesn't support
63 64 65 66
 * reallocating but we need to allocate jump islands just after each
 * object images. Otherwise relative branches to jump islands can fail
 * due to 24-bits displacement overflow.
 */
67
#define USE_MMAP 1
68 69
#include <fcntl.h>
#include <sys/mman.h>
dons's avatar
dons committed
70 71 72 73 74

#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif

75 76 77 78
#else

#define USE_MMAP 0

79 80
#endif

81

82 83 84
/* PowerPC and ARM have relative branch instructions with only 24 bit
 * displacements and therefore need jump islands contiguous with each object
 * code module.
85
 */
86 87 88 89 90 91 92 93
#if defined(powerpc_HOST_ARCH)
#define SHORT_REL_BRANCH 1
#endif
#if defined(arm_HOST_ARCH)
#define SHORT_REL_BRANCH 1
#endif

#if (USE_MMAP && defined(SHORT_REL_BRANCH) && defined(linux_HOST_OS))
94 95 96 97 98
#define USE_CONTIGUOUS_MMAP 1
#else
#define USE_CONTIGUOUS_MMAP 0
#endif

pcapriotti's avatar
pcapriotti committed
99
#if defined(linux_HOST_OS) || defined(solaris2_HOST_OS) || defined(freebsd_HOST_OS) || defined(kfreebsdgnu_HOST_OS) || defined(dragonfly_HOST_OS) || defined(netbsd_HOST_OS) || defined(openbsd_HOST_OS) || defined(gnu_HOST_OS)
100
#  define OBJFORMAT_ELF
Ian Lynagh's avatar
Ian Lynagh committed
101 102
#  include <regex.h>    // regex is already used by dlopen() so this is OK
                        // to use here without requiring an additional lib
103
#elif defined (mingw32_HOST_OS)
104
#  define OBJFORMAT_PEi386
105
#  include <windows.h>
106
#  include <shfolder.h> /* SHGetFolderPathW */
sof's avatar
sof committed
107
#  include <math.h>
108
#  include <wchar.h>
109
#elif defined(darwin_HOST_OS)
110
#  define OBJFORMAT_MACHO
111
#  include <regex.h>
Ian Lynagh's avatar
Ian Lynagh committed
112 113
#  include <mach/machine.h>
#  include <mach-o/fat.h>
114 115 116
#  include <mach-o/loader.h>
#  include <mach-o/nlist.h>
#  include <mach-o/reloc.h>
117 118 119
#if defined(powerpc_HOST_ARCH)
#  include <mach-o/ppc/reloc.h>
#endif
120 121 122
#if defined(x86_64_HOST_ARCH)
#  include <mach-o/x86_64/reloc.h>
#endif
123 124
#endif

125 126 127 128
#if defined(x86_64_HOST_ARCH) && defined(darwin_HOST_OS)
#define ALWAYS_PIC
#endif

129 130 131 132
#if defined(dragonfly_HOST_OS)
#include <sys/tls.h>
#endif

133 134 135 136 137 138 139
typedef struct _RtsSymbolInfo {
    void *value;
    const ObjectCode *owner;
    HsBool weak;
} RtsSymbolInfo;

/* Hash table mapping symbol names to RtsSymbolInfo */
140
static /*Str*/HashTable *symhash;
141

142
/* List of currently loaded objects */
Ian Lynagh's avatar
Ian Lynagh committed
143
ObjectCode *objects = NULL;     /* initially empty */
144

145 146 147 148
/* List of objects that have been unloaded via unloadObj(), but are waiting
   to be actually freed via checkUnload() */
ObjectCode *unloaded_objects = NULL; /* initially empty */

149
#ifdef THREADED_RTS
150
/* This protects all the Linker's global state except unloaded_objects */
151
Mutex linker_mutex;
152 153 154 155
/*
 * This protects unloaded_objects.  We have a separate mutex for this, because
 * the GC needs to access unloaded_objects in checkUnload, while the linker only
 * needs to access unloaded_objects in unloadObj(), so this allows most linker
156
 * operations proceed concurrently with the GC.
157 158
 */
Mutex linker_unloaded_mutex;
159 160
#endif

161 162 163
/* Type of the initializer */
typedef void (*init_t) (int argc, char **argv, char **env);

164
static HsInt isAlreadyLoaded( pathchar *path );
165
static HsInt loadOc( ObjectCode* oc );
166
static ObjectCode* mkOc( pathchar *path, char *image, int imageSize,
Erik de Castro Lopo's avatar
Erik de Castro Lopo committed
167 168
                         rtsBool mapped, char *archiveMemberName,
                         int misalignment
169 170
                       );

171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202
// Use wchar_t for pathnames on Windows (#5697)
#if defined(mingw32_HOST_OS)
#define pathcmp wcscmp
#define pathlen wcslen
#define pathopen _wfopen
#define pathstat _wstat
#define struct_stat struct _stat
#define open wopen
#define WSTR(s) L##s
#else
#define pathcmp strcmp
#define pathlen strlen
#define pathopen fopen
#define pathstat stat
#define struct_stat struct stat
#define WSTR(s) s
#endif

static pathchar* pathdup(pathchar *path)
{
    pathchar *ret;
#if defined(mingw32_HOST_OS)
    ret = wcsdup(path);
#else
    /* sigh, strdup() isn't a POSIX function, so do it the long way */
    ret = stgMallocBytes( strlen(path)+1, "loadObj" );
    strcpy(ret, path);
#endif
    return ret;
}


203
#if defined(OBJFORMAT_ELF)
204 205 206
static int ocVerifyImage_ELF    ( ObjectCode* oc );
static int ocGetNames_ELF       ( ObjectCode* oc );
static int ocResolve_ELF        ( ObjectCode* oc );
207
static int ocRunInit_ELF        ( ObjectCode* oc );
208
#if NEED_SYMBOL_EXTRAS
209
static int ocAllocateSymbolExtras_ELF ( ObjectCode* oc );
210
#endif
211
#elif defined(OBJFORMAT_PEi386)
212 213 214
static int ocVerifyImage_PEi386 ( ObjectCode* oc );
static int ocGetNames_PEi386    ( ObjectCode* oc );
static int ocResolve_PEi386     ( ObjectCode* oc );
215
static int ocRunInit_PEi386     ( ObjectCode* oc );
216
static void *lookupSymbolInDLLs ( unsigned char *lbl );
217 218 219 220
/* See Note [mingw-w64 name decoration scheme] */
#ifndef x86_64_HOST_ARCH
 static void zapTrailingAtSign   ( unsigned char *sym );
#endif
221
static char *allocateImageAndTrampolines (
Austin Seipp's avatar
Austin Seipp committed
222
   pathchar* arch_name, char* member_name,
223
#if defined(x86_64_HOST_ARCH)
Austin Seipp's avatar
Austin Seipp committed
224
   FILE* f,
225 226 227 228 229 230 231 232 233
#endif
   int size );
#if defined(x86_64_HOST_ARCH)
static int ocAllocateSymbolExtras_PEi386 ( ObjectCode* oc );
static size_t makeSymbolExtra_PEi386( ObjectCode* oc, size_t, char* symbol );
#define PEi386_IMAGE_OFFSET 4
#else
#define PEi386_IMAGE_OFFSET 0
#endif
234 235 236 237
#elif defined(OBJFORMAT_MACHO)
static int ocVerifyImage_MachO    ( ObjectCode* oc );
static int ocGetNames_MachO       ( ObjectCode* oc );
static int ocResolve_MachO        ( ObjectCode* oc );
238
static int ocRunInit_MachO        ( ObjectCode* oc );
239

240
#if (USE_MMAP == 0)
241
static int machoGetMisalignment( FILE * );
Ian Lynagh's avatar
Ian Lynagh committed
242
#endif
243
#if NEED_SYMBOL_EXTRAS
244 245
static int ocAllocateSymbolExtras_MachO ( ObjectCode* oc );
#endif
246
#ifdef powerpc_HOST_ARCH
247
static void machoInitSymbolsWithoutUnderscore( void );
248
#endif
249
#endif
250

251 252
#if defined(OBJFORMAT_PEi386)
// MingW-w64 is missing these from the implementation. So we have to look them up
253 254
typedef DLL_DIRECTORY_COOKIE(WINAPI *LPAddDLLDirectory)(PCWSTR NewDirectory);
typedef WINBOOL(WINAPI *LPRemoveDLLDirectory)(DLL_DIRECTORY_COOKIE Cookie);
255 256
#endif

257 258
static void freeProddableBlocks (ObjectCode *oc);

259
#if USE_MMAP
260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286
/**
 * An allocated page being filled by the allocator
 */
struct m32_alloc_t {
   void * base_addr;             // Page address
   unsigned int current_size;    // Number of bytes already reserved
};

#define M32_MAX_PAGES 32

/**
 * Allocator
 *
 * Currently an allocator is just a set of pages being filled. The maximum
 * number of pages can be configured with M32_MAX_PAGES.
 */
typedef struct m32_allocator_t {
   struct m32_alloc_t pages[M32_MAX_PAGES];
} * m32_allocator;

// We use a global memory allocator
static struct m32_allocator_t allocator;

struct m32_allocator_t;
static void m32_allocator_init(struct m32_allocator_t *m32);
#endif

287 288 289 290 291 292 293 294 295 296 297 298 299
/* on x86_64 we have a problem with relocating symbol references in
 * code that was compiled without -fPIC.  By default, the small memory
 * model is used, which assumes that symbol references can fit in a
 * 32-bit slot.  The system dynamic linker makes this work for
 * references to shared libraries by either (a) allocating a jump
 * table slot for code references, or (b) moving the symbol at load
 * time (and copying its contents, if necessary) for data references.
 *
 * We unfortunately can't tell whether symbol references are to code
 * or data.  So for now we assume they are code (the vast majority
 * are), and allocate jump-table slots.  Unfortunately this will
 * SILENTLY generate crashing code for data references.  This hack is
 * enabled by X86_64_ELF_NONPIC_HACK.
Ian Lynagh's avatar
Ian Lynagh committed
300
 *
301 302 303 304 305 306 307 308
 * One workaround is to use shared Haskell libraries.  This is
 * coming.  Another workaround is to keep the static libraries but
 * compile them with -fPIC, because that will generate PIC references
 * to data which can be relocated.  The PIC code is still too green to
 * do this systematically, though.
 *
 * See bug #781
 * See thread http://www.haskell.org/pipermail/cvs-ghc/2007-September/038458.html
309 310 311 312 313 314 315 316 317 318 319 320
 *
 * Naming Scheme for Symbol Macros
 *
 * SymI_*: symbol is internal to the RTS. It resides in an object
 *         file/library that is statically.
 * SymE_*: symbol is external to the RTS library. It might be linked
 *         dynamically.
 *
 * Sym*_HasProto  : the symbol prototype is imported in an include file
 *                  or defined explicitly
 * Sym*_NeedsProto: the symbol is undefined and we add a dummy
 *                  default proto extern void sym(void);
321 322
 */
#define X86_64_ELF_NONPIC_HACK 1
323

324 325 326 327 328 329 330 331 332 333 334 335
/* Link objects into the lower 2Gb on x86_64.  GHC assumes the
 * small memory model on this architecture (see gcc docs,
 * -mcmodel=small).
 *
 * MAP_32BIT not available on OpenBSD/amd64
 */
#if defined(x86_64_HOST_ARCH) && defined(MAP_32BIT)
#define TRY_MAP_32BIT MAP_32BIT
#else
#define TRY_MAP_32BIT 0
#endif

Ben Gamari's avatar
Ben Gamari committed
336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359
/*
  Note [The ARM/Thumb Story]
  ~~~~~~~~~~~~~~~~~~~~~~~~~~

  Support for the ARM architecture is complicated by the fact that ARM has not
  one but several instruction encodings. The two relevant ones here are the original
  ARM encoding and Thumb, a more dense variant of ARM supporting only a subset
  of the instruction set.

  How the CPU decodes a particular instruction is determined by a mode bit. This
  mode bit is set on jump instructions, the value being determined by the low
  bit of the target address: An odd address means the target is a procedure
  encoded in the Thumb encoding whereas an even address means it's a traditional
  ARM procedure (the actual address jumped to is even regardless of the encoding bit).

  Interoperation between Thumb- and ARM-encoded object code (known as "interworking")
  is tricky. If the linker needs to link a call by an ARM object into Thumb code
  (or vice-versa) it will produce a jump island. This, however, is incompatible with
  GHC's tables-next-to-code. For this reason, it is critical that GHC emit
  exclusively ARM or Thumb objects for all Haskell code.

  We still do, however, need to worry about foreign code.
*/

360 361 362 363 364 365 366 367 368 369 370 371 372 373
/*
 * Due to the small memory model (see above), on x86_64 we have to map
 * all our non-PIC object files into the low 2Gb of the address space
 * (why 2Gb and not 4Gb?  Because all addresses must be reachable
 * using a 32-bit signed PC-relative offset). On Linux we can do this
 * using the MAP_32BIT flag to mmap(), however on other OSs
 * (e.g. *BSD, see #2063, and also on Linux inside Xen, see #2512), we
 * can't do this.  So on these systems, we have to pick a base address
 * in the low 2Gb of the address space and try to allocate memory from
 * there.
 *
 * We pick a default address based on the OS, but also make this
 * configurable via an RTS flag (+RTS -xm)
 */
374
#if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
375 376 377 378 379 380 381 382 383

#if defined(MAP_32BIT)
// Try to use MAP_32BIT
#define MMAP_32BIT_BASE_DEFAULT 0
#else
// A guess: 1Gb.
#define MMAP_32BIT_BASE_DEFAULT 0x40000000
#endif

Ian Lynagh's avatar
Ian Lynagh committed
384
static void *mmap_32bit_base = (void *)MMAP_32BIT_BASE_DEFAULT;
385 386 387 388 389 390 391
#endif

/* MAP_ANONYMOUS is MAP_ANON on some systems, e.g. OpenBSD */
#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
#define MAP_ANONYMOUS MAP_ANON
#endif

392 393
/* -----------------------------------------------------------------------------
 * Insert symbols into hash tables, checking for duplicates.
394 395
 *
 * Returns: 0 on failure, nonzero on success
396
 */
397

398
static int ghciInsertSymbolTable(
399 400
   pathchar* obj_name,
   HashTable *table,
401
   const char* key,
402 403 404
   void *data,
   HsBool weak,
   ObjectCode *owner)
405
{
406 407 408 409 410 411 412 413
   RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
   if (!pinfo) /* new entry */
   {
      pinfo = stgMallocBytes(sizeof (*pinfo), "ghciInsertToSymbolTable");
      pinfo->value = data;
      pinfo->owner = owner;
      pinfo->weak = weak;
      insertStrHashTable(table, key, pinfo);
414 415 416 417 418 419 420
      return 1;
   }
   else if ((!pinfo->weak || pinfo->value) && weak)
   {
     return 1; /* duplicate weak symbol, throw it away */
   }
   else if (pinfo->weak) /* weak symbol is in the table */
Simon Marlow's avatar
Simon Marlow committed
421
   {
422 423 424 425
      /* override the weak definition with the non-weak one */
      pinfo->value = data;
      pinfo->owner = owner;
      pinfo->weak = HS_BOOL_FALSE;
426
      return 1;
Simon Marlow's avatar
Simon Marlow committed
427 428
   }
   debugBelch(
429
      "GHC runtime linker: fatal error: I found a duplicate definition for symbol\n"
430 431
      "   %s\n"
      "whilst processing object file\n"
432
      "   %" PATH_FMT "\n"
433 434 435 436
      "This could be caused by:\n"
      "   * Loading two different object files which export the same symbol\n"
      "   * Specifying the same object file twice on the GHCi command line\n"
      "   * An incorrect `package.conf' entry, causing some object to be\n"
437
      "     loaded twice.\n",
438 439
      (char*)key,
      obj_name
Simon Marlow's avatar
Simon Marlow committed
440
   );
441
   return 0;
442
}
443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468

static HsBool ghciLookupSymbolTable(HashTable *table,
    const char *key, void **result)
{
    RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
    if (!pinfo) {
        *result = NULL;
        return HS_BOOL_FALSE;
    }
    if (pinfo->weak)
        IF_DEBUG(linker, debugBelch("lookup: promoting %s\n", key));
    /* Once it's looked up, it can no longer be overridden */
    pinfo->weak = HS_BOOL_FALSE;

    *result = pinfo->value;
    return HS_BOOL_TRUE;
}

static void ghciRemoveSymbolTable(HashTable *table, const char *key,
    ObjectCode *owner)
{
    RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
    if (!pinfo || owner != pinfo->owner) return;
    removeStrHashTable(table, key, NULL);
    stgFree(pinfo);
}
469 470 471
/* -----------------------------------------------------------------------------
 * initialize the object linker
 */
472 473 474 475


static int linker_init_done = 0 ;

476
#if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
477
static void *dl_prog_handle;
478 479 480 481 482
static regex_t re_invalid;
static regex_t re_realso;
#ifdef THREADED_RTS
static Mutex dl_mutex; // mutex to protect dlopen/dlerror critical section
#endif
483 484
#elif defined(OBJFORMAT_PEi386)
void addDLLHandle(pathchar* dll_name, HINSTANCE instance);
485
#endif
486

487 488 489 490
void initLinker (void)
{
    // default to retaining CAFs for backwards compatibility.  Most
    // users will want initLinker_(0): otherwise unloadObj() will not
Gabor Greif's avatar
Gabor Greif committed
491
    // be able to unload object files when they contain CAFs.
492 493 494
    initLinker_(1);
}

495
void
496
initLinker_ (int retain_cafs)
497
{
498
    RtsSymbolVal *sym;
Simon Marlow's avatar
Simon Marlow committed
499
#if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
500
    int compileResult;
Simon Marlow's avatar
Simon Marlow committed
501
#endif
502

503 504
    IF_DEBUG(linker, debugBelch("initLinker: start\n"));

505
    /* Make initLinker idempotent, so we can call it
Gabor Greif's avatar
typo  
Gabor Greif committed
506
       before every relevant operation; that means we
507
       don't need to initialise the linker separately */
Ian Lynagh's avatar
Ian Lynagh committed
508 509 510
    if (linker_init_done == 1) {
        IF_DEBUG(linker, debugBelch("initLinker: idempotent return\n"));
        return;
511 512
    } else {
        linker_init_done = 1;
513 514
    }

515 516 517
    objects = NULL;
    unloaded_objects = NULL;

518 519
#if defined(THREADED_RTS)
    initMutex(&linker_mutex);
520
    initMutex(&linker_unloaded_mutex);
521
#if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
522
    initMutex(&dl_mutex);
523
#endif
524
#endif
525 526 527 528
    symhash = allocStrHashTable();

    /* populate the symbol table with stuff from the RTS */
    for (sym = rtsSyms; sym->lbl != NULL; sym++) {
529 530 531 532
        if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"),
                                    symhash, sym->lbl, sym->addr, HS_BOOL_FALSE, NULL)) {
            barf("ghciInsertSymbolTable failed");
        }
Ian Lynagh's avatar
Ian Lynagh committed
533
        IF_DEBUG(linker, debugBelch("initLinker: inserting rts symbol %s, %p\n", sym->lbl, sym->addr));
534
    }
535
#   if defined(OBJFORMAT_MACHO) && defined(powerpc_HOST_ARCH)
536 537
    machoInitSymbolsWithoutUnderscore();
#   endif
538 539 540 541 542
    /* GCC defines a special symbol __dso_handle which is resolved to NULL if
       referenced from a statically linked module. We need to mimic this, but
       we cannot use NULL because we use it to mean nonexistent symbols. So we
       use an arbitrary (hopefully unique) address here.
    */
543 544 545 546
    if (! ghciInsertSymbolTable(WSTR("(GHCi special symbols)"),
                                symhash, "__dso_handle", (void *)0x12345687, HS_BOOL_FALSE, NULL)) {
        barf("ghciInsertSymbolTable failed");
    }
547

Gabor Greif's avatar
Gabor Greif committed
548
    // Redirect newCAF to newRetainedCAF if retain_cafs is true.
549 550
    if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"), symhash,
                                MAYBE_LEADING_UNDERSCORE_STR("newCAF"),
551
                                retain_cafs ? newRetainedCAF : newGCdCAF,
552 553 554
                                HS_BOOL_FALSE, NULL)) {
        barf("ghciInsertSymbolTable failed");
    }
555

556
#   if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
557
#   if defined(RTLD_DEFAULT)
558 559
    dl_prog_handle = RTLD_DEFAULT;
#   else
560
    dl_prog_handle = dlopen(NULL, RTLD_LAZY);
561
#   endif /* RTLD_DEFAULT */
562 563

    compileResult = regcomp(&re_invalid,
564
           "(([^ \t()])+\\.so([^ \t:()])*):([ \t])*(invalid ELF header|file too short)",
565
           REG_EXTENDED);
Ian Lynagh's avatar
Ian Lynagh committed
566 567 568
    if (compileResult != 0) {
        barf("Compiling re_invalid failed");
    }
569
    compileResult = regcomp(&re_realso,
570
           "(GROUP|INPUT) *\\( *([^ )]+)",
571
           REG_EXTENDED);
Ian Lynagh's avatar
Ian Lynagh committed
572 573 574
    if (compileResult != 0) {
        barf("Compiling re_realso failed");
    }
575
#   endif
576

577
#if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
578 579 580 581 582
    if (RtsFlags.MiscFlags.linkerMemBase != 0) {
        // User-override for mmap_32bit_base
        mmap_32bit_base = (void*)RtsFlags.MiscFlags.linkerMemBase;
    }
#endif
583 584 585 586 587 588 589

#if defined(mingw32_HOST_OS)
    /*
     * These two libraries cause problems when added to the static link,
     * but are necessary for resolving symbols in GHCi, hence we load
     * them manually here.
     */
590 591
    addDLL(WSTR("msvcrt"));
    addDLL(WSTR("kernel32"));
592
    addDLLHandle(WSTR("*.exe"), GetModuleHandle(NULL));
593
#endif
594

595
#if USE_MMAP
596
    m32_allocator_init(&allocator);
Tamar Christina's avatar
Tamar Christina committed
597
#endif
598

599 600
    IF_DEBUG(linker, debugBelch("initLinker: done\n"));
    return;
601 602
}

603 604 605 606 607 608 609 610 611 612 613
void
exitLinker( void ) {
#if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
   if (linker_init_done == 1) {
      regfree(&re_invalid);
      regfree(&re_realso);
#ifdef THREADED_RTS
      closeMutex(&dl_mutex);
#endif
   }
#endif
614 615 616
   if (linker_init_done == 1) {
       freeHashTable(symhash, free);
   }
617 618 619
#ifdef THREADED_RTS
   closeMutex(&linker_mutex);
#endif
620 621
}

622
/* -----------------------------------------------------------------------------
623 624 625
 *                  Loading DLL or .so dynamic libraries
 * -----------------------------------------------------------------------------
 *
626 627 628 629
 * Add a DLL from which symbols may be found.  In the ELF case, just
 * do RTLD_GLOBAL-style add, so no further messing around needs to
 * happen in order that symbols in the loaded .so are findable --
 * lookupSymbol() will subsequently see them by dlsym on the program's
630 631
 * dl-handle.  Returns NULL if success, otherwise ptr to an err msg.
 *
632
 * In the PEi386 case, open the DLLs and put handles to them in a
633
 * linked list.  When looking for a symbol, try all handles in the
634 635 636 637 638
 * list.  This means that we need to load even DLLs that are guaranteed
 * to be in the ghc.exe image already, just so we can get a handle
 * to give to loadSymbol, so that we can find the symbols.  For such
 * libraries, the LoadLibrary call should be a no-op except for returning
 * the handle.
639
 *
640
 */
641 642 643 644 645 646

#if defined(OBJFORMAT_PEi386)
/* A record for storing handles into DLLs. */

typedef
   struct _OpenedDLL {
647
      pathchar*          name;
648 649
      struct _OpenedDLL* next;
      HINSTANCE instance;
650
   }
651 652 653 654
   OpenedDLL;

/* A list thereof. */
static OpenedDLL* opened_dlls = NULL;
655 656 657 658 659 660 661 662 663 664 665 666

/* A record for storing indirectly linked functions from DLLs. */
typedef
   struct _IndirectAddr {
      void*                 addr;
      struct _IndirectAddr* next;
   }
   IndirectAddr;

/* A list thereof. */
static IndirectAddr* indirects = NULL;

667 668 669 670 671 672 673 674 675 676
/* Adds a DLL instance to the list of DLLs in which to search for symbols. */
void addDLLHandle(pathchar* dll_name, HINSTANCE instance) {
   OpenedDLL* o_dll;
   o_dll = stgMallocBytes( sizeof(OpenedDLL), "addDLLHandle" );
   o_dll->name     = dll_name ? pathdup(dll_name) : NULL;
   o_dll->instance = instance;
   o_dll->next     = opened_dlls;
   opened_dlls     = o_dll;
}

677 678
#endif

679
#  if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
680

681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702
/* Suppose in ghci we load a temporary SO for a module containing
       f = 1
   and then modify the module, recompile, and load another temporary
   SO with
       f = 2
   Then as we don't unload the first SO, dlsym will find the
       f = 1
   symbol whereas we want the
       f = 2
   symbol. We therefore need to keep our own SO handle list, and
   try SOs in the right order. */

typedef
   struct _OpenedSO {
      struct _OpenedSO* next;
      void *handle;
   }
   OpenedSO;

/* A list thereof. */
static OpenedSO* openedSOs = NULL;

703
static const char *
704 705
internal_dlopen(const char *dll_name)
{
706
   OpenedSO* o_so;
707
   void *hdl;
708 709
   const char *errmsg;
   char *errmsg_copy;
710

711 712
   // omitted: RTLD_NOW
   // see http://www.haskell.org/pipermail/cvs-ghc/2007-September/038570.html
713 714 715 716 717 718 719 720 721 722
   IF_DEBUG(linker,
      debugBelch("internal_dlopen: dll_name = '%s'\n", dll_name));

   //-------------- Begin critical section ------------------
   // This critical section is necessary because dlerror() is not
   // required to be reentrant (see POSIX -- IEEE Std 1003.1-2008)
   // Also, the error message returned must be copied to preserve it
   // (see POSIX also)

   ACQUIRE_LOCK(&dl_mutex);
723
   hdl = dlopen(dll_name, RTLD_LAZY|RTLD_LOCAL); /* see Note [RTLD_LOCAL] */
dons's avatar
dons committed
724

725
   errmsg = NULL;
726 727 728 729
   if (hdl == NULL) {
      /* dlopen failed; return a ptr to the error msg. */
      errmsg = dlerror();
      if (errmsg == NULL) errmsg = "addDLL: unknown error";
730 731 732
      errmsg_copy = stgMallocBytes(strlen(errmsg)+1, "addDLL");
      strcpy(errmsg_copy, errmsg);
      errmsg = errmsg_copy;
733 734 735 736 737
   } else {
      o_so = stgMallocBytes(sizeof(OpenedSO), "addDLL");
      o_so->handle = hdl;
      o_so->next   = openedSOs;
      openedSOs    = o_so;
738
   }
739

740 741 742 743 744
   RELEASE_LOCK(&dl_mutex);
   //--------------- End critical section -------------------

   return errmsg;
}
745

746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763
/*
  Note [RTLD_LOCAL]

  In GHCi we want to be able to override previous .so's with newly
  loaded .so's when we recompile something.  This further implies that
  when we look up a symbol in internal_dlsym() we have to iterate
  through the loaded libraries (in order from most recently loaded to
  oldest) looking up the symbol in each one until we find it.

  However, this can cause problems for some symbols that are copied
  by the linker into the executable image at runtime - see #8935 for a
  lengthy discussion.  To solve that problem we need to look up
  symbols in the main executable *first*, before attempting to look
  them up in the loaded .so's.  But in order to make that work, we
  have to always call dlopen with RTLD_LOCAL, so that the loaded
  libraries don't populate the global symbol table.
*/

764
static void *
765
internal_dlsym(const char *symbol) {
766 767 768 769 770 771
    OpenedSO* o_so;
    void *v;

    // We acquire dl_mutex as concurrent dl* calls may alter dlerror
    ACQUIRE_LOCK(&dl_mutex);
    dlerror();
772 773 774 775 776 777 778
    // look in program first
    v = dlsym(dl_prog_handle, symbol);
    if (dlerror() == NULL) {
        RELEASE_LOCK(&dl_mutex);
        return v;
    }

779 780 781 782 783 784 785 786 787 788
    for (o_so = openedSOs; o_so != NULL; o_so = o_so->next) {
        v = dlsym(o_so->handle, symbol);
        if (dlerror() == NULL) {
            RELEASE_LOCK(&dl_mutex);
            return v;
        }
    }
    RELEASE_LOCK(&dl_mutex);
    return v;
}
789 790 791
#  endif

const char *
792
addDLL( pathchar *dll_name )
793 794 795 796 797 798
{
#  if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
   /* ------------------- ELF DLL loader ------------------- */

#define NMATCH 5
   regmatch_t match[NMATCH];
799
   const char *errmsg;
800 801 802 803 804 805 806 807 808 809
   FILE* fp;
   size_t match_length;
#define MAXLINE 1000
   char line[MAXLINE];
   int result;

   IF_DEBUG(linker, debugBelch("addDLL: dll_name = '%s'\n", dll_name));
   errmsg = internal_dlopen(dll_name);

   if (errmsg == NULL) {
810 811
      return NULL;
   }
812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833

   // GHC Trac ticket #2615
   // On some systems (e.g., Gentoo Linux) dynamic files (e.g. libc.so)
   // contain linker scripts rather than ELF-format object code. This
   // code handles the situation by recognizing the real object code
   // file name given in the linker script.
   //
   // If an "invalid ELF header" error occurs, it is assumed that the
   // .so file contains a linker script instead of ELF object code.
   // In this case, the code looks for the GROUP ( ... ) linker
   // directive. If one is found, the first file name inside the
   // parentheses is treated as the name of a dynamic library and the
   // code attempts to dlopen that file. If this is also unsuccessful,
   // an error message is returned.

   // see if the error message is due to an invalid ELF header
   IF_DEBUG(linker, debugBelch("errmsg = '%s'\n", errmsg));
   result = regexec(&re_invalid, errmsg, (size_t) NMATCH, match, 0);
   IF_DEBUG(linker, debugBelch("result = %i\n", result));
   if (result == 0) {
      // success -- try to read the named file as a linker script
      match_length = (size_t) stg_min((match[1].rm_eo - match[1].rm_so),
Ian Lynagh's avatar
Ian Lynagh committed
834
                                 MAXLINE-1);
835 836 837 838
      strncpy(line, (errmsg+(match[1].rm_so)),match_length);
      line[match_length] = '\0'; // make sure string is null-terminated
      IF_DEBUG(linker, debugBelch ("file name = '%s'\n", line));
      if ((fp = fopen(line, "r")) == NULL) {
Ian Lynagh's avatar
Ian Lynagh committed
839
         return errmsg; // return original error if open fails
840
      }
841
      // try to find a GROUP or INPUT ( ... ) command
842
      while (fgets(line, MAXLINE, fp) != NULL) {
Ian Lynagh's avatar
Ian Lynagh committed
843 844
         IF_DEBUG(linker, debugBelch("input line = %s", line));
         if (regexec(&re_realso, line, (size_t) NMATCH, match, 0) == 0) {
845 846
            // success -- try to dlopen the first named file
            IF_DEBUG(linker, debugBelch("match%s\n",""));
847
            line[match[2].rm_eo] = '\0';
848
            stgFree((void*)errmsg); // Free old message before creating new one
849
            errmsg = internal_dlopen(line+match[2].rm_so);
Ian Lynagh's avatar
Ian Lynagh committed
850 851
            break;
         }
852 853 854
         // if control reaches here, no GROUP or INPUT ( ... ) directive
         // was found and the original error message is returned to the
         // caller
855 856 857 858
      }
      fclose(fp);
   }
   return errmsg;
859

860
#  elif defined(OBJFORMAT_PEi386)
861
   /* ------------------- Win32 DLL loader ------------------- */
862

863
   pathchar*      buf;
864
   OpenedDLL* o_dll;
865
   HINSTANCE  instance;
866

867
   IF_DEBUG(linker, debugBelch("\naddDLL; dll_name = `%" PATH_FMT "'\n", dll_name));
868 869 870

   /* See if we've already got it, and ignore if so. */
   for (o_dll = opened_dlls; o_dll != NULL; o_dll = o_dll->next) {
871
      if (0 == pathcmp(o_dll->name, dll_name))
872
         return NULL;
873 874
   }

875 876 877 878
   /* The file name has no suffix (yet) so that we can try
      both foo.dll and foo.drv

      The documentation for LoadLibrary says:
Ian Lynagh's avatar
Ian Lynagh committed
879 880 881 882 883
        If no file name extension is specified in the lpFileName
        parameter, the default library extension .dll is
        appended. However, the file name string can include a trailing
        point character (.) to indicate that the module name has no
        extension. */
884

885 886
   size_t bufsize = pathlen(dll_name) + 10;
   buf = stgMallocBytes(bufsize * sizeof(wchar_t), "addDLL");
887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917

   /* These are ordered by probability of success and order we'd like them */
   const wchar_t *formats[] = { L"%s.DLL", L"%s.DRV", L"lib%s.DLL", L"%s" };
   const DWORD flags[]      = { LOAD_LIBRARY_SEARCH_USER_DIRS | LOAD_LIBRARY_SEARCH_DEFAULT_DIRS, 0 };

   int cFormat;
   int cFlag;
   int flags_start = 1; // Assume we don't support the new API

   /* Detect if newer API are available, if not, skip the first flags entry */
   if (GetProcAddress((HMODULE)LoadLibraryW(L"Kernel32.DLL"), "AddDllDirectory")) {
       flags_start = 0;
   }

   /* Iterate through the possible flags and formats */
   for (cFlag = flags_start; cFlag < 2; cFlag++)
   {
       for (cFormat = 0; cFormat < 4; cFormat++)
       {
           snwprintf(buf, bufsize, formats[cFormat], dll_name);
           instance = LoadLibraryExW(buf, NULL, flags[cFlag]);
           if (instance == NULL)
           {
               if (GetLastError() != ERROR_MOD_NOT_FOUND)
               {
                   goto error;
               }
           }
           else
           {
               break; // We're done. DLL has been loaded.
918 919
           }
       }
920
   }
921 922 923 924 925 926

   // Check if we managed to load the DLL
   if (instance == NULL) {
       goto error;
   }

sof's avatar
sof committed
927
   stgFree(buf);
928

929
   addDLLHandle(dll_name, instance);
930 931

   return NULL;
932 933 934

error:
   stgFree(buf);
935
   sysErrorBelch("addDLL: %" PATH_FMT " (Win32 error %lu)", dll_name, GetLastError());
Simon Marlow's avatar
Simon Marlow committed
936

937 938
   /* LoadLibrary failed; return a ptr to the error msg. */
   return "addDLL: could not load DLL";
Simon Marlow's avatar
Simon Marlow committed
939 940 941 942

#  else
   barf("addDLL: not implemented on this platform");
#  endif
943 944
}

945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979
/* -----------------------------------------------------------------------------
* Searches the system directories to determine if there is a system DLL that
* satisfies the given name. This prevent GHCi from linking against a static
* library if a DLL is available.
*
* Returns: NULL on failure or no DLL found, else the full path to the DLL
*          that can be loaded.
*/
pathchar* findSystemLibrary(pathchar* dll_name)
{
    IF_DEBUG(linker, debugBelch("\nfindSystemLibrary: dll_name = `%" PATH_FMT "'\n", dll_name));

#if defined(OBJFORMAT_PEi386)
    const unsigned int init_buf_size = 1024;
    unsigned int bufsize     = init_buf_size;
    wchar_t* result = malloc(sizeof(wchar_t) * bufsize);
    DWORD wResult   = SearchPathW(NULL, dll_name, NULL, bufsize, result, NULL);

    if (wResult > bufsize) {
        result  = realloc(result, sizeof(wchar_t) * wResult);
        wResult = SearchPathW(NULL, dll_name, NULL, wResult, result, NULL);
    }


    if (!wResult) {
        free(result);
        return NULL;
    }

    return result;
#else
    (void)(dll_name); // Function not implemented for other platforms.
    return NULL;
#endif
}
980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113

/* -----------------------------------------------------------------------------
* Emits a warning determining that the system is missing a required security
* update that we need to get access to the proper APIs
*/
void warnMissingKBLibraryPaths( void )
{
    static HsBool missing_update_warn = HS_BOOL_FALSE;
    if (!missing_update_warn) {
        debugBelch("Warning: If linking fails, consider installing KB2533623.\n");
        missing_update_warn = HS_BOOL_TRUE;
    }
}

/* -----------------------------------------------------------------------------
* appends a directory to the process DLL Load path so LoadLibrary can find it
*
* Returns: NULL on failure, or pointer to be passed to removeLibrarySearchPath to
*          restore the search path to what it was before this call.
*/
HsPtr addLibrarySearchPath(pathchar* dll_path)
{
    IF_DEBUG(linker, debugBelch("\naddLibrarySearchPath: dll_path = `%" PATH_FMT "'\n", dll_path));

#if defined(OBJFORMAT_PEi386)
    HINSTANCE hDLL = LoadLibraryW(L"Kernel32.DLL");
    LPAddDLLDirectory AddDllDirectory = (LPAddDLLDirectory)GetProcAddress((HMODULE)hDLL, "AddDllDirectory");

    HsPtr result = NULL;

    const unsigned int init_buf_size = 4096;
    int bufsize = init_buf_size;

    // Make sure the path is an absolute path
    WCHAR* abs_path = malloc(sizeof(WCHAR) * init_buf_size);
    DWORD wResult = GetFullPathNameW(dll_path, bufsize, abs_path, NULL);
    if (!wResult){
        sysErrorBelch("addLibrarySearchPath[GetFullPathNameW]: %" PATH_FMT " (Win32 error %lu)", dll_path, GetLastError());
    }
    else if (wResult > init_buf_size) {
        abs_path = realloc(abs_path, sizeof(WCHAR) * wResult);
        if (!GetFullPathNameW(dll_path, bufsize, abs_path, NULL)) {
            sysErrorBelch("addLibrarySearchPath[GetFullPathNameW]: %" PATH_FMT " (Win32 error %lu)", dll_path, GetLastError());
        }
    }

    if (AddDllDirectory) {
        result = AddDllDirectory(abs_path);
    }
    else
    {
        warnMissingKBLibraryPaths();
        WCHAR* str = malloc(sizeof(WCHAR) * init_buf_size);
        wResult = GetEnvironmentVariableW(L"PATH", str, bufsize);

        if (wResult > init_buf_size) {
            str = realloc(str, sizeof(WCHAR) * wResult);
            bufsize = wResult;
            wResult = GetEnvironmentVariableW(L"PATH", str, bufsize);
            if (!wResult) {
                sysErrorBelch("addLibrarySearchPath[GetEnvironmentVariableW]: %" PATH_FMT " (Win32 error %lu)", dll_path, GetLastError());
            }
        }

        bufsize = wResult + 2 + pathlen(abs_path);
        wchar_t* newPath = malloc(sizeof(wchar_t) * bufsize);

        wcscpy(newPath, abs_path);
        wcscat(newPath, L";");
        wcscat(newPath, str);
        if (!SetEnvironmentVariableW(L"PATH", (LPCWSTR)newPath)) {
            sysErrorBelch("addLibrarySearchPath[SetEnvironmentVariableW]: %" PATH_FMT " (Win32 error %lu)", abs_path, GetLastError());
        }

        free(newPath);
        free(abs_path);

        return str;
    }

    if (!result) {
        sysErrorBelch("addLibrarySearchPath: %" PATH_FMT " (Win32 error %lu)", abs_path, GetLastError());
        free(abs_path);
        return NULL;
    }

    free(abs_path);
    return result;
#else
    (void)(dll_path); // Function not implemented for other platforms.
    return NULL;
#endif
}

/* -----------------------------------------------------------------------------
* removes a directory from the process DLL Load path
*
* Returns: HS_BOOL_TRUE on success, otherwise HS_BOOL_FALSE
*/