Linker.c 58.1 KB
Newer Older
1
/* -----------------------------------------------------------------------------
2
 * $Id: Linker.c,v 1.55 2001/08/10 08:24:38 simonmar Exp $
3 4 5 6 7 8 9 10 11 12 13 14
 *
 * (c) The GHC Team, 2000
 *
 * RTS Object Linker
 *
 * ---------------------------------------------------------------------------*/

#include "Rts.h"
#include "RtsFlags.h"
#include "HsFFI.h"
#include "Hash.h"
#include "Linker.h"
15
#include "LinkerInternals.h"
16
#include "RtsUtils.h"
17
#include "StoragePriv.h"
18

19
#ifdef HAVE_SYS_TYPES_H
20
#include <sys/types.h>
21 22 23
#endif

#ifdef HAVE_SYS_STAT_H
24
#include <sys/stat.h>
25
#endif
26

27
#ifdef HAVE_DLFCN_H
28
#include <dlfcn.h>
29
#endif
30

31
#if defined(linux_TARGET_OS) || defined(solaris2_TARGET_OS) || defined(freebsd_TARGET_OS)
32
#  define OBJFORMAT_ELF
33
#elif defined(cygwin32_TARGET_OS) || defined (mingw32_TARGET_OS)
34
#  define OBJFORMAT_PEi386
35
#  include <windows.h>
36 37
#endif

38 39 40
/* Hash table mapping symbol names to Symbol */
/*Str*/HashTable *symhash;

41
#if defined(OBJFORMAT_ELF)
42 43 44
static int ocVerifyImage_ELF    ( ObjectCode* oc );
static int ocGetNames_ELF       ( ObjectCode* oc );
static int ocResolve_ELF        ( ObjectCode* oc );
45
#elif defined(OBJFORMAT_PEi386)
46 47 48 49 50 51 52 53 54
static int ocVerifyImage_PEi386 ( ObjectCode* oc );
static int ocGetNames_PEi386    ( ObjectCode* oc );
static int ocResolve_PEi386     ( ObjectCode* oc );
#endif

/* -----------------------------------------------------------------------------
 * Built-in symbols from the RTS
 */

55 56 57 58 59 60
typedef struct _RtsSymbolVal {
    char   *lbl;
    void   *addr;
} RtsSymbolVal;


61 62 63 64 65 66 67 68 69 70 71
#if !defined(PAR)
#define Maybe_ForeignObj        SymX(mkForeignObjzh_fast)

#define Maybe_Stable_Names      SymX(mkWeakzh_fast)			\
      				SymX(makeStableNamezh_fast)		\
      				SymX(finalizzeWeakzh_fast)
#else
/* These are not available in GUM!!! -- HWL */
#define Maybe_ForeignObj
#define Maybe_Stable_Names
#endif
72 73

#if !defined (mingw32_TARGET_OS)
74

75
#define RTS_POSIX_ONLY_SYMBOLS                  \
76 77
      SymX(stg_sig_install)			\
      Sym(nocldstop)
78 79
#define RTS_MINGW_ONLY_SYMBOLS /**/

80
#else
81

82
#define RTS_POSIX_ONLY_SYMBOLS
83 84 85

/* These are statically linked from the mingw libraries into the ghc
   executable, so we have to employ this hack. */
86
#define RTS_MINGW_ONLY_SYMBOLS                  \
87
      SymX(memset)                              \
88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119
      SymX(inet_ntoa)                           \
      SymX(inet_addr)                           \
      SymX(htonl)                               \
      SymX(recvfrom)                            \
      SymX(listen)                              \
      SymX(bind)                                \
      SymX(shutdown)                            \
      SymX(connect)                             \
      SymX(htons)                               \
      SymX(ntohs)                               \
      SymX(getservbyname)                       \
      SymX(getservbyport)                       \
      SymX(getprotobynumber)                    \
      SymX(getprotobyname)                      \
      SymX(gethostbyname)                       \
      SymX(gethostbyaddr)                       \
      SymX(gethostname)                         \
      SymX(strcpy)                              \
      SymX(strncpy)                             \
      SymX(abort)                               \
      Sym(_alloca)                              \
      Sym(isxdigit)                             \
      Sym(isupper)                              \
      Sym(ispunct)                              \
      Sym(islower)                              \
      Sym(isspace)                              \
      Sym(isprint)                              \
      Sym(isdigit)                              \
      Sym(iscntrl)                              \
      Sym(isalpha)                              \
      Sym(isalnum)                              \
      SymX(strcmp)                              \
120 121 122
      SymX(memmove)                             \
      SymX(realloc)                             \
      SymX(malloc)                              \
123 124 125 126 127 128 129 130 131 132 133 134 135 136
      SymX(pow)                                 \
      SymX(tanh)                                \
      SymX(cosh)                                \
      SymX(sinh)                                \
      SymX(atan)                                \
      SymX(acos)                                \
      SymX(asin)                                \
      SymX(tan)                                 \
      SymX(cos)                                 \
      SymX(sin)                                 \
      SymX(exp)                                 \
      SymX(log)                                 \
      SymX(sqrt)                                \
      SymX(memcpy)                              \
137
      Sym(mktime)                               \
138
      Sym(_imp___timezone)                      \
139 140 141 142
      Sym(_imp___tzname)                        \
      Sym(localtime)                            \
      Sym(gmtime)                               \
      SymX(getenv)                              \
143
      SymX(free)                                \
144 145 146 147 148 149
      SymX(rename)                              \
      Sym(opendir)                              \
      Sym(readdir)                              \
      Sym(closedir)                             \
      SymX(GetCurrentProcess)                   \
      SymX(GetProcessTimes)                     \
rrt's avatar
rrt committed
150
      SymX(CloseHandle)                         \
151 152 153
      SymX(GetExitCodeProcess)                  \
      SymX(WaitForSingleObject)                 \
      SymX(CreateProcessA)                      \
sof's avatar
sof committed
154 155 156 157
      Sym(__divdi3)                             \
      Sym(__udivdi3)                            \
      Sym(__moddi3)                             \
      Sym(__umoddi3)                            \
158
      SymX(_errno)
159
#endif
160

161

162 163 164 165 166 167 168 169 170
#define RTS_SYMBOLS				\
      SymX(MainRegTable)			\
      Sym(stg_gc_enter_1)			\
      Sym(stg_gc_noregs)			\
      Sym(stg_gc_seq_1)				\
      Sym(stg_gc_d1)				\
      Sym(stg_gc_f1)				\
      Sym(stg_gc_ut_1_0)			\
      Sym(stg_gc_ut_0_1)			\
171
      Sym(stg_gc_unpt_r1)			\
172 173 174 175 176 177
      Sym(stg_gc_unbx_r1)			\
      Sym(stg_chk_0)				\
      Sym(stg_chk_1)				\
      Sym(stg_gen_chk)				\
      SymX(stg_exit)				\
      SymX(stg_update_PAP)			\
178
      SymX(stg_ap_1_upd_info)			\
179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
      SymX(stg_ap_2_upd_info)			\
      SymX(stg_ap_3_upd_info)			\
      SymX(stg_ap_4_upd_info)			\
      SymX(stg_ap_5_upd_info)			\
      SymX(stg_ap_6_upd_info)			\
      SymX(stg_ap_7_upd_info)			\
      SymX(stg_ap_8_upd_info)			\
      SymX(stg_sel_0_upd_info)			\
      SymX(stg_sel_1_upd_info)			\
      SymX(stg_sel_2_upd_info)			\
      SymX(stg_sel_3_upd_info)			\
      SymX(stg_sel_4_upd_info)			\
      SymX(stg_sel_5_upd_info)			\
      SymX(stg_sel_6_upd_info)			\
      SymX(stg_sel_7_upd_info)			\
      SymX(stg_sel_8_upd_info)			\
      SymX(stg_sel_9_upd_info)			\
      SymX(stg_sel_10_upd_info)			\
      SymX(stg_sel_11_upd_info)			\
      SymX(stg_sel_12_upd_info)			\
199 200 201
      SymX(stg_sel_13_upd_info)			\
      SymX(stg_sel_14_upd_info)			\
      SymX(stg_sel_15_upd_info)			\
202 203
      SymX(stg_upd_frame_info)			\
      SymX(stg_seq_frame_info)			\
204
      SymX(stg_CAF_BLACKHOLE_info)		\
205 206 207
      SymX(stg_IND_STATIC_info)			\
      SymX(stg_EMPTY_MVAR_info)			\
      SymX(stg_MUT_ARR_PTRS_FROZEN_info)	\
208
      SymX(stg_WEAK_info)                       \
209 210
      SymX(stg_CHARLIKE_closure)		\
      SymX(stg_INTLIKE_closure)			\
211
      SymX(newCAF)				\
212
      SymX(newBCOzh_fast)			\
213
      SymX(mkApUpd0zh_fast)			\
214 215 216 217
      SymX(putMVarzh_fast)			\
      SymX(newMVarzh_fast)			\
      SymX(takeMVarzh_fast)			\
      SymX(tryTakeMVarzh_fast)			\
218
      SymX(tryPutMVarzh_fast)			\
219 220
      SymX(catchzh_fast)			\
      SymX(raisezh_fast)			\
221
      SymX(forkzh_fast)				\
222 223 224 225 226 227 228 229 230
      SymX(delayzh_fast)			\
      SymX(yieldzh_fast)			\
      SymX(killThreadzh_fast)			\
      SymX(waitReadzh_fast)			\
      SymX(waitWritezh_fast)			\
      SymX(suspendThread)			\
      SymX(resumeThread)			\
      SymX(stackOverflow)			\
      SymX(int2Integerzh_fast)			\
231
      SymX(word2Integerzh_fast)			\
232
      Maybe_ForeignObj				\
233 234 235 236 237 238
      SymX(__encodeDouble)			\
      SymX(decodeDoublezh_fast)			\
      SymX(decodeFloatzh_fast)			\
      SymX(gcdIntegerzh_fast)			\
      SymX(newArrayzh_fast)			\
      SymX(unsafeThawArrayzh_fast)		\
239
      SymX(newByteArrayzh_fast)			\
240
      SymX(newPinnedByteArrayzh_fast)		\
241 242 243 244 245 246 247 248 249
      SymX(newMutVarzh_fast)			\
      SymX(quotRemIntegerzh_fast)		\
      SymX(quotIntegerzh_fast)			\
      SymX(remIntegerzh_fast)			\
      SymX(divExactIntegerzh_fast)		\
      SymX(divModIntegerzh_fast)		\
      SymX(timesIntegerzh_fast)			\
      SymX(minusIntegerzh_fast)			\
      SymX(plusIntegerzh_fast)			\
250 251 252 253
      SymX(andIntegerzh_fast)			\
      SymX(orIntegerzh_fast)			\
      SymX(xorIntegerzh_fast)			\
      SymX(complementIntegerzh_fast)		\
254
      Maybe_Stable_Names			\
255 256 257 258 259 260 261 262 263 264 265 266 267 268
      SymX(blockAsyncExceptionszh_fast)		\
      SymX(unblockAsyncExceptionszh_fast)	\
      SymX(isDoubleNaN)				\
      SymX(isDoubleInfinite)			\
      SymX(isDoubleDenormalized)		\
      SymX(isDoubleNegativeZero)		\
      SymX(__encodeFloat)			\
      SymX(isFloatNaN)				\
      SymX(isFloatInfinite)			\
      SymX(isFloatDenormalized)			\
      SymX(isFloatNegativeZero)			\
      SymX(__int_encodeFloat)			\
      SymX(__int_encodeDouble)			\
      SymX(__gmpz_cmp_si)			\
269
      SymX(__gmpz_cmp_ui)			\
270 271
      SymX(__gmpz_cmp)				\
      SymX(__gmpn_gcd_1)			\
272 273
      SymX(__gmpz_get_si)			\
      SymX(__gmpz_get_ui)			\
274 275 276
      SymX(prog_argv)				\
      SymX(prog_argc)				\
      SymX(resetNonBlockingFd)			\
277
      SymX(performGC)				\
278 279 280 281
      SymX(getStablePtr)			\
      SymX(stable_ptr_table)			\
      SymX(shutdownHaskellAndExit)		\
      Sym(stg_enterStackTop)			\
282
      Sym(stg_yield_to_interpreter)		\
283 284
      Sym(StgReturn)				\
      Sym(init_stack)				\
285 286 287 288 289 290 291 292 293 294 295 296
      SymX(cmp_thread)				\
      Sym(__init_PrelGHC)			\
      SymX(freeHaskellFunctionPtr)		\
      SymX(OnExitHook)				\
      SymX(ErrorHdrHook)			\
      SymX(NoRunnableThreadsHook)		\
      SymX(StackOverflowHook)			\
      SymX(OutOfHeapHook)			\
      SymX(MallocFailHook)			\
      SymX(PatErrorHdrHook)			\
      SymX(defaultsHook)			\
      SymX(PreTraceHook)			\
297 298
      SymX(PostTraceHook)			\
      SymX(createAdjustor)			\
299
      SymX(rts_mkChar)				\
300
      SymX(rts_mkInt)				\
301 302 303 304 305 306 307 308 309 310 311 312
      SymX(rts_mkInt8)				\
      SymX(rts_mkInt16)				\
      SymX(rts_mkInt32)				\
      SymX(rts_mkInt64)				\
      SymX(rts_mkWord)				\
      SymX(rts_mkWord8)				\
      SymX(rts_mkWord16)			\
      SymX(rts_mkWord32)			\
      SymX(rts_mkWord64)			\
      SymX(rts_mkPtr)				\
      SymX(rts_mkFloat)				\
      SymX(rts_mkDouble)			\
313
      SymX(rts_mkStablePtr)			\
314 315
      SymX(rts_mkBool)				\
      SymX(rts_mkString)			\
316
      SymX(rts_apply)				\
317 318 319 320 321 322 323 324 325 326 327 328 329 330
      SymX(rts_mkAddr)				\
      SymX(rts_getChar)				\
      SymX(rts_getInt)				\
      SymX(rts_getInt32)			\
      SymX(rts_getWord)				\
      SymX(rts_getWord32)			\
      SymX(rts_getPtr)				\
      SymX(rts_getFloat)			\
      SymX(rts_getDouble)			\
      SymX(rts_getStablePtr)			\
      SymX(rts_getBool)				\
      SymX(rts_getAddr)				\
      SymX(rts_eval)				\
      SymX(rts_eval_)				\
331
      SymX(rts_evalIO)				\
332 333
      SymX(rts_evalLazyIO)			\
      SymX(rts_checkSchedStatus)
334 335 336 337

#ifndef SUPPORT_LONG_LONGS
#define RTS_LONG_LONG_SYMS /* nothing */
#else
338
#define RTS_LONG_LONG_SYMS			\
339 340 341
      SymX(int64ToIntegerzh_fast)		\
      SymX(word64ToIntegerzh_fast)
#endif /* SUPPORT_LONG_LONGS */
342 343 344 345 346

/* entirely bogus claims about types of these symbols */
#define Sym(vvv)  extern void (vvv);
#define SymX(vvv) /**/
RTS_SYMBOLS
347
RTS_LONG_LONG_SYMS
348
RTS_POSIX_ONLY_SYMBOLS
349
RTS_MINGW_ONLY_SYMBOLS
350 351 352 353 354 355 356 357 358 359 360 361 362
#undef Sym
#undef SymX

#ifdef LEADING_UNDERSCORE
#define MAYBE_LEADING_UNDERSCORE_STR(s) ("_" s)
#else
#define MAYBE_LEADING_UNDERSCORE_STR(s) (s)
#endif

#define Sym(vvv) { MAYBE_LEADING_UNDERSCORE_STR(#vvv), \
                    (void*)(&(vvv)) },
#define SymX(vvv) Sym(vvv)

363
static RtsSymbolVal rtsSyms[] = {
364
      RTS_SYMBOLS
365
      RTS_LONG_LONG_SYMS
366
      RTS_POSIX_ONLY_SYMBOLS
367
      RTS_MINGW_ONLY_SYMBOLS
368 369 370 371 372 373
      { 0, 0 } /* sentinel */
};

/* -----------------------------------------------------------------------------
 * initialize the object linker
 */
374
#if defined(OBJFORMAT_ELF)
375
static void *dl_prog_handle;
376
#endif
377 378 379 380

void
initLinker( void )
{
381
    RtsSymbolVal *sym;
382 383 384 385 386

    symhash = allocStrHashTable();

    /* populate the symbol table with stuff from the RTS */
    for (sym = rtsSyms; sym->lbl != NULL; sym++) {
387
	insertStrHashTable(symhash, sym->lbl, sym->addr);
388
    }
389
#   if defined(OBJFORMAT_ELF)
390
    dl_prog_handle = dlopen(NULL, RTLD_LAZY);
391
#   endif
392 393
}

394 395 396 397 398
/* -----------------------------------------------------------------------------
 * Add a DLL from which symbols may be found.  In the ELF case, just
 * do RTLD_GLOBAL-style add, so no further messing around needs to
 * happen in order that symbols in the loaded .so are findable --
 * lookupSymbol() will subsequently see them by dlsym on the program's
399 400 401 402 403
 * dl-handle.  Returns NULL if success, otherwise ptr to an err msg.
 *
 * In the PEi386 case, open the DLLs and put handles to them in a 
 * linked list.  When looking for a symbol, try all handles in the
 * list.
404
 */
405 406 407 408 409 410

#if defined(OBJFORMAT_PEi386)
/* A record for storing handles into DLLs. */

typedef
   struct _OpenedDLL {
411
      char*              name;
412 413 414 415 416 417 418 419 420 421 422
      struct _OpenedDLL* next;
      HINSTANCE instance;
   } 
   OpenedDLL;

/* A list thereof. */
static OpenedDLL* opened_dlls = NULL;
#endif



423
char*
424
addDLL ( char* path, char* dll_name )
425 426 427 428
{
#  if defined(OBJFORMAT_ELF)
   void *hdl;
   char *buf;
429
   char *errmsg;
430

431 432 433 434 435 436 437
   if (path == NULL || strlen(path) == 0) {
      buf = stgMallocBytes(strlen(dll_name) + 10, "addDll");
      sprintf(buf, "lib%s.so", dll_name);
   } else {
      buf = stgMallocBytes(strlen(path) + 1 + strlen(dll_name) + 10, "addDll");
      sprintf(buf, "%s/lib%s.so", path, dll_name);
   }
438
   hdl = dlopen(buf, RTLD_NOW | RTLD_GLOBAL );
439
   free(buf);
440 441 442 443 444 445 446 447
   if (hdl == NULL) {
      /* dlopen failed; return a ptr to the error msg. */
      errmsg = dlerror();
      if (errmsg == NULL) errmsg = "addDLL: unknown error";
      return errmsg;
   } else {
      return NULL;
   }
448 449
   /*NOTREACHED*/

450
#  elif defined(OBJFORMAT_PEi386)
451

452
   /* Add this DLL to the list of DLLs in which to search for symbols.
453
      The path argument is ignored. */
454
   char*      buf;
455
   OpenedDLL* o_dll;
456
   HINSTANCE  instance;
457 458 459 460 461 462 463

   /* fprintf(stderr, "\naddDLL; path=`%s', dll_name = `%s'\n", path, dll_name); */

   /* See if we've already got it, and ignore if so. */
   for (o_dll = opened_dlls; o_dll != NULL; o_dll = o_dll->next) {
      if (0 == strcmp(o_dll->name, dll_name))
         return NULL;
464 465
   }

466
   buf = stgMallocBytes(strlen(dll_name) + 10, "addDLL");
467 468 469 470 471
   sprintf(buf, "%s.DLL", dll_name);
   instance = LoadLibrary(buf);
   free(buf);
   if (instance == NULL) {
     /* LoadLibrary failed; return a ptr to the error msg. */
472
     return "addDLL: unknown error";
473 474 475
   }

   o_dll = stgMallocBytes( sizeof(OpenedDLL), "addDLL" );
476 477
   o_dll->name     = stgMallocBytes(1+strlen(dll_name), "addDLL");
   strcpy(o_dll->name, dll_name);
478
   o_dll->instance = instance;
479 480
   o_dll->next     = opened_dlls;
   opened_dlls     = o_dll;
481 482

   return NULL;
483 484 485 486 487
#  else
   barf("addDLL: not implemented on this platform");
#  endif
}

488 489 490 491 492 493
/* -----------------------------------------------------------------------------
 * lookup a symbol in the hash table
 */  
void *
lookupSymbol( char *lbl )
{
494
    void *val;
495
    ASSERT(symhash != NULL);
496 497 498
    val = lookupStrHashTable(symhash, lbl);

    if (val == NULL) {
499
#       if defined(OBJFORMAT_ELF)
500
	return dlsym(dl_prog_handle, lbl);
501
#       elif defined(OBJFORMAT_PEi386)
502 503 504
        OpenedDLL* o_dll;
        void* sym;
        for (o_dll = opened_dlls; o_dll != NULL; o_dll = o_dll->next) {
505
           /* fprintf(stderr, "look in %s for %s\n", o_dll->name, lbl); */
sof's avatar
sof committed
506 507 508 509 510 511 512 513 514
	   if (lbl[0] == '_') {
	     /* HACK: if the name has an initial underscore, try stripping
		it off & look that up first. I've yet to verify whether there's
		a Rule that governs whether an initial '_' *should always* be
		stripped off when mapping from import lib name to the DLL name.
	     */
	     sym = GetProcAddress(o_dll->instance, (lbl+1));
	     if (sym != NULL) return sym;
	   }
515 516 517
           sym = GetProcAddress(o_dll->instance, lbl);
           if (sym != NULL) return sym;
        }
518
        return NULL;
ken's avatar
ken committed
519 520 521
#       else
        ASSERT(2+2 == 5);
        return NULL;
522
#       endif
523
    } else {
524
	return val;
525 526 527
    }
}

528 529 530 531
static 
void *
lookupLocalSymbol( ObjectCode* oc, char *lbl )
{
532
    void *val;
533 534 535 536 537
    val = lookupStrHashTable(oc->lochash, lbl);

    if (val == NULL) {
        return NULL;
    } else {
538
	return val;
539 540 541 542
    }
}


543 544 545 546 547 548 549 550 551 552 553 554 555
/* -----------------------------------------------------------------------------
 * Load an obj (populate the global symbol table, but don't resolve yet)
 *
 * Returns: 1 if ok, 0 on error.
 */
HsInt
loadObj( char *path )
{
   ObjectCode* oc;
   struct stat st;
   int r, n;
   FILE *f;

556
   /* fprintf(stderr, "loadObj %s\n", path ); */
557
#  ifdef DEBUG
558 559 560 561 562 563
   /* assert that we haven't already loaded this object */
   { 
       ObjectCode *o;
       for (o = objects; o; o = o->next)
	   ASSERT(strcmp(o->fileName, path));
   }
564
#  endif /* DEBUG */   
565 566 567

   oc = stgMallocBytes(sizeof(ObjectCode), "loadObj(oc)");

568
#  if defined(OBJFORMAT_ELF)
569
   oc->formatName = "ELF";
570
#  elif defined(OBJFORMAT_PEi386)
571 572 573 574 575 576 577 578 579
   oc->formatName = "PEi386";
#  else
   free(oc);
   barf("loadObj: not implemented on this platform");
#  endif

   r = stat(path, &st);
   if (r == -1) { return 0; }

580
   /* sigh, strdup() isn't a POSIX function, so do it the long way */
581 582 583
   oc->fileName = stgMallocBytes( strlen(path)+1, "loadObj" );
   strcpy(oc->fileName, path);

584 585 586 587
   oc->fileSize          = st.st_size;
   oc->image             = stgMallocBytes( st.st_size, "loadObj(image)" );
   oc->symbols           = NULL;
   oc->sections          = NULL;
588
   oc->lochash           = allocStrHashTable();
589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605

   /* chain it onto the list of objects */
   oc->next              = objects;
   objects               = oc;

   /* load the image into memory */
   f = fopen(path, "rb");
   if (!f) {
       barf("loadObj: can't read `%s'", path);
   }
   n = fread ( oc->image, 1, oc->fileSize, f );
   if (n != oc->fileSize) {
      fclose(f);
      barf("loadObj: error whilst reading `%s'", path);
   }

   /* verify the in-memory image */
606
#  if defined(OBJFORMAT_ELF)
607
   r = ocVerifyImage_ELF ( oc );
608
#  elif defined(OBJFORMAT_PEi386)
609 610 611 612 613 614 615
   r = ocVerifyImage_PEi386 ( oc );
#  else
   barf("loadObj: no verify method");
#  endif
   if (!r) { return r; }

   /* build the symbol list for this image */
616
#  if defined(OBJFORMAT_ELF)
617
   r = ocGetNames_ELF ( oc );
618
#  elif defined(OBJFORMAT_PEi386)
619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643
   r = ocGetNames_PEi386 ( oc );
#  else
   barf("loadObj: no getNames method");
#  endif
   if (!r) { return r; }

   /* loaded, but not resolved yet */
   oc->status = OBJECT_LOADED;

   return 1;
}

/* -----------------------------------------------------------------------------
 * resolve all the currently unlinked objects in memory
 *
 * Returns: 1 if ok, 0 on error.
 */
HsInt 
resolveObjs( void )
{
    ObjectCode *oc;
    int r;

    for (oc = objects; oc; oc = oc->next) {
	if (oc->status != OBJECT_RESOLVED) {
644
#           if defined(OBJFORMAT_ELF)
645
	    r = ocResolve_ELF ( oc );
646
#           elif defined(OBJFORMAT_PEi386)
647
	    r = ocResolve_PEi386 ( oc );
648
#           else
649
	    barf("resolveObjs: not implemented on this platform");
650
#           endif
651 652 653 654 655 656 657 658 659 660 661 662 663
	    if (!r) { return r; }
	    oc->status = OBJECT_RESOLVED;
	}
    }
    return 1;
}

/* -----------------------------------------------------------------------------
 * delete an object from the pool
 */
HsInt
unloadObj( char *path )
{
664
    ObjectCode *oc, *prev;
665

666 667 668
    ASSERT(symhash != NULL);
    ASSERT(objects != NULL);

669 670
    prev = NULL;
    for (oc = objects; oc; prev = oc, oc = oc->next) {
671 672 673 674 675 676
	if (!strcmp(oc->fileName,path)) {

	    /* Remove all the mappings for the symbols within this
	     * object..
	     */
	    { 
677 678 679 680 681 682 683
                int i;
                for (i = 0; i < oc->n_symbols; i++) {
                   if (oc->symbols[i] != NULL) {
                       removeStrHashTable(symhash, oc->symbols[i], NULL);
                   }
                }
            }
684

685 686 687 688 689 690
	    if (prev == NULL) {
		objects = oc->next;
	    } else {
		prev->next = oc->next;
	    }

691 692 693
	    /* We're going to leave this in place, in case there are
	       any pointers from the heap into it: */
	    /* free(oc->image); */
694
	    free(oc->fileName);
695 696
	    free(oc->symbols);
	    free(oc->sections);
697 698 699
	    /* The local hash table should have been freed at the end
               of the ocResolve_ call on it. */
            ASSERT(oc->lochash == NULL);
700 701 702 703
	    free(oc);
	    return 1;
	}
    }
704

705 706 707 708 709
    belch("unloadObj: can't find `%s' to unload", path);
    return 0;
}

/* --------------------------------------------------------------------------
710
 * PEi386 specifics (Win32 targets)
711 712 713 714 715 716 717 718 719 720
 * ------------------------------------------------------------------------*/

/* The information for this linker comes from 
      Microsoft Portable Executable 
      and Common Object File Format Specification
      revision 5.1 January 1998
   which SimonM says comes from the MS Developer Network CDs.
*/
      

721
#if defined(OBJFORMAT_PEi386)
722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789



typedef unsigned char  UChar;
typedef unsigned short UInt16;
typedef unsigned int   UInt32;
typedef          int   Int32;


typedef 
   struct {
      UInt16 Machine;
      UInt16 NumberOfSections;
      UInt32 TimeDateStamp;
      UInt32 PointerToSymbolTable;
      UInt32 NumberOfSymbols;
      UInt16 SizeOfOptionalHeader;
      UInt16 Characteristics;
   }
   COFF_header;

#define sizeof_COFF_header 20


typedef 
   struct {
      UChar  Name[8];
      UInt32 VirtualSize;
      UInt32 VirtualAddress;
      UInt32 SizeOfRawData;
      UInt32 PointerToRawData;
      UInt32 PointerToRelocations;
      UInt32 PointerToLinenumbers;
      UInt16 NumberOfRelocations;
      UInt16 NumberOfLineNumbers;
      UInt32 Characteristics; 
   }
   COFF_section;

#define sizeof_COFF_section 40


typedef
   struct {
      UChar  Name[8];
      UInt32 Value;
      UInt16 SectionNumber;
      UInt16 Type;
      UChar  StorageClass;
      UChar  NumberOfAuxSymbols;
   }
   COFF_symbol;

#define sizeof_COFF_symbol 18


typedef
   struct {
      UInt32 VirtualAddress;
      UInt32 SymbolTableIndex;
      UInt16 Type;
   }
   COFF_reloc;

#define sizeof_COFF_reloc 10


/* From PE spec doc, section 3.3.2 */
790 791 792 793 794 795 796 797 798 799
/* Note use of MYIMAGE_* since IMAGE_* are already defined in
   windows.h -- for the same purpose, but I want to know what I'm
   getting, here. */
#define MYIMAGE_FILE_RELOCS_STRIPPED     0x0001
#define MYIMAGE_FILE_EXECUTABLE_IMAGE    0x0002
#define MYIMAGE_FILE_DLL                 0x2000
#define MYIMAGE_FILE_SYSTEM              0x1000
#define MYIMAGE_FILE_BYTES_REVERSED_HI   0x8000
#define MYIMAGE_FILE_BYTES_REVERSED_LO   0x0080
#define MYIMAGE_FILE_32BIT_MACHINE       0x0100
800 801

/* From PE spec doc, section 5.4.2 and 5.4.4 */
802 803 804
#define MYIMAGE_SYM_CLASS_EXTERNAL       2
#define MYIMAGE_SYM_CLASS_STATIC         3
#define MYIMAGE_SYM_UNDEFINED            0
805 806

/* From PE spec doc, section 4.1 */
807 808
#define MYIMAGE_SCN_CNT_CODE             0x00000020
#define MYIMAGE_SCN_CNT_INITIALIZED_DATA 0x00000040
809 810

/* From PE spec doc, section 5.2.1 */
811 812
#define MYIMAGE_REL_I386_DIR32           0x0006
#define MYIMAGE_REL_I386_REL32           0x0014
813 814 815 816 817 818 819 820 821 822 823


/* We use myindex to calculate array addresses, rather than
   simply doing the normal subscript thing.  That's because
   some of the above structs have sizes which are not 
   a whole number of words.  GCC rounds their sizes up to a
   whole number of words, which means that the address calcs
   arising from using normal C indexing or pointer arithmetic
   are just plain wrong.  Sigh.
*/
static UChar *
824
myindex ( int scale, void* base, int index )
825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884
{
   return
      ((UChar*)base) + scale * index;
}


static void
printName ( UChar* name, UChar* strtab )
{
   if (name[0]==0 && name[1]==0 && name[2]==0 && name[3]==0) {
      UInt32 strtab_offset = * (UInt32*)(name+4);
      fprintf ( stderr, "%s", strtab + strtab_offset );
   } else {
      int i;
      for (i = 0; i < 8; i++) {
         if (name[i] == 0) break;
         fprintf ( stderr, "%c", name[i] );
      }
   }
}


static void
copyName ( UChar* name, UChar* strtab, UChar* dst, int dstSize )
{
   if (name[0]==0 && name[1]==0 && name[2]==0 && name[3]==0) {
      UInt32 strtab_offset = * (UInt32*)(name+4);
      strncpy ( dst, strtab+strtab_offset, dstSize );
      dst[dstSize-1] = 0;
   } else {
      int i = 0;
      while (1) {
         if (i >= 8) break;
         if (name[i] == 0) break;
         dst[i] = name[i];
         i++;
      }
      dst[i] = 0;
   }
}


static UChar *
cstring_from_COFF_symbol_name ( UChar* name, UChar* strtab )
{
   UChar* newstr;
   /* If the string is longer than 8 bytes, look in the
      string table for it -- this will be correctly zero terminated. 
   */
   if (name[0]==0 && name[1]==0 && name[2]==0 && name[3]==0) {
      UInt32 strtab_offset = * (UInt32*)(name+4);
      return ((UChar*)strtab) + strtab_offset;
   }
   /* Otherwise, if shorter than 8 bytes, return the original,
      which by defn is correctly terminated.
   */
   if (name[7]==0) return name;
   /* The annoying case: 8 bytes.  Copy into a temporary
      (which is never freed ...)
   */
885 886 887 888
   newstr = stgMallocBytes(9, "cstring_from_COFF_symbol_name");
   ASSERT(newstr);
   strncpy(newstr,name,8);
   newstr[8] = 0;
889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909
   return newstr;
}


/* Just compares the short names (first 8 chars) */
static COFF_section *
findPEi386SectionCalled ( ObjectCode* oc,  char* name )
{
   int i;
   COFF_header* hdr 
      = (COFF_header*)(oc->image);
   COFF_section* sectab 
      = (COFF_section*) (
           ((UChar*)(oc->image)) 
           + sizeof_COFF_header + hdr->SizeOfOptionalHeader
        );
   for (i = 0; i < hdr->NumberOfSections; i++) {
      UChar* n1;
      UChar* n2;
      COFF_section* section_i 
         = (COFF_section*)
910
           myindex ( sizeof_COFF_section, sectab, i );
911 912 913 914 915 916 917 918 919 920 921 922 923 924 925
      n1 = (UChar*) &(section_i->Name);
      n2 = name;
      if (n1[0]==n2[0] && n1[1]==n2[1] && n1[2]==n2[2] && 
          n1[3]==n2[3] && n1[4]==n2[4] && n1[5]==n2[5] && 
          n1[6]==n2[6] && n1[7]==n2[7])
         return section_i;
   }

   return NULL;
}


static void
zapTrailingAtSign ( UChar* sym )
{
926
#  define my_isdigit(c) ((c) >= '0' && (c) <= '9')
927 928 929 930 931 932
   int i, j;
   if (sym[0] == 0) return;
   i = 0; 
   while (sym[i] != 0) i++;
   i--;
   j = i;
933
   while (j > 0 && my_isdigit(sym[j])) j--;
934
   if (j > 0 && sym[j] == '@' && j != i) sym[j] = 0;
935
#  undef my_isdigit
936 937 938 939 940 941 942 943 944 945 946
}


static int
ocVerifyImage_PEi386 ( ObjectCode* oc )
{
   int i, j;
   COFF_header*  hdr;
   COFF_section* sectab;
   COFF_symbol*  symtab;
   UChar*        strtab;
947
   /* fprintf(stderr, "\nLOADING %s\n", oc->fileName); */
948 949 950 951 952 953 954 955 956
   hdr = (COFF_header*)(oc->image);
   sectab = (COFF_section*) (
               ((UChar*)(oc->image)) 
               + sizeof_COFF_header + hdr->SizeOfOptionalHeader
            );
   symtab = (COFF_symbol*) (
               ((UChar*)(oc->image))
               + hdr->PointerToSymbolTable 
            );
957
   strtab = ((UChar*)symtab)
958 959 960
            + hdr->NumberOfSymbols * sizeof_COFF_symbol;

   if (hdr->Machine != 0x14c) {
961 962
      belch("Not x86 PEi386");
      return 0;
963 964
   }
   if (hdr->SizeOfOptionalHeader != 0) {
965 966
      belch("PEi386 with nonempty optional header");
      return 0;
967
   }
968 969 970 971
   if ( /* (hdr->Characteristics & MYIMAGE_FILE_RELOCS_STRIPPED) || */
        (hdr->Characteristics & MYIMAGE_FILE_EXECUTABLE_IMAGE) ||
        (hdr->Characteristics & MYIMAGE_FILE_DLL) ||
        (hdr->Characteristics & MYIMAGE_FILE_SYSTEM) ) {
972 973
      belch("Not a PEi386 object file");
      return 0;
974
   }
975 976 977 978 979 980 981 982 983
   if ( (hdr->Characteristics & MYIMAGE_FILE_BYTES_REVERSED_HI)
        /* || !(hdr->Characteristics & MYIMAGE_FILE_32BIT_MACHINE) */ ) {
      belch("Invalid PEi386 word size or endiannness: %d", 
            (int)(hdr->Characteristics));
      return 0;
   }
   /* fprintf(stderr, "strtab size %d\n", * (UInt32*)strtab); */
   if (* (UInt32*)strtab > 510000) {
      belch("PEi386 object has suspiciously large string table; > 64k relocs?");
984
      return 0;
985 986 987
   }

   /* No further verification after this point; only debug printing. */
988 989 990
   i = 0;
   IF_DEBUG(linker, i=1);
   if (i == 0) return 1;
991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014

   fprintf ( stderr, 
             "sectab offset = %d\n", ((UChar*)sectab) - ((UChar*)hdr) );
   fprintf ( stderr, 
             "symtab offset = %d\n", ((UChar*)symtab) - ((UChar*)hdr) );
   fprintf ( stderr, 
             "strtab offset = %d\n", ((UChar*)strtab) - ((UChar*)hdr) );

   fprintf ( stderr, "\n" );
   fprintf ( stderr, 
             "Machine:           0x%x\n", (UInt32)(hdr->Machine) );
   fprintf ( stderr, 
             "# sections:        %d\n",   (UInt32)(hdr->NumberOfSections) );
   fprintf ( stderr,
             "time/date:         0x%x\n", (UInt32)(hdr->TimeDateStamp) );
   fprintf ( stderr,
             "symtab offset:     %d\n",   (UInt32)(hdr->PointerToSymbolTable) );
   fprintf ( stderr, 
             "# symbols:         %d\n",   (UInt32)(hdr->NumberOfSymbols) );
   fprintf ( stderr, 
             "sz of opt hdr:     %d\n",   (UInt32)(hdr->SizeOfOptionalHeader) );
   fprintf ( stderr,
             "characteristics:   0x%x\n", (UInt32)(hdr->Characteristics) );

1015
   /* Print the section table. */
1016 1017 1018 1019 1020
   fprintf ( stderr, "\n" );
   for (i = 0; i < hdr->NumberOfSections; i++) {
      COFF_reloc* reltab;
      COFF_section* sectab_i
         = (COFF_section*)
1021
           myindex ( sizeof_COFF_section, sectab, i );
1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046
      fprintf ( stderr, 
                "\n"
                "section %d\n"
                "     name `",
                i 
              );
      printName ( sectab_i->Name, strtab );
      fprintf ( stderr, 
                "'\n"
                "    vsize %d\n"
                "    vaddr %d\n"
                "  data sz %d\n"
                " data off %d\n"
                "  num rel %d\n"
                "  off rel %d\n",
                sectab_i->VirtualSize,
                sectab_i->VirtualAddress,
                sectab_i->SizeOfRawData,
                sectab_i->PointerToRawData,
                sectab_i->NumberOfRelocations,
                sectab_i->PointerToRelocations
              );
      reltab = (COFF_reloc*) (
                  ((UChar*)(oc->image)) + sectab_i->PointerToRelocations
               );
1047

1048 1049 1050
      for (j = 0; j < sectab_i->NumberOfRelocations; j++) {
         COFF_symbol* sym;
         COFF_reloc* rel = (COFF_reloc*)
1051
                           myindex ( sizeof_COFF_reloc, reltab, j );
1052 1053 1054 1055 1056
         fprintf ( stderr, 
                   "        type 0x%-4x   vaddr 0x%-8x   name `",
                   (UInt32)rel->Type, 
                   rel->VirtualAddress );
         sym = (COFF_symbol*)
1057
               myindex ( sizeof_COFF_symbol, symtab, rel->SymbolTableIndex );
1058
         printName ( sym->Name, strtab -10 );
1059 1060 1061 1062 1063
         fprintf ( stderr, "'\n" );
      }
      fprintf ( stderr, "\n" );
   }

1064 1065 1066 1067 1068 1069 1070 1071 1072
   fprintf ( stderr, "\n" );
   fprintf ( stderr, "string table has size 0x%x\n", * (UInt32*)strtab );
   fprintf ( stderr, "---START of string table---\n");
   for (i = 4; i < *(Int32*)strtab; i++) {
      if (strtab[i] == 0) 
         fprintf ( stderr, "\n"); else 
         fprintf( stderr, "%c", strtab[i] );
   }
   fprintf ( stderr, "--- END  of string table---\n");
1073 1074 1075 1076 1077

   fprintf ( stderr, "\n" );
   i = 0;
   while (1) {
      COFF_symbol* symtab_i;
1078
      if (i >= (Int32)(hdr->NumberOfSymbols)) break;
1079
      symtab_i = (COFF_symbol*)
1080
                 myindex ( sizeof_COFF_symbol, symtab, i );
1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104
      fprintf ( stderr, 
                "symbol %d\n"
                "     name `",
                i 
              );
      printName ( symtab_i->Name, strtab );
      fprintf ( stderr, 
                "'\n"
                "    value 0x%x\n"
                "     sec# %d\n"
                "     type 0x%x\n"
                "   sclass 0x%x\n"
                "     nAux %d\n",
                symtab_i->Value,
                (Int32)(symtab_i->SectionNumber) - 1,
                (UInt32)symtab_i->Type,
                (UInt32)symtab_i->StorageClass,
                (UInt32)symtab_i->NumberOfAuxSymbols 
              );
      i += symtab_i->NumberOfAuxSymbols;
      i++;
   }

   fprintf ( stderr, "\n" );
1105
   return 1;
1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134
}


static int
ocGetNames_PEi386 ( ObjectCode* oc )
{
   COFF_header*  hdr;
   COFF_section* sectab;
   COFF_symbol*  symtab;
   UChar*        strtab;

   UChar* sname;
   void*  addr;
   int    i;
   
   hdr = (COFF_header*)(oc->image);
   sectab = (COFF_section*) (
               ((UChar*)(oc->image)) 
               + sizeof_COFF_header + hdr->SizeOfOptionalHeader
            );
   symtab = (COFF_symbol*) (
               ((UChar*)(oc->image))
               + hdr->PointerToSymbolTable 
            );
   strtab = ((UChar*)(oc->image))
            + hdr->PointerToSymbolTable
            + hdr->NumberOfSymbols * sizeof_COFF_symbol;

   /* Copy exported symbols into the ObjectCode. */
1135 1136 1137 1138 1139 1140 1141 1142

   oc->n_symbols = hdr->NumberOfSymbols;
   oc->symbols   = stgMallocBytes(oc->n_symbols * sizeof(char*),
                                  "ocGetNames_PEi386(oc->symbols)");
   /* Call me paranoid; I don't care. */
   for (i = 0; i < oc->n_symbols; i++) 
      oc->symbols[i] = NULL;

1143 1144 1145
   i = 0;
   while (1) {
      COFF_symbol* symtab_i;
1146
      if (i >= (Int32)(hdr->NumberOfSymbols)) break;
1147
      symtab_i = (COFF_symbol*)
1148
                 myindex ( sizeof_COFF_symbol, symtab, i );
1149

1150 1151
      if (symtab_i->StorageClass == MYIMAGE_SYM_CLASS_EXTERNAL &&
          symtab_i->SectionNumber != MYIMAGE_SYM_UNDEFINED) {
1152 1153 1154 1155

         /* This symbol is global and defined, viz, exported */
         COFF_section* sectabent;

1156 1157
         /* cstring_from_COFF_symbol_name always succeeds. */
         sname = cstring_from_COFF_symbol_name ( symtab_i->Name, strtab );
1158

1159 1160
         /* for MYIMAGE_SYMCLASS_EXTERNAL 
                && !MYIMAGE_SYM_UNDEFINED,
1161 1162 1163 1164 1165
            the address of the symbol is: 
                address of relevant section + offset in section
         */
         sectabent = (COFF_section*)
                     myindex ( sizeof_COFF_section, 
1166 1167
                               sectab,
                               symtab_i->SectionNumber-1 );
1168 1169 1170
         addr = ((UChar*)(oc->image))
                + (sectabent->PointerToRawData
                   + symtab_i->Value);
rrt's avatar
rrt committed
1171
         /* fprintf(stderr,"addSymbol %p `%s'\n", addr,sname); */
1172 1173 1174 1175
         IF_DEBUG(linker, belch("addSymbol %p `%s'\n", addr,sname);)
         ASSERT(i >= 0 && i < oc->n_symbols);
         oc->symbols[i] = sname;
         insertStrHashTable(symhash, sname, addr);
1176 1177 1178 1179 1180 1181
      }
      i += symtab_i->NumberOfAuxSymbols;
      i++;
   }

   /* Copy section information into the ObjectCode. */
1182 1183 1184 1185 1186 1187

   oc->n_sections = hdr->NumberOfSections;
   oc->sections = stgMallocBytes( oc->n_sections * sizeof(Section), 
                                  "ocGetNamesPEi386" );

   for (i = 0; i < oc->n_sections; i++) {
1188 1189 1190 1191 1192 1193 1194
      UChar* start;
      UChar* end;

      SectionKind kind 
         = SECTIONKIND_OTHER;
      COFF_section* sectab_i
         = (COFF_section*)
1195
           myindex ( sizeof_COFF_section, sectab, i );
rrt's avatar
rrt committed
1196
      IF_DEBUG(linker, belch("section name = %s\n", sectab_i->Name ));
1197 1198 1199 1200 1201 1202

#if 0
      /* I'm sure this is the Right Way to do it.  However, the 
         alternative of testing the sectab_i->Name field seems to
         work ok with Cygwin.
      */
1203 1204
      if (sectab_i->Characteristics & MYIMAGE_SCN_CNT_CODE || 
          sectab_i->Characteristics & MYIMAGE_SCN_CNT_INITIALIZED_DATA)
1205 1206 1207
         kind = SECTIONKIND_CODE_OR_RODATA;
#endif