StgCRun.c 26.7 KB
Newer Older
1
/* -----------------------------------------------------------------------------
2
 *
3
 * (c) The GHC Team, 1998-2003
4
 *
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
 * STG-to-C glue.
 *
 * To run an STG function from C land, call
 *
 *		rv = StgRun(f,BaseReg);
 *
 * where "f" is the STG function to call, and BaseReg is the address of the
 * RegTable for this run (we might have separate RegTables if we're running
 * multiple threads on an SMP machine).
 *
 * In the end, "f" must JMP to StgReturn (defined below),
 * passing the return-value "rv" in R1,
 * to return to the caller of StgRun returning "rv" in
 * the whatever way C returns a value.
 *
 * NOTE: StgRun/StgReturn do *NOT* load or store Hp or any
ken's avatar
ken committed
21
 * other registers (other than saving the C callee-saves
22 23
 * registers).  Instead, the called function "f" must do that
 * in STG land.
ken's avatar
ken committed
24
 *
25 26 27 28 29 30
 * GCC will have assumed that pushing/popping of C-stack frames is
 * going on when it generated its code, and used stack space
 * accordingly.  However, we actually {\em post-process away} all
 * such stack-framery (see \tr{ghc/driver/ghc-asm.lprl}). Things will
 * be OK however, if we initially make sure there are
 * @RESERVED_C_STACK_BYTES@ on the C-stack to begin with, for local
ken's avatar
ken committed
31
 * variables.
32 33 34
 *
 * -------------------------------------------------------------------------- */

35 36
#include "PosixSource.h"

ken's avatar
ken committed
37 38 39 40 41 42 43 44 45 46 47

/*
 * We define the following (unused) global register variables, because for
 * some reason gcc generates sub-optimal code for StgRun() on the Alpha
 * (unnecessarily saving extra registers on the stack) if we don't.
 *
 * Why do it at the top of this file, rather than near StgRun() below?  Because
 * gcc doesn't let us define global register variables after any function
 * definition has been read.  Any point after #include "Stg.h" would be too
 * late.
 *
ken's avatar
ken committed
48 49 50
 * We define alpha_EXTRA_CAREFUL here to save $s6, $f8 and $f9 -- registers
 * that we don't use but which are callee-save registers.  The __divq() routine
 * in libc.a clobbers $s6.
ken's avatar
ken committed
51
 */
52
#include "ghcconfig.h"
53
#ifdef alpha_HOST_ARCH
ken's avatar
ken committed
54
#define alpha_EXTRA_CAREFUL
ken's avatar
ken committed
55
register long   fake_ra __asm__("$26");
ken's avatar
ken committed
56
register long   fake_gp __asm__("$29");
ken's avatar
ken committed
57 58 59 60 61 62 63
#ifdef alpha_EXTRA_CAREFUL
register long   fake_s6 __asm__("$15");
register double fake_f8 __asm__("$f8");
register double fake_f9 __asm__("$f9");
#endif
#endif

64 65 66 67 68 69
/* include Stg.h first because we want real machine regs in here: we
 * have to get the value of R1 back from Stg land to C land intact.
 */
#include "Stg.h"
#include "Rts.h"
#include "StgRun.h"
70
#include "RtsFlags.h"
71
#include "OSThreads.h"
72
#include "Capability.h"
73 74 75 76 77 78 79 80 81 82 83

#ifdef DEBUG
#include "RtsUtils.h"
#include "Printer.h"
#endif

#ifdef USE_MINIINTERPRETER

/* -----------------------------------------------------------------------------
   any architecture (using miniinterpreter)
   -------------------------------------------------------------------------- */
ken's avatar
ken committed
84

85
StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg STG_UNUSED)
86
{
87
    while (f) {
88
	IF_DEBUG(interpreter,
89
	    debugBelch("Jumping to ");
90
	    printPtr((P_)f); fflush(stdout);
91
	    debugBelch("\n");
92
	    );
93 94
	f = (StgFunPtr) (f)();
    }
95
    return (StgRegTable *)R1.p;
96 97
}

98
StgFunPtr StgReturn(void)
99
{
100
    return 0;
101 102 103 104 105 106 107 108 109 110
}

#else /* !USE_MINIINTERPRETER */

#ifdef LEADING_UNDERSCORE
#define STG_RETURN "_StgReturn"
#else
#define STG_RETURN "StgReturn"
#endif

111 112 113
/* -----------------------------------------------------------------------------
   x86 architecture
   -------------------------------------------------------------------------- */
ken's avatar
ken committed
114

115
#ifdef i386_HOST_ARCH
116

117 118 119 120 121 122
#ifdef darwin_TARGET_OS
#define STG_GLOBAL ".globl "
#else
#define STG_GLOBAL ".global "
#endif

123
StgRegTable *
124 125
StgRun(StgFunPtr f, StgRegTable *basereg) {

126
    unsigned char space[ RESERVED_C_STACK_BYTES + 4*sizeof(void *) ];
127
    StgRegTable * r;
128 129

    __asm__ volatile (
ken's avatar
ken committed
130
	/*
131 132 133
	 * save callee-saves registers on behalf of the STG code.
	 */
	"movl %%esp, %%eax\n\t"
134
	"addl %4, %%eax\n\t"
135 136 137 138 139 140 141
        "movl %%ebx,0(%%eax)\n\t"
        "movl %%esi,4(%%eax)\n\t"
        "movl %%edi,8(%%eax)\n\t"
        "movl %%ebp,12(%%eax)\n\t"
	/*
	 * Set BaseReg
	 */
142
	"movl %3,%%ebx\n\t"
143
	/*
144
	 * grab the function argument from the stack
145
	 */
146
        "movl %2,%%eax\n\t"
147 148
        
	/*
149 150 151 152
	 * Darwin note:
	 * The stack pointer has to be aligned to a multiple of 16 bytes at
	 * this point. This works out correctly with gcc 4.0.1, but it might
	 * break at any time in the future. TODO: Make this future-proof.
153 154 155 156 157
	 */

	/*
	 * jump to it
	 */
158 159
        "jmp *%%eax\n\t"

160
	STG_GLOBAL STG_RETURN "\n"
161
       	STG_RETURN ":\n\t"
162 163 164

	"movl %%esi, %%eax\n\t"   /* Return value in R1  */

165 166 167 168
	/*
	 * restore callee-saves registers.  (Don't stomp on %%eax!)
	 */
	"movl %%esp, %%edx\n\t"
169
	"addl %4, %%edx\n\t"
170 171 172 173 174
        "movl 0(%%edx),%%ebx\n\t"	/* restore the registers saved above */
        "movl 4(%%edx),%%esi\n\t"
        "movl 8(%%edx),%%edi\n\t"
        "movl 12(%%edx),%%ebp\n\t"

175 176
      : "=&a" (r), "=m" (space)
      : "m" (f), "m" (basereg), "i" (RESERVED_C_STACK_BYTES)
177 178 179 180 181 182 183 184
      : "edx" /* stomps on %edx */
    );

    return r;
}

#endif

185 186 187 188 189 190 191 192 193 194
/* ----------------------------------------------------------------------------
   x86-64 is almost the same as plain x86.

   I've done it using entirely inline assembler, because I couldn't
   get gcc to generate the correct subtraction from %rsp by using
   the local array variable trick.  It didn't seem to reserve
   enough space.  Oh well, it's not much harder this way.

   ------------------------------------------------------------------------- */

195
#ifdef x86_64_HOST_ARCH
196

197
extern StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg);
198

Simon Marlow's avatar
Simon Marlow committed
199 200
static void GNUC3_ATTRIBUTE(used)
StgRunIsImplementedInAssembler(void)
201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219
{
    __asm__ volatile (
	/*
	 * save callee-saves registers on behalf of the STG code.
	 */
	".globl StgRun\n"
	"StgRun:\n\t"
	"subq %0, %%rsp\n\t"
	"movq %%rsp, %%rax\n\t"
	"addq %0-48, %%rax\n\t"
        "movq %%rbx,0(%%rax)\n\t"
        "movq %%rbp,8(%%rax)\n\t"
        "movq %%r12,16(%%rax)\n\t"
        "movq %%r13,24(%%rax)\n\t"
        "movq %%r14,32(%%rax)\n\t"
        "movq %%r15,40(%%rax)\n\t"
	/*
	 * Set BaseReg
	 */
220
	"movq %%rsi,%%r13\n\t"
221 222 223 224 225 226 227 228 229
	/*
	 * grab the function argument from the stack, and jump to it.
	 */
        "movq %%rdi,%%rax\n\t"
        "jmp *%%rax\n\t"

	".global " STG_RETURN "\n"
       	STG_RETURN ":\n\t"

230
	"movq %%rbx, %%rax\n\t"   /* Return value in R1  */
231 232 233 234 235 236 237 238 239 240 241 242 243 244 245

	/*
	 * restore callee-saves registers.  (Don't stomp on %%rax!)
	 */
	"movq %%rsp, %%rdx\n\t"
	"addq %0-48, %%rdx\n\t"
        "movq 0(%%rdx),%%rbx\n\t"	/* restore the registers saved above */
        "movq 8(%%rdx),%%rbp\n\t"
        "movq 16(%%rdx),%%r12\n\t"
        "movq 24(%%rdx),%%r13\n\t"
        "movq 32(%%rdx),%%r14\n\t"
        "movq 40(%%rdx),%%r15\n\t"
	"addq %0, %%rsp\n\t"
	"retq"

246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280
	: : "i"(RESERVED_C_STACK_BYTES+48+8 /*stack frame size*/));
    /* 
       HACK alert!

       The x86_64 ABI specifies that on a procedure call, %rsp is
       aligned on a 16-byte boundary + 8.  That is, the first
       argument on the stack after the return address will be
       16-byte aligned.  
       
       Which should be fine: RESERVED_C_STACK_BYTES+48 is a multiple
       of 16 bytes.  
       
       BUT... when we do a C-call from STG land, gcc likes to put the
       stack alignment adjustment in the prolog.  eg. if we're calling
       a function with arguments in regs, gcc will insert 'subq $8,%rsp'
       in the prolog, to keep %rsp aligned (the return address is 8
       bytes, remember).  The mangler throws away the prolog, so we
       lose the stack alignment.

       The hack is to add this extra 8 bytes to our %rsp adjustment
       here, so that throughout STG code, %rsp is 16-byte aligned,
       ready for a C-call.  

       A quick way to see if this is wrong is to compile this code:

          main = System.Exit.exitWith ExitSuccess

       And run it with +RTS -sstderr.  The stats code in the RTS, in
       particular statsPrintf(), relies on the stack alignment because
       it saves the %xmm regs on the stack, so it'll fall over if the
       stack isn't aligned, and calling exitWith from Haskell invokes
       shutdownHaskellAndExit using a C call.

       Future gcc releases will almost certainly break this hack...
    */
281 282 283 284
}

#endif /* x86-64 */

285
/* -----------------------------------------------------------------------------
286 287
   Sparc architecture

ken's avatar
ken committed
288
   --
289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312
   OLD COMMENT from GHC-3.02:

   We want tailjumps to be calls, because `call xxx' is the only Sparc
   branch that allows an arbitrary label as a target.  (Gcc's ``goto
   *target'' construct ends up loading the label into a register and
   then jumping, at the cost of two extra instructions for the 32-bit
   load.)

   When entering the threaded world, we stash our return address in a
   known location so that \tr{%i7} is available as an extra
   callee-saves register.  Of course, we have to restore this when
   coming out of the threaded world.

   I hate this god-forsaken architecture.  Since the top of the
   reserved stack space is used for globals and the bottom is reserved
   for outgoing arguments, we have to stick our return address
   somewhere in the middle.  Currently, I'm allowing 100 extra
   outgoing arguments beyond the first 6.  --JSM

   Updated info (GHC 4.06): we don't appear to use %i7 any more, so
   I'm not sure whether we still need to save it.  Incedentally, what
   does the last paragraph above mean when it says "the top of the
   stack is used for globals"?  What globals?  --SDM

313
   Updated info (GHC 4.08.2): not saving %i7 any more (see below).
314
   -------------------------------------------------------------------------- */
ken's avatar
ken committed
315

316
#ifdef sparc_HOST_ARCH
317

318
StgRegTable *
319
StgRun(StgFunPtr f, StgRegTable *basereg) {
320

321 322 323 324 325
    unsigned char space[RESERVED_C_STACK_BYTES];
#if 0
    register void *i7 __asm__("%i7");
    ((void **)(space))[100] = i7;
#endif
326
    f();
327
    __asm__ volatile (
ken's avatar
ken committed
328
	    ".align 4\n"
329
            ".global " STG_RETURN "\n"
ken's avatar
ken committed
330
       	    STG_RETURN ":"
331 332 333 334 335 336 337 338 339 340 341
	    : : : "l0","l1","l2","l3","l4","l5","l6","l7");
    /* we tell the C compiler that l0-l7 are clobbered on return to
     * StgReturn, otherwise it tries to use these to save eg. the
     * address of space[100] across the call.  The correct thing
     * to do would be to save all the callee-saves regs, but we
     * can't be bothered to do that.
     *
     * The code that gcc generates for this little fragment is now
     * terrible.  We could do much better by coding it directly in
     * assembler.
     */
342
#if 0
343 344 345 346 347
    /* updated 4.08.2: we don't save %i7 in the middle of the reserved
     * space any more, since gcc tries to save its address across the
     * call to f(), this gets clobbered in STG land and we end up
     * dereferencing a bogus pointer in StgReturn.
     */
ken's avatar
ken committed
348
    __asm__ volatile ("ld %1,%0"
349
		      : "=r" (i7) : "m" (((void **)(space))[100]));
350
#endif
351
    return (StgRegTable *)R1.i;
352 353 354 355 356 357
}

#endif

/* -----------------------------------------------------------------------------
   alpha architecture
ken's avatar
ken committed
358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378

   "The stack pointer (SP) must at all times denote an address that has octaword
    alignment. (This restriction has the side effect that the in-memory portion
    of the argument list, if any, will start on an octaword boundary.) Note that
    the stack grows toward lower addresses. During a procedure invocation, SP
    can never be set to a value that is higher than the value of SP at entry to
    that procedure invocation.

   "The contents of the stack, located above the portion of the argument list
    (if any) that is passed in memory, belong to the calling procedure. Because
    they are part of the calling procedure, they should not be read or written
    by the called procedure, except as specified by indirect arguments or
    language-controlled up-level references.

   "The SP value might be used by the hardware when raising exceptions and
    asynchronous interrupts. It must be assumed that the contents of the stack
    below the current SP value and within the stack for the current thread are
    continually and unpredictably modified, as specified in the _Alpha
    Architecture Reference Manual_, and as a result of asynchronous software
    actions."

ken's avatar
ken committed
379
   -- Compaq Computer Corporation, Houston. Tru64 UNIX Calling Standard for
ken's avatar
ken committed
380 381
      Alpha Systems, 5.1 edition, August 2000, section 3.2.1.  http://www.
      tru64unix.compaq.com/docs/base_doc/DOCUMENTATION/V51_PDF/ARH9MBTE.PDF
382 383
   -------------------------------------------------------------------------- */

384
#ifdef alpha_HOST_ARCH
385

386
StgRegTable *
ken's avatar
ken committed
387
StgRun(StgFunPtr f, StgRegTable *basereg)
388
{
ken's avatar
ken committed
389
    register long   real_ra __asm__("$26"); volatile long   save_ra;
ken's avatar
ken committed
390
    register long   real_gp __asm__("$29"); volatile long   save_gp;
ken's avatar
ken committed
391 392 393 394 395 396 397 398 399 400

    register long   real_s0 __asm__("$9" ); volatile long   save_s0;
    register long   real_s1 __asm__("$10"); volatile long   save_s1;
    register long   real_s2 __asm__("$11"); volatile long   save_s2;
    register long   real_s3 __asm__("$12"); volatile long   save_s3;
    register long   real_s4 __asm__("$13"); volatile long   save_s4;
    register long   real_s5 __asm__("$14"); volatile long   save_s5;
#ifdef alpha_EXTRA_CAREFUL
    register long   real_s6 __asm__("$15"); volatile long   save_s6;
#endif
ken's avatar
ken committed
401

ken's avatar
ken committed
402 403 404 405 406 407 408 409 410 411 412 413 414
    register double real_f2 __asm__("$f2"); volatile double save_f2;
    register double real_f3 __asm__("$f3"); volatile double save_f3;
    register double real_f4 __asm__("$f4"); volatile double save_f4;
    register double real_f5 __asm__("$f5"); volatile double save_f5;
    register double real_f6 __asm__("$f6"); volatile double save_f6;
    register double real_f7 __asm__("$f7"); volatile double save_f7;
#ifdef alpha_EXTRA_CAREFUL
    register double real_f8 __asm__("$f8"); volatile double save_f8;
    register double real_f9 __asm__("$f9"); volatile double save_f9;
#endif

    register StgFunPtr real_pv __asm__("$27");

415
    StgRegTable * ret;
416

ken's avatar
ken committed
417
    save_ra = real_ra;
ken's avatar
ken committed
418
    save_gp = real_gp;
419

ken's avatar
ken committed
420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472
    save_s0 = real_s0;
    save_s1 = real_s1;
    save_s2 = real_s2;
    save_s3 = real_s3;
    save_s4 = real_s4;
    save_s5 = real_s5;
#ifdef alpha_EXTRA_CAREFUL
    save_s6 = real_s6;
#endif

    save_f2 = real_f2;
    save_f3 = real_f3;
    save_f4 = real_f4;
    save_f5 = real_f5;
    save_f6 = real_f6;
    save_f7 = real_f7;
#ifdef alpha_EXTRA_CAREFUL
    save_f8 = real_f8;
    save_f9 = real_f9;
#endif

    real_pv = f;

    __asm__ volatile(	"lda $30,-%0($30)"	"\n"
		"\t"	"jmp ($27)"		"\n"
		"\t"	".align 3"		"\n"
		".globl " STG_RETURN		"\n"
		STG_RETURN ":"			"\n"
		"\t"	"lda $30,%0($30)"	"\n"
		: : "K" (RESERVED_C_STACK_BYTES));

    ret = real_s5;

    real_s0 = save_s0;
    real_s1 = save_s1;
    real_s2 = save_s2;
    real_s3 = save_s3;
    real_s4 = save_s4;
    real_s5 = save_s5;
#ifdef alpha_EXTRA_CAREFUL
    real_s6 = save_s6;
#endif

    real_f2 = save_f2;
    real_f3 = save_f3;
    real_f4 = save_f4;
    real_f5 = save_f5;
    real_f6 = save_f6;
    real_f7 = save_f7;
#ifdef alpha_EXTRA_CAREFUL
    real_f8 = save_f8;
    real_f9 = save_f9;
#endif
473

ken's avatar
ken committed
474
    real_ra = save_ra;
ken's avatar
ken committed
475
    real_gp = save_gp;
476

477
    return ret;
478 479
}

480
#endif /* alpha_HOST_ARCH */
481

482 483 484 485
/* -----------------------------------------------------------------------------
   HP-PA architecture
   -------------------------------------------------------------------------- */

486
#ifdef hppa1_1_HOST_ARCH
487

488
StgRegTable *
ken's avatar
ken committed
489
StgRun(StgFunPtr f, StgRegTable *basereg)
490 491
{
    StgChar space[RESERVED_C_STACK_BYTES+16*sizeof(long)+10*sizeof(double)];
492
    StgRegTable * ret;
493

494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533
    __asm__ volatile ("ldo %0(%%r30),%%r19\n"
		      "\tstw %%r3, 0(0,%%r19)\n"
                      "\tstw %%r4, 4(0,%%r19)\n"
                      "\tstw %%r5, 8(0,%%r19)\n"
                      "\tstw %%r6,12(0,%%r19)\n"
                      "\tstw %%r7,16(0,%%r19)\n"
                      "\tstw %%r8,20(0,%%r19)\n"
                      "\tstw %%r9,24(0,%%r19)\n"
		      "\tstw %%r10,28(0,%%r19)\n"
                      "\tstw %%r11,32(0,%%r19)\n"
                      "\tstw %%r12,36(0,%%r19)\n"
                      "\tstw %%r13,40(0,%%r19)\n"
                      "\tstw %%r14,44(0,%%r19)\n"
                      "\tstw %%r15,48(0,%%r19)\n"
                      "\tstw %%r16,52(0,%%r19)\n"
                      "\tstw %%r17,56(0,%%r19)\n"
                      "\tstw %%r18,60(0,%%r19)\n"
		      "\tldo 80(%%r19),%%r19\n"
		      "\tfstds %%fr12,-16(0,%%r19)\n"
		      "\tfstds %%fr13, -8(0,%%r19)\n"
		      "\tfstds %%fr14,  0(0,%%r19)\n"
		      "\tfstds %%fr15,  8(0,%%r19)\n"
		      "\tldo 32(%%r19),%%r19\n"
		      "\tfstds %%fr16,-16(0,%%r19)\n"
		      "\tfstds %%fr17, -8(0,%%r19)\n"
		      "\tfstds %%fr18,  0(0,%%r19)\n"
		      "\tfstds %%fr19,  8(0,%%r19)\n"
		      "\tldo 32(%%r19),%%r19\n"
		      "\tfstds %%fr20,-16(0,%%r19)\n"
		      "\tfstds %%fr21, -8(0,%%r19)\n" : :
                      "n" (-(116 * sizeof(long) + 10 * sizeof(double))) : "%r19"
		      );

    f();

    __asm__ volatile (".align 4\n"
               	      "\t.EXPORT " STG_RETURN ",CODE\n"
		      "\t.EXPORT " STG_RETURN ",ENTRY,PRIV_LEV=3\n"
                      STG_RETURN "\n"
                      /* "\tldo %0(%%r3),%%r19\n" */
534 535
                      "\tldo %1(%%r30),%%r19\n"
                      "\tcopy %%r11, %0\n"  /* save R1 */
536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563
		      "\tldw  0(0,%%r19),%%r3\n"
                      "\tldw  4(0,%%r19),%%r4\n"
                      "\tldw  8(0,%%r19),%%r5\n"
                      "\tldw 12(0,%%r19),%%r6\n"
                      "\tldw 16(0,%%r19),%%r7\n"
                      "\tldw 20(0,%%r19),%%r8\n"
                      "\tldw 24(0,%%r19),%%r9\n"
		      "\tldw 28(0,%%r19),%%r10\n"
                      "\tldw 32(0,%%r19),%%r11\n"
                      "\tldw 36(0,%%r19),%%r12\n"
                      "\tldw 40(0,%%r19),%%r13\n"
                      "\tldw 44(0,%%r19),%%r14\n"
                      "\tldw 48(0,%%r19),%%r15\n"
                      "\tldw 52(0,%%r19),%%r16\n"
                      "\tldw 56(0,%%r19),%%r17\n"
                      "\tldw 60(0,%%r19),%%r18\n"
		      "\tldo 80(%%r19),%%r19\n"
		      "\tfldds -16(0,%%r19),%%fr12\n"
		      "\tfldds  -8(0,%%r19),%%fr13\n"
		      "\tfldds   0(0,%%r19),%%fr14\n"
		      "\tfldds   8(0,%%r19),%%fr15\n"
		      "\tldo 32(%%r19),%%r19\n"
		      "\tfldds -16(0,%%r19),%%fr16\n"
		      "\tfldds  -8(0,%%r19),%%fr17\n"
		      "\tfldds   0(0,%%r19),%%fr18\n"
		      "\tfldds   8(0,%%r19),%%fr19\n"
		      "\tldo 32(%%r19),%%r19\n"
		      "\tfldds -16(0,%%r19),%%fr20\n"
ken's avatar
ken committed
564
		      "\tfldds  -8(0,%%r19),%%fr21\n"
565 566 567
		         : "=r" (ret)
		         : "n" (-(116 * sizeof(long) + 10 * sizeof(double)))
		         : "%r19"
568 569
		      );

570
    return ret;
571 572
}

573
#endif /* hppa1_1_HOST_ARCH */
574

575 576 577
/* -----------------------------------------------------------------------------
   PowerPC architecture

578
   Everything is in assembler, so we don't have to deal with GCC...
579 580 581
   
   -------------------------------------------------------------------------- */

582
#ifdef powerpc_HOST_ARCH
583

584
extern StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg);
585

586
#ifdef darwin_HOST_OS
587
void StgRunIsImplementedInAssembler(void)
588
{
589 590 591 592 593
#if HAVE_SUBSECTIONS_VIA_SYMBOLS
            // if the toolchain supports deadstripping, we have to
            // prevent it here (it tends to get confused here).
        __asm__ volatile (".no_dead_strip _StgRunIsImplementedInAssembler");
#endif
594 595 596 597 598
	__asm__ volatile (
		"\n.globl _StgRun\n"
		"_StgRun:\n"
		"\tmflr r0\n"
		"\tbl saveFP # f14\n"
599 600
		"\tstmw r13,-220(r1)\n"
		"\tstwu r1,-%0(r1)\n"
601
                "\tmr r27,r4\n" // BaseReg == r27
602 603 604 605 606 607
		"\tmtctr r3\n"
		"\tmr r12,r3\n"
		"\tbctr\n"
		".globl _StgReturn\n"
		"_StgReturn:\n"
		"\tmr r3,r14\n"
608 609
		"\tla r1,%0(r1)\n"
		"\tlmw r13,-220(r1)\n"
610
		"\tb restFP # f14\n"
611
	: : "i"(RESERVED_C_STACK_BYTES+224 /*stack frame size*/));
612
}
613 614 615 616 617 618 619 620 621 622 623 624
#else

// This version is for PowerPC Linux.

// Differences from the Darwin/Mac OS X version:
// *) Different Assembler Syntax
// *) Doesn't use Register Saving Helper Functions (although they exist somewhere)
// *) We may not access positive stack offsets
//    (no "Red Zone" as in the Darwin ABI)
// *) The Link Register is saved to a different offset in the caller's stack frame
//    (Linux: 4(r1), Darwin 8(r1))

Simon Marlow's avatar
Simon Marlow committed
625 626
static void GNUC3_ATTRIBUTE(used)
StgRunIsImplementedInAssembler(void)
627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654
{
	__asm__ volatile (
		"\t.globl StgRun\n"
		"\t.type StgRun,@function\n"
		"StgRun:\n"
		"\tmflr 0\n"
		"\tstw 0,4(1)\n"
		"\tmr 5,1\n"
		"\tstwu 1,-%0(1)\n"
		"\tstmw 13,-220(5)\n"
		"\tstfd 14,-144(5)\n"
		"\tstfd 15,-136(5)\n"
		"\tstfd 16,-128(5)\n"
		"\tstfd 17,-120(5)\n"
		"\tstfd 18,-112(5)\n"
		"\tstfd 19,-104(5)\n"
		"\tstfd 20,-96(5)\n"
		"\tstfd 21,-88(5)\n"
		"\tstfd 22,-80(5)\n"
		"\tstfd 23,-72(5)\n"
		"\tstfd 24,-64(5)\n"
		"\tstfd 25,-56(5)\n"
		"\tstfd 26,-48(5)\n"
		"\tstfd 27,-40(5)\n"
		"\tstfd 28,-32(5)\n"
		"\tstfd 29,-24(5)\n"
		"\tstfd 30,-16(5)\n"
		"\tstfd 31,-8(5)\n"
655
		"\tmr 27,4\n"  // BaseReg == r27
656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686
		"\tmtctr 3\n"
		"\tmr 12,3\n"
		"\tbctr\n"
		".globl StgReturn\n"
		"\t.type StgReturn,@function\n"
		"StgReturn:\n"
		"\tmr 3,14\n"
		"\tla 5,%0(1)\n"
		"\tlmw 13,-220(5)\n"
		"\tlfd 14,-144(5)\n"
		"\tlfd 15,-136(5)\n"
		"\tlfd 16,-128(5)\n"
		"\tlfd 17,-120(5)\n"
		"\tlfd 18,-112(5)\n"
		"\tlfd 19,-104(5)\n"
		"\tlfd 20,-96(5)\n"
		"\tlfd 21,-88(5)\n"
		"\tlfd 22,-80(5)\n"
		"\tlfd 23,-72(5)\n"
		"\tlfd 24,-64(5)\n"
		"\tlfd 25,-56(5)\n"
		"\tlfd 26,-48(5)\n"
		"\tlfd 27,-40(5)\n"
		"\tlfd 28,-32(5)\n"
		"\tlfd 29,-24(5)\n"
		"\tlfd 30,-16(5)\n"
		"\tlfd 31,-8(5)\n"
		"\tmr 1,5\n"
		"\tlwz 0,4(1)\n"
		"\tmtlr 0\n"
		"\tblr\n"
687
	: : "i"(RESERVED_C_STACK_BYTES+224 /*stack frame size*/));
688 689
}
#endif
690 691 692

#endif

693 694 695 696 697 698 699
/* -----------------------------------------------------------------------------
   PowerPC 64 architecture

   Everything is in assembler, so we don't have to deal with GCC...
   
   -------------------------------------------------------------------------- */

700
#ifdef powerpc64_HOST_ARCH
701

702
#ifdef linux_HOST_OS
703
extern StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg);
704

Simon Marlow's avatar
Simon Marlow committed
705 706
static void GNUC3_ATTRIBUTE(used)
StgRunIsImplementedInAssembler(void)
707 708 709 710 711 712 713 714 715
{
        // r0 volatile
	// r1 stack pointer
	// r2 toc - needs to be saved
	// r3-r10 argument passing, volatile
	// r11, r12 very volatile (not saved across cross-module calls)
	// r13 thread local state (never modified, don't need to save)
	// r14-r31 callee-save
	__asm__ volatile (
716 717 718
		".section \".opd\",\"aw\"\n"
		".align 3\n"
		".globl StgRun\n"
719
		"StgRun:\n"
720 721 722 723 724 725 726 727 728 729
			"\t.quad\t.StgRun,.TOC.@tocbase,0\n"
			"\t.size StgRun,24\n"
		".globl StgReturn\n"
		"StgReturn:\n"
			"\t.quad\t.StgReturn,.TOC.@tocbase,0\n"
			"\t.size StgReturn,24\n"
		".previous\n"
		".globl .StgRun\n"
		".type .StgRun,@function\n"
		".StgRun:\n"
730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775
			"\tmflr 0\n"
			"\tmr 5, 1\n"
			"\tstd 0, 16(1)\n"
			"\tstdu 1, -%0(1)\n"
			"\tstd 2, -296(5)\n"
			"\tstd 14, -288(5)\n"
			"\tstd 15, -280(5)\n"
			"\tstd 16, -272(5)\n"
			"\tstd 17, -264(5)\n"
			"\tstd 18, -256(5)\n"
			"\tstd 19, -248(5)\n"
			"\tstd 20, -240(5)\n"
			"\tstd 21, -232(5)\n"
			"\tstd 22, -224(5)\n"
			"\tstd 23, -216(5)\n"
			"\tstd 24, -208(5)\n"
			"\tstd 25, -200(5)\n"
			"\tstd 26, -192(5)\n"
			"\tstd 27, -184(5)\n"
			"\tstd 28, -176(5)\n"
			"\tstd 29, -168(5)\n"
			"\tstd 30, -160(5)\n"
			"\tstd 31, -152(5)\n"
			"\tstfd 14, -144(5)\n"
			"\tstfd 15, -136(5)\n"
			"\tstfd 16, -128(5)\n"
			"\tstfd 17, -120(5)\n"
			"\tstfd 18, -112(5)\n"
			"\tstfd 19, -104(5)\n"
			"\tstfd 20, -96(5)\n"
			"\tstfd 21, -88(5)\n"
			"\tstfd 22, -80(5)\n"
			"\tstfd 23, -72(5)\n"
			"\tstfd 24, -64(5)\n"
			"\tstfd 25, -56(5)\n"
			"\tstfd 26, -48(5)\n"
			"\tstfd 27, -40(5)\n"
			"\tstfd 28, -32(5)\n"
			"\tstfd 29, -24(5)\n"
			"\tstfd 30, -16(5)\n"
			"\tstfd 31, -8(5)\n"
			"\tmr 27, 4\n"  // BaseReg == r27
			"\tld 2, 8(3)\n"
			"\tld 3, 0(3)\n"
			"\tmtctr 3\n"
			"\tbctr\n"
776 777 778
		".globl .StgReturn\n"
		".type .StgReturn,@function\n"
		".StgReturn:\n"
779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823
			"\tmr 3,14\n"
			"\tla 5, %0(1)\n" // load address == addi r5, r1, %0
			"\tld 2, -296(5)\n"
			"\tld 14, -288(5)\n"
			"\tld 15, -280(5)\n"
			"\tld 16, -272(5)\n"
			"\tld 17, -264(5)\n"
			"\tld 18, -256(5)\n"
			"\tld 19, -248(5)\n"
			"\tld 20, -240(5)\n"
			"\tld 21, -232(5)\n"
			"\tld 22, -224(5)\n"
			"\tld 23, -216(5)\n"
			"\tld 24, -208(5)\n"
			"\tld 25, -200(5)\n"
			"\tld 26, -192(5)\n"
			"\tld 27, -184(5)\n"
			"\tld 28, -176(5)\n"
			"\tld 29, -168(5)\n"
			"\tld 30, -160(5)\n"
			"\tld 31, -152(5)\n"
			"\tlfd 14, -144(5)\n"
			"\tlfd 15, -136(5)\n"
			"\tlfd 16, -128(5)\n"
			"\tlfd 17, -120(5)\n"
			"\tlfd 18, -112(5)\n"
			"\tlfd 19, -104(5)\n"
			"\tlfd 20, -96(5)\n"
			"\tlfd 21, -88(5)\n"
			"\tlfd 22, -80(5)\n"
			"\tlfd 23, -72(5)\n"
			"\tlfd 24, -64(5)\n"
			"\tlfd 25, -56(5)\n"
			"\tlfd 26, -48(5)\n"
			"\tlfd 27, -40(5)\n"
			"\tlfd 28, -32(5)\n"
			"\tlfd 29, -24(5)\n"
			"\tlfd 30, -16(5)\n"
			"\tlfd 31, -8(5)\n"
			"\tmr 1, 5\n"
			"\tld 0, 16(1)\n"
			"\tmtlr 0\n"
			"\tblr\n"
	: : "i"(RESERVED_C_STACK_BYTES+304 /*stack frame size*/));
}
824
#else // linux_HOST_OS
825 826 827 828 829
#error Only linux support for power64 right now.
#endif

#endif

830 831 832
/* -----------------------------------------------------------------------------
   IA64 architecture

833 834 835 836 837 838 839
   Again, in assembler - so we can fiddle with the register stack, and because
   gcc doesn't handle asm-clobbered callee-saves correctly.

   loc0  - loc15: preserved locals
   loc16 - loc28: STG registers
           loc29: saved ar.pfs
           loc30: saved b0
840
           loc31: saved gp (gcc 3.3 uses this slot)
841 842
   -------------------------------------------------------------------------- */

843
#ifdef ia64_HOST_ARCH
844 845 846 847 848

/* the memory stack is rarely used, so 16K is excessive */
#undef RESERVED_C_STACK_BYTES
#define RESERVED_C_STACK_BYTES 1024

849 850 851 852 853 854 855
#if ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3)
/* gcc 3.3+: leave an extra slot for gp saves */
#define LOCALS 32
#else
#define LOCALS 31
#endif

Simon Marlow's avatar
Simon Marlow committed
856 857
static void GNUC3_ATTRIBUTE(used)
StgRunIsImplementedInAssembler(void)
858 859 860 861
{
    __asm__ volatile(
		".global StgRun\n"
		"StgRun:\n"
862
		"\talloc loc29 = ar.pfs, 0, %1, 8, 0\n"	/* setup register frame */
863 864 865 866 867 868 869 870 871 872 873 874 875 876
		"\tld8 r18 = [r32],8\n"			/* get procedure address */
		"\tadds sp = -%0, sp ;;\n"		/* setup stack */
		"\tld8 gp = [r32]\n"			/* get procedure GP */
		"\tadds r16 = %0-(6*16), sp\n"
		"\tadds r17 = %0-(5*16), sp ;;\n"
		"\tstf.spill [r16] = f16,32\n"		/* spill callee-saved fp regs */
		"\tstf.spill [r17] = f17,32\n"
		"\tmov b6 = r18 ;;\n"			/* set target address */
		"\tstf.spill [r16] = f18,32\n"
		"\tstf.spill [r17] = f19,32\n"
		"\tmov loc30 = b0 ;;\n"			/* save return address */
		"\tstf.spill [r16] = f20,32\n"
		"\tstf.spill [r17] = f21,32\n"
		"\tbr.few b6 ;;\n"			/* branch to function */
877 878
		".global StgReturn\n"
		"StgReturn:\n"
879 880 881 882 883 884 885 886 887 888 889
		"\tmov r8 = loc16\n"		/* return value in r8 */
		"\tadds r16 = %0-(6*16), sp\n"
	    	"\tadds r17 = %0-(5*16), sp ;;\n"
		"\tldf.fill f16 = [r16],32\n"	/* start restoring fp regs */
		"\tldf.fill f17 = [r17],32\n"
		"\tmov ar.pfs = loc29 ;;\n"	/* restore register frame */
		"\tldf.fill f18 = [r16],32\n"
		"\tldf.fill f19 = [r17],32\n"
		"\tmov b0 = loc30 ;;\n"		/* restore return address */
		"\tldf.fill f20 = [r16],32\n"
		"\tldf.fill f21 = [r17],32\n"
890 891
		"\tadds sp = %0, sp\n"		/* restore stack */
		"\tbr.ret.sptk.many b0 ;;\n"	/* return */
892
	: : "i"(RESERVED_C_STACK_BYTES + 6*16), "i"(LOCALS));
893 894 895 896
}

#endif

897
#endif /* !USE_MINIINTERPRETER */