StgCRun.c 26.7 KB
Newer Older
1
/* -----------------------------------------------------------------------------
2
 *
3
 * (c) The GHC Team, 1998-2003
4
 *
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
 * STG-to-C glue.
 *
 * To run an STG function from C land, call
 *
 *		rv = StgRun(f,BaseReg);
 *
 * where "f" is the STG function to call, and BaseReg is the address of the
 * RegTable for this run (we might have separate RegTables if we're running
 * multiple threads on an SMP machine).
 *
 * In the end, "f" must JMP to StgReturn (defined below),
 * passing the return-value "rv" in R1,
 * to return to the caller of StgRun returning "rv" in
 * the whatever way C returns a value.
 *
 * NOTE: StgRun/StgReturn do *NOT* load or store Hp or any
ken's avatar
ken committed
21
 * other registers (other than saving the C callee-saves
22
23
 * registers).  Instead, the called function "f" must do that
 * in STG land.
ken's avatar
ken committed
24
 *
25
26
27
28
29
30
 * GCC will have assumed that pushing/popping of C-stack frames is
 * going on when it generated its code, and used stack space
 * accordingly.  However, we actually {\em post-process away} all
 * such stack-framery (see \tr{ghc/driver/ghc-asm.lprl}). Things will
 * be OK however, if we initially make sure there are
 * @RESERVED_C_STACK_BYTES@ on the C-stack to begin with, for local
ken's avatar
ken committed
31
 * variables.
32
33
34
 *
 * -------------------------------------------------------------------------- */

35
36
#include "PosixSource.h"

ken's avatar
ken committed
37
38
39
40
41
42
43
44
45
46
47

/*
 * We define the following (unused) global register variables, because for
 * some reason gcc generates sub-optimal code for StgRun() on the Alpha
 * (unnecessarily saving extra registers on the stack) if we don't.
 *
 * Why do it at the top of this file, rather than near StgRun() below?  Because
 * gcc doesn't let us define global register variables after any function
 * definition has been read.  Any point after #include "Stg.h" would be too
 * late.
 *
ken's avatar
ken committed
48
49
50
 * We define alpha_EXTRA_CAREFUL here to save $s6, $f8 and $f9 -- registers
 * that we don't use but which are callee-save registers.  The __divq() routine
 * in libc.a clobbers $s6.
ken's avatar
ken committed
51
 */
52
#include "ghcconfig.h"
53
#ifdef alpha_HOST_ARCH
ken's avatar
ken committed
54
#define alpha_EXTRA_CAREFUL
ken's avatar
ken committed
55
register long   fake_ra __asm__("$26");
ken's avatar
ken committed
56
register long   fake_gp __asm__("$29");
ken's avatar
ken committed
57
58
59
60
61
62
63
#ifdef alpha_EXTRA_CAREFUL
register long   fake_s6 __asm__("$15");
register double fake_f8 __asm__("$f8");
register double fake_f9 __asm__("$f9");
#endif
#endif

64
65
66
67
68
69
/* include Stg.h first because we want real machine regs in here: we
 * have to get the value of R1 back from Stg land to C land intact.
 */
#include "Stg.h"
#include "Rts.h"
#include "StgRun.h"
70
#include "RtsFlags.h"
71
#include "OSThreads.h"
72
#include "Capability.h"
73
74
75
76
77
78
79
80
81
82
83

#ifdef DEBUG
#include "RtsUtils.h"
#include "Printer.h"
#endif

#ifdef USE_MINIINTERPRETER

/* -----------------------------------------------------------------------------
   any architecture (using miniinterpreter)
   -------------------------------------------------------------------------- */
ken's avatar
ken committed
84

85
StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg STG_UNUSED)
86
{
87
    while (f) {
88
	IF_DEBUG(interpreter,
89
	    debugBelch("Jumping to ");
90
	    printPtr((P_)f); fflush(stdout);
91
	    debugBelch("\n");
92
	    );
93
94
	f = (StgFunPtr) (f)();
    }
95
    return (StgRegTable *)R1.p;
96
97
}

98
StgFunPtr StgReturn(void)
99
{
100
    return 0;
101
102
103
104
105
106
107
108
109
110
}

#else /* !USE_MINIINTERPRETER */

#ifdef LEADING_UNDERSCORE
#define STG_RETURN "_StgReturn"
#else
#define STG_RETURN "StgReturn"
#endif

111
112
113
/* -----------------------------------------------------------------------------
   x86 architecture
   -------------------------------------------------------------------------- */
ken's avatar
ken committed
114

115
#ifdef i386_HOST_ARCH
116

117
118
119
120
121
122
#ifdef darwin_TARGET_OS
#define STG_GLOBAL ".globl "
#else
#define STG_GLOBAL ".global "
#endif

123
StgRegTable *
124
125
StgRun(StgFunPtr f, StgRegTable *basereg) {

126
    unsigned char space[ RESERVED_C_STACK_BYTES + 4*sizeof(void *) ];
127
    StgRegTable * r;
128
129

    __asm__ volatile (
ken's avatar
ken committed
130
	/*
131
132
133
	 * save callee-saves registers on behalf of the STG code.
	 */
	"movl %%esp, %%eax\n\t"
134
	"addl %4, %%eax\n\t"
135
136
137
138
139
140
141
        "movl %%ebx,0(%%eax)\n\t"
        "movl %%esi,4(%%eax)\n\t"
        "movl %%edi,8(%%eax)\n\t"
        "movl %%ebp,12(%%eax)\n\t"
	/*
	 * Set BaseReg
	 */
142
	"movl %3,%%ebx\n\t"
143
	/*
144
	 * grab the function argument from the stack
145
	 */
146
        "movl %2,%%eax\n\t"
147
148
        
	/*
149
150
151
152
	 * Darwin note:
	 * The stack pointer has to be aligned to a multiple of 16 bytes at
	 * this point. This works out correctly with gcc 4.0.1, but it might
	 * break at any time in the future. TODO: Make this future-proof.
153
154
155
156
157
	 */

	/*
	 * jump to it
	 */
158
159
        "jmp *%%eax\n\t"

160
	STG_GLOBAL STG_RETURN "\n"
161
       	STG_RETURN ":\n\t"
162
163
164

	"movl %%esi, %%eax\n\t"   /* Return value in R1  */

165
166
167
168
	/*
	 * restore callee-saves registers.  (Don't stomp on %%eax!)
	 */
	"movl %%esp, %%edx\n\t"
169
	"addl %4, %%edx\n\t"
170
171
172
173
174
        "movl 0(%%edx),%%ebx\n\t"	/* restore the registers saved above */
        "movl 4(%%edx),%%esi\n\t"
        "movl 8(%%edx),%%edi\n\t"
        "movl 12(%%edx),%%ebp\n\t"

175
176
      : "=&a" (r), "=m" (space)
      : "m" (f), "m" (basereg), "i" (RESERVED_C_STACK_BYTES)
177
178
179
180
181
182
183
184
      : "edx" /* stomps on %edx */
    );

    return r;
}

#endif

185
186
187
188
189
190
191
192
193
194
/* ----------------------------------------------------------------------------
   x86-64 is almost the same as plain x86.

   I've done it using entirely inline assembler, because I couldn't
   get gcc to generate the correct subtraction from %rsp by using
   the local array variable trick.  It didn't seem to reserve
   enough space.  Oh well, it's not much harder this way.

   ------------------------------------------------------------------------- */

195
#ifdef x86_64_HOST_ARCH
196

197
extern StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg);
198

Simon Marlow's avatar
Simon Marlow committed
199
200
static void GNUC3_ATTRIBUTE(used)
StgRunIsImplementedInAssembler(void)
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
{
    __asm__ volatile (
	/*
	 * save callee-saves registers on behalf of the STG code.
	 */
	".globl StgRun\n"
	"StgRun:\n\t"
	"subq %0, %%rsp\n\t"
	"movq %%rsp, %%rax\n\t"
	"addq %0-48, %%rax\n\t"
        "movq %%rbx,0(%%rax)\n\t"
        "movq %%rbp,8(%%rax)\n\t"
        "movq %%r12,16(%%rax)\n\t"
        "movq %%r13,24(%%rax)\n\t"
        "movq %%r14,32(%%rax)\n\t"
        "movq %%r15,40(%%rax)\n\t"
	/*
	 * Set BaseReg
	 */
220
	"movq %%rsi,%%r13\n\t"
221
222
223
224
225
226
227
228
229
	/*
	 * grab the function argument from the stack, and jump to it.
	 */
        "movq %%rdi,%%rax\n\t"
        "jmp *%%rax\n\t"

	".global " STG_RETURN "\n"
       	STG_RETURN ":\n\t"

230
	"movq %%rbx, %%rax\n\t"   /* Return value in R1  */
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245

	/*
	 * restore callee-saves registers.  (Don't stomp on %%rax!)
	 */
	"movq %%rsp, %%rdx\n\t"
	"addq %0-48, %%rdx\n\t"
        "movq 0(%%rdx),%%rbx\n\t"	/* restore the registers saved above */
        "movq 8(%%rdx),%%rbp\n\t"
        "movq 16(%%rdx),%%r12\n\t"
        "movq 24(%%rdx),%%r13\n\t"
        "movq 32(%%rdx),%%r14\n\t"
        "movq 40(%%rdx),%%r15\n\t"
	"addq %0, %%rsp\n\t"
	"retq"

246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
	: : "i"(RESERVED_C_STACK_BYTES+48+8 /*stack frame size*/));
    /* 
       HACK alert!

       The x86_64 ABI specifies that on a procedure call, %rsp is
       aligned on a 16-byte boundary + 8.  That is, the first
       argument on the stack after the return address will be
       16-byte aligned.  
       
       Which should be fine: RESERVED_C_STACK_BYTES+48 is a multiple
       of 16 bytes.  
       
       BUT... when we do a C-call from STG land, gcc likes to put the
       stack alignment adjustment in the prolog.  eg. if we're calling
       a function with arguments in regs, gcc will insert 'subq $8,%rsp'
       in the prolog, to keep %rsp aligned (the return address is 8
       bytes, remember).  The mangler throws away the prolog, so we
       lose the stack alignment.

       The hack is to add this extra 8 bytes to our %rsp adjustment
       here, so that throughout STG code, %rsp is 16-byte aligned,
       ready for a C-call.  

       A quick way to see if this is wrong is to compile this code:

          main = System.Exit.exitWith ExitSuccess

       And run it with +RTS -sstderr.  The stats code in the RTS, in
       particular statsPrintf(), relies on the stack alignment because
       it saves the %xmm regs on the stack, so it'll fall over if the
       stack isn't aligned, and calling exitWith from Haskell invokes
       shutdownHaskellAndExit using a C call.

       Future gcc releases will almost certainly break this hack...
    */
281
282
283
284
}

#endif /* x86-64 */

285
/* -----------------------------------------------------------------------------
286
287
   Sparc architecture

ken's avatar
ken committed
288
   --
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
   OLD COMMENT from GHC-3.02:

   We want tailjumps to be calls, because `call xxx' is the only Sparc
   branch that allows an arbitrary label as a target.  (Gcc's ``goto
   *target'' construct ends up loading the label into a register and
   then jumping, at the cost of two extra instructions for the 32-bit
   load.)

   When entering the threaded world, we stash our return address in a
   known location so that \tr{%i7} is available as an extra
   callee-saves register.  Of course, we have to restore this when
   coming out of the threaded world.

   I hate this god-forsaken architecture.  Since the top of the
   reserved stack space is used for globals and the bottom is reserved
   for outgoing arguments, we have to stick our return address
   somewhere in the middle.  Currently, I'm allowing 100 extra
   outgoing arguments beyond the first 6.  --JSM

   Updated info (GHC 4.06): we don't appear to use %i7 any more, so
   I'm not sure whether we still need to save it.  Incedentally, what
   does the last paragraph above mean when it says "the top of the
   stack is used for globals"?  What globals?  --SDM

313
   Updated info (GHC 4.08.2): not saving %i7 any more (see below).
314
   -------------------------------------------------------------------------- */
ken's avatar
ken committed
315

316
#ifdef sparc_HOST_ARCH
317

318
StgRegTable *
319
StgRun(StgFunPtr f, StgRegTable *basereg) {
320

321
322
323
324
325
    unsigned char space[RESERVED_C_STACK_BYTES];
#if 0
    register void *i7 __asm__("%i7");
    ((void **)(space))[100] = i7;
#endif
326
    f();
327
    __asm__ volatile (
ken's avatar
ken committed
328
	    ".align 4\n"
329
            ".global " STG_RETURN "\n"
ken's avatar
ken committed
330
       	    STG_RETURN ":"
331
332
333
334
335
336
337
338
339
340
341
	    : : : "l0","l1","l2","l3","l4","l5","l6","l7");
    /* we tell the C compiler that l0-l7 are clobbered on return to
     * StgReturn, otherwise it tries to use these to save eg. the
     * address of space[100] across the call.  The correct thing
     * to do would be to save all the callee-saves regs, but we
     * can't be bothered to do that.
     *
     * The code that gcc generates for this little fragment is now
     * terrible.  We could do much better by coding it directly in
     * assembler.
     */
342
#if 0
343
344
345
346
347
    /* updated 4.08.2: we don't save %i7 in the middle of the reserved
     * space any more, since gcc tries to save its address across the
     * call to f(), this gets clobbered in STG land and we end up
     * dereferencing a bogus pointer in StgReturn.
     */
ken's avatar
ken committed
348
    __asm__ volatile ("ld %1,%0"
349
		      : "=r" (i7) : "m" (((void **)(space))[100]));
350
#endif
351
    return (StgRegTable *)R1.i;
352
353
354
355
356
357
}

#endif

/* -----------------------------------------------------------------------------
   alpha architecture
ken's avatar
ken committed
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378

   "The stack pointer (SP) must at all times denote an address that has octaword
    alignment. (This restriction has the side effect that the in-memory portion
    of the argument list, if any, will start on an octaword boundary.) Note that
    the stack grows toward lower addresses. During a procedure invocation, SP
    can never be set to a value that is higher than the value of SP at entry to
    that procedure invocation.

   "The contents of the stack, located above the portion of the argument list
    (if any) that is passed in memory, belong to the calling procedure. Because
    they are part of the calling procedure, they should not be read or written
    by the called procedure, except as specified by indirect arguments or
    language-controlled up-level references.

   "The SP value might be used by the hardware when raising exceptions and
    asynchronous interrupts. It must be assumed that the contents of the stack
    below the current SP value and within the stack for the current thread are
    continually and unpredictably modified, as specified in the _Alpha
    Architecture Reference Manual_, and as a result of asynchronous software
    actions."

ken's avatar
ken committed
379
   -- Compaq Computer Corporation, Houston. Tru64 UNIX Calling Standard for
ken's avatar
ken committed
380
381
      Alpha Systems, 5.1 edition, August 2000, section 3.2.1.  http://www.
      tru64unix.compaq.com/docs/base_doc/DOCUMENTATION/V51_PDF/ARH9MBTE.PDF
382
383
   -------------------------------------------------------------------------- */

384
#ifdef alpha_HOST_ARCH
385

386
StgRegTable *
ken's avatar
ken committed
387
StgRun(StgFunPtr f, StgRegTable *basereg)
388
{
ken's avatar
ken committed
389
    register long   real_ra __asm__("$26"); volatile long   save_ra;
ken's avatar
ken committed
390
    register long   real_gp __asm__("$29"); volatile long   save_gp;
ken's avatar
ken committed
391
392
393
394
395
396
397
398
399
400

    register long   real_s0 __asm__("$9" ); volatile long   save_s0;
    register long   real_s1 __asm__("$10"); volatile long   save_s1;
    register long   real_s2 __asm__("$11"); volatile long   save_s2;
    register long   real_s3 __asm__("$12"); volatile long   save_s3;
    register long   real_s4 __asm__("$13"); volatile long   save_s4;
    register long   real_s5 __asm__("$14"); volatile long   save_s5;
#ifdef alpha_EXTRA_CAREFUL
    register long   real_s6 __asm__("$15"); volatile long   save_s6;
#endif
ken's avatar
ken committed
401

ken's avatar
ken committed
402
403
404
405
406
407
408
409
410
411
412
413
414
    register double real_f2 __asm__("$f2"); volatile double save_f2;
    register double real_f3 __asm__("$f3"); volatile double save_f3;
    register double real_f4 __asm__("$f4"); volatile double save_f4;
    register double real_f5 __asm__("$f5"); volatile double save_f5;
    register double real_f6 __asm__("$f6"); volatile double save_f6;
    register double real_f7 __asm__("$f7"); volatile double save_f7;
#ifdef alpha_EXTRA_CAREFUL
    register double real_f8 __asm__("$f8"); volatile double save_f8;
    register double real_f9 __asm__("$f9"); volatile double save_f9;
#endif

    register StgFunPtr real_pv __asm__("$27");

415
    StgRegTable * ret;
416

ken's avatar
ken committed
417
    save_ra = real_ra;
ken's avatar
ken committed
418
    save_gp = real_gp;
419

ken's avatar
ken committed
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
    save_s0 = real_s0;
    save_s1 = real_s1;
    save_s2 = real_s2;
    save_s3 = real_s3;
    save_s4 = real_s4;
    save_s5 = real_s5;
#ifdef alpha_EXTRA_CAREFUL
    save_s6 = real_s6;
#endif

    save_f2 = real_f2;
    save_f3 = real_f3;
    save_f4 = real_f4;
    save_f5 = real_f5;
    save_f6 = real_f6;
    save_f7 = real_f7;
#ifdef alpha_EXTRA_CAREFUL
    save_f8 = real_f8;
    save_f9 = real_f9;
#endif

    real_pv = f;

    __asm__ volatile(	"lda $30,-%0($30)"	"\n"
		"\t"	"jmp ($27)"		"\n"
		"\t"	".align 3"		"\n"
		".globl " STG_RETURN		"\n"
		STG_RETURN ":"			"\n"
		"\t"	"lda $30,%0($30)"	"\n"
		: : "K" (RESERVED_C_STACK_BYTES));

    ret = real_s5;

    real_s0 = save_s0;
    real_s1 = save_s1;
    real_s2 = save_s2;
    real_s3 = save_s3;
    real_s4 = save_s4;
    real_s5 = save_s5;
#ifdef alpha_EXTRA_CAREFUL
    real_s6 = save_s6;
#endif

    real_f2 = save_f2;
    real_f3 = save_f3;
    real_f4 = save_f4;
    real_f5 = save_f5;
    real_f6 = save_f6;
    real_f7 = save_f7;
#ifdef alpha_EXTRA_CAREFUL
    real_f8 = save_f8;
    real_f9 = save_f9;
#endif
473

ken's avatar
ken committed
474
    real_ra = save_ra;
ken's avatar
ken committed
475
    real_gp = save_gp;
476

477
    return ret;
478
479
}

480
#endif /* alpha_HOST_ARCH */
481

482
483
484
485
/* -----------------------------------------------------------------------------
   HP-PA architecture
   -------------------------------------------------------------------------- */

486
#ifdef hppa1_1_HOST_ARCH
487

488
StgRegTable *
ken's avatar
ken committed
489
StgRun(StgFunPtr f, StgRegTable *basereg)
490
491
{
    StgChar space[RESERVED_C_STACK_BYTES+16*sizeof(long)+10*sizeof(double)];
492
    StgRegTable * ret;
493

494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
    __asm__ volatile ("ldo %0(%%r30),%%r19\n"
		      "\tstw %%r3, 0(0,%%r19)\n"
                      "\tstw %%r4, 4(0,%%r19)\n"
                      "\tstw %%r5, 8(0,%%r19)\n"
                      "\tstw %%r6,12(0,%%r19)\n"
                      "\tstw %%r7,16(0,%%r19)\n"
                      "\tstw %%r8,20(0,%%r19)\n"
                      "\tstw %%r9,24(0,%%r19)\n"
		      "\tstw %%r10,28(0,%%r19)\n"
                      "\tstw %%r11,32(0,%%r19)\n"
                      "\tstw %%r12,36(0,%%r19)\n"
                      "\tstw %%r13,40(0,%%r19)\n"
                      "\tstw %%r14,44(0,%%r19)\n"
                      "\tstw %%r15,48(0,%%r19)\n"
                      "\tstw %%r16,52(0,%%r19)\n"
                      "\tstw %%r17,56(0,%%r19)\n"
                      "\tstw %%r18,60(0,%%r19)\n"
		      "\tldo 80(%%r19),%%r19\n"
		      "\tfstds %%fr12,-16(0,%%r19)\n"
		      "\tfstds %%fr13, -8(0,%%r19)\n"
		      "\tfstds %%fr14,  0(0,%%r19)\n"
		      "\tfstds %%fr15,  8(0,%%r19)\n"
		      "\tldo 32(%%r19),%%r19\n"
		      "\tfstds %%fr16,-16(0,%%r19)\n"
		      "\tfstds %%fr17, -8(0,%%r19)\n"
		      "\tfstds %%fr18,  0(0,%%r19)\n"
		      "\tfstds %%fr19,  8(0,%%r19)\n"
		      "\tldo 32(%%r19),%%r19\n"
		      "\tfstds %%fr20,-16(0,%%r19)\n"
		      "\tfstds %%fr21, -8(0,%%r19)\n" : :
                      "n" (-(116 * sizeof(long) + 10 * sizeof(double))) : "%r19"
		      );

    f();

    __asm__ volatile (".align 4\n"
               	      "\t.EXPORT " STG_RETURN ",CODE\n"
		      "\t.EXPORT " STG_RETURN ",ENTRY,PRIV_LEV=3\n"
                      STG_RETURN "\n"
                      /* "\tldo %0(%%r3),%%r19\n" */
534
535
                      "\tldo %1(%%r30),%%r19\n"
                      "\tcopy %%r11, %0\n"  /* save R1 */
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
		      "\tldw  0(0,%%r19),%%r3\n"
                      "\tldw  4(0,%%r19),%%r4\n"
                      "\tldw  8(0,%%r19),%%r5\n"
                      "\tldw 12(0,%%r19),%%r6\n"
                      "\tldw 16(0,%%r19),%%r7\n"
                      "\tldw 20(0,%%r19),%%r8\n"
                      "\tldw 24(0,%%r19),%%r9\n"
		      "\tldw 28(0,%%r19),%%r10\n"
                      "\tldw 32(0,%%r19),%%r11\n"
                      "\tldw 36(0,%%r19),%%r12\n"
                      "\tldw 40(0,%%r19),%%r13\n"
                      "\tldw 44(0,%%r19),%%r14\n"
                      "\tldw 48(0,%%r19),%%r15\n"
                      "\tldw 52(0,%%r19),%%r16\n"
                      "\tldw 56(0,%%r19),%%r17\n"
                      "\tldw 60(0,%%r19),%%r18\n"
		      "\tldo 80(%%r19),%%r19\n"
		      "\tfldds -16(0,%%r19),%%fr12\n"
		      "\tfldds  -8(0,%%r19),%%fr13\n"
		      "\tfldds   0(0,%%r19),%%fr14\n"
		      "\tfldds   8(0,%%r19),%%fr15\n"
		      "\tldo 32(%%r19),%%r19\n"
		      "\tfldds -16(0,%%r19),%%fr16\n"
		      "\tfldds  -8(0,%%r19),%%fr17\n"
		      "\tfldds   0(0,%%r19),%%fr18\n"
		      "\tfldds   8(0,%%r19),%%fr19\n"
		      "\tldo 32(%%r19),%%r19\n"
		      "\tfldds -16(0,%%r19),%%fr20\n"
ken's avatar
ken committed
564
		      "\tfldds  -8(0,%%r19),%%fr21\n"
565
566
567
		         : "=r" (ret)
		         : "n" (-(116 * sizeof(long) + 10 * sizeof(double)))
		         : "%r19"
568
569
		      );

570
    return ret;
571
572
}

573
#endif /* hppa1_1_HOST_ARCH */
574

575
576
577
/* -----------------------------------------------------------------------------
   PowerPC architecture

578
   Everything is in assembler, so we don't have to deal with GCC...
579
580
581
   
   -------------------------------------------------------------------------- */

582
#ifdef powerpc_HOST_ARCH
583

584
extern StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg);
585

586
#ifdef darwin_HOST_OS
587
void StgRunIsImplementedInAssembler(void)
588
{
589
590
591
592
593
#if HAVE_SUBSECTIONS_VIA_SYMBOLS
            // if the toolchain supports deadstripping, we have to
            // prevent it here (it tends to get confused here).
        __asm__ volatile (".no_dead_strip _StgRunIsImplementedInAssembler");
#endif
594
595
596
597
598
	__asm__ volatile (
		"\n.globl _StgRun\n"
		"_StgRun:\n"
		"\tmflr r0\n"
		"\tbl saveFP # f14\n"
599
600
		"\tstmw r13,-220(r1)\n"
		"\tstwu r1,-%0(r1)\n"
601
                "\tmr r27,r4\n" // BaseReg == r27
602
603
604
605
606
607
		"\tmtctr r3\n"
		"\tmr r12,r3\n"
		"\tbctr\n"
		".globl _StgReturn\n"
		"_StgReturn:\n"
		"\tmr r3,r14\n"
608
609
		"\tla r1,%0(r1)\n"
		"\tlmw r13,-220(r1)\n"
610
		"\tb restFP # f14\n"
611
	: : "i"(RESERVED_C_STACK_BYTES+224 /*stack frame size*/));
612
}
613
614
615
616
617
618
619
620
621
622
623
624
#else

// This version is for PowerPC Linux.

// Differences from the Darwin/Mac OS X version:
// *) Different Assembler Syntax
// *) Doesn't use Register Saving Helper Functions (although they exist somewhere)
// *) We may not access positive stack offsets
//    (no "Red Zone" as in the Darwin ABI)
// *) The Link Register is saved to a different offset in the caller's stack frame
//    (Linux: 4(r1), Darwin 8(r1))

Simon Marlow's avatar
Simon Marlow committed
625
626
static void GNUC3_ATTRIBUTE(used)
StgRunIsImplementedInAssembler(void)
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
{
	__asm__ volatile (
		"\t.globl StgRun\n"
		"\t.type StgRun,@function\n"
		"StgRun:\n"
		"\tmflr 0\n"
		"\tstw 0,4(1)\n"
		"\tmr 5,1\n"
		"\tstwu 1,-%0(1)\n"
		"\tstmw 13,-220(5)\n"
		"\tstfd 14,-144(5)\n"
		"\tstfd 15,-136(5)\n"
		"\tstfd 16,-128(5)\n"
		"\tstfd 17,-120(5)\n"
		"\tstfd 18,-112(5)\n"
		"\tstfd 19,-104(5)\n"
		"\tstfd 20,-96(5)\n"
		"\tstfd 21,-88(5)\n"
		"\tstfd 22,-80(5)\n"
		"\tstfd 23,-72(5)\n"
		"\tstfd 24,-64(5)\n"
		"\tstfd 25,-56(5)\n"
		"\tstfd 26,-48(5)\n"
		"\tstfd 27,-40(5)\n"
		"\tstfd 28,-32(5)\n"
		"\tstfd 29,-24(5)\n"
		"\tstfd 30,-16(5)\n"
		"\tstfd 31,-8(5)\n"
655
		"\tmr 27,4\n"  // BaseReg == r27
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
		"\tmtctr 3\n"
		"\tmr 12,3\n"
		"\tbctr\n"
		".globl StgReturn\n"
		"\t.type StgReturn,@function\n"
		"StgReturn:\n"
		"\tmr 3,14\n"
		"\tla 5,%0(1)\n"
		"\tlmw 13,-220(5)\n"
		"\tlfd 14,-144(5)\n"
		"\tlfd 15,-136(5)\n"
		"\tlfd 16,-128(5)\n"
		"\tlfd 17,-120(5)\n"
		"\tlfd 18,-112(5)\n"
		"\tlfd 19,-104(5)\n"
		"\tlfd 20,-96(5)\n"
		"\tlfd 21,-88(5)\n"
		"\tlfd 22,-80(5)\n"
		"\tlfd 23,-72(5)\n"
		"\tlfd 24,-64(5)\n"
		"\tlfd 25,-56(5)\n"
		"\tlfd 26,-48(5)\n"
		"\tlfd 27,-40(5)\n"
		"\tlfd 28,-32(5)\n"
		"\tlfd 29,-24(5)\n"
		"\tlfd 30,-16(5)\n"
		"\tlfd 31,-8(5)\n"
		"\tmr 1,5\n"
		"\tlwz 0,4(1)\n"
		"\tmtlr 0\n"
		"\tblr\n"
687
	: : "i"(RESERVED_C_STACK_BYTES+224 /*stack frame size*/));
688
689
}
#endif
690
691
692

#endif

693
694
695
696
697
698
699
/* -----------------------------------------------------------------------------
   PowerPC 64 architecture

   Everything is in assembler, so we don't have to deal with GCC...
   
   -------------------------------------------------------------------------- */

700
#ifdef powerpc64_HOST_ARCH
701

702
#ifdef linux_HOST_OS
703
extern StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg);
704

Simon Marlow's avatar
Simon Marlow committed
705
706
static void GNUC3_ATTRIBUTE(used)
StgRunIsImplementedInAssembler(void)
707
708
709
710
711
712
713
714
715
{
        // r0 volatile
	// r1 stack pointer
	// r2 toc - needs to be saved
	// r3-r10 argument passing, volatile
	// r11, r12 very volatile (not saved across cross-module calls)
	// r13 thread local state (never modified, don't need to save)
	// r14-r31 callee-save
	__asm__ volatile (
716
717
718
		".section \".opd\",\"aw\"\n"
		".align 3\n"
		".globl StgRun\n"
719
		"StgRun:\n"
720
721
722
723
724
725
726
727
728
729
			"\t.quad\t.StgRun,.TOC.@tocbase,0\n"
			"\t.size StgRun,24\n"
		".globl StgReturn\n"
		"StgReturn:\n"
			"\t.quad\t.StgReturn,.TOC.@tocbase,0\n"
			"\t.size StgReturn,24\n"
		".previous\n"
		".globl .StgRun\n"
		".type .StgRun,@function\n"
		".StgRun:\n"
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
			"\tmflr 0\n"
			"\tmr 5, 1\n"
			"\tstd 0, 16(1)\n"
			"\tstdu 1, -%0(1)\n"
			"\tstd 2, -296(5)\n"
			"\tstd 14, -288(5)\n"
			"\tstd 15, -280(5)\n"
			"\tstd 16, -272(5)\n"
			"\tstd 17, -264(5)\n"
			"\tstd 18, -256(5)\n"
			"\tstd 19, -248(5)\n"
			"\tstd 20, -240(5)\n"
			"\tstd 21, -232(5)\n"
			"\tstd 22, -224(5)\n"
			"\tstd 23, -216(5)\n"
			"\tstd 24, -208(5)\n"
			"\tstd 25, -200(5)\n"
			"\tstd 26, -192(5)\n"
			"\tstd 27, -184(5)\n"
			"\tstd 28, -176(5)\n"
			"\tstd 29, -168(5)\n"
			"\tstd 30, -160(5)\n"
			"\tstd 31, -152(5)\n"
			"\tstfd 14, -144(5)\n"
			"\tstfd 15, -136(5)\n"
			"\tstfd 16, -128(5)\n"
			"\tstfd 17, -120(5)\n"
			"\tstfd 18, -112(5)\n"
			"\tstfd 19, -104(5)\n"
			"\tstfd 20, -96(5)\n"
			"\tstfd 21, -88(5)\n"
			"\tstfd 22, -80(5)\n"
			"\tstfd 23, -72(5)\n"
			"\tstfd 24, -64(5)\n"
			"\tstfd 25, -56(5)\n"
			"\tstfd 26, -48(5)\n"
			"\tstfd 27, -40(5)\n"
			"\tstfd 28, -32(5)\n"
			"\tstfd 29, -24(5)\n"
			"\tstfd 30, -16(5)\n"
			"\tstfd 31, -8(5)\n"
			"\tmr 27, 4\n"  // BaseReg == r27
			"\tld 2, 8(3)\n"
			"\tld 3, 0(3)\n"
			"\tmtctr 3\n"
			"\tbctr\n"
776
777
778
		".globl .StgReturn\n"
		".type .StgReturn,@function\n"
		".StgReturn:\n"
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
			"\tmr 3,14\n"
			"\tla 5, %0(1)\n" // load address == addi r5, r1, %0
			"\tld 2, -296(5)\n"
			"\tld 14, -288(5)\n"
			"\tld 15, -280(5)\n"
			"\tld 16, -272(5)\n"
			"\tld 17, -264(5)\n"
			"\tld 18, -256(5)\n"
			"\tld 19, -248(5)\n"
			"\tld 20, -240(5)\n"
			"\tld 21, -232(5)\n"
			"\tld 22, -224(5)\n"
			"\tld 23, -216(5)\n"
			"\tld 24, -208(5)\n"
			"\tld 25, -200(5)\n"
			"\tld 26, -192(5)\n"
			"\tld 27, -184(5)\n"
			"\tld 28, -176(5)\n"
			"\tld 29, -168(5)\n"
			"\tld 30, -160(5)\n"
			"\tld 31, -152(5)\n"
			"\tlfd 14, -144(5)\n"
			"\tlfd 15, -136(5)\n"
			"\tlfd 16, -128(5)\n"
			"\tlfd 17, -120(5)\n"
			"\tlfd 18, -112(5)\n"
			"\tlfd 19, -104(5)\n"
			"\tlfd 20, -96(5)\n"
			"\tlfd 21, -88(5)\n"
			"\tlfd 22, -80(5)\n"
			"\tlfd 23, -72(5)\n"
			"\tlfd 24, -64(5)\n"
			"\tlfd 25, -56(5)\n"
			"\tlfd 26, -48(5)\n"
			"\tlfd 27, -40(5)\n"
			"\tlfd 28, -32(5)\n"
			"\tlfd 29, -24(5)\n"
			"\tlfd 30, -16(5)\n"
			"\tlfd 31, -8(5)\n"
			"\tmr 1, 5\n"
			"\tld 0, 16(1)\n"
			"\tmtlr 0\n"
			"\tblr\n"
	: : "i"(RESERVED_C_STACK_BYTES+304 /*stack frame size*/));
}
824
#else // linux_HOST_OS
825
826
827
828
829
#error Only linux support for power64 right now.
#endif

#endif

830
831
832
/* -----------------------------------------------------------------------------
   IA64 architecture

833
834
835
836
837
838
839
   Again, in assembler - so we can fiddle with the register stack, and because
   gcc doesn't handle asm-clobbered callee-saves correctly.

   loc0  - loc15: preserved locals
   loc16 - loc28: STG registers
           loc29: saved ar.pfs
           loc30: saved b0
840
           loc31: saved gp (gcc 3.3 uses this slot)
841
842
   -------------------------------------------------------------------------- */

843
#ifdef ia64_HOST_ARCH
844
845
846
847
848

/* the memory stack is rarely used, so 16K is excessive */
#undef RESERVED_C_STACK_BYTES
#define RESERVED_C_STACK_BYTES 1024

849
850
851
852
853
854
855
#if ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3)
/* gcc 3.3+: leave an extra slot for gp saves */
#define LOCALS 32
#else
#define LOCALS 31
#endif

Simon Marlow's avatar
Simon Marlow committed
856
857
static void GNUC3_ATTRIBUTE(used)
StgRunIsImplementedInAssembler(void)
858
859
860
861
{
    __asm__ volatile(
		".global StgRun\n"
		"StgRun:\n"
862
		"\talloc loc29 = ar.pfs, 0, %1, 8, 0\n"	/* setup register frame */
863
864
865
866
867
868
869
870
871
872
873
874
875
876
		"\tld8 r18 = [r32],8\n"			/* get procedure address */
		"\tadds sp = -%0, sp ;;\n"		/* setup stack */
		"\tld8 gp = [r32]\n"			/* get procedure GP */
		"\tadds r16 = %0-(6*16), sp\n"
		"\tadds r17 = %0-(5*16), sp ;;\n"
		"\tstf.spill [r16] = f16,32\n"		/* spill callee-saved fp regs */
		"\tstf.spill [r17] = f17,32\n"
		"\tmov b6 = r18 ;;\n"			/* set target address */
		"\tstf.spill [r16] = f18,32\n"
		"\tstf.spill [r17] = f19,32\n"
		"\tmov loc30 = b0 ;;\n"			/* save return address */
		"\tstf.spill [r16] = f20,32\n"
		"\tstf.spill [r17] = f21,32\n"
		"\tbr.few b6 ;;\n"			/* branch to function */
877
878
		".global StgReturn\n"
		"StgReturn:\n"
879
880
881
882
883
884
885
886
887
888
889
		"\tmov r8 = loc16\n"		/* return value in r8 */
		"\tadds r16 = %0-(6*16), sp\n"
	    	"\tadds r17 = %0-(5*16), sp ;;\n"
		"\tldf.fill f16 = [r16],32\n"	/* start restoring fp regs */
		"\tldf.fill f17 = [r17],32\n"
		"\tmov ar.pfs = loc29 ;;\n"	/* restore register frame */
		"\tldf.fill f18 = [r16],32\n"
		"\tldf.fill f19 = [r17],32\n"
		"\tmov b0 = loc30 ;;\n"		/* restore return address */
		"\tldf.fill f20 = [r16],32\n"
		"\tldf.fill f21 = [r17],32\n"
890
891
		"\tadds sp = %0, sp\n"		/* restore stack */
		"\tbr.ret.sptk.many b0 ;;\n"	/* return */
892
	: : "i"(RESERVED_C_STACK_BYTES + 6*16), "i"(LOCALS));
893
894
895
896
}

#endif

897
#endif /* !USE_MINIINTERPRETER */