StgCRun.c 27.1 KB
Newer Older
1
/* -----------------------------------------------------------------------------
2
 *
3
 * (c) The GHC Team, 1998-2011
4
 *
5
6
7
8
 * STG-to-C glue.
 *
 * To run an STG function from C land, call
 *
dterei's avatar
dterei committed
9
 *              rv = StgRun(f,BaseReg);
10
11
12
13
14
 *
 * where "f" is the STG function to call, and BaseReg is the address of the
 * RegTable for this run (we might have separate RegTables if we're running
 * multiple threads on an SMP machine).
 *
dterei's avatar
dterei committed
15
16
 * In the end, "f" must JMP to StgReturn (defined below), passing the
 * return-value "rv" in R1, to return to the caller of StgRun returning "rv" in
17
18
 * the whatever way C returns a value.
 *
dterei's avatar
dterei committed
19
20
21
 * NOTE: StgRun/StgReturn do *NOT* load or store Hp or any other registers
 * (other than saving the C callee-saves registers). Instead, the called
 * function "f" must do that in STG land.
ken's avatar
ken committed
22
 *
23
24
25
 * We also initially make sure that there are @RESERVED_C_STACK_BYTES@ on the
 * C-stack. This is done to reserve some space for the allocation of
 * temporaries in STG code.
26
27
28
 *
 * -------------------------------------------------------------------------- */

29
#include "PosixSource.h"
30
#include "ghcconfig.h"
ken's avatar
ken committed
31

32
#if defined(sparc_HOST_ARCH) || defined(USE_MINIINTERPRETER)
33
34
35
/* include Stg.h first because we want real machine regs in here: we
 * have to get the value of R1 back from Stg land to C land intact.
 */
36
37
38
39
40
41
42
43

/* We include windows.h very early, as on Win64 the CONTEXT type has
   fields "R8", "R9" and "R10", which goes bad if we've already
   #define'd those names for our own purposes (in stg/Regs.h) */
#if defined(HAVE_WINDOWS_H)
#include <windows.h>
#endif

44
#define IN_STGCRUN 1
45
46
#include "Stg.h"
#include "Rts.h"
47
48
49
50
51
52
53
54
55
56
57
#else
/* The other architectures do not require the actual register macro definitions
 * here because they use hand written assembly to implement the StgRun
 * function. Including Stg.h first will define the R1 values using GCC specific
 * techniques, which we don't want for LLVM based C compilers. Since we don't
 * actually need the real machine register definitions here, we include the
 * headers in the opposite order to allow LLVM-based C compilers to work.
 */
#include "Rts.h"
#include "Stg.h"
#endif
Simon Marlow's avatar
Simon Marlow committed
58

59
#include "StgRun.h"
60
#include "Capability.h"
61

Ben Gamari's avatar
Ben Gamari committed
62
#if defined(DEBUG)
63
64
65
66
#include "RtsUtils.h"
#include "Printer.h"
#endif

Ben Gamari's avatar
Ben Gamari committed
67
#if defined(USE_MINIINTERPRETER)
68
69
70
71

/* -----------------------------------------------------------------------------
   any architecture (using miniinterpreter)
   -------------------------------------------------------------------------- */
ken's avatar
ken committed
72

73
StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg STG_UNUSED)
74
{
75
    while (f) {
dterei's avatar
dterei committed
76
77
78
79
80
81
        IF_DEBUG(interpreter,
                 debugBelch("Jumping to ");
                 printPtr((P_)f); fflush(stdout);
                 debugBelch("\n");
                 );
        f = (StgFunPtr) (f)();
82
    }
83
    return (StgRegTable *)R1.p;
84
85
}

86
StgFunPtr StgReturn(void)
87
{
88
    return 0;
89
90
91
92
}

#else /* !USE_MINIINTERPRETER */

Ben Gamari's avatar
Ben Gamari committed
93
#if defined(LEADING_UNDERSCORE)
94
#define STG_RUN "_StgRun"
95
96
#define STG_RETURN "_StgReturn"
#else
dterei's avatar
dterei committed
97
#define STG_RUN "StgRun"
98
99
100
#define STG_RETURN "StgReturn"
#endif

Ian Lynagh's avatar
Ian Lynagh committed
101
#if defined(mingw32_HOST_OS)
102
103
104
105
106
107
108
109
110
111
112
/*
 * Note [Windows Stack allocations]
 *
 * On windows the stack has to be allocated 4k at a time, otherwise
 * we get a segfault.  The C compiler knows how to do this (it calls
 * _alloca()), so we make sure that we can allocate as much stack as
 * we need.  However since we are doing a local stack allocation and the value
 * isn't valid outside the frame, compilers are free to optimize this allocation
 * and the corresponding stack check away. So to prevent that we request that
 * this function never be optimized (See #14669).  */
STG_NO_OPTIMIZE StgWord8 *win32AllocStack(void)
Ian Lynagh's avatar
Ian Lynagh committed
113
114
115
116
117
118
{
    StgWord8 stack[RESERVED_C_STACK_BYTES + 16 + 12];
    return stack;
}
#endif

119
120
121
/* -----------------------------------------------------------------------------
   x86 architecture
   -------------------------------------------------------------------------- */
ken's avatar
ken committed
122

Ben Gamari's avatar
Ben Gamari committed
123
#if defined(i386_HOST_ARCH)
124

125
#if defined(darwin_HOST_OS) || defined(ios_HOST_OS)
126
#define STG_GLOBAL ".globl "
127
#define STG_HIDDEN ".private_extern "
128
129
#else
#define STG_GLOBAL ".global "
130
#define STG_HIDDEN ".hidden "
131
132
#endif

133
134
135
136
137
138
139
140
141
/*
 * Note [Stack Alignment on X86]
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 *
 * On X86 (both 32bit and 64bit) we keep the stack aligned on function calls at
 * a 16-byte boundary. This is done because on a number of architectures the
 * ABI requires this (x64, Mac OSX 32bit/64bit) as well as interfacing with
 * other libraries through the FFI.
 *
ian@well-typed.com's avatar
ian@well-typed.com committed
142
 * As part of this arrangment we must maintain the stack at a 16-byte boundary
143
144
145
146
147
148
149
 * - word_size-bytes (so 16n - 4 for i386 and 16n - 8 for x64) on entry to a
 * procedure since both GCC and LLVM expect this. This is because the stack
 * should have been 16-byte boundary aligned and then a call made which pushes
 * a return address onto the stack (so word_size more space used). In STG code
 * we only jump to other STG procedures, so we maintain the 16n - word_size
 * alignment for these jumps.
 *
Gabor Greif's avatar
Gabor Greif committed
150
 * This gives us binary compatibility with LLVM and GCC as well as dealing
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
 * with the FFI. Previously we just maintianed a 16n byte alignment for
 * procedure entry and calls, which led to bugs (see #4211 and #5250).
 *
 * To change this convention you need to change the code here, and in
 * compiler/nativeGen/X86/CodeGen.hs::GenCCall, and maybe the adjustor
 * code for thunks in rts/AdjustorAsm.s, rts/Adjustor.c.
 *
 * A quick way to see if this is wrong is to compile this code:
 *
 *    main = System.Exit.exitWith ExitSuccess
 *
 * And run it with +RTS -sstderr.  The stats code in the RTS, in
 * particular statsPrintf(), relies on the stack alignment because
 * it saves the %xmm regs on the stack, so it'll fall over if the
 * stack isn't aligned, and calling exitWith from Haskell invokes
 * shutdownHaskellAndExit using a C call.
 *
168
169
 * If you edit the sequence below be sure to update the unwinding information
 * for stg_stop_thread in StgStartup.cmm.
170
171
 */

172
173
174
175
176
static void GNUC3_ATTRIBUTE(used)
StgRunIsImplementedInAssembler(void)
{
    __asm__ volatile (
        STG_GLOBAL STG_RUN "\n"
Ian Lynagh's avatar
Ian Lynagh committed
177
#if !defined(mingw32_HOST_OS)
178
        STG_HIDDEN STG_RUN "\n"
Ian Lynagh's avatar
Ian Lynagh committed
179
#endif
dterei's avatar
dterei committed
180
        STG_RUN ":\n\t"
181

dterei's avatar
dterei committed
182
        /*
183
184
185
186
187
188
189
         * move %esp down to reserve an area for temporary storage
         * during the execution of STG code.
         *
         * The stack pointer has to be aligned to a multiple of 16
         * bytes from here - this is a requirement of the C ABI, so
         * that C code can assign SSE2 registers directly to/from
         * stack locations.
dterei's avatar
dterei committed
190
         */
191
        "subl %0, %%esp\n\t"
192

dterei's avatar
dterei committed
193
194
195
        /*
         * save callee-saves registers on behalf of the STG code.
         */
196
197
        "movl %%esp, %%eax\n\t"
        "addl %0-16, %%eax\n\t"
198
199
200
201
        "movl %%ebx,0(%%eax)\n\t"
        "movl %%esi,4(%%eax)\n\t"
        "movl %%edi,8(%%eax)\n\t"
        "movl %%ebp,12(%%eax)\n\t"
dterei's avatar
dterei committed
202
203
204
        /*
         * Set BaseReg
         */
205
        "movl 24(%%eax),%%ebx\n\t"
dterei's avatar
dterei committed
206
207
208
        /*
         * grab the function argument from the stack
         */
209
210
        "movl 20(%%eax),%%eax\n\t"
        /*
dterei's avatar
dterei committed
211
212
         * jump to it
         */
213
214
        "jmp *%%eax\n\t"

215
        STG_GLOBAL STG_RETURN "\n"
dterei's avatar
dterei committed
216
        STG_RETURN ":\n\t"
217

dterei's avatar
dterei committed
218
        "movl %%esi, %%eax\n\t"   /* Return value in R1  */
219

dterei's avatar
dterei committed
220
221
222
223
        /*
         * restore callee-saves registers.  (Don't stomp on %%eax!)
         */
        "movl %%esp, %%edx\n\t"
224
        "addl %0-16, %%edx\n\t"
dterei's avatar
dterei committed
225
        "movl 0(%%edx),%%ebx\n\t"       /* restore the registers saved above */
226
227
228
229
        "movl 4(%%edx),%%esi\n\t"
        "movl 8(%%edx),%%edi\n\t"
        "movl 12(%%edx),%%ebp\n\t"

230
231
        "addl %0, %%esp\n\t"
        "ret"
232

233
      : : "i" (RESERVED_C_STACK_BYTES + 16)
234
        // + 16 to make room for the 4 registers we have to save
235
        // See Note [Stack Alignment on X86]
236
    );
237
238
239
240
}

#endif

241
242
243
244
245
246
247
248
249
/* ----------------------------------------------------------------------------
   x86-64 is almost the same as plain x86.

   I've done it using entirely inline assembler, because I couldn't
   get gcc to generate the correct subtraction from %rsp by using
   the local array variable trick.  It didn't seem to reserve
   enough space.  Oh well, it's not much harder this way.
   ------------------------------------------------------------------------- */

Ben Gamari's avatar
Ben Gamari committed
250
#if defined(x86_64_HOST_ARCH)
251

252
253
#define STG_GLOBAL ".globl "

254
#if defined(darwin_HOST_OS) || defined(ios_HOST_OS)
255
256
257
258
#define STG_HIDDEN ".private_extern "
#else
#define STG_HIDDEN ".hidden "
#endif
259

Simon Marlow's avatar
Simon Marlow committed
260
261
static void GNUC3_ATTRIBUTE(used)
StgRunIsImplementedInAssembler(void)
262
263
{
    __asm__ volatile (
dterei's avatar
dterei committed
264
265
266
        /*
         * save callee-saves registers on behalf of the STG code.
         */
267
        STG_GLOBAL STG_RUN "\n"
Ian Lynagh's avatar
Ian Lynagh committed
268
#if !defined(mingw32_HOST_OS)
269
        STG_HIDDEN STG_RUN "\n"
Ian Lynagh's avatar
Ian Lynagh committed
270
#endif
dterei's avatar
dterei committed
271
        STG_RUN ":\n\t"
272
        "subq %1, %%rsp\n\t"
dterei's avatar
dterei committed
273
        "movq %%rsp, %%rax\n\t"
274
        "subq %0, %%rsp\n\t"
275
276
277
278
279
280
        "movq %%rbx,0(%%rax)\n\t"
        "movq %%rbp,8(%%rax)\n\t"
        "movq %%r12,16(%%rax)\n\t"
        "movq %%r13,24(%%rax)\n\t"
        "movq %%r14,32(%%rax)\n\t"
        "movq %%r15,40(%%rax)\n\t"
281
282
283
284
285
#if defined(mingw32_HOST_OS)
        "movq %%rdi,48(%%rax)\n\t"
        "movq %%rsi,56(%%rax)\n\t"
        "movq %%xmm6,64(%%rax)\n\t"
#endif
dterei's avatar
dterei committed
286
287
288
        /*
         * Set BaseReg
         */
289
290
291
#if defined(mingw32_HOST_OS)
        "movq %%rdx,%%r13\n\t"
#else
dterei's avatar
dterei committed
292
        "movq %%rsi,%%r13\n\t"
293
#endif
dterei's avatar
dterei committed
294
295
296
        /*
         * grab the function argument from the stack, and jump to it.
         */
297
298
299
#if defined(mingw32_HOST_OS)
        "movq %%rcx,%%rax\n\t"
#else
300
        "movq %%rdi,%%rax\n\t"
301
#endif
302
303
        "jmp *%%rax\n\t"

dterei's avatar
dterei committed
304
305
        ".globl " STG_RETURN "\n"
         STG_RETURN ":\n\t"
306

dterei's avatar
dterei committed
307
        "movq %%rbx, %%rax\n\t"   /* Return value in R1  */
308

dterei's avatar
dterei committed
309
310
311
        /*
         * restore callee-saves registers.  (Don't stomp on %%rax!)
         */
312
        "addq %0, %%rsp\n\t"
313
314
315
316
317
318
        "movq 0(%%rsp),%%rbx\n\t"       /* restore the registers saved above */
        "movq 8(%%rsp),%%rbp\n\t"
        "movq 16(%%rsp),%%r12\n\t"
        "movq 24(%%rsp),%%r13\n\t"
        "movq 32(%%rsp),%%r14\n\t"
        "movq 40(%%rsp),%%r15\n\t"
319
#if defined(mingw32_HOST_OS)
320
321
322
        "movq 48(%%rsp),%%rdi\n\t"
        "movq 56(%%rsp),%%rsi\n\t"
        "movq 64(%%rsp),%%xmm6\n\t"
323
#endif
324
        "addq %1, %%rsp\n\t"
dterei's avatar
dterei committed
325
        "retq"
326

327
328
        :
        : "i"(RESERVED_C_STACK_BYTES),
329
          "i"(STG_RUN_STACK_FRAME_SIZE /* stack frame size */)
330
        );
331
332
333
        /*
         * See Note [Stack Alignment on X86]
         */
334
335
336
337
}

#endif /* x86-64 */

338
/* -----------------------------------------------------------------------------
339
340
   Sparc architecture

ken's avatar
ken committed
341
   --
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
   OLD COMMENT from GHC-3.02:

   We want tailjumps to be calls, because `call xxx' is the only Sparc
   branch that allows an arbitrary label as a target.  (Gcc's ``goto
   *target'' construct ends up loading the label into a register and
   then jumping, at the cost of two extra instructions for the 32-bit
   load.)

   When entering the threaded world, we stash our return address in a
   known location so that \tr{%i7} is available as an extra
   callee-saves register.  Of course, we have to restore this when
   coming out of the threaded world.

   I hate this god-forsaken architecture.  Since the top of the
   reserved stack space is used for globals and the bottom is reserved
   for outgoing arguments, we have to stick our return address
   somewhere in the middle.  Currently, I'm allowing 100 extra
   outgoing arguments beyond the first 6.  --JSM

   Updated info (GHC 4.06): we don't appear to use %i7 any more, so
   I'm not sure whether we still need to save it.  Incedentally, what
   does the last paragraph above mean when it says "the top of the
   stack is used for globals"?  What globals?  --SDM

366
   Updated info (GHC 4.08.2): not saving %i7 any more (see below).
367
   -------------------------------------------------------------------------- */
ken's avatar
ken committed
368

Ben Gamari's avatar
Ben Gamari committed
369
#if defined(sparc_HOST_ARCH)
370

371
StgRegTable *
372
StgRun(StgFunPtr f, StgRegTable *basereg) {
373

374
375
376
377
378
    unsigned char space[RESERVED_C_STACK_BYTES];
#if 0
    register void *i7 __asm__("%i7");
    ((void **)(space))[100] = i7;
#endif
379
    f();
380
    __asm__ volatile (
dterei's avatar
dterei committed
381
382
383
384
                 ".align 4\n"
                 ".global " STG_RETURN "\n"
                 STG_RETURN ":"
                 : : "p" (space) : "l0","l1","l2","l3","l4","l5","l6","l7");
385
386
387
388
389
390
    /* we tell the C compiler that l0-l7 are clobbered on return to
     * StgReturn, otherwise it tries to use these to save eg. the
     * address of space[100] across the call.  The correct thing
     * to do would be to save all the callee-saves regs, but we
     * can't be bothered to do that.
     *
391
392
393
     * We also explicitly mark space as used since gcc eliminates it
     * otherwise.
     *
394
395
396
397
     * The code that gcc generates for this little fragment is now
     * terrible.  We could do much better by coding it directly in
     * assembler.
     */
398
#if 0
399
400
401
402
403
    /* updated 4.08.2: we don't save %i7 in the middle of the reserved
     * space any more, since gcc tries to save its address across the
     * call to f(), this gets clobbered in STG land and we end up
     * dereferencing a bogus pointer in StgReturn.
     */
ken's avatar
ken committed
404
    __asm__ volatile ("ld %1,%0"
dterei's avatar
dterei committed
405
                                : "=r" (i7) : "m" (((void **)(space))[100]));
406
#endif
407
    return (StgRegTable *)R1.i;
408
409
410
411
}

#endif

412
413
414
/* -----------------------------------------------------------------------------
   PowerPC architecture

415
   Everything is in assembler, so we don't have to deal with GCC...
416
417
   -------------------------------------------------------------------------- */

Ben Gamari's avatar
Ben Gamari committed
418
#if defined(powerpc_HOST_ARCH)
419

420
421
#define STG_GLOBAL ".globl "

422
#if defined(darwin_HOST_OS)
423
424
425
426
#define STG_HIDDEN ".private_extern "
#else
#define STG_HIDDEN ".hidden "
#endif
427

428
429
430
431
432
#if defined(aix_HOST_OS)

// implementation is in StgCRunAsm.S

#elif defined(darwin_HOST_OS)
433
void StgRunIsImplementedInAssembler(void)
434
{
435
436
437
#if HAVE_SUBSECTIONS_VIA_SYMBOLS
            // if the toolchain supports deadstripping, we have to
            // prevent it here (it tends to get confused here).
438
        __asm__ volatile (".no_dead_strip _StgRunIsImplementedInAssembler\n");
439
#endif
dterei's avatar
dterei committed
440
        __asm__ volatile (
441
442
443
                STG_GLOBAL STG_RUN "\n"
                STG_HIDDEN STG_RUN "\n"
                STG_RUN ":\n"
dterei's avatar
dterei committed
444
445
446
447
                "\tmflr r0\n"
                "\tbl saveFP # f14\n"
                "\tstmw r13,-220(r1)\n"
                "\tstwu r1,-%0(r1)\n"
448
                "\tmr r27,r4\n" // BaseReg == r27
dterei's avatar
dterei committed
449
450
451
452
453
454
455
456
457
458
                "\tmtctr r3\n"
                "\tmr r12,r3\n"
                "\tbctr\n"
                ".globl _StgReturn\n"
                "_StgReturn:\n"
                "\tmr r3,r14\n"
                "\tla r1,%0(r1)\n"
                "\tlmw r13,-220(r1)\n"
                "\tb restFP # f14\n"
        : : "i"(RESERVED_C_STACK_BYTES+224 /*stack frame size*/));
459
}
460
461
462
463
464
465
466
467
468
469
470
471
#else

// This version is for PowerPC Linux.

// Differences from the Darwin/Mac OS X version:
// *) Different Assembler Syntax
// *) Doesn't use Register Saving Helper Functions (although they exist somewhere)
// *) We may not access positive stack offsets
//    (no "Red Zone" as in the Darwin ABI)
// *) The Link Register is saved to a different offset in the caller's stack frame
//    (Linux: 4(r1), Darwin 8(r1))

Simon Marlow's avatar
Simon Marlow committed
472
473
static void GNUC3_ATTRIBUTE(used)
StgRunIsImplementedInAssembler(void)
474
{
dterei's avatar
dterei committed
475
476
        __asm__ volatile (
                "\t.globl StgRun\n"
477
                "\t.hidden StgRun\n"
dterei's avatar
dterei committed
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
                "\t.type StgRun,@function\n"
                "StgRun:\n"
                "\tmflr 0\n"
                "\tstw 0,4(1)\n"
                "\tmr 5,1\n"
                "\tstwu 1,-%0(1)\n"
                "\tstmw 13,-220(5)\n"
                "\tstfd 14,-144(5)\n"
                "\tstfd 15,-136(5)\n"
                "\tstfd 16,-128(5)\n"
                "\tstfd 17,-120(5)\n"
                "\tstfd 18,-112(5)\n"
                "\tstfd 19,-104(5)\n"
                "\tstfd 20,-96(5)\n"
                "\tstfd 21,-88(5)\n"
                "\tstfd 22,-80(5)\n"
                "\tstfd 23,-72(5)\n"
                "\tstfd 24,-64(5)\n"
                "\tstfd 25,-56(5)\n"
                "\tstfd 26,-48(5)\n"
                "\tstfd 27,-40(5)\n"
                "\tstfd 28,-32(5)\n"
                "\tstfd 29,-24(5)\n"
                "\tstfd 30,-16(5)\n"
                "\tstfd 31,-8(5)\n"
                "\tmr 27,4\n"  // BaseReg == r27
                "\tmtctr 3\n"
                "\tmr 12,3\n"
                "\tbctr\n"
                ".globl StgReturn\n"
                "\t.type StgReturn,@function\n"
                "StgReturn:\n"
                "\tmr 3,14\n"
                "\tla 5,%0(1)\n"
                "\tlmw 13,-220(5)\n"
                "\tlfd 14,-144(5)\n"
                "\tlfd 15,-136(5)\n"
                "\tlfd 16,-128(5)\n"
                "\tlfd 17,-120(5)\n"
                "\tlfd 18,-112(5)\n"
                "\tlfd 19,-104(5)\n"
                "\tlfd 20,-96(5)\n"
                "\tlfd 21,-88(5)\n"
                "\tlfd 22,-80(5)\n"
                "\tlfd 23,-72(5)\n"
                "\tlfd 24,-64(5)\n"
                "\tlfd 25,-56(5)\n"
                "\tlfd 26,-48(5)\n"
                "\tlfd 27,-40(5)\n"
                "\tlfd 28,-32(5)\n"
                "\tlfd 29,-24(5)\n"
                "\tlfd 30,-16(5)\n"
                "\tlfd 31,-8(5)\n"
                "\tmr 1,5\n"
                "\tlwz 0,4(1)\n"
                "\tmtlr 0\n"
                "\tblr\n"
        : : "i"(RESERVED_C_STACK_BYTES+224 /*stack frame size*/));
536
537
}
#endif
538
539
540

#endif

541
542
543
544
545
546
/* -----------------------------------------------------------------------------
   PowerPC 64 architecture

   Everything is in assembler, so we don't have to deal with GCC...
   -------------------------------------------------------------------------- */

Ben Gamari's avatar
Ben Gamari committed
547
#if defined(powerpc64_HOST_ARCH)
548

Ben Gamari's avatar
Ben Gamari committed
549
#if defined(linux_HOST_OS)
Simon Marlow's avatar
Simon Marlow committed
550
551
static void GNUC3_ATTRIBUTE(used)
StgRunIsImplementedInAssembler(void)
552
553
{
        // r0 volatile
dterei's avatar
dterei committed
554
555
556
557
558
559
560
561
562
563
        // r1 stack pointer
        // r2 toc - needs to be saved
        // r3-r10 argument passing, volatile
        // r11, r12 very volatile (not saved across cross-module calls)
        // r13 thread local state (never modified, don't need to save)
        // r14-r31 callee-save
        __asm__ volatile (
                ".section \".opd\",\"aw\"\n"
                ".align 3\n"
                ".globl StgRun\n"
564
                ".hidden StgRun\n"
dterei's avatar
dterei committed
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
                "StgRun:\n"
                "\t.quad\t.StgRun,.TOC.@tocbase,0\n"
                "\t.size StgRun,24\n"
                ".globl StgReturn\n"
                "StgReturn:\n"
                "\t.quad\t.StgReturn,.TOC.@tocbase,0\n"
                "\t.size StgReturn,24\n"
                ".previous\n"
                ".globl .StgRun\n"
                ".type .StgRun,@function\n"
                ".StgRun:\n"
                "\tmflr 0\n"
                "\tmr 5, 1\n"
                "\tstd 0, 16(1)\n"
                "\tstdu 1, -%0(1)\n"
                "\tstd 2, -296(5)\n"
                "\tstd 14, -288(5)\n"
                "\tstd 15, -280(5)\n"
                "\tstd 16, -272(5)\n"
                "\tstd 17, -264(5)\n"
                "\tstd 18, -256(5)\n"
                "\tstd 19, -248(5)\n"
                "\tstd 20, -240(5)\n"
                "\tstd 21, -232(5)\n"
                "\tstd 22, -224(5)\n"
                "\tstd 23, -216(5)\n"
                "\tstd 24, -208(5)\n"
                "\tstd 25, -200(5)\n"
                "\tstd 26, -192(5)\n"
                "\tstd 27, -184(5)\n"
                "\tstd 28, -176(5)\n"
                "\tstd 29, -168(5)\n"
                "\tstd 30, -160(5)\n"
                "\tstd 31, -152(5)\n"
                "\tstfd 14, -144(5)\n"
                "\tstfd 15, -136(5)\n"
                "\tstfd 16, -128(5)\n"
                "\tstfd 17, -120(5)\n"
                "\tstfd 18, -112(5)\n"
                "\tstfd 19, -104(5)\n"
                "\tstfd 20, -96(5)\n"
                "\tstfd 21, -88(5)\n"
                "\tstfd 22, -80(5)\n"
                "\tstfd 23, -72(5)\n"
                "\tstfd 24, -64(5)\n"
                "\tstfd 25, -56(5)\n"
                "\tstfd 26, -48(5)\n"
                "\tstfd 27, -40(5)\n"
                "\tstfd 28, -32(5)\n"
                "\tstfd 29, -24(5)\n"
                "\tstfd 30, -16(5)\n"
                "\tstfd 31, -8(5)\n"
                "\tmr 27, 4\n"  // BaseReg == r27
                "\tld 2, 8(3)\n"
                "\tld 3, 0(3)\n"
                "\tmtctr 3\n"
                "\tbctr\n"
                ".globl .StgReturn\n"
                ".type .StgReturn,@function\n"
                ".StgReturn:\n"
                "\tmr 3,14\n"
                "\tla 5, %0(1)\n" // load address == addi r5, r1, %0
                "\tld 2, -296(5)\n"
                "\tld 14, -288(5)\n"
                "\tld 15, -280(5)\n"
                "\tld 16, -272(5)\n"
                "\tld 17, -264(5)\n"
                "\tld 18, -256(5)\n"
                "\tld 19, -248(5)\n"
                "\tld 20, -240(5)\n"
                "\tld 21, -232(5)\n"
                "\tld 22, -224(5)\n"
                "\tld 23, -216(5)\n"
                "\tld 24, -208(5)\n"
                "\tld 25, -200(5)\n"
                "\tld 26, -192(5)\n"
                "\tld 27, -184(5)\n"
                "\tld 28, -176(5)\n"
                "\tld 29, -168(5)\n"
                "\tld 30, -160(5)\n"
                "\tld 31, -152(5)\n"
                "\tlfd 14, -144(5)\n"
                "\tlfd 15, -136(5)\n"
                "\tlfd 16, -128(5)\n"
                "\tlfd 17, -120(5)\n"
                "\tlfd 18, -112(5)\n"
                "\tlfd 19, -104(5)\n"
                "\tlfd 20, -96(5)\n"
                "\tlfd 21, -88(5)\n"
                "\tlfd 22, -80(5)\n"
                "\tlfd 23, -72(5)\n"
                "\tlfd 24, -64(5)\n"
                "\tlfd 25, -56(5)\n"
                "\tlfd 26, -48(5)\n"
                "\tlfd 27, -40(5)\n"
                "\tlfd 28, -32(5)\n"
                "\tlfd 29, -24(5)\n"
                "\tlfd 30, -16(5)\n"
                "\tlfd 31, -8(5)\n"
                "\tmr 1, 5\n"
                "\tld 0, 16(1)\n"
                "\tmtlr 0\n"
                "\tblr\n"
        : : "i"(RESERVED_C_STACK_BYTES+304 /*stack frame size*/));
669
}
670

671
#else // linux_HOST_OS
672
#error Only Linux support for power64 right now.
673
674
675
676
#endif

#endif

Ben Gamari's avatar
Ben Gamari committed
677
#if defined(powerpc64le_HOST_ARCH)
678
679
680
681
682
683
684
/* -----------------------------------------------------------------------------
   PowerPC 64 little endian architecture

   Really everything is in assembler, so we don't have to deal with GCC...
   -------------------------------------------------------------------------- */
#endif

685
686
687
688
/* -----------------------------------------------------------------------------
   ARM architecture
   -------------------------------------------------------------------------- */

Ben Gamari's avatar
Ben Gamari committed
689
#if defined(arm_HOST_ARCH)
690
691
692
693
694
695
696

#if defined(__thumb__)
#define THUMB_FUNC ".thumb\n\t.thumb_func\n\t"
#else
#define THUMB_FUNC
#endif

697
698
699
700
StgRegTable *
StgRun(StgFunPtr f, StgRegTable *basereg) {
    StgRegTable * r;
    __asm__ volatile (
dterei's avatar
dterei committed
701
702
703
        /*
         * save callee-saves registers on behalf of the STG code.
         */
704
        "stmfd sp!, {r4-r11, ip, lr}\n\t"
705
#if !defined(arm_HOST_ARCH_PRE_ARMv6)
706
        "vstmdb sp!, {d8-d11}\n\t"
707
#endif
708
709
        /*
         * allocate some space for Stg machine's temporary storage.
Erik de Castro Lopo's avatar
Erik de Castro Lopo committed
710
         * Note: RESERVED_C_STACK_BYTES has to be a round number here or
711
712
713
         * the assembler can't assemble it.
         */
        "sub sp, sp, %3\n\t"
dterei's avatar
dterei committed
714
715
716
        /*
         * Set BaseReg
         */
717
718
719
720
        "mov r4, %2\n\t"
        /*
         * Jump to function argument.
         */
721
        "bx %1\n\t"
722

723
        ".globl " STG_RETURN "\n\t"
724
        THUMB_FUNC
725
#if !defined(ios_HOST_OS)
726
        ".type " STG_RETURN ", %%function\n"
727
#endif
dterei's avatar
dterei committed
728
        STG_RETURN ":\n\t"
729
730
731
732
733
734
735
736
        /*
         * Free the space we allocated
         */
        "add sp, sp, %3\n\t"
        /*
         * Return the new register table, taking it from Stg's R1 (ARM's R7).
         */
        "mov %0, r7\n\t"
dterei's avatar
dterei committed
737
738
739
        /*
         * restore callee-saves registers.
         */
740
#if !defined(arm_HOST_ARCH_PRE_ARMv6)
741
        "vldmia sp!, {d8-d11}\n\t"
742
#endif
743
        "ldmfd sp!, {r4-r11, ip, lr}\n\t"
744
745
      : "=r" (r)
      : "r" (f), "r" (basereg), "i" (RESERVED_C_STACK_BYTES)
746
747
748
749
750
751
752
753
754
755
756
757
758
#if !defined(__thumb__)
        /* In ARM mode, r11/fp is frame-pointer and so we cannot mark
           it as clobbered. If we do so, GCC complains with error. */
      : "%r4", "%r5", "%r6", "%r7", "%r8", "%r9", "%r10", "%ip", "%lr"
#else
        /* In Thumb mode r7 is frame-pointer and so we cannot mark it
           as clobbered. On the other hand we mark as clobbered also
           those regs not used in Thumb mode. Hard to judge if this is
           needed, but certainly Haskell code is using them for
           placing GHC's virtual registers there. See
           includes/stg/MachRegs.h Please note that Haskell code is
           compiled by GHC/LLVM into ARM code (not Thumb!), at least
           as of February 2012 */
759
      : "%r4", "%r5", "%r6", "%r8", "%r9", "%r10", "%11", "%ip", "%lr"
760
#endif
761
762
763
764
765
    );
    return r;
}
#endif

Ben Gamari's avatar
Ben Gamari committed
766
#if defined(aarch64_HOST_ARCH)
Colin Watson's avatar
Colin Watson committed
767
768
769
770
771
772

StgRegTable *
StgRun(StgFunPtr f, StgRegTable *basereg) {
    StgRegTable * r;
    __asm__ volatile (
        /*
773
774
         * Save callee-saves registers on behalf of the STG code.
         * Floating point registers only need the bottom 64 bits preserved.
775
         * We need to use the names x16, x17, x29 and x30 instead of ip0
776
777
         * ip1, fp and lp because one of either clang or gcc doesn't understand
         * the later names.
Colin Watson's avatar
Colin Watson committed
778
         */
779
780
        "stp x29,  x30,  [sp, #-16]!\n\t"
        "mov x29, sp\n\t"
781
        "stp x16, x17, [sp, #-16]!\n\t"
Colin Watson's avatar
Colin Watson committed
782
783
784
785
        "stp x19, x20, [sp, #-16]!\n\t"
        "stp x21, x22, [sp, #-16]!\n\t"
        "stp x23, x24, [sp, #-16]!\n\t"
        "stp x25, x26, [sp, #-16]!\n\t"
786
        "stp x27, x28, [sp, #-16]!\n\t"
787
788
789
790
        "stp d8,  d9,  [sp, #-16]!\n\t"
        "stp d10, d11, [sp, #-16]!\n\t"
        "stp d12, d13, [sp, #-16]!\n\t"
        "stp d14, d15, [sp, #-16]!\n\t"
Colin Watson's avatar
Colin Watson committed
791
792
793

        /*
         * allocate some space for Stg machine's temporary storage.
Erik de Castro Lopo's avatar
Erik de Castro Lopo committed
794
         * Note: RESERVED_C_STACK_BYTES has to be a round number here or
Colin Watson's avatar
Colin Watson committed
795
796
         * the assembler can't assemble it.
         */
797
        "sub sp, sp, %3\n\t"
Colin Watson's avatar
Colin Watson committed
798
799
800
801
802
803
804
        /*
         * Set BaseReg
         */
        "mov x19, %2\n\t"
        /*
         * Jump to function argument.
         */
805
        "br %1\n\t"
Colin Watson's avatar
Colin Watson committed
806
807

        ".globl " STG_RETURN "\n\t"
808
#if !defined(ios_HOST_OS)
Colin Watson's avatar
Colin Watson committed
809
        ".type " STG_RETURN ", %%function\n"
810
#endif
Colin Watson's avatar
Colin Watson committed
811
812
813
814
        STG_RETURN ":\n\t"
        /*
         * Free the space we allocated
         */
815
        "add sp, sp, %3\n\t"
Colin Watson's avatar
Colin Watson committed
816
817
818
819
820
821
822
        /*
         * Return the new register table, taking it from Stg's R1 (ARM64's R22).
         */
        "mov %0, x22\n\t"
        /*
         * restore callee-saves registers.
         */
823
824
825
826
827

        "ldp d14, d15, [sp], #16\n\t"
        "ldp d12, d13, [sp], #16\n\t"
        "ldp d10, d11, [sp], #16\n\t"
        "ldp d8,  d9,  [sp], #16\n\t"
Colin Watson's avatar
Colin Watson committed
828
829
830
831
832
        "ldp x27, x28, [sp], #16\n\t"
        "ldp x25, x26, [sp], #16\n\t"
        "ldp x23, x24, [sp], #16\n\t"
        "ldp x21, x22, [sp], #16\n\t"
        "ldp x19, x20, [sp], #16\n\t"
833
        "ldp x16, x17, [sp], #16\n\t"
834
        "ldp x29,  x30,  [sp], #16\n\t"
Colin Watson's avatar
Colin Watson committed
835
836
837
838

      : "=r" (r)
      : "r" (f), "r" (basereg), "i" (RESERVED_C_STACK_BYTES)
        : "%x19", "%x20", "%x21", "%x22", "%x23", "%x24", "%x25", "%x26", "%x27", "%x28",
839
          "%x16", "%x17", "%x30"
Colin Watson's avatar
Colin Watson committed
840
841
842
843
844
845
    );
    return r;
}

#endif

846
#endif /* !USE_MINIINTERPRETER */