Commit cb5ccf0a authored by matthewc's avatar matthewc
Browse files

[project @ 2002-06-03 13:08:37 by matthewc]

Initial mangling and tailcalls support for IA64.

Function prologues and epilogues are deleted and we use a single register
stack frame throughout (with a little register renaming in the mangler...)
Dropthrough from fast to slow entry point is also implemented.
Tailcalls are marked and converted into jumps at mangle time.
parent 852baf4d
-----------------------------------------------------------------------------
-- $Id: DriverFlags.hs,v 1.94 2002/05/15 08:59:59 chak Exp $
-- $Id: DriverFlags.hs,v 1.95 2002/06/03 13:08:37 matthewc Exp $
--
-- Driver flags
--
......@@ -565,6 +565,9 @@ machdepCCOpts
"-DSTOLEN_X86_REGS="++show n_regs ]
)
| prefixMatch "ia64" cTARGETPLATFORM
= return ( [], ["-fomit-frame-pointer", "-G0"] )
| prefixMatch "mips" cTARGETPLATFORM
= return ( ["-static"], [] )
......
......@@ -212,6 +212,34 @@ sub init_TARGET_STUFF {
$T_HDR_direct = "\.text\n\t\.align 4\n";
$T_create_word = "\t.word";
#--------------------------------------------------------#
} elsif ( $TargetPlatform =~ /^ia64-.*-linux$/ ) {
$T_STABBY = 0; # 1 iff .stab things (usually if a.out format)
$T_US = ''; # _ if symbols have an underscore on the front
$T_PRE_APP = '#';
$T_CONST_LBL = '^\.LC(\d+):$'; # regexp for what such a lbl looks like
$T_POST_LBL = ':';
$T_MOVE_DIRVS = '^(\s*\.(global|proc|pred\.safe_across_calls|text|data|section|align|size|type|ident)\s+.*\n)';
$T_COPY_DIRVS = '\.(global|proc)';
$T_hsc_cc_PAT = '\.string.*\)(hsc|cc) (.*)\\\\t(.*)"';
$T_DOT_WORD = '\.(long|value|byte|zero)';
$T_DOT_GLOBAL = '\.global';
$T_HDR_literal = "\.section\t\.rodata\n";
$T_HDR_misc = "\.text\n\t\.align 8\n";
$T_HDR_data = "\.data\n\t\.align 8\n";
$T_HDR_consist = "\.text\n";
$T_HDR_closure = "\.data\n\t\.align 8\n";
$T_HDR_srt = "\.text\n\t\.align 8\n";
$T_HDR_info = "\.text\n\t\.align 8\n";
$T_HDR_entry = "\.text\n\t\.align 16\n";
$T_HDR_fast = "\.text\n\t\.align 16\n";
$T_HDR_vector = "\.text\n\t\.align 8\n";
$T_HDR_direct = "\.text\n\t\.align 8\n";
$T_create_word = "\t.word";
#--------------------------------------------------------#
} elsif ( $TargetPlatform =~ /^m68k-.*-sunos4/ ) {
......@@ -437,7 +465,7 @@ sub mangle_asm {
next if /${T_PRE_APP}(NO_)?APP/o;
next if /^;/ && $TargetPlatform =~ /^hppa/;
next if /(^$|^\t\.file\t|^ # )/ && $TargetPlatform =~ /^(mips)-/;
next if /(^$|^\t\.file\t|^ # )/ && $TargetPlatform =~ /^(mips|ia64)-/;
if ( $TargetPlatform =~ /^mips-/
&& /^\t\.(globl\S+\.text|comm\t)/ ) {
......@@ -657,7 +685,10 @@ sub mangle_asm {
# toss all prologue stuff; HPPA is pretty weird
# (see elsewhere)
$c = &mash_hppa_prologue($c) if $TargetPlatform =~ /^hppa/;
$c = &hppa_mash_prologue($c) if $TargetPlatform =~ /^hppa-/;
# do some register renaming before dropping the prologue
$c = &ia64_rename_outputs($c) if $TargetPlatform =~ /^ia64-/;
# be slightly paranoid to make sure there's
# nothing surprising in there
......@@ -684,6 +715,14 @@ sub mangle_asm {
$r = $& . $r;
}
} elsif ($TargetPlatform =~ /^ia64-/) {
$p =~ s/^\t\.prologue .*\n//;
$p =~ s/^\t\.save ar\.pfs, r\d+\n\talloc r\d+ = ar\.pfs, .*\n//;
$p =~ s/^\t\.fframe \d+\n\tadds r12 = -\d+, r12\n//;
$p =~ s/^\t\.save rp, r\d+\n\tmov r\d+ = b0\n//;
$p =~ s/^\t\.(mii|mmi)\n//; # bundling is no longer sensible
$p =~ s/^\t;;\n//g; # discard stops
$p =~ s/^\t\/\/.*\n//g; # gcc inserts timings in // comments
} elsif ($TargetPlatform =~ /^m68k-/) {
$p =~ s/^\tlink a6,#-?\d.*\n//;
$p =~ s/^\tpea a6@\n\tmovel sp,a6\n//;
......@@ -760,6 +799,14 @@ sub mangle_asm {
$e =~ s/^\tpopl\s+\%ecx\n//;
$e =~ s/^\taddl\s+\$\d+,\s*\%esp\n//;
$e =~ s/^\tsubl\s+\$-\d+,\s*\%esp\n//;
} elsif ($TargetPlatform =~ /^ia64-/) {
$e =~ s/^\tmov ar\.pfs = r\d+\n//;
$e =~ s/^\tmov b0 = r\d+\n//;
$e =~ s/^\t\.restore sp\n\tadds r12 = \d+, r12\n//;
$e =~ s/^\tbr\.ret\.sptk\.many b0\n//;
$e =~ s/^\t\.(mii|mmi|mib)\n//g; # bundling is no longer sensible
$e =~ s/^\t;;\n//g; # discard stops - stop at end of body is sufficient
$e =~ s/^\t\/\/.*\n//g; # gcc inserts timings in // comments
} elsif ($TargetPlatform =~ /^m68k-/) {
$e =~ s/^\tunlk a6\n//;
$e =~ s/^\trts\n//;
......@@ -801,6 +848,10 @@ sub mangle_asm {
$c =~ s/^\t(call|jbsr|jal)\s+${T_US}__DISCARD__\n//go;
$c =~ s/^\tbl\s+L___DISCARD__\$stub\n//go if $TargetPlatform =~ /^powerpc-apple-.*/;
# IA64: mangle tailcalls into jumps here
$c =~ s/^\tbr\.call\.sptk\.many b0 = (.*)\n\t;;\n(\tmov r1 = r\d+\n)?\t;;\n\t--- TAILCALL ---\n/\tbr\.few $1\n/g
if $TargetPlatform =~ /^ia64-/;
# MIPS: that may leave some gratuitous asm macros around
# (no harm done; but we get rid of them to be tidier)
$c =~ s/^\t\.set\tnoreorder\n\t\.set\tnomacro\n\taddu\t(\S+)\n\t\.set\tmacro\n\t\.set\treorder\n/\taddu\t$1\n/
......@@ -1049,6 +1100,9 @@ sub mangle_asm {
# The next two only apply if we're not stealing %esi or %edi.
$c =~ s/^\tmovl\s+\$${T_US}${symb}_fast\d*,\s*\%esi\n\tjmp\s+\*\%esi\n// if ($StolenX86Regs < 3);
$c =~ s/^\tmovl\s+\$${T_US}${symb}_fast\d*,\s*\%edi\n\tjmp\s+\*\%edi\n// if ($StolenX86Regs < 4);
} elsif ( $TargetPlatform =~ /^ia64-/ ) {
#$c =~ s/^\tbr\.few ${symb}_fast\d*#\n\t;;\n(\t;;\n\t\.endp ${symb}_entry#\n)/$1/;
$c =~ s/^\tbr\.few ${symb}_fast\d*#\n(\t;;\n\t\.endp ${symb}_entry#\n)/$1/;
} elsif ( $TargetPlatform =~ /^mips-/ ) {
$c =~ s/^\tjmp \$31,\(\$27\),0\n\t\.align 4\n\t\.end/\t.align 4\n\t.end/;
} elsif ( $TargetPlatform =~ /^m68k-/ ) {
......@@ -1174,7 +1228,7 @@ sub mangle_asm {
\end{code}
\begin{code}
sub mash_hppa_prologue { # OK, epilogue, too
sub hppa_mash_prologue { # OK, epilogue, too
local($_) = @_;
# toss all prologue stuff
......@@ -1198,6 +1252,45 @@ sub mash_hppa_prologue { # OK, epilogue, too
}
\end{code}
On IA64 we use a single register frame throughout STG execution, and delete
the frame management instructions from the prologue and epilogue - similarly
to the memory stack. Unfortunately, gcc always uses absolute register names
instead of logical names like out0. This means that outputs (i.e. inputs to
other functions) will end up in the wrong registers relative to our "frame".
Hence this evil register renaming....
\begin{code}
sub ia64_rename_outputs {
local($_) = @_;
return ($_) if (!/^\talloc r\d+ = ar\.pfs, (\d+), (\d+), (\d+), (\d+)$/);
local($inputs,$locals,$outputs,$rotating) = ($1,$2,$3,$4);
local($oldbase,$newbase,$old,$new,$i);
local($LOCALS) = 24; # must correspond to value in StgCRun.c
# Check everything fits in our standard frame. Only 8 outputs should
# ever go in registers.
die "No inputs allowed: $inputs" if ($inputs > 0);
die "Too many locals: $locals" if ($locals > $LOCALS);
die "Too many outputs: $outputs" if ($outputs > 8);
die "No rotating registers allowed: $rotating" if ($rotating > 0);
$outbase = 32 + $inputs + $locals;
$newbase = 32 + 0 + $LOCALS;
# Always do renaming from the top to avoid collisions
for ($i = $outputs-1; $i >= 0; $i--) {
$old = $outbase + $i;
$new = $newbase + $i;
s/\br$old\b/r$new/g;
}
return ($_);
}
\end{code}
\begin{code}
sub print_doctored {
local($_, $need_fallthru_patch) = @_;
......@@ -1347,6 +1440,8 @@ right after the table itself. (The code pasting is done elsewhere.)
sub rev_tbl {
local($symb, $tbl, $discard1) = @_;
return ($tbl) if ($TargetPlatform =~ /^ia64-/);
local($before) = '';
local($label) = '';
local(@imports) = (); # hppa only
......
/* -----------------------------------------------------------------------------
* $Id: Stg.h,v 1.45 2002/02/13 07:48:19 sof Exp $
* $Id: Stg.h,v 1.46 2002/06/03 13:08:41 matthewc Exp $
*
* (c) The GHC Team, 1998-1999
*
......@@ -81,7 +81,7 @@
*
* UNDEFINING THIS WON'T WORK ON ITS OWN. You have been warned.
*/
#ifndef USE_MINIINTERPRETER
#if !defined(USE_MINIINTERPRETER) && !defined(ia64_TARGET_ARCH)
#define TABLES_NEXT_TO_CODE
#endif
......
/* -----------------------------------------------------------------------------
* $Id: TailCalls.h,v 1.9 2002/05/28 09:22:08 wolfgang Exp $
* $Id: TailCalls.h,v 1.10 2002/06/03 13:08:41 matthewc Exp $
*
* (c) The GHC Team, 1998-1999
*
......@@ -185,6 +185,21 @@ but uses $$dyncall if necessary to cope, just in case you aren't.
#endif /* powerpc_TARGET_ARCH */
/* -----------------------------------------------------------------------------
Tail calling on IA64
-------------------------------------------------------------------------- */
#ifdef ia64_TARGET_ARCH
/* The compiler can more intelligently decide how to do this. We therefore
* implement it as a call and optimise to a jump at mangle time. */
#define JMP_(cont) ((F_) (cont))(); __asm__ volatile ("--- TAILCALL ---");
/* Don't emit calls to __DISCARD__ as this causes hassles */
#define __DISCARD__()
#endif
/* -----------------------------------------------------------------------------
FUNBEGIN and FUNEND.
......
/* -----------------------------------------------------------------------------
* $Id: StgCRun.c,v 1.33 2002/05/21 14:58:49 wolfgang Exp $
* $Id: StgCRun.c,v 1.34 2002/06/03 13:08:41 matthewc Exp $
*
* (c) The GHC Team, 1998-2000
*
......@@ -465,7 +465,7 @@ StgRun(StgFunPtr f, StgRegTable *basereg)
extern StgThreadReturnCode StgRun(StgFunPtr f, StgRegTable *basereg);
void StgRunIsImplementedInAssembler()
void StgRunIsImplementedInAssembler(void)
{
__asm__ volatile (
"\n.globl _StgRun\n"
......@@ -488,5 +488,41 @@ void StgRunIsImplementedInAssembler()
#endif
/* -----------------------------------------------------------------------------
IA64 architecture
Again, in assembler - so we can fiddle with the register stack.
-------------------------------------------------------------------------- */
#ifdef ia64_TARGET_ARCH
/* the memory stack is rarely used, so 16K is excessive */
#undef RESERVED_C_STACK_BYTES
#define RESERVED_C_STACK_BYTES 1024
void StgRunIsImplementedInAssembler(void)
{
__asm__ volatile(
".global StgRun\n"
"StgRun:\n"
"\talloc r55 = ar.pfs, 0, 24, 8, 0\n" /* setup register frame */
"\tmov r54 = b0\n" /* save return address */
"\tadds sp = -%0, sp\n" /* setup stack */
"\tld8 r16=[r32],8 ;;\n" /* branch to f using descriptor */
"\tld8 r1=[r32]\n"
"\tmov b6=r16\n"
"\tbr.few b6 ;;\n"
".global StgReturn\n"
"StgReturn:\n"
"\tmov r8 = 0\n" /* return value in r8 */
"\tmov ar.pfs = r55\n" /* restore register frame */
"\tmov b0 = r54\n" /* restore return address */
"\tadds sp = %0, sp\n" /* restore stack */
"\tbr.ret.sptk.many b0 ;;\n" /* return */
: : "i"(RESERVED_C_STACK_BYTES));
}
#endif
#endif /* !USE_MINIINTERPRETER */
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment