ghc-asm.lprl 52.2 KB
Newer Older
1
2
%************************************************************************
%*									*
3
\section[Driver-asm-fiddling]{Fiddling with assembler files}
4
5
6
7
8
9
10
11
12
13
14
15
%*									*
%************************************************************************

Tasks:
\begin{itemize}
\item
Utterly stomp out C functions' prologues and epilogues; i.e., the
stuff to do with the C stack.
\item
Any other required tidying up.
\end{itemize}

16
17
18
19
20
21
22
23
24
25
26
27
General note [chak]: Many regexps are very fragile because they rely on white
space being in the right place.  This caused trouble with gcc 2.95 (at least
on Linux), where the use of white space in .s files generated by gcc suddenly 
changed.  To guarantee compatibility across different versions of gcc, make
sure (at least on i386-.*-linux) that regexps tolerate varying amounts of white
space between an assembler statement and its arguments as well as after a the
comma separating multiple arguments.  

\emph{For the time being, I have corrected the regexps for i386-.*-linux.  I
didn't touch all the regexps for other i386 platforms, as I don't have
a box to test these changes.}

28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
HPPA specific notes:
\begin{itemize}
\item
The HP linker is very picky about symbols being in the appropriate
space (code vs. data).  When we mangle the threaded code to put the
info tables just prior to the code, they wind up in code space
rather than data space.  This means that references to *_info from
un-mangled parts of the RTS (e.g. unthreaded GC code) get
unresolved symbols.  Solution:  mini-mangler for .c files on HP.  I
think this should really be triggered in the driver by a new -rts
option, so that user code doesn't get mangled inappropriately.
\item
With reversed tables, jumps are to the _info label rather than to
the _entry label.  The _info label is just an address in code
space, rather than an entry point with the descriptive blob we
talked about yesterday.  As a result, you can't use the call-style
JMP_ macro.  However, some JMP_ macros take _info labels as targets
and some take code entry points within the RTS.  The latter won't
work with the goto-style JMP_ macro.  Sigh.  Solution: Use the goto
style JMP_ macro, and mangle some more assembly, changing all
"RP'literal" and "LP'literal" references to "R'literal" and
"L'literal," so that you get the real address of the code, rather
than the descriptive blob.  Also change all ".word P%literal"
entries in info tables and vector tables to just ".word literal,"
for the same reason.  Advantage: No more ridiculous call sequences.
\end{itemize}

55
56
57
58
59
60
61
62
63
64
65
66
67
68
%************************************************************************
%*									*
\subsection{Top-level code}
%*									*
%************************************************************************

\begin{code}
$TargetPlatform = $TARGETPLATFORM;

($Pgm = $0) =~ s|.*/||;
$ifile = $ARGV[0];
$ofile = $ARGV[1];

if ( $TargetPlatform =~ /^i386-/ ) {
69
70
71
72
73
    if ($ARGV[2] eq '') {
	$StolenX86Regs = 4;
    } else {
        $StolenX86Regs = $ARGV[2];
    }
74
75
76
77
78
79
80
}

&mangle_asm($ifile,$ofile);

exit(0);
\end{code}

81
82
83
84
85
86
%************************************************************************
%*									*
\subsection{Constants for various architectures}
%*									*
%************************************************************************

87
88
89
\begin{code}
sub init_TARGET_STUFF {

90
91
92
93
94
95
    #--------------------------------------------------------#
    if ( $TargetPlatform =~ /^alpha-.*-.*/ ) {

    $T_STABBY	    = 0; # 1 iff .stab things (usually if a.out format)
    $T_US	    = ''; # _ if symbols have an underscore on the front
    $T_PRE_APP	    = 'DONT THINK THIS APPLIES'; # regexp that says what comes before APP/NO_APP
ken's avatar
ken committed
96
    $T_CONST_LBL    = '^\$L?C(\d+):$'; # regexp for what such a lbl looks like
97
98
    $T_POST_LBL	    = ':';

ken's avatar
ken committed
99
100
    $T_MOVE_DIRVS   = '^(\s*(\$.*\.\.ng:|\.align\s+\d+|\.(globl|ent)\s+\S+|\#.*|\.(file|loc)\s+\S+\s+\S+|\.text|\.r?data)\n)';
    $T_COPY_DIRVS   = '^\s*(\$.*\.\.ng:|\#|\.(file|globl|ent|loc))';
101
102

    $T_hsc_cc_PAT   = '\.ascii.*\)(hsc|cc) (.*)\\\\11"\n\t\.ascii\s+"(.*)\\\\0"';
ken's avatar
ken committed
103
    $T_DOT_WORD	    = '\.(long|quad|byte|word)';
rrt's avatar
rrt committed
104
    $T_DOT_GLOBAL   = '^\t\.globl';
105
106
107
108
109
    $T_HDR_literal  = "\.rdata\n\t\.align 3\n";
    $T_HDR_misc	    = "\.text\n\t\.align 3\n";
    $T_HDR_data	    = "\.data\n\t\.align 3\n";
    $T_HDR_consist  = "\.text\n";
    $T_HDR_closure  = "\.data\n\t\.align 3\n";
110
    $T_HDR_srt      = "\.text\n\t\.align 3\n";
111
112
113
114
    $T_HDR_info	    = "\.text\n\t\.align 3\n";
    $T_HDR_entry    = "\.text\n\t\.align 3\n";
    $T_HDR_vector   = "\.text\n\t\.align 3\n";
    $T_HDR_direct   = "\.text\n\t\.align 3\n";
115
    $T_create_word  = "\t.quad";
116
117
118
119
120
121
122
123
124
125
126
127
128

    #--------------------------------------------------------#
    } elsif ( $TargetPlatform =~ /^hppa/ ) {

    $T_STABBY	    = 0; # 1 iff .stab things (usually if a.out format)
    $T_US	    = ''; # _ if symbols have an underscore on the front
    $T_PRE_APP	    = 'DONT THINK THIS APPLIES'; # regexp that says what comes before APP/NO_APP
    $T_CONST_LBL    = '^L\$C(\d+)$'; # regexp for what such a lbl looks like
    $T_POST_LBL	    = '';

    $T_MOVE_DIRVS   = '^((\s+\.(IMPORT|EXPORT|PARAM).*|\s+\.align\s+\d+|\s+\.(SPACE|SUBSPA)\s+\S+|\s*)\n)';
    $T_COPY_DIRVS   = '^\s+\.(IMPORT|EXPORT)';

sof's avatar
sof committed
129
    $T_hsc_cc_PAT   = '\.STRING.*\)(hsc|cc) (.*)\\\\x09(.*)\\\\x00"';
130
    $T_DOT_WORD	    = '\.(blockz|word|half|byte)';
rrt's avatar
rrt committed
131
    $T_DOT_GLOBAL   = '^\s+\.EXPORT';
132
133
134
135
136
    $T_HDR_literal  = "\t.SPACE \$TEXT\$\n\t.SUBSPA \$LIT\$\n";
    $T_HDR_misc	    = "\t.SPACE \$TEXT\$\n\t.SUBSPA \$CODE\$\n\t\.align 4\n";
    $T_HDR_data	    = "\t.SPACE \$PRIVATE\$\n\t.SUBSPA \$DATA\$\n\t\.align 4\n";
    $T_HDR_consist  = "\t.SPACE \$TEXT\$\n\t.SUBSPA \$LIT\$\n";
    $T_HDR_closure  = "\t.SPACE \$PRIVATE\$\n\t.SUBSPA \$DATA\$\n\t\.align 4\n";
137
    $T_HDR_srt      = "\t.SPACE \$PRIVATE\$\n\t.SUBSPA \$DATA\$\n\t\.align 4\n";
138
139
140
141
    $T_HDR_info	    = "\t.SPACE \$TEXT\$\n\t.SUBSPA \$CODE\$\n\t\.align 4\n";
    $T_HDR_entry    = "\t.SPACE \$TEXT\$\n\t.SUBSPA \$CODE\$\n\t\.align 4\n";
    $T_HDR_vector   = "\t.SPACE \$TEXT\$\n\t.SUBSPA \$CODE\$\n\t\.align 4\n";
    $T_HDR_direct   = "\t.SPACE \$TEXT\$\n\t.SUBSPA \$CODE\$\n\t\.align 4\n";
142
    $T_create_word  = "\t.word";
143
144

    #--------------------------------------------------------#
145
    } elsif ( $TargetPlatform =~ /^i386-.*-(linuxaout|freebsd2|openbsd|nextstep3|cygwin32|mingw32)$/ ) {
sof's avatar
sof committed
146
				# NeXT added but not tested. CaS
147
148
149
150
151
152

    $T_STABBY	    = 1; # 1 iff .stab things (usually if a.out format)
    $T_US	    = '_'; # _ if symbols have an underscore on the front
    $T_PRE_APP	    = '^#'; # regexp that says what comes before APP/NO_APP
    $T_CONST_LBL    = '^LC(\d+):$';
    $T_POST_LBL	    = ':';
153
154
    $T_X86_PRE_LLBL_PAT = 'L';
    $T_X86_PRE_LLBL	    = 'L';
155
156
    $T_X86_BADJMP   = '^\tjmp [^L\*]';

sof's avatar
sof committed
157
    $T_MOVE_DIRVS   = '^(\s*(\.(p2)?align\s+\d+(,0x90)?|\.globl\s+\S+|\.text|\.data|\.stab[^n].*|\.type\s+.*|\.size\s+.*)\n)';
158
159
    $T_COPY_DIRVS   = '\.(globl|stab)';
    $T_hsc_cc_PAT   = '\.ascii.*\)(hsc|cc) (.*)\\\\11"\n\t\.ascii\s+"(.*)\\\\0"';
160
    $T_DOT_WORD	    = '\.(long|word|value|byte|space)';
161
    $T_DOT_GLOBAL   = '\.globl';
rrt's avatar
rrt committed
162
    $T_HDR_literal  = "\.text\n\t\.align 2\n";
163
    $T_HDR_misc	    = "\.text\n\t\.align 2,0x90\n";
rrt's avatar
rrt committed
164
    $T_HDR_data	    = "\.data\n\t\.align 2\n";
165
    $T_HDR_consist  = "\.text\n";
rrt's avatar
rrt committed
166
    $T_HDR_closure  = "\.data\n\t\.align 2\n";
167
    $T_HDR_srt      = "\.text\n\t\.align 2\n";
168
    $T_HDR_info	    = "\.text\n\t\.align 2\n"; # NB: requires padding
169
    $T_HDR_entry    = "\.text\n"; # no .align so we're right next to _info (arguably wrong...?)
170
171
    $T_HDR_vector   = "\.text\n\t\.align 2\n"; # NB: requires padding
    $T_HDR_direct   = "\.text\n\t\.align 2,0x90\n";
172
    $T_create_word  = "\t.word";
173

174
    #--------------------------------------------------------#
175
    } elsif ( $TargetPlatform =~ /^i386-.*-(solaris2|linux|freebsd|netbsd)$/ ) {
176
177
178

    $T_STABBY	    = 0; # 1 iff .stab things (usually if a.out format)
    $T_US	    = ''; # _ if symbols have an underscore on the front
rrt's avatar
rrt committed
179
    $T_PRE_APP	    = # regexp that says what comes before APP/NO_APP
180
		      ($TargetPlatform =~ /-(linux|freebsd|netbsd)$/) ? '#' : '/' ;
181
    $T_CONST_LBL    = '^\.LC(\d+):$'; # regexp for what such a lbl looks like
182
    $T_POST_LBL	    = ':';
183
184
    $T_X86_PRE_LLBL_PAT = '\.L';
    $T_X86_PRE_LLBL	    = '.L';
185
    $T_X86_BADJMP   = '^\tjmp\s+[^\.\*]';
186

187
188
    $T_MOVE_DIRVS   = '^(\s*(\.(p2)?align\s+\d+(,\s*0x90)?|\.globl\s+\S+|\.text|\.data|\.section\s+.*|\.type\s+.*|\.size\s+\S+,\d+|\.ident.*)\n)';
    $T_COPY_DIRVS   = '^\s*\.(globl|type|size)';
189

190
    if ( $TargetPlatform =~ /freebsd|netbsd/ ) {
191
192
193
194
195
        $T_hsc_cc_PAT   = '\.ascii.*\)(hsc|cc) (.*)\\\\11"\n\t\.ascii\s+"(.*)\\\\0"';
    } else {
        $T_hsc_cc_PAT   = '\.string.*\)(hsc|cc) (.*)\\\\t(.*)"';
    }

196
    $T_DOT_WORD	    = '\.(long|value|word|byte|zero)';
197
198
    $T_DOT_GLOBAL   = '\.globl';
    $T_HDR_literal  = "\.section\t\.rodata\n"; # or just use .text??? (WDP 95/11)
199
    $T_HDR_misc	    = "\.text\n\t\.align 4\n";
200
201
202
    $T_HDR_data	    = "\.data\n\t\.align 4\n"; # ToDo: change align??
    $T_HDR_consist  = "\.text\n";
    $T_HDR_closure  = "\.data\n\t\.align 4\n"; # ToDo: change align?
203
204
    $T_HDR_srt      = "\.text\n\t\.align 4\n"; # ToDo: change align?
    $T_HDR_info	    = "\.text\n\t\.align 4\n"; # NB: requires padding
205
    $T_HDR_entry    = "\.text\n"; # no .align so we're right next to _info (arguably wrong...?)
206
207
    $T_HDR_vector   = "\.text\n\t\.align 4\n"; # NB: requires padding
    $T_HDR_direct   = "\.text\n\t\.align 4\n";
208
    $T_create_word  = "\t.word";
209

210
211
212
213
214
215
216
217
218
    #--------------------------------------------------------#
    } elsif ( $TargetPlatform =~ /^ia64-.*-linux$/ ) {

    $T_STABBY       = 0; # 1 iff .stab things (usually if a.out format)
    $T_US           = ''; # _ if symbols have an underscore on the front
    $T_PRE_APP      = '#';
    $T_CONST_LBL    = '^\.LC(\d+):$'; # regexp for what such a lbl looks like
    $T_POST_LBL     = ':';

219
    $T_MOVE_DIRVS   = '^(\s*\.(global|proc|pred\.safe_across_calls|text|data|section|subsection|align|size|type|ident)\s+.*\n)';
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
    $T_COPY_DIRVS   = '\.(global|proc)';

    $T_hsc_cc_PAT   = '\.string.*\)(hsc|cc) (.*)\\\\t(.*)"';
    $T_DOT_WORD     = '\.(long|value|byte|zero)';
    $T_DOT_GLOBAL   = '\.global';
    $T_HDR_literal  = "\.section\t\.rodata\n";
    $T_HDR_misc     = "\.text\n\t\.align 8\n";
    $T_HDR_data     = "\.data\n\t\.align 8\n";
    $T_HDR_consist  = "\.text\n";
    $T_HDR_closure  = "\.data\n\t\.align 8\n";
    $T_HDR_srt      = "\.text\n\t\.align 8\n";
    $T_HDR_info     = "\.text\n\t\.align 8\n";
    $T_HDR_entry    = "\.text\n\t\.align 16\n";
    $T_HDR_vector   = "\.text\n\t\.align 8\n";
    $T_HDR_direct   = "\.text\n\t\.align 8\n";
    $T_create_word  = "\t.word";

237
238
239
240
241
242
243
244
245
    #--------------------------------------------------------#
    } elsif ( $TargetPlatform =~ /^m68k-.*-sunos4/ ) {

    $T_STABBY	    = 1; # 1 iff .stab things (usually if a.out format)
    $T_US	    = '_'; # _ if symbols have an underscore on the front
    $T_PRE_APP	    = '^# MAY NOT APPLY'; # regexp that says what comes before APP/NO_APP
    $T_CONST_LBL    = '^LC(\d+):$';
    $T_POST_LBL	    = ':';

rrt's avatar
rrt committed
246
    $T_MOVE_DIRVS   = '^(\s*(\.align\s+\d+|\.proc\s+\d+|\.const|\.cstring|\.globl\s+\S+|\.text|\.data|\.even|\.stab[^n].*)\n)';
247
248
249
250
251
252
253
254
255
256
    $T_COPY_DIRVS   = '\.(globl|proc|stab)';
    $T_hsc_cc_PAT   = '\.ascii.*\)(hsc|cc) (.*)\\\\11"\n\t\.ascii\s+"(.*)\\\\0"';

    $T_DOT_WORD	    = '\.long';
    $T_DOT_GLOBAL   = '\.globl';
    $T_HDR_literal  = "\.text\n\t\.even\n";
    $T_HDR_misc	    = "\.text\n\t\.even\n";
    $T_HDR_data	    = "\.data\n\t\.even\n";
    $T_HDR_consist  = "\.text\n";
    $T_HDR_closure  = "\.data\n\t\.even\n";
257
    $T_HDR_srt      = "\.text\n\t\.even\n";
258
259
260
261
    $T_HDR_info	    = "\.text\n\t\.even\n";
    $T_HDR_entry    = "\.text\n\t\.even\n";
    $T_HDR_vector   = "\.text\n\t\.even\n";
    $T_HDR_direct   = "\.text\n\t\.even\n";
262
    $T_create_word  = "\t.long";
263
264
265
266
267
268
269
270
271
272

    #--------------------------------------------------------#
    } elsif ( $TargetPlatform =~ /^mips-.*/ ) {

    $T_STABBY	    = 0; # 1 iff .stab things (usually if a.out format)
    $T_US	    = ''; # _ if symbols have an underscore on the front
    $T_PRE_APP	    = '^\s*#'; # regexp that says what comes before APP/NO_APP
    $T_CONST_LBL    = '^\$LC(\d+):$'; # regexp for what such a lbl looks like
    $T_POST_LBL	    = ':';

rrt's avatar
rrt committed
273
    $T_MOVE_DIRVS   = '^(\s*(\.align\s+\d+|\.(globl|ent)\s+\S+|\.text|\.r?data)\n)';
274
275
276
277
    $T_COPY_DIRVS   = '\.(globl|ent)';

    $T_hsc_cc_PAT   = 'I WAS TOO LAZY TO DO THIS BIT (WDP 95/05)';
    $T_DOT_WORD	    = '\.word';
rrt's avatar
rrt committed
278
    $T_DOT_GLOBAL   = '^\t\.globl';
279
280
281
282
283
    $T_HDR_literal  = "\t\.rdata\n\t\.align 2\n";
    $T_HDR_misc	    = "\t\.text\n\t\.align 2\n";
    $T_HDR_data	    = "\t\.data\n\t\.align 2\n";
    $T_HDR_consist  = 'TOO LAZY TO DO THIS TOO';
    $T_HDR_closure  = "\t\.data\n\t\.align 2\n";
284
    $T_HDR_srt      = "\t\.text\n\t\.align 2\n";
285
286
287
288
    $T_HDR_info	    = "\t\.text\n\t\.align 2\n";
    $T_HDR_entry    = "\t\.text\n\t\.align 2\n";
    $T_HDR_vector   = "\t\.text\n\t\.align 2\n";
    $T_HDR_direct   = "\t\.text\n\t\.align 2\n";
289
    $T_create_word  = "\t.word";
290
291

    #--------------------------------------------------------#
292
293
    } elsif ( $TargetPlatform =~ /^powerpc-apple-.*/ ) {
				# Apple PowerPC Darwin/MacOS X.
294
    $T_STABBY	    = 0; # 1 iff .stab things (usually if a.out format)
295
296
297
    $T_US	    = '_'; # _ if symbols have an underscore on the front
    $T_PRE_APP	    = 'WHAT IS THIS'; # regexp that says what comes before APP/NO_APP
    $T_CONST_LBL    = '^\LC\d+:'; # regexp for what such a lbl looks like
298
299
    $T_POST_LBL	    = ':';

300
    $T_MOVE_DIRVS   = '^(\s*(\.align \d+|\.text|\.data|\.const_data|\.cstring|\.non_lazy_symbol_pointer|\.const|\.static_const|\.literal4|\.literal8|\.static_data|\.globl \S+)\n)';
301
    $T_COPY_DIRVS   = '\.(globl)';
302

sof's avatar
sof committed
303
    $T_hsc_cc_PAT   = '\.byte.*\)(hsc|cc) (.*)"\n\t\.byte \d+\n\t\.byte "(.*)"\n\t\.byte \d+';
304
    $T_DOT_WORD	    = '\.(long|short|byte|fill|space)';
305
    $T_DOT_GLOBAL   = '\.globl';
sof's avatar
sof committed
306
    $T_HDR_toc      = "\.toc\n";
307
308
309
310
311
312
313
314
315
316
    $T_HDR_literal  = "\t\.const_data\n\t\.align 2\n";
    $T_HDR_misc	    = "\t\.text\n\t\.align 2\n";
    $T_HDR_data	    = "\t\.data\n\t\.align 2\n";
    $T_HDR_consist  = "\t\.text\n\t\.align 2\n";
    $T_HDR_closure  = "\t\.const_data\n\t\.align 2\n";
    $T_HDR_srt      = "\t\.text\n\t\.align 2\n";
    $T_HDR_info	    = "\t\.text\n\t\.align 2\n";
    $T_HDR_entry    = "\t\.text\n\t\.align 2\n";
    $T_HDR_vector   = "\t\.text\n\t\.align 2\n";
    $T_HDR_direct   = "\t\.text\n\t\.align 2\n";
317
    $T_create_word  = "\t.long";
318
319
320
321
322
323
324
325
326
327
328
329
330
331

    #--------------------------------------------------------#
    } elsif ( $TargetPlatform =~ /^sparc-.*-solaris2/ ) {

    $T_STABBY	    = 0; # 1 iff .stab things (usually if a.out format)
    $T_US	    = ''; # _ if symbols have an underscore on the front
    $T_PRE_APP	    = 'DOES NOT SEEM TO APPLY'; # regexp that says what comes before APP/NO_APP
    $T_CONST_LBL    = '^\.LLC(\d+):$'; # regexp for what such a lbl looks like
    $T_POST_LBL	    = ':';

    $T_MOVE_DIRVS   = '^((\s+\.align\s+\d+|\s+\.proc\s+\d+|\s+\.global\s+\S+|\.text|\.data|\.stab.*|\.section.*|\s+\.type.*|\s+\.size.*)\n)';
    $T_COPY_DIRVS   = '\.(global|proc|stab)';

    $T_hsc_cc_PAT   = '\.asciz.*\)(hsc|cc) (.*)\\\\t(.*)"';
332
    $T_DOT_WORD	    = '\.(long|word|byte|half|skip|uahalf|uaword)';
rrt's avatar
rrt committed
333
    $T_DOT_GLOBAL   = '^\t\.global';
334
335
336
337
338
    $T_HDR_literal  = "\.text\n\t\.align 8\n";
    $T_HDR_misc	    = "\.text\n\t\.align 4\n";
    $T_HDR_data	    = "\.data\n\t\.align 8\n";
    $T_HDR_consist  = "\.text\n";
    $T_HDR_closure  = "\.data\n\t\.align 4\n";
339
    $T_HDR_srt      = "\.data\n\t\.align 4\n";
340
341
342
343
    $T_HDR_info	    = "\.text\n\t\.align 4\n";
    $T_HDR_entry    = "\.text\n\t\.align 4\n";
    $T_HDR_vector   = "\.text\n\t\.align 4\n";
    $T_HDR_direct   = "\.text\n\t\.align 4\n";
344
    $T_create_word  = "\t.word";
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365

    #--------------------------------------------------------#
    } elsif ( $TargetPlatform =~ /^sparc-.*-sunos4/ ) {

    $T_STABBY	    = 1; # 1 iff .stab things (usually if a.out format)
    $T_US	    = '_'; # _ if symbols have an underscore on the front
    $T_PRE_APP	    = '^# DOES NOT SEEM TO APPLY'; # regexp that says what comes before APP/NO_APP
    $T_CONST_LBL    = '^LC(\d+):$';
    $T_POST_LBL	    = ':';

    $T_MOVE_DIRVS   = '^((\s+\.align\s+\d+|\s+\.proc\s+\d+|\s+\.global\s+\S+|\.text|\.data|\.stab.*)\n)';
    $T_COPY_DIRVS   = '\.(global|proc|stab)';
    $T_hsc_cc_PAT   = '\.ascii.*\)(hsc|cc) (.*)\\\\11"\n\t\.ascii\s+"(.*)\\\\0"';

    $T_DOT_WORD	    = '\.word';
    $T_DOT_GLOBAL   = '^\t\.global';
    $T_HDR_literal  = "\.text\n\t\.align 8\n";
    $T_HDR_misc	    = "\.text\n\t\.align 4\n";
    $T_HDR_data	    = "\.data\n\t\.align 8\n";
    $T_HDR_consist  = "\.text\n";
    $T_HDR_closure  = "\.data\n\t\.align 4\n";
366
    $T_HDR_srt      = "\.data\n\t\.align 4\n";
367
368
369
370
    $T_HDR_info	    = "\.text\n\t\.align 4\n";
    $T_HDR_entry    = "\.text\n\t\.align 4\n";
    $T_HDR_vector   = "\.text\n\t\.align 4\n";
    $T_HDR_direct   = "\.text\n\t\.align 4\n";
371
    $T_create_word  = "\t.word";
372
373
374
375
376

    #--------------------------------------------------------#
    } else {
	print STDERR "$Pgm: don't know how to mangle assembly language for: $TargetPlatform\n";
	exit 1;
377
378
379
380
381
382
383
384
    }

if ( 0 ) {
print STDERR "T_STABBY: $T_STABBY\n";
print STDERR "T_US: $T_US\n";
print STDERR "T_PRE_APP: $T_PRE_APP\n";
print STDERR "T_CONST_LBL: $T_CONST_LBL\n";
print STDERR "T_POST_LBL: $T_POST_LBL\n";
385
386
387
388
389
if ( $TargetPlatform =~ /^i386-/ ) {
    print STDERR "T_X86_PRE_LLBL_PAT: $T_X86_PRE_LLBL_PAT\n";
    print STDERR "T_X86_PRE_LLBL: $T_X86_PRE_LLBL\n";
    print STDERR "T_X86_BADJMP: $T_X86_BADJMP\n";
}
390
391
392
393
print STDERR "T_MOVE_DIRVS: $T_MOVE_DIRVS\n";
print STDERR "T_COPY_DIRVS: $T_COPY_DIRVS\n";
print STDERR "T_hsc_cc_PAT: $T_hsc_cc_PAT\n";
print STDERR "T_DOT_WORD: $T_DOT_WORD\n";
394
print STDERR "T_HDR_literal: $T_HDR_literal\n";
395
396
397
398
399
400
401
402
403
404
405
406
407
print STDERR "T_HDR_misc: $T_HDR_misc\n";
print STDERR "T_HDR_data: $T_HDR_data\n";
print STDERR "T_HDR_consist: $T_HDR_consist\n";
print STDERR "T_HDR_closure: $T_HDR_closure\n";
print STDERR "T_HDR_info: $T_HDR_info\n";
print STDERR "T_HDR_entry: $T_HDR_entry\n";
print STDERR "T_HDR_vector: $T_HDR_vector\n";
print STDERR "T_HDR_direct: $T_HDR_direct\n";
}

}
\end{code}

408
409
410
411
412
413
%************************************************************************
%*									*
\subsection{Mangle away}
%*									*
%************************************************************************

414
415
416
417
418
419
420
\begin{code}
sub mangle_asm {
    local($in_asmf, $out_asmf) = @_;

    # multi-line regexp matching:
    local($*) = 1;
    local($i, $c);
sof's avatar
sof committed
421
422


423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
    &init_TARGET_STUFF();
    &init_FUNNY_THINGS();

    open(INASM, "< $in_asmf")
	|| &tidy_up_and_die(1,"$Pgm: failed to open `$in_asmf' (to read)\n");
    open(OUTASM,"> $out_asmf")
	|| &tidy_up_and_die(1,"$Pgm: failed to open `$out_asmf' (to write)\n");

    # read whole file, divide into "chunks":
    #	record some info about what we've found...

    @chk = ();		# contents of the chunk
    $numchks = 0;	# number of them
    @chkcat = ();	# what category of thing in each chunk
    @chksymb = ();	# what symbol(base) is defined in this chunk
438
    %entrychk = ();	# ditto, its entry code
439
    %closurechk = ();	# ditto, the (static) closure
440
    %srtchk = ();	# ditto, its SRT (for top-level things)
441
442
    %infochk = (); 	# given a symbol base, say what chunk its info tbl is in
    %vectorchk = ();    # ditto, return vector table
443
    $EXTERN_DECLS = '';	# .globl <foo> .text (MIPS only)
444

445
    $i = 0; $chkcat[0] = 'misc'; $chk[0] = '';
446
447

    while (<INASM>) {
rrt's avatar
rrt committed
448
	tr/\r//d if $TargetPlatform =~ /-mingw32$/; # In case Perl doesn't convert line endings
449
	next if $T_STABBY && /^\.stab.*${T_US}__stg_split_marker/o;
450
	next if $T_STABBY && /^\.stab.*ghc.*c_ID/;
451
	next if /^\t\.def.*endef$/;
452
	next if /${T_PRE_APP}(NO_)?APP/o; 
453
454
    	next if /^;/ && $TargetPlatform =~ /^hppa/;

455
	next if /(^$|^\t\.file\t|^ # )/ && $TargetPlatform =~ /^(mips|ia64)-/;
456
457

	if ( $TargetPlatform =~ /^mips-/ 
ken's avatar
ken committed
458
	  && /^\t\.(globl\S+\.text|comm\t)/ ) {
459
460
	    $EXTERN_DECLS .= $_ unless /(__DISCARD__|\b(PK_|ASSIGN_)(FLT|DBL)\b)/;
  
sof's avatar
sof committed
461
462
463
464
465
466
467
468
469
	# As a temporary solution for compiling "foreign export" declarations,
	# we use global variables to pass arguments from C to STG land.
	# These declarations live in the .hc file and not in the generated C
	# stub file, so we let them pass through here.
	} elsif ( /^\t\.comm\t__fexp_.*$/ ) {
	    $chk[++$i]   = $_;
	    $chkcat[$i]  = 'data';
	    $chksymb[$i] = '';

470
	} elsif ( /^\s+/ ) { # most common case first -- a simple line!
471
472
473
474
	    # duplicated from the bottom

	    $chk[$i] .= $_;

475
476
477
478
479
480
481
482
483
484
	} elsif ( /\.\.ng:$/ && $TargetPlatform =~ /^alpha-/ ) {
	    # Alphas: Local labels not to be confused with new chunks
	    $chk[$i] .= $_;
  
	# NB: all the rest start with a non-space

	} elsif ( $TargetPlatform =~ /^mips-/
	       && /^\d+:/ ) { # a funny-looking very-local label
	    $chk[$i] .= $_;

485
	} elsif ( /$T_CONST_LBL/o ) {
486
487
	    $chk[++$i]   = $_;
	    $chkcat[$i]  = 'literal';
488
489
	    $chksymb[$i] = $1;

490
	} elsif ( /^${T_US}__stg_split_marker(\d*)${T_POST_LBL}$/o ) {
491
492
	    $chk[++$i]   = $_;
	    $chkcat[$i]  = 'splitmarker';
493
494
	    $chksymb[$i] = $1;

495
	} elsif ( /^${T_US}([A-Za-z0-9_]+)_info${T_POST_LBL}$/o ) {
496
	    $symb = $1;
497
498
	    $chk[++$i]   = $_;
	    $chkcat[$i]  = 'infotbl';
499
500
501
502
503
504
	    $chksymb[$i] = $symb;

	    die "Info table already? $symb; $i\n" if defined($infochk{$symb});

	    $infochk{$symb} = $i;

505
	} elsif ( /^${T_US}([A-Za-z0-9_]+)_(entry|ret)${T_POST_LBL}$/o ) {
506
	    $chk[++$i]   = $_;
507
	    $chkcat[$i]  = 'entry';
508
509
	    $chksymb[$i] = $1;

510
	    $entrychk{$1} = $i;
511

512
	} elsif ( /^${T_US}([A-Za-z0-9_]+)_closure${T_POST_LBL}$/o ) {
513
514
	    $chk[++$i]   = $_;
	    $chkcat[$i]  = 'closure';
515
516
517
518
	    $chksymb[$i] = $1;

	    $closurechk{$1} = $i;

519
	} elsif ( /^${T_US}([A-Za-z0-9_]+)_srt${T_POST_LBL}$/o ) {
520
521
522
523
524
525
	    $chk[++$i]   = $_;
	    $chkcat[$i]  = 'srt';
	    $chksymb[$i] = $1;

	    $srtchk{$1} = $i;

526
	} elsif ( /^${T_US}([A-Za-z0-9_]+)_ct${T_POST_LBL}$/o ) {
527
528
529
530
	    $chk[++$i]   = $_;
	    $chkcat[$i]  = 'data';
	    $chksymb[$i] = '';

531
	} elsif ( /^(${T_US}__gnu_compiled_c|gcc2_compiled\.)${T_POST_LBL}/o ) {
532
533
	    ; # toss it

534
535
536
537
	} elsif ( /^${T_US}[A-Za-z0-9_]+\.\d+${T_POST_LBL}$/o
	       || /^${T_US}.*_CAT${T_POST_LBL}$/o 		# PROF: _entryname_CAT
	       || /^${T_US}.*_done${T_POST_LBL}$/o		# PROF: _module_done
	       || /^${T_US}_module_registered${T_POST_LBL}$/o	# PROF: _module_registered
538
	       ) {
539
540
	    $chk[++$i]   = $_;
	    $chkcat[$i]  = 'data';
541
542
	    $chksymb[$i] = '';

543
544
545
	} elsif ( /^([A-Za-z0-9_]+)\s+\.comm/ && $TargetPlatform =~ /^hppa/ ) {
	    $chk[++$i]   = $_;
	    $chkcat[$i]  = 'bss';
sof's avatar
sof committed
546
547
  	    $chksymb[$i] = '';

548
	} elsif ( /^${T_US}([A-Za-z0-9_]+)_cc(s)?${T_POST_LBL}$/o ) {
sof's avatar
sof committed
549
550
551
552
553
            # all CC_ symbols go in the data section...
	    $chk[++$i]   = $_;
	    $chkcat[$i]  = 'data';
	    $chksymb[$i] = '';

554
	} elsif ( /^${T_US}([A-Za-z0-9_]+)_(alt|dflt)${T_POST_LBL}$/o ) {
555
556
	    $chk[++$i]   = $_;
	    $chkcat[$i]  = 'misc';
557
	    $chksymb[$i] = '';
558
	} elsif ( /^${T_US}([A-Za-z0-9_]+)_vtbl${T_POST_LBL}$/o ) {
559
560
	    $chk[++$i]   = $_;
	    $chkcat[$i]  = 'vector';
561
562
563
564
	    $chksymb[$i] = $1;

	    $vectorchk{$1} = $i;

sof's avatar
sof committed
565
566
567
568
569
570
571
572
573
	# As a temporary solution for compiling "foreign export" declarations,
	# we use global variables to pass arguments from C to STG land.
	# These declarations live in the .hc file and not in the generated C
	# stub file, so we let them pass through here.
	} elsif ( /^[\t ]+\.comm[\t ]+__fexp_.*$/ ) {
	    $chk[++$i]   = $_;
	    $chkcat[$i]  = 'data';
	    $chksymb[$i] = '';

574
	} elsif ( $TargetPlatform =~ /^i386-.*-solaris2/
chak's avatar
chak committed
575
576
577
578
579
580
581
582
	     &&   /^[A-Za-z0-9][A-Za-z0-9_]*:/ ) {
            # Some Solaris system headers contain function definitions (as
	    # opposed to mere prototypes), which end up in the .hc file when
	    # a Haskell module foreign imports the corresponding system 
	    # functions (most notably stat()).  We put them into the text 
            # segment.  Note that this currently does not extend to function
	    # names starting with an underscore. 
	    # - chak 7/2001
583
	    $chk[++$i]   = $_;
chak's avatar
chak committed
584
	    $chkcat[$i]  = 'misc';
585
586
	    $chksymb[$i] = $1;

587
	} elsif ( /^${T_US}[A-Za-z0-9_]/o
588
		&& ( $TargetPlatform !~ /^hppa/ # need to avoid local labels in this case
589
		   || ! /^L\$\d+$/ ) ) {
590
591
	    local($thing);
	    chop($thing = $_);
592
593
	    $thing =~ s/:$//;
	    print STDERR "Warning: retaining unknown function \`$thing' in output from C compiler\n"
sof's avatar
sof committed
594
		unless # $KNOWN_FUNNY_THING{$thing}
595
		       /^${T_US}stg_.*${T_POST_LBL}$/o          # RTS internals
sof's avatar
sof committed
596
		    || /^${T_US}__stg_.*${T_POST_LBL}$/o        # more RTS internals
597
		    || /^${T_US}__fexp_.*${T_POST_LBL}$/o       # foreign export
598
		    || /^${T_US}.*_slow${T_POST_LBL}$/o         # slow entry
ken's avatar
ken committed
599
		    || /^${T_US}__stginit.*${T_POST_LBL}$/o     # __stginit<module>
600
		    || /^${T_US}.*_btm${T_POST_LBL}$/o          # large bitmaps
601
		    || /^${T_US}.*_fast${T_POST_LBL}$/o         # primops
chak's avatar
chak committed
602
603
		    || /^${T_US}.*_closure_tbl${T_POST_LBL}$/o  # closure tables
                    || /^_uname:/o;			        # x86/Solaris2
604
605
	    $chk[++$i]   = $_;
	    $chkcat[$i]  = 'misc';
606
	    $chksymb[$i] = '';
607

608
	} elsif ( $TargetPlatform =~ /^powerpc-apple-.*/ && /^\.picsymbol_stub/ )
609
610
611
612
613
	{
	    $chk[++$i]   = $_;
	    $chkcat[$i]  = 'dyld';
	    $chksymb[$i] = '';
	} elsif ( $TargetPlatform =~ /^powerpc-apple-.*/ && /^\.symbol_stub/ )
614
615
616
617
618
619
620
621
622
623
624
625
626
627
	{
	    $chk[++$i]   = $_;
	    $chkcat[$i]  = 'dyld';
	    $chksymb[$i] = '';
	} elsif ( $TargetPlatform =~ /^powerpc-apple-.*/ && /^\.lazy_symbol_pointer/ )
	{
	    $chk[++$i]   = $_;
	    $chkcat[$i]  = 'dyld';
	    $chksymb[$i] = '';
	} elsif ( $TargetPlatform =~ /^powerpc-apple-.*/ && /^\.non_lazy_symbol_pointer/ )
	{
	    $chk[++$i]   = $_;
	    $chkcat[$i]  = 'dyld';
	    $chksymb[$i] = '';
628
	} elsif ( $TargetPlatform =~ /^powerpc-apple-.*/ && /^\.data/ && $chkcat[$i] eq 'dyld')
629
630
631
632
	{	# non_lazy_symbol_ptrs that point to local symbols
	    $chk[++$i]   = $_;
	    $chkcat[$i]  = 'dyld';
	    $chksymb[$i] = '';
633
634
635
636
637
638
639
	} else { # simple line (duplicated at the top)

	    $chk[$i] .= $_;
	}
    }
    $numchks = $#chk + 1;

640
641
642
    # open CHUNKS, ">/tmp/chunks1" or die "Cannot open /tmp/chunks1: $!\n";
    # for (my $i = 0; $i < @chk; ++$i) { print CHUNKS "======= $i =======\n", $chk[$i] }
    # close CHUNKS;
ken's avatar
ken committed
643

644
645
646
647
648
649
650
651
    # the division into chunks is imperfect;
    # we throw some things over the fence into the next
    # chunk.
    #
    # also, there are things we would like to know
    # about the whole module before we start spitting
    # output.

sof's avatar
sof committed
652
    local($FIRST_MANGLABLE) = ($TargetPlatform =~ /^(alpha-|hppa|mips-)/) ? 1 : 0;
ken's avatar
ken committed
653
    local($FIRST_TOSSABLE ) = ($TargetPlatform =~ /^(hppa|mips-)/) ? 1 : 0;
654
655
656
657
658

#   print STDERR "first chunk to mangle: $FIRST_MANGLABLE\n";

    # Alphas: NB: we start meddling at chunk 1, not chunk 0
    # The first ".rdata" is quite magical; as of GCC 2.7.x, it
ken's avatar
ken committed
659
    # spits a ".quad 0" in after the very first ".rdata"; we
660
661
662
663
664
    # detect this special case (tossing the ".quad 0")!
    local($magic_rdata_seen) = 0;
  
    # HPPAs, MIPSen: also start medding at chunk 1

ken's avatar
ken committed
665
    for ($i = $FIRST_TOSSABLE; $i < $numchks; $i++) {
666
667
668
669
	$c = $chk[$i]; # convenience copy

#	print STDERR "\nCHK $i (BEFORE) (",$chkcat[$i],"):\n", $c;

670
671
	# toss all prologue stuff; HPPA is pretty weird
	# (see elsewhere)
672
673
	$c = &hppa_mash_prologue($c) if $TargetPlatform =~ /^hppa-/;

674
675
676
677
	# be slightly paranoid to make sure there's
	# nothing surprising in there
	if ( $c =~ /--- BEGIN ---/ ) {
	    if (($p, $r) = split(/--- BEGIN ---/, $c)) {
678

679
680
681
682
		# remove junk whitespace around the split point
		$p =~ s/\t+$//;
		$r =~ s/^\s*\n//;

683
		if ($TargetPlatform =~ /^i386-/) {
684
685
686
		    $p =~ s/^\tpushl\s+\%edi\n//;
		    $p =~ s/^\tpushl\s+\%esi\n//;
		    $p =~ s/^\tpushl\s+\%ebx\n//;
687
688
		    $p =~ s/^\tmovl\s+\%esi,\s*\d*\(\%esp\)\n//;
		    $p =~ s/^\tmovl\s+\%edi,\s*\d*\(\%esp\)\n//;
689
690
		    $p =~ s/^\tsubl\s+\$\d+,\s*\%esp\n//;
                    $p =~ s/^\tmovl\s+\$\d+,\s*\%eax\n\tcall\s+__alloca\n// if ($TargetPlatform =~ /^.*-cygwin32/);
691
692
693
694

		    # GCC 3.1 is in the habit of adding spurious writes to the
		    # stack in the prologue.  Just to be on the safe side,
		    # chuck these over the fence into the main code.
695
		    while ($p =~ /^\tmovl\s+\$\d+,\s*\d*\(\%esp\)\n/) {
696
697
698
699
700
			  # print "Spurious instruction: $&";
			  $p = $` . $';
			  $r = $& . $r;
		    }

701
702
		} elsif ($TargetPlatform =~ /^ia64-/) {
		    $p =~ s/^\t\.prologue .*\n//;
703
		    $p =~ s/^\t\.save ar\.pfs, r\d+\n\talloc r\d+ = ar\.pfs, 0, 31, \d+, 0\n//;
704
705
706
707
708
		    $p =~ s/^\t\.fframe \d+\n\tadds r12 = -\d+, r12\n//;
		    $p =~ s/^\t\.save rp, r\d+\n\tmov r\d+ = b0\n//;
		    $p =~ s/^\t\.(mii|mmi)\n//;	# bundling is no longer sensible
		    $p =~ s/^\t;;\n//g;		# discard stops
		    $p =~ s/^\t\/\/.*\n//g;	# gcc inserts timings in // comments
709
710
		} elsif ($TargetPlatform =~ /^m68k-/) {
		    $p =~ s/^\tlink a6,#-?\d.*\n//;
sof's avatar
sof committed
711
712
713
714
715
 		    $p =~ s/^\tpea a6@\n\tmovel sp,a6\n//;    
 				# The above showed up in the asm code,
 				# so I added it here.
 				# I hope it's correct.
 				# CaS
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
		    $p =~ s/^\tmovel d2,sp\@-\n//;
		    $p =~ s/^\tmovel d5,sp\@-\n//; # SMmark.* only?
		    $p =~ s/^\tmoveml \#0x[0-9a-f]+,sp\@-\n//; # SMmark.* only?
		} elsif ($TargetPlatform =~ /^mips-/) {
		    # the .frame/.mask/.fmask that we use is the same
		    # as that produced by GCC for miniInterpret; this
		    # gives GDB some chance of figuring out what happened
		    $FRAME = "\t.frame\t\$sp,2168,\$31\n\t.mask\t0x90000000,-4\n\t.fmask\t0x00000000,0\n";
		    $p =~ s/^\t\.(frame).*\n/__FRAME__/g;
		    $p =~ s/^\t\.(mask|fmask).*\n//g;
		    $p =~ s/^\t\.cprestore.*\n/\t\.cprestore 416\n/; # 16 + 100 4-byte args
		    $p =~ s/^\tsubu\t\$sp,\$sp,\d+\n//;
		    $p =~ s/^\tsw\t\$31,\d+\(\$sp\)\n//;
		    $p =~ s/^\tsw\t\$fp,\d+\(\$sp\)\n//;
		    $p =~ s/^\tsw\t\$28,\d+\(\$sp\)\n//;
		    $p =~ s/__FRAME__/$FRAME/;
732
		} elsif ($TargetPlatform =~ /^powerpc-apple-.*/) {
733
734
735
736
		    $pcrel_label = $p;
		    $pcrel_label =~ s/(.|\n)*^(L\d+\$pb):\n(.|\n)*/$2/ or $pcrel_label = "";

		    $p =~ s/^\tmflr r0\n//;
737
 		    $p =~ s/^\tbl saveFP # f\d+\n//;
738
739
 		    $p =~ s/^\tbl saveFP ; save f\d+-f\d+\n//;
		    $p =~ s/^L\d+\$pb:\n//;
740
741
742
743
744
 		    $p =~ s/^\tstmw r\d+,-\d+\(r1\)\n//;
   		    $p =~ s/^\tstfd f\d+,-\d+\(r1\)\n//g;
 		    $p =~ s/^\tstw r0,\d+\(r1\)\n//g;
  		    $p =~ s/^\tstwu r1,-\d+\(r1\)\n//; 
  		    $p =~ s/^\tstw r\d+,-\d+\(r1\)\n//g; 
745
746
747
  		    $p =~ s/^\tbcl 20,31,L\d+\$pb\n//;
  		    $p =~ s/^L\d+\$pb:\n//;
  		    $p =~ s/^\tmflr r31\n//;
748
749
750
751
752
753

		    # This is bad: GCC 3 seems to zero-fill some local variables in the prologue
		    # under some circumstances, only when generating position dependent code.
		    # I have no idea why, and I don't think it is necessary, so let's toss it.
		    $p =~ s/^\tli r\d+,0\n//g;
		    $p =~ s/^\tstw r\d+,\d+\(r1\)\n//g;
754
755
756
		} else {
		    print STDERR "$Pgm: unknown prologue mangling? $TargetPlatform\n";
		}
757
758
759
		
		# HWL HACK: dont die, just print a warning
		#print stderr  "HWL: this should die! Prologue junk?: $p\n" if $p =~ /^\t[^\.]/;
760
		die "Prologue junk?: $p\n" if $p =~ /^\s+[^\s\.]/;
761
762
763
764
765
766
		
		if ($TargetPlatform =~ /^powerpc-apple-.*/ && $pcrel_label ne "") {
		    # on PowerPC, we have to keep a part of the prologue
		    # (which loads the current instruction pointer into register r31)
		    $p .= "bcl 20,31,$pcrel_label\n";
		    $p .= "$pcrel_label:\n";
767
		    $p .= "\tmflr r31\n";
768
		}
769
		
770
771
772
773
774
		# glue together what's left
		$c = $p . $r;
	    }
	}

rrt's avatar
rrt committed
775
776
777
778
779
	if ( $TargetPlatform =~ /^mips-/ ) {
	    # MIPS: first, this basic sequence may occur "--- END ---" or not
	    $c =~ s/^\tlw\t\$31,\d+\(\$sp\)\n\taddu\t\$sp,\$sp,\d+\n\tj\t\$31\n\t\.end/\t\.end/;
	}

780
781
782
	# toss all epilogue stuff; again, paranoidly
	if ( $c =~ /--- END ---/ ) {
	    if (($r, $e) = split(/--- END ---/, $c)) {
783
784
		if ($TargetPlatform =~ /^i386-/) {
		    $e =~ s/^\tret\n//;
785
786
787
788
789
790
		    $e =~ s/^\tpopl\s+\%edi\n//;
		    $e =~ s/^\tpopl\s+\%esi\n//;
		    $e =~ s/^\tpopl\s+\%edx\n//;
		    $e =~ s/^\tpopl\s+\%ecx\n//;
		    $e =~ s/^\taddl\s+\$\d+,\s*\%esp\n//;
		    $e =~ s/^\tsubl\s+\$-\d+,\s*\%esp\n//;
791
792
793
794
795
796
797
798
		} elsif ($TargetPlatform =~ /^ia64-/) {
		    $e =~ s/^\tmov ar\.pfs = r\d+\n//;
		    $e =~ s/^\tmov b0 = r\d+\n//;
		    $e =~ s/^\t\.restore sp\n\tadds r12 = \d+, r12\n//;
		    $e =~ s/^\tbr\.ret\.sptk\.many b0\n//;
		    $e =~ s/^\t\.(mii|mmi|mib)\n//g;	# bundling is no longer sensible
		    $e =~ s/^\t;;\n//g;			# discard stops - stop at end of body is sufficient
		    $e =~ s/^\t\/\/.*\n//g;		# gcc inserts timings in // comments
799
800
801
802
803
804
805
806
		} elsif ($TargetPlatform =~ /^m68k-/) {
		    $e =~ s/^\tunlk a6\n//;
		    $e =~ s/^\trts\n//;
		} elsif ($TargetPlatform =~ /^mips-/) {
		    $e =~ s/^\tlw\t\$31,\d+\(\$sp\)\n//;
		    $e =~ s/^\tlw\t\$fp,\d+\(\$sp\)\n//;
		    $e =~ s/^\taddu\t\$sp,\$sp,\d+\n//;
		    $e =~ s/^\tj\t\$31\n//;
807
808
809
810
811
812
 		} elsif ($TargetPlatform =~ /^powerpc-apple-.*/) {
  		    $e =~ s/^\taddi r1,r1,\d+\n//;
  		    $e =~ s/^\tcal r1,\d+\(r1\)\n//;
  		    $e =~ s/^\tlw?z? r\d+,\d+\(r1\)\n//; 
 		    $e =~ s/^\tmtlr r0\n//;
  		    $e =~ s/^\tblr\n//;
813
814
815
		} else {
		    print STDERR "$Pgm: unknown epilogue mangling? $TargetPlatform\n";
		}
816

817
		print STDERR "WARNING: Epilogue junk?: $e\n" if $e =~ /^\t\s*[^\.\s\n]/;
818
819
820

		# glue together what's left
		$c = $r . $e;
821
		$c =~ s/\n\t\n/\n/; # junk blank line
822
823
824
	    }
	}

825
826
827
828
	# On SPARCs, we don't do --- BEGIN/END ---, we just
	# toss the register-windowing save/restore/ret* instructions
	# directly:
	if ( $TargetPlatform =~ /^sparc-/ ) {
829
	    $c =~ s/^\t(save.*|restore.*|ret|retl)\n//g;
830
831
832
833
834
835
	    # throw away PROLOGUE comments
	    $c =~ s/^\t!#PROLOGUE# 0\n\t!#PROLOGUE# 1\n//;
	}

	# On Alphas, the prologue mangling is done a little later (below)

836
    	# toss all calls to __DISCARD__
837
	$c =~ s/^\t(call|jbsr|jal)\s+${T_US}__DISCARD__\n//go;
ken's avatar
ken committed
838
	$c =~ s/^\tjsr\s+\$26\s*,\s*${T_US}__DISCARD__\n//go if $TargetPlatform =~ /^alpha-/;
839
	$c =~ s/^\tbl\s+L___DISCARD__\$stub\n//go if $TargetPlatform =~ /^powerpc-apple-.*/;
840

841
842
843
844
	# IA64: mangle tailcalls into jumps here
	$c =~ s/^\tbr\.call\.sptk\.many b0 = (.*)\n\t;;\n(\tmov r1 = r\d+\n)?\t;;\n\t--- TAILCALL ---\n/\tbr\.few $1\n/g
		if $TargetPlatform =~ /^ia64-/;

845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
	# MIPS: that may leave some gratuitous asm macros around
	# (no harm done; but we get rid of them to be tidier)
	$c =~ s/^\t\.set\tnoreorder\n\t\.set\tnomacro\n\taddu\t(\S+)\n\t\.set\tmacro\n\t\.set\treorder\n/\taddu\t$1\n/
	    if $TargetPlatform =~ /^mips-/;

    	# toss stack adjustment after DoSparks
    	$c =~ s/^(\tjbsr _DoSparks\n)\taddqw #8,sp/$1/g
		if $TargetPlatform =~ /^m68k-/; # this looks old...

	if ( $TargetPlatform =~ /^alpha-/ &&
	   ! $magic_rdata_seen &&
	   $c =~ /^\s*\.rdata\n\t\.quad 0\n\t\.align \d\n/ ) {
	    $c =~ s/^\s*\.rdata\n\t\.quad 0\n\t\.align (\d)\n/\.rdata\n\t\.align $1\n/;
	    $magic_rdata_seen = 1;
	}

	# pick some end-things and move them to the next chunk
862
863
864
865

	# pin a funny end-thing on (for easier matching):
	$c .= 'FUNNY#END#THING';

866
	while ( $c =~ /${T_MOVE_DIRVS}FUNNY#END#THING/o ) {
sof's avatar
sof committed
867

rrt's avatar
rrt committed
868
	    $to_move = $1;
869
870
871
872
873
874
875
876
877
878
879

	    # on x86 we try not to copy any directives into a literal
	    # chunk, rather we keep looking for the next real chunk.  This
	    # is because we get things like
	    #
	    #    .globl blah_closure
	    #    .LC32
	    #    	.string "..."
	    #    blah_closure:
	    #		...
            #
880
	    if ( $TargetPlatform =~ /^(i386|sparc|powerpc)/ && $to_move =~ /${T_COPY_DIRVS}/ ) {
881
882
883
884
885
886
887
888
889
890
		$j = $i + 1;
		while ( $j < $numchks  && $chk[$j] =~ /$T_CONST_LBL/) {
			$j++;
		}
		if ( $j < $numchks ) {
			$chk[$j] = $to_move . $chk[$j];
	        }
	    }

	    elsif ( $i < ($numchks - 1)
891
	      && ( $to_move =~ /${T_COPY_DIRVS}/
892
	        || ($TargetPlatform =~ /^hppa/ && $to_move =~ /align/ && $chkcat[$i+1] eq 'literal') )) {
893
894
895
896
		$chk[$i + 1] = $to_move . $chk[$i + 1];
		# otherwise they're tossed
	    }

897
	    $c =~ s/${T_MOVE_DIRVS}FUNNY#END#THING/FUNNY#END#THING/o;
898
899
	}

900
901
902
    	if ( $TargetPlatform =~ /^alpha-/ && $c =~ /^\t\.ent\s+(\S+)/ ) {
    	    $ent = $1;
	    # toss all prologue stuff, except for loading gp, and the ..ng address
ken's avatar
ken committed
903
904
905
906
907
908
909
910
911
912
913
914
	    unless ($c =~ /\.ent.*\n\$.*\.\.ng:/) {
		if (($p, $r) = split(/^\t\.prologue/, $c)) {
		    if (($keep, $junk) = split(/\.\.ng:/, $p)) {
			$keep =~ s/^\t\.frame.*\n/\t.frame \$30,0,\$26,0\n/;
			$keep =~ s/^\t\.(mask|fmask).*\n//g;
			$c = $keep . "..ng:\n";
		    } else {
			print STDERR "malformed code block ($ent)?\n"
		    }
		}
		$c .= "\t.prologue" . $r;
	    }
915
916
    	}
  
917
918
919
920
921
922
923
	$c =~ s/FUNNY#END#THING//;

#	print STDERR "\nCHK $i (AFTER) (",$chkcat[$i],"):\n", $c;

	$chk[$i] = $c; # update w/ convenience copy
    }

ken's avatar
ken committed
924
925
926
927
    # open CHUNKS, ">/tmp/chunks2" or die "Cannot open /tmp/chunks2: $!\n";
    # for (my $i = 0; $i < @chk; ++$i) { print CHUNKS "======= $i =======\n", $chk[$i] }
    # close CHUNKS;

928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
    if ( $TargetPlatform =~ /^alpha-/ ) {
	# print out the header stuff first
	$chk[0] =~ s/^(\t\.file.*)"(ghc\d+\.c)"/$1"$ifile_root.hc"/;
	print OUTASM $chk[0];

    } elsif ( $TargetPlatform =~ /^hppa/ ) {
	print OUTASM $chk[0];

    } elsif ( $TargetPlatform =~ /^mips-/ ) {
	$chk[0] = "\t\.file\t1 \"$ifile_root.hc\"\n" . $chk[0];

	# get rid of horrible "<dollar>Revision: .*$" strings
	local(@lines0) = split(/\n/, $chk[0]);
	local($z) = 0;
	while ( $z <= $#lines0 ) {
	    if ( $lines0[$z] =~ /^\t\.byte\t0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f$/ ) {
		undef($lines0[$z]);
		$z++;
		while ( $z <= $#lines0 ) {
		    undef($lines0[$z]);
		    last if $lines0[$z] =~ /[,\t]0x0$/;
		    $z++;
		}
	    }
	    $z++;
	}
	$chk[0] = join("\n", @lines0);
	$chk[0] =~ s/\n\n+/\n/;
	print OUTASM $chk[0];
    }

    # print out all the literal strings next
960
    for ($i = 0; $i < $numchks; $i++) {
961
962
963
964
	if ( $chkcat[$i] eq 'literal' ) {
	    print OUTASM $T_HDR_literal, $chk[$i];
	    print OUTASM "; end literal\n" if $TargetPlatform =~ /^hppa/; # for the splitter

965
966
967
968
	    $chkcat[$i] = 'DONE ALREADY';
	}
    }

969
970
971
972
973
974
975
976
977
978
979
980
981
    # on the HPPA, print out all the bss next
    if ( $TargetPlatform =~ /^hppa/ ) {
	for ($i = 1; $i < $numchks; $i++) {
	    if ( $chkcat[$i] eq 'bss' ) {
		print OUTASM "\t.SPACE \$PRIVATE\$\n\t.SUBSPA \$BSS\$\n\t.align 4\n";
		print OUTASM $chk[$i];

		$chkcat[$i] = 'DONE ALREADY';
	    }
	}
    }

    for ($i = $FIRST_MANGLABLE; $i < $numchks; $i++) {
982
983
984
985
986
#	print STDERR "$i: cat $chkcat[$i], symb $chksymb[$i]\n";

	next if $chkcat[$i] eq 'DONE ALREADY';

	if ( $chkcat[$i] eq 'misc' ) {
987
988
	    if ($chk[$i] ne '') {
		print OUTASM $T_HDR_misc;
sof's avatar
sof committed
989
 		&print_doctored($chk[$i], 0);
990
	    }
991
992
993
994
995

	} elsif ( $chkcat[$i] eq 'toss' ) {
	    print STDERR "*** NB: TOSSING code for $chksymb[$i] !!! ***\n";

	} elsif ( $chkcat[$i] eq 'data' ) {
996
997
998
999
	    if ($chk[$i] ne '') {
		print OUTASM $T_HDR_data;
		print OUTASM $chk[$i];
	    }
1000