ghc-asm.lprl 77.5 KB
Newer Older
1
2
%************************************************************************
%*									*
3
\section[Driver-asm-fiddling]{Fiddling with assembler files}
4
5
6
7
8
9
10
11
12
13
14
15
%*									*
%************************************************************************

Tasks:
\begin{itemize}
\item
Utterly stomp out C functions' prologues and epilogues; i.e., the
stuff to do with the C stack.
\item
Any other required tidying up.
\end{itemize}

16
17
18
19
20
21
22
23
24
25
26
27
General note [chak]: Many regexps are very fragile because they rely on white
space being in the right place.  This caused trouble with gcc 2.95 (at least
on Linux), where the use of white space in .s files generated by gcc suddenly 
changed.  To guarantee compatibility across different versions of gcc, make
sure (at least on i386-.*-linux) that regexps tolerate varying amounts of white
space between an assembler statement and its arguments as well as after a the
comma separating multiple arguments.  

\emph{For the time being, I have corrected the regexps for i386-.*-linux.  I
didn't touch all the regexps for other i386 platforms, as I don't have
a box to test these changes.}

28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
HPPA specific notes:
\begin{itemize}
\item
The HP linker is very picky about symbols being in the appropriate
space (code vs. data).  When we mangle the threaded code to put the
info tables just prior to the code, they wind up in code space
rather than data space.  This means that references to *_info from
un-mangled parts of the RTS (e.g. unthreaded GC code) get
unresolved symbols.  Solution:  mini-mangler for .c files on HP.  I
think this should really be triggered in the driver by a new -rts
option, so that user code doesn't get mangled inappropriately.
\item
With reversed tables, jumps are to the _info label rather than to
the _entry label.  The _info label is just an address in code
space, rather than an entry point with the descriptive blob we
talked about yesterday.  As a result, you can't use the call-style
JMP_ macro.  However, some JMP_ macros take _info labels as targets
and some take code entry points within the RTS.  The latter won't
work with the goto-style JMP_ macro.  Sigh.  Solution: Use the goto
style JMP_ macro, and mangle some more assembly, changing all
"RP'literal" and "LP'literal" references to "R'literal" and
"L'literal," so that you get the real address of the code, rather
than the descriptive blob.  Also change all ".word P%literal"
entries in info tables and vector tables to just ".word literal,"
for the same reason.  Advantage: No more ridiculous call sequences.
\end{itemize}

55
56
57
58
59
60
61
%************************************************************************
%*									*
\subsection{Top-level code}
%*									*
%************************************************************************

\begin{code}
62
63
64
65
66
67
68
69
70
############################################################################
# Make all regexp matching multi-line aware.  This replaces the line below
# originally found in "sub mangle_asm":
#
#  local($*) = 1;
#
# This used to work, but Perl 5.10 removes support for $*, so we uses an
# equivalent construct that works in Perl 5.6 and later.
#
71
72
73
BEGIN { if ($] >= 5.010) {
    require overload; overload::constant( qr => sub { "(?m:$_[1])" } );
} }
74
75
############################################################################

76
77
78
79
80
81
82
$TargetPlatform = $TARGETPLATFORM;

($Pgm = $0) =~ s|.*/||;
$ifile = $ARGV[0];
$ofile = $ARGV[1];

if ( $TargetPlatform =~ /^i386-/ ) {
83
84
85
86
87
    if ($ARGV[2] eq '') {
	$StolenX86Regs = 4;
    } else {
        $StolenX86Regs = $ARGV[2];
    }
88
89
90
91
92
93
94
}

&mangle_asm($ifile,$ofile);

exit(0);
\end{code}

95
96
97
98
99
100
%************************************************************************
%*									*
\subsection{Constants for various architectures}
%*									*
%************************************************************************

101
102
103
\begin{code}
sub init_TARGET_STUFF {

104
105
106
107
108
109
    #--------------------------------------------------------#
    if ( $TargetPlatform =~ /^alpha-.*-.*/ ) {

    $T_STABBY	    = 0; # 1 iff .stab things (usually if a.out format)
    $T_US	    = ''; # _ if symbols have an underscore on the front
    $T_PRE_APP	    = 'DONT THINK THIS APPLIES'; # regexp that says what comes before APP/NO_APP
ken's avatar
ken committed
110
    $T_CONST_LBL    = '^\$L?C(\d+):$'; # regexp for what such a lbl looks like
111
112
    $T_POST_LBL	    = ':';

ken's avatar
ken committed
113
114
    $T_MOVE_DIRVS   = '^(\s*(\$.*\.\.ng:|\.align\s+\d+|\.(globl|ent)\s+\S+|\#.*|\.(file|loc)\s+\S+\s+\S+|\.text|\.r?data)\n)';
    $T_COPY_DIRVS   = '^\s*(\$.*\.\.ng:|\#|\.(file|globl|ent|loc))';
115

ken's avatar
ken committed
116
    $T_DOT_WORD	    = '\.(long|quad|byte|word)';
rrt's avatar
rrt committed
117
    $T_DOT_GLOBAL   = '^\t\.globl';
118
119
120
    $T_HDR_literal  = "\.rdata\n\t\.align 3\n";
    $T_HDR_misc	    = "\.text\n\t\.align 3\n";
    $T_HDR_data	    = "\.data\n\t\.align 3\n";
121
    $T_HDR_rodata   = "\.rdata\n\t\.align 3\n";
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
    $T_HDR_closure  = "\.data\n\t\.align 3\n";
    $T_HDR_info	    = "\.text\n\t\.align 3\n";
    $T_HDR_entry    = "\.text\n\t\.align 3\n";
    $T_HDR_vector   = "\.text\n\t\.align 3\n";

    #--------------------------------------------------------#
    } elsif ( $TargetPlatform =~ /^hppa/ ) {

    $T_STABBY	    = 0; # 1 iff .stab things (usually if a.out format)
    $T_US	    = ''; # _ if symbols have an underscore on the front
    $T_PRE_APP	    = 'DONT THINK THIS APPLIES'; # regexp that says what comes before APP/NO_APP
    $T_CONST_LBL    = '^L\$C(\d+)$'; # regexp for what such a lbl looks like
    $T_POST_LBL	    = '';

    $T_MOVE_DIRVS   = '^((\s+\.(IMPORT|EXPORT|PARAM).*|\s+\.align\s+\d+|\s+\.(SPACE|SUBSPA)\s+\S+|\s*)\n)';
    $T_COPY_DIRVS   = '^\s+\.(IMPORT|EXPORT)';

139
    $T_DOT_WORD	    = '\.(blockz|word|half|byte)';
rrt's avatar
rrt committed
140
    $T_DOT_GLOBAL   = '^\s+\.EXPORT';
141
142
143
    $T_HDR_literal  = "\t.SPACE \$TEXT\$\n\t.SUBSPA \$LIT\$\n";
    $T_HDR_misc	    = "\t.SPACE \$TEXT\$\n\t.SUBSPA \$CODE\$\n\t\.align 4\n";
    $T_HDR_data	    = "\t.SPACE \$PRIVATE\$\n\t.SUBSPA \$DATA\$\n\t\.align 4\n";
144
    $T_HDR_rodata   = "\t.SPACE \$PRIVATE\$\n\t.SUBSPA \$DATA\$\n\t\.align 4\n";
145
146
147
148
149
150
    $T_HDR_closure  = "\t.SPACE \$PRIVATE\$\n\t.SUBSPA \$DATA\$\n\t\.align 4\n";
    $T_HDR_info	    = "\t.SPACE \$TEXT\$\n\t.SUBSPA \$CODE\$\n\t\.align 4\n";
    $T_HDR_entry    = "\t.SPACE \$TEXT\$\n\t.SUBSPA \$CODE\$\n\t\.align 4\n";
    $T_HDR_vector   = "\t.SPACE \$TEXT\$\n\t.SUBSPA \$CODE\$\n\t\.align 4\n";

    #--------------------------------------------------------#
dons's avatar
dons committed
151
    } elsif ( $TargetPlatform =~ /^i386-.*-(linuxaout|freebsd2|nextstep3|cygwin32|mingw32)$/ ) {
sof's avatar
sof committed
152
				# NeXT added but not tested. CaS
153
154
155
156
157
158

    $T_STABBY	    = 1; # 1 iff .stab things (usually if a.out format)
    $T_US	    = '_'; # _ if symbols have an underscore on the front
    $T_PRE_APP	    = '^#'; # regexp that says what comes before APP/NO_APP
    $T_CONST_LBL    = '^LC(\d+):$';
    $T_POST_LBL	    = ':';
159
160
    $T_X86_PRE_LLBL_PAT = 'L';
    $T_X86_PRE_LLBL	    = 'L';
161
162
    $T_X86_BADJMP   = '^\tjmp [^L\*]';

163
    $T_MOVE_DIRVS   = '^(\s*(\.(p2)?align\s.*|\.globl\s+\S+|\.text|\.data|\.stab[^n].*|\.type\s+.*|\.size\s+.*|\.lcomm.*)\n)';
164
    $T_COPY_DIRVS   = '\.(globl|stab|lcomm)';
165
    $T_DOT_WORD	    = '\.(long|word|value|byte|space)';
166
    $T_DOT_GLOBAL   = '\.globl';
rrt's avatar
rrt committed
167
    $T_HDR_literal  = "\.text\n\t\.align 2\n";
168
    $T_HDR_misc	    = "\.text\n\t\.align 2,0x90\n";
rrt's avatar
rrt committed
169
    $T_HDR_data	    = "\.data\n\t\.align 2\n";
170
    $T_HDR_rodata   = "\.text\n\t\.align 2\n";
rrt's avatar
rrt committed
171
    $T_HDR_closure  = "\.data\n\t\.align 2\n";
172
    $T_HDR_info	    = "\.text\n\t\.align 2\n"; # NB: requires padding
173
    $T_HDR_entry    = "\.text\n"; # no .align so we're right next to _info (arguably wrong...?)
174
    $T_HDR_vector   = "\.text\n\t\.align 2\n"; # NB: requires padding
175

176
    #--------------------------------------------------------#
177
    } elsif ( $TargetPlatform =~ /^i386-.*-(solaris2|linux|gnu|freebsd|netbsd|openbsd|kfreebsdgnu)$/ ) {
178
179
180

    $T_STABBY	    = 0; # 1 iff .stab things (usually if a.out format)
    $T_US	    = ''; # _ if symbols have an underscore on the front
rrt's avatar
rrt committed
181
    $T_PRE_APP	    = # regexp that says what comes before APP/NO_APP
182
		      ($TargetPlatform =~ /-(linux|gnu|freebsd|netbsd|openbsd)$/) ? '#' : '/' ;
183
    $T_CONST_LBL    = '^\.LC(\d+):$'; # regexp for what such a lbl looks like
184
    $T_POST_LBL	    = ':';
185
186
    $T_X86_PRE_LLBL_PAT = '\.L';
    $T_X86_PRE_LLBL	    = '.L';
187
    $T_X86_BADJMP   = '^\tjmp\s+[^\.\*]';
188

189
    $T_MOVE_DIRVS   = '^(\s*(\.(p2)?align\s.*|\.globl\s+\S+|\.text|\.data|\.section\s+.*|\.type\s+.*|\.size\s+\S+\s*,\s*\d+|\.ident.*|\.local.*)\n)';
190
191
192
193
194
195
196
    if ( $TargetPlatform =~ /solaris2/ ) {
            # newer Solaris linkers are picky about .size information, so
            # omit it (see #1421)
            $T_COPY_DIRVS   = '^\s*\.(globl|local)';
    } else {
            $T_COPY_DIRVS   = '^\s*\.(globl|type|size|local)';
    }
197

198
    $T_DOT_WORD	    = '\.(long|value|word|byte|zero)';
199
200
    $T_DOT_GLOBAL   = '\.globl';
    $T_HDR_literal  = "\.section\t\.rodata\n"; # or just use .text??? (WDP 95/11)
201
    $T_HDR_misc	    = "\.text\n\t\.align 4\n";
202
203
204
205
    $T_HDR_data	    = "\.data\n\t\.align 4\n";
    $T_HDR_rodata   = "\.section\t\.rodata\n\t\.align 4\n";
    $T_HDR_closure  = "\.data\n\t\.align 4\n";
    $T_HDR_info	    = "\.text\n\t\.align 4\n";
206
    $T_HDR_entry    = "\.text\n"; # no .align so we're right next to _info (arguably wrong...?)
207
    $T_HDR_vector   = "\.text\n\t\.align 4\n"; # NB: requires padding
208

209
210
211
212
213
214
215
216
217
    #--------------------------------------------------------#
    } elsif ( $TargetPlatform =~ /^ia64-.*-linux$/ ) {

    $T_STABBY       = 0; # 1 iff .stab things (usually if a.out format)
    $T_US           = ''; # _ if symbols have an underscore on the front
    $T_PRE_APP      = '#';
    $T_CONST_LBL    = '^\.LC(\d+):$'; # regexp for what such a lbl looks like
    $T_POST_LBL     = ':';

218
    $T_MOVE_DIRVS   = '^(\s*\.(global|proc|pred\.safe_across_calls|text|data|section|subsection|align|size|type|ident)\s+.*\n)';
219
220
221
222
223
    $T_COPY_DIRVS   = '\.(global|proc)';

    $T_DOT_WORD     = '\.(long|value|byte|zero)';
    $T_DOT_GLOBAL   = '\.global';
    $T_HDR_literal  = "\.section\t\.rodata\n";
Ian Lynagh's avatar
Ian Lynagh committed
224
    $T_HDR_misc     = "\.text\n\t\.align 16\n"; # May contain code; align like 'entry'
225
    $T_HDR_data     = "\.data\n\t\.align 8\n";
226
    $T_HDR_rodata   = "\.section\t\.rodata\n\t\.align 8\n";
227
228
229
230
231
    $T_HDR_closure  = "\.data\n\t\.align 8\n";
    $T_HDR_info     = "\.text\n\t\.align 8\n";
    $T_HDR_entry    = "\.text\n\t\.align 16\n";
    $T_HDR_vector   = "\.text\n\t\.align 8\n";

232
    #--------------------------------------------------------#
dons's avatar
dons committed
233
    } elsif ( $TargetPlatform =~ /^x86_64-.*-(linux|openbsd)$/ ) {
234
235
236
237
238
239
240

    $T_STABBY       = 0; # 1 iff .stab things (usually if a.out format)
    $T_US           = ''; # _ if symbols have an underscore on the front
    $T_PRE_APP      = '#';
    $T_CONST_LBL    = '^\.LC(\d+):$'; # regexp for what such a lbl looks like
    $T_POST_LBL     = ':';

241
    $T_MOVE_DIRVS   = '^(\s*\.(globl|text|data|section|align|size|type|ident|local)([ \t].*)?\n)';
242
    $T_COPY_DIRVS   = '\.(globl|type|size|local)';
243
244
245

    $T_DOT_WORD     = '\.(quad|long|value|byte|zero)';
    $T_DOT_GLOBAL   = '\.global';
246
247

    $T_HDR_literal16 = "\.section\t\.rodata.cst16\n\t.align 16\n";
248
    $T_HDR_literal  = "\.section\t\.rodata\n";
249

250
251
    $T_HDR_misc     = "\.text\n\t\.align 8\n";
    $T_HDR_data     = "\.data\n\t\.align 8\n";
252
    $T_HDR_rodata   = "\.section\t\.rodata\n\t\.align 8\n";
253
254
255
256
257
258

	# the assembler on x86_64/Linux refuses to generate code for
	#   .quad  x - y
	# where x is in the text section and y in the rodata section.
	# It works if y is in the text section, though.  This is probably
	# going to cause difficulties for PIC, I imagine.
259
260
        #       
        # See Note [x86-64-relative] in includes/InfoTables.h
261
262
    $T_HDR_relrodata= "\.text\n\t\.align 8\n";

263
264
265
266
267
    $T_HDR_closure  = "\.data\n\t\.align 8\n";
    $T_HDR_info     = "\.text\n\t\.align 8\n";
    $T_HDR_entry    = "\.text\n\t\.align 8\n";
    $T_HDR_vector   = "\.text\n\t\.align 8\n";

268
269
270
271
272
273
274
275
276
    #--------------------------------------------------------#
    } elsif ( $TargetPlatform =~ /^m68k-.*-sunos4/ ) {

    $T_STABBY	    = 1; # 1 iff .stab things (usually if a.out format)
    $T_US	    = '_'; # _ if symbols have an underscore on the front
    $T_PRE_APP	    = '^# MAY NOT APPLY'; # regexp that says what comes before APP/NO_APP
    $T_CONST_LBL    = '^LC(\d+):$';
    $T_POST_LBL	    = ':';

rrt's avatar
rrt committed
277
    $T_MOVE_DIRVS   = '^(\s*(\.align\s+\d+|\.proc\s+\d+|\.const|\.cstring|\.globl\s+\S+|\.text|\.data|\.even|\.stab[^n].*)\n)';
278
279
280
281
282
283
284
    $T_COPY_DIRVS   = '\.(globl|proc|stab)';

    $T_DOT_WORD	    = '\.long';
    $T_DOT_GLOBAL   = '\.globl';
    $T_HDR_literal  = "\.text\n\t\.even\n";
    $T_HDR_misc	    = "\.text\n\t\.even\n";
    $T_HDR_data	    = "\.data\n\t\.even\n";
285
    $T_HDR_rodata   = "\.text\n\t\.even\n";
286
287
288
289
290
291
292
293
294
295
296
297
298
299
    $T_HDR_closure  = "\.data\n\t\.even\n";
    $T_HDR_info	    = "\.text\n\t\.even\n";
    $T_HDR_entry    = "\.text\n\t\.even\n";
    $T_HDR_vector   = "\.text\n\t\.even\n";

    #--------------------------------------------------------#
    } elsif ( $TargetPlatform =~ /^mips-.*/ ) {

    $T_STABBY	    = 0; # 1 iff .stab things (usually if a.out format)
    $T_US	    = ''; # _ if symbols have an underscore on the front
    $T_PRE_APP	    = '^\s*#'; # regexp that says what comes before APP/NO_APP
    $T_CONST_LBL    = '^\$LC(\d+):$'; # regexp for what such a lbl looks like
    $T_POST_LBL	    = ':';

rrt's avatar
rrt committed
300
    $T_MOVE_DIRVS   = '^(\s*(\.align\s+\d+|\.(globl|ent)\s+\S+|\.text|\.r?data)\n)';
301
302
303
    $T_COPY_DIRVS   = '\.(globl|ent)';

    $T_DOT_WORD	    = '\.word';
rrt's avatar
rrt committed
304
    $T_DOT_GLOBAL   = '^\t\.globl';
305
306
307
    $T_HDR_literal  = "\t\.rdata\n\t\.align 2\n";
    $T_HDR_misc	    = "\t\.text\n\t\.align 2\n";
    $T_HDR_data	    = "\t\.data\n\t\.align 2\n";
308
    $T_HDR_rodata   = "\t\.rdata\n\t\.align 2\n";
309
310
311
312
313
314
    $T_HDR_closure  = "\t\.data\n\t\.align 2\n";
    $T_HDR_info	    = "\t\.text\n\t\.align 2\n";
    $T_HDR_entry    = "\t\.text\n\t\.align 2\n";
    $T_HDR_vector   = "\t\.text\n\t\.align 2\n";

    #--------------------------------------------------------#
315
    } elsif ( $TargetPlatform =~ /^powerpc-apple-darwin.*/ ) {
316
				# Apple PowerPC Darwin/MacOS X.
317
    $T_STABBY	    = 0; # 1 iff .stab things (usually if a.out format)
318
    $T_US	    = '_'; # _ if symbols have an underscore on the front
319
    $T_PRE_APP	    = 'DOESNT APPLY'; # regexp that says what comes before APP/NO_APP
320
    $T_CONST_LBL    = '^\LC\d+:'; # regexp for what such a lbl looks like
321
322
    $T_POST_LBL	    = ':';

323
    $T_MOVE_DIRVS   = '^(\s*(\.(p2)?align\s.*|\.text|\.data|\.const_data|\.cstring|\.non_lazy_symbol_pointer|\.const|\.static_const|\.literal4|\.literal8|\.static_data|\.globl \S+|\.section .*|\.lcomm.*)\n)';
324
    $T_COPY_DIRVS   = '\.(globl|lcomm)';
325

326
    $T_DOT_WORD	    = '\.(long|short|byte|fill|space)';
327
    $T_DOT_GLOBAL   = '\.globl';
sof's avatar
sof committed
328
    $T_HDR_toc      = "\.toc\n";
329
    $T_HDR_literal  = "\t\.const\n\t\.align 2\n";
330
331
    $T_HDR_misc	    = "\t\.text\n\t\.align 2\n";
    $T_HDR_data	    = "\t\.data\n\t\.align 2\n";
332
333
334
    $T_HDR_rodata   = "\t\.const\n\t\.align 2\n";
    $T_HDR_relrodata= "\t\.const_data\n\t\.align 2\n";
    $T_HDR_closure  = "\t\.data\n\t\.align 2\n";
335
336
337
    $T_HDR_info	    = "\t\.text\n\t\.align 2\n";
    $T_HDR_entry    = "\t\.text\n\t\.align 2\n";
    $T_HDR_vector   = "\t\.text\n\t\.align 2\n";
338

339
340
341
342
343
344
345
346
347
348
349
350
    #--------------------------------------------------------#
    } elsif ( $TargetPlatform =~ /^i386-apple-darwin.*/ ) {
				# Apple PowerPC Darwin/MacOS X.
    $T_STABBY	    = 0; # 1 iff .stab things (usually if a.out format)
    $T_US	    = '_'; # _ if symbols have an underscore on the front
    $T_PRE_APP	    = 'DOESNT APPLY'; # regexp that says what comes before APP/NO_APP
    $T_CONST_LBL    = '^\LC\d+:'; # regexp for what such a lbl looks like
    $T_POST_LBL	    = ':';
    $T_X86_PRE_LLBL_PAT = 'L';
    $T_X86_PRE_LLBL	    = 'L';
    $T_X86_BADJMP   = '^\tjmp [^L\*]';

351
    $T_MOVE_DIRVS   = '^(\s*(\.(p2)?align\s.*|\.text|\.data|\.const_data|\.cstring|\.non_lazy_symbol_pointer|\.const|\.static_const|\.literal4|\.literal8|\.static_data|\.globl \S+|\.section .*|\.lcomm.*)\n)';
352
353
354
355
356
    $T_COPY_DIRVS   = '\.(globl|lcomm)';

    $T_DOT_WORD	    = '\.(long|short|byte|fill|space)';
    $T_DOT_GLOBAL   = '\.globl';
    $T_HDR_toc      = "\.toc\n";
357
358
    $T_HDR_literal16= "\t\.literal8\n\t\.align 4\n";
    $T_HDR_literal  = "\t\.const\n\t\.align 4\n";
359
360
361
362
363
364
365
366
367
    $T_HDR_misc	    = "\t\.text\n\t\.align 2\n";
    $T_HDR_data	    = "\t\.data\n\t\.align 2\n";
    $T_HDR_rodata   = "\t\.const\n\t\.align 2\n";
    $T_HDR_relrodata= "\t\.const_data\n\t\.align 2\n";
    $T_HDR_closure  = "\t\.data\n\t\.align 2\n";
    $T_HDR_info	    = "\t\.text\n\t\.align 2\n";
    $T_HDR_entry    = "\t\.text\n\t\.align 2\n";
    $T_HDR_vector   = "\t\.text\n\t\.align 2\n";

368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
    #--------------------------------------------------------#
    } elsif ( $TargetPlatform =~ /^x86_64-apple-darwin.*/ ) {
				# Apple PowerPC Darwin/MacOS X.
    $T_STABBY	    = 0; # 1 iff .stab things (usually if a.out format)
    $T_US	    = '_'; # _ if symbols have an underscore on the front
    $T_PRE_APP	    = 'DOESNT APPLY'; # regexp that says what comes before APP/NO_APP
    $T_CONST_LBL    = '^\LC\d+:'; # regexp for what such a lbl looks like
    $T_POST_LBL	    = ':';

    $T_MOVE_DIRVS   = '^(\s*(\.align \d+|\.text|\.data|\.const_data|\.cstring|\.non_lazy_symbol_pointer|\.const|\.static_const|\.literal4|\.literal8|\.static_data|\.globl \S+|\.section .*|\.lcomm.*)\n)';
    $T_COPY_DIRVS   = '\.(globl|lcomm)';

    $T_DOT_WORD	    = '\.(quad|long|short|byte|fill|space)';
    $T_DOT_GLOBAL   = '\.globl';
    $T_HDR_toc      = "\.toc\n";
    $T_HDR_literal16= "\t\.literal8\n\t\.align 4\n";
    $T_HDR_literal  = "\t\.const\n\t\.align 4\n";
    $T_HDR_misc	    = "\t\.text\n\t\.align 2\n";
    $T_HDR_data	    = "\t\.data\n\t\.align 2\n";
    $T_HDR_rodata   = "\t\.const\n\t\.align 2\n";
    $T_HDR_relrodata= "\t\.const_data\n\t\.align 2\n";
    $T_HDR_closure  = "\t\.data\n\t\.align 2\n";
    $T_HDR_info	    = "\t\.text\n\t\.align 2\n";
    $T_HDR_entry    = "\t\.text\n\t\.align 2\n";
    $T_HDR_vector   = "\t\.text\n\t\.align 2\n";

394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
    #--------------------------------------------------------#
    } elsif ( $TargetPlatform =~ /^powerpc-.*-linux/ ) {
				# PowerPC Linux
    $T_STABBY	    = 0; # 1 iff .stab things (usually if a.out format)
    $T_US	    = ''; # _ if symbols have an underscore on the front
    $T_PRE_APP	    = '^#'; # regexp that says what comes before APP/NO_APP
    $T_CONST_LBL    = '^\.LC\d+:'; # regexp for what such a lbl looks like
    $T_POST_LBL	    = ':';

    $T_MOVE_DIRVS   = '^(\s*(\.(p2)?align\s+\d+(,\s*0x90)?|\.globl\s+\S+|\.text|\.data|\.section\s+.*|\.type\s+.*|\.size\s+\S+\s*,\s*\d+|\.ident.*|\.local.*)\n)';
    $T_COPY_DIRVS   = '^\s*\.(globl|type|size|local)';

    $T_DOT_WORD	    = '\.(long|short|byte|fill|space)';
    $T_DOT_GLOBAL   = '\.globl';
    $T_HDR_toc      = "\.toc\n";
    $T_HDR_literal  = "\t\.section\t.rodata\n\t\.align 2\n";
    $T_HDR_misc	    = "\t\.text\n\t\.align 2\n";
    $T_HDR_data	    = "\t\.data\n\t\.align 2\n";
412
    $T_HDR_rodata   = "\t\.section\t.rodata\n\t\.align 2\n";
413
414
415
416
417
    $T_HDR_closure  = "\t\.data\n\t\.align 2\n";
    $T_HDR_info	    = "\t\.text\n\t\.align 2\n";
    $T_HDR_entry    = "\t\.text\n\t\.align 2\n";
    $T_HDR_vector   = "\t\.text\n\t\.align 2\n";

418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
    #--------------------------------------------------------#
    } elsif ( $TargetPlatform =~ /^powerpc64-.*-linux/ ) {
				# PowerPC 64 Linux
    $T_STABBY	    = 0; # 1 iff .stab things (usually if a.out format)
    $T_US	    = '\.'; # _ if symbols have an underscore on the front
    $T_PRE_APP	    = '^#'; # regexp that says what comes before APP/NO_APP
    $T_CONST_LBL    = '^\.LC\d+:'; # regexp for what such a lbl looks like
    $T_POST_LBL	    = ':';

    $T_MOVE_DIRVS   = '^(\s*(\.(p2)?align\s+\d+(,\s*0x90)?|\.globl\s+\S+|\.text|\.data|\.section\s+.*|\.type\s+.*|\.size\s+\S+\s*,\s*\d+|\.ident.*|\.local.*)\n)';
    $T_COPY_DIRVS   = '^\s*\.(globl|type|size|local)';

    $T_DOT_WORD	    = '\.(long|short|byte|fill|space)';
    $T_DOT_GLOBAL   = '\.globl';
    $T_HDR_toc      = "\.toc\n";
    $T_HDR_literal  = "\t\.section\t\".toc\",\"aw\"\n";
    $T_HDR_misc	    = "\t\.text\n\t\.align 2\n";
    $T_HDR_data	    = "\t\.data\n\t\.align 2\n";
    $T_HDR_rodata   = "\t\.section\t.rodata\n\t\.align 2\n";
    $T_HDR_closure  = "\t\.data\n\t\.align 2\n";
    $T_HDR_info	    = "\t\.text\n\t\.align 2\n";
    $T_HDR_entry    = "\t\.text\n\t\.align 2\n";
    $T_HDR_vector   = "\t\.text\n\t\.align 2\n";

442
    #--------------------------------------------------------#
dons's avatar
dons committed
443
    } elsif ( $TargetPlatform =~ /^sparc-.*-(solaris2|openbsd)/ ) {
444
445
446
447
448
449
450

    $T_STABBY	    = 0; # 1 iff .stab things (usually if a.out format)
    $T_US	    = ''; # _ if symbols have an underscore on the front
    $T_PRE_APP	    = 'DOES NOT SEEM TO APPLY'; # regexp that says what comes before APP/NO_APP
    $T_CONST_LBL    = '^\.LLC(\d+):$'; # regexp for what such a lbl looks like
    $T_POST_LBL	    = ':';

451
452
    $T_MOVE_DIRVS   =  '^((\s+\.align\s+\d+|\s+\.proc\s+\d+|\s+\.global\s+\S+|\s+\.local\s+\S+|\.text|\.data|\.stab.*|\s*\.section.*|\s+\.type.*|\s+\.size.*)\n)';
    $T_COPY_DIRVS   = '\.(global|local|proc|stab)';
453

454
    $T_DOT_WORD	    = '\.(long|word|byte|half|skip|uahalf|uaword)';
rrt's avatar
rrt committed
455
    $T_DOT_GLOBAL   = '^\t\.global';
456
457
458
    $T_HDR_literal  = "\.text\n\t\.align 8\n";
    $T_HDR_misc	    = "\.text\n\t\.align 4\n";
    $T_HDR_data	    = "\.data\n\t\.align 8\n";
459
    $T_HDR_rodata   = "\.text\n\t\.align 4\n";
460
    $T_HDR_closure  = "\.data\n\t\.align 4\n";
461
    $T_HDR_info     = "\.text\n\t\.align 4\n";
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
    $T_HDR_entry    = "\.text\n\t\.align 4\n";
    $T_HDR_vector   = "\.text\n\t\.align 4\n";

    #--------------------------------------------------------#
    } elsif ( $TargetPlatform =~ /^sparc-.*-sunos4/ ) {

    $T_STABBY	    = 1; # 1 iff .stab things (usually if a.out format)
    $T_US	    = '_'; # _ if symbols have an underscore on the front
    $T_PRE_APP	    = '^# DOES NOT SEEM TO APPLY'; # regexp that says what comes before APP/NO_APP
    $T_CONST_LBL    = '^LC(\d+):$';
    $T_POST_LBL	    = ':';

    $T_MOVE_DIRVS   = '^((\s+\.align\s+\d+|\s+\.proc\s+\d+|\s+\.global\s+\S+|\.text|\.data|\.stab.*)\n)';
    $T_COPY_DIRVS   = '\.(global|proc|stab)';

    $T_DOT_WORD	    = '\.word';
    $T_DOT_GLOBAL   = '^\t\.global';
    $T_HDR_literal  = "\.text\n\t\.align 8\n";
    $T_HDR_misc	    = "\.text\n\t\.align 4\n";
    $T_HDR_data	    = "\.data\n\t\.align 8\n";
482
    $T_HDR_rodata   = "\.text\n\t\.align 4\n";
483
484
485
486
487
    $T_HDR_closure  = "\.data\n\t\.align 4\n";
    $T_HDR_info	    = "\.text\n\t\.align 4\n";
    $T_HDR_entry    = "\.text\n\t\.align 4\n";
    $T_HDR_vector   = "\.text\n\t\.align 4\n";

488
489
490
491
492
493
494
495
496
    #--------------------------------------------------------#
    } elsif ( $TargetPlatform =~ /^sparc-.*-linux/ ) {
    $T_STABBY       = 0; # 1 iff .stab things (usually if a.out format)
    $T_US           = ''; # _ if symbols have an underscore on the front
    $T_PRE_APP      = '#'; # regexp that says what comes before APP/NO_APP
                           # Probably doesn't apply anyway
    $T_CONST_LBL    = '^\.LLC(\d+):$'; # regexp for what such a lbl looks like
    $T_POST_LBL     = ':';

497
498
    $T_MOVE_DIRVS   = '^((\s+\.align\s+\d+|\s+\.proc\s+\d+|\s+\.global\s+\S+|\s+\.local\s+\S+|\.text|\.data|\.seg|\.stab.*|\s+?\.section.*|\s+\.type.*|\s+\.size.*)\n)';
    $T_COPY_DIRVS   = '\.(global|local|globl|proc|stab)';
499
500
501
502
503
504

    $T_DOT_WORD     = '\.(long|word|nword|xword|byte|half|short|skip|uahalf|uaword)';
    $T_DOT_GLOBAL   = '^\t\.global';
    $T_HDR_literal  = "\.text\n\t\.align 8\n";
    $T_HDR_misc     = "\.text\n\t\.align 4\n";
    $T_HDR_data     = "\.data\n\t\.align 8\n";
505
    $T_HDR_rodata   = "\.text\n\t\.align 4\n";
506
507
508
509
510
    $T_HDR_closure  = "\.data\n\t\.align 4\n";
    $T_HDR_info     = "\.text\n\t\.align 4\n";
    $T_HDR_entry    = "\.text\n\t\.align 4\n";
    $T_HDR_vector   = "\.text\n\t\.align 4\n";

511
512
513
514
    #--------------------------------------------------------#
    } else {
	print STDERR "$Pgm: don't know how to mangle assembly language for: $TargetPlatform\n";
	exit 1;
515
516
    }

517
518
519
520
521
522
    if($T_HDR_relrodata eq "") {
            # default values:
            # relrodata defaults to rodata.
        $T_HDR_relrodata = $T_HDR_rodata;
    }

523
524
525
526
527
528
if ( 0 ) {
print STDERR "T_STABBY: $T_STABBY\n";
print STDERR "T_US: $T_US\n";
print STDERR "T_PRE_APP: $T_PRE_APP\n";
print STDERR "T_CONST_LBL: $T_CONST_LBL\n";
print STDERR "T_POST_LBL: $T_POST_LBL\n";
529
530
531
532
533
if ( $TargetPlatform =~ /^i386-/ ) {
    print STDERR "T_X86_PRE_LLBL_PAT: $T_X86_PRE_LLBL_PAT\n";
    print STDERR "T_X86_PRE_LLBL: $T_X86_PRE_LLBL\n";
    print STDERR "T_X86_BADJMP: $T_X86_BADJMP\n";
}
534
535
536
print STDERR "T_MOVE_DIRVS: $T_MOVE_DIRVS\n";
print STDERR "T_COPY_DIRVS: $T_COPY_DIRVS\n";
print STDERR "T_DOT_WORD: $T_DOT_WORD\n";
537
print STDERR "T_HDR_literal: $T_HDR_literal\n";
538
539
print STDERR "T_HDR_misc: $T_HDR_misc\n";
print STDERR "T_HDR_data: $T_HDR_data\n";
540
print STDERR "T_HDR_rodata: $T_HDR_rodata\n";
541
542
543
544
545
546
547
548
549
print STDERR "T_HDR_closure: $T_HDR_closure\n";
print STDERR "T_HDR_info: $T_HDR_info\n";
print STDERR "T_HDR_entry: $T_HDR_entry\n";
print STDERR "T_HDR_vector: $T_HDR_vector\n";
}

}
\end{code}

550
551
552
553
554
555
%************************************************************************
%*									*
\subsection{Mangle away}
%*									*
%************************************************************************

556
557
558
\begin{code}
sub mangle_asm {
    local($in_asmf, $out_asmf) = @_;
559
    local($i, $c);
560
    local($*) = 1;
561

562
563
564
    # ia64-specific information for code chunks
    my $ia64_locnum;
    my $ia64_outnum;
sof's avatar
sof committed
565

566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
    &init_TARGET_STUFF();
    &init_FUNNY_THINGS();

    open(INASM, "< $in_asmf")
	|| &tidy_up_and_die(1,"$Pgm: failed to open `$in_asmf' (to read)\n");
    open(OUTASM,"> $out_asmf")
	|| &tidy_up_and_die(1,"$Pgm: failed to open `$out_asmf' (to write)\n");

    # read whole file, divide into "chunks":
    #	record some info about what we've found...

    @chk = ();		# contents of the chunk
    $numchks = 0;	# number of them
    @chkcat = ();	# what category of thing in each chunk
    @chksymb = ();	# what symbol(base) is defined in this chunk
581
    %entrychk = ();	# ditto, its entry code
582
    %closurechk = ();	# ditto, the (static) closure
583
    %srtchk = ();	# ditto, its SRT (for top-level things)
584
585
    %infochk = (); 	# given a symbol base, say what chunk its info tbl is in
    %vectorchk = ();    # ditto, return vector table
586
    $EXTERN_DECLS = '';	# .globl <foo> .text (MIPS only)
587

588
    $i = 0; $chkcat[0] = 'misc'; $chk[0] = '';
589
590

    while (<INASM>) {
rrt's avatar
rrt committed
591
	tr/\r//d if $TargetPlatform =~ /-mingw32$/; # In case Perl doesn't convert line endings
592
	next if $T_STABBY && /^\.stab.*${T_US}__stg_split_marker/o;
593
	next if $T_STABBY && /^\.stab.*ghc.*c_ID/;
594
	next if /^\t\.def.*endef$/;
595
	next if /${T_PRE_APP}(NO_)?APP/o; 
596
597
    	next if /^;/ && $TargetPlatform =~ /^hppa/;

598
	next if /(^$|^\t\.file\t|^ # )/ && $TargetPlatform =~ /^(mips|ia64)-/;
599
600

	if ( $TargetPlatform =~ /^mips-/ 
ken's avatar
ken committed
601
	  && /^\t\.(globl\S+\.text|comm\t)/ ) {
602
	    $EXTERN_DECLS .= $_ unless /(__DISCARD__|\b(PK_|ASSIGN_)(FLT|DBL)\b)/;
603
604
605
606
607
608
609
610
611
612
613
	# Treat .comm variables as data.  These show up in two (known) places:
	#
	#    - the module_registered variable used in the __stginit fragment.
	#      even though these are declared static and initialised, gcc 3.3
	#      likes to make them .comm, presumably to save space in the
	#      object file.
	#
	#    - global variables used to pass arguments from C to STG in
	#      a foreign export.  (is this still true? --SDM)
	# 
	} elsif ( /^\t\.comm.*$/ ) {
sof's avatar
sof committed
614
615
616
617
	    $chk[++$i]   = $_;
	    $chkcat[$i]  = 'data';
	    $chksymb[$i] = '';

618
619
	# Labels ending "_str": these are literal strings.
	} elsif ( /^${T_US}([A-Za-z0-9_]+)_str${T_POST_LBL}$/ ) {
620
	    $chk[++$i]   = $_;
621
	    $chkcat[$i]  = 'relrodata';
622
	    $chksymb[$i] = '';
623
624
        } elsif ( $TargetPlatform =~ /-darwin/
                && (/^\s*\.subsections_via_symbols/
625
                  ||/^\s*\.no_dead_strip.*/)) {
626
627
628
629
630
            # Don't allow Apple's linker to do any dead-stripping of symbols
            # in this file, because it will mess up info-tables in mangled
            # code.
            # The .no_dead_strip directives are actually put there by
            # the gcc3 "used" attribute on entry points.
631
        
632
        } elsif ( $TargetPlatform =~ /^.*-apple-darwin.*/ && ( 
633
		   /^\s*\.picsymbol_stub/
634
635
		|| /^\s*\.section __TEXT,__picsymbol_stub\d,.*/
		|| /^\s*\.section __TEXT,__picsymbolstub\d,.*/
636
		|| /^\s*\.symbol_stub/
637
638
		|| /^\s*\.section __TEXT,__symbol_stub\d,.*/
		|| /^\s*\.section __TEXT,__symbolstub\d,.*/
639
		|| /^\s*\.lazy_symbol_pointer/
640
641
		|| /^\s*\.non_lazy_symbol_pointer/
		|| /^\s*\.section __IMPORT.*/))
642
643
644
645
	{
	    $chk[++$i]   = $_;
	    $chkcat[$i]  = 'dyld';
	    $chksymb[$i] = '';
646
	    $dyld_section = $_;
647

648
	} elsif ( $TargetPlatform =~ /^.*-apple-darwin.*/ && $chkcat[$i] eq 'dyld' && /^\s*\.data/)
649
650
651
652
	{	# non_lazy_symbol_ptrs that point to local symbols
	    $chk[++$i]   = $_;
	    $chkcat[$i]  = 'dyld';
	    $chksymb[$i] = '';
653
	    $dyld_section = $_;
654
655
656
	} elsif ( $TargetPlatform =~ /^.*-apple-darwin.*/ && $chkcat[$i] eq 'dyld' && /^\s*\.align/)
	{	# non_lazy_symbol_ptrs that point to local symbols
	    $dyld_section .= $_;
657
658
659
660
661
	} elsif ( $TargetPlatform =~ /^.*-apple-darwin.*/ && $chkcat[$i] eq 'dyld' && /^L_.*:$/)
	{	# non_lazy_symbol_ptrs that point to local symbols
	    $chk[++$i]   = $dyld_section . $_;
	    $chkcat[$i]  = 'dyld';
	    $chksymb[$i] = '';
662

663
	} elsif ( /^\s+/ ) { # most common case first -- a simple line!
664
665
666
667
	    # duplicated from the bottom

	    $chk[$i] .= $_;

668
669
670
671
672
673
674
675
676
	} elsif ( /\.\.ng:$/ && $TargetPlatform =~ /^alpha-/ ) {
	    # Alphas: Local labels not to be confused with new chunks
	    $chk[$i] .= $_;
	# NB: all the rest start with a non-space

	} elsif ( $TargetPlatform =~ /^mips-/
	       && /^\d+:/ ) { # a funny-looking very-local label
	    $chk[$i] .= $_;

677
	} elsif ( /$T_CONST_LBL/o ) {
678
679
	    $chk[++$i]   = $_;
	    $chkcat[$i]  = 'literal';
680
681
	    $chksymb[$i] = $1;

682
	} elsif ( /^${T_US}__stg_split_marker(\d*)${T_POST_LBL}$/o ) {
683
684
	    $chk[++$i]   = $_;
	    $chkcat[$i]  = 'splitmarker';
685
686
	    $chksymb[$i] = $1;

687
	} elsif ( /^${T_US}([A-Za-z0-9_]+)_info${T_POST_LBL}$/o ) {
688
	    $symb = $1;
689
690
	    $chk[++$i]   = $_;
	    $chkcat[$i]  = 'infotbl';
691
692
693
694
695
696
	    $chksymb[$i] = $symb;

	    die "Info table already? $symb; $i\n" if defined($infochk{$symb});

	    $infochk{$symb} = $i;

697
	} elsif ( /^${T_US}([A-Za-z0-9_]+)_(entry|ret)${T_POST_LBL}$/o ) {
698
	    $chk[++$i]   = $_;
699
	    $chkcat[$i]  = 'entry';
700
701
	    $chksymb[$i] = $1;

702
	    $entrychk{$1} = $i;
703

704
	} elsif ( /^${T_US}([A-Za-z0-9_]+)_closure${T_POST_LBL}$/o ) {
705
706
	    $chk[++$i]   = $_;
	    $chkcat[$i]  = 'closure';
707
708
709
710
	    $chksymb[$i] = $1;

	    $closurechk{$1} = $i;

711
	} elsif ( /^${T_US}([A-Za-z0-9_]+)_srt${T_POST_LBL}$/o ) {
712
713
714
715
716
717
	    $chk[++$i]   = $_;
	    $chkcat[$i]  = 'srt';
	    $chksymb[$i] = $1;

	    $srtchk{$1} = $i;

718
	} elsif ( /^${T_US}([A-Za-z0-9_]+)_ct${T_POST_LBL}$/o ) {
719
720
721
722
	    $chk[++$i]   = $_;
	    $chkcat[$i]  = 'data';
	    $chksymb[$i] = '';

723
724
725
726
727
	} elsif ( /^${T_US}(stg_ap_stack_entries|stg_stack_save_entries|stg_arg_bitmaps)${T_POST_LBL}$/o ) {
	    $chk[++$i]   = $_;
	    $chkcat[$i]  = 'data';
	    $chksymb[$i] = '';

728
	} elsif ( /^(${T_US}__gnu_compiled_c|gcc2_compiled\.)${T_POST_LBL}/o ) {
729
730
	    ; # toss it

731
732
733
734
	} elsif ( /^${T_US}[A-Za-z0-9_]+\.\d+${T_POST_LBL}$/o
	       || /^${T_US}.*_CAT${T_POST_LBL}$/o 		# PROF: _entryname_CAT
	       || /^${T_US}.*_done${T_POST_LBL}$/o		# PROF: _module_done
	       || /^${T_US}_module_registered${T_POST_LBL}$/o	# PROF: _module_registered
735
	       ) {
736
737
	    $chk[++$i]   = $_;
	    $chkcat[$i]  = 'data';
738
739
	    $chksymb[$i] = '';

740
741
742
	} elsif ( /^([A-Za-z0-9_]+)\s+\.comm/ && $TargetPlatform =~ /^hppa/ ) {
	    $chk[++$i]   = $_;
	    $chkcat[$i]  = 'bss';
sof's avatar
sof committed
743
744
  	    $chksymb[$i] = '';

745
	} elsif ( /^${T_US}([A-Za-z0-9_]+)_cc(s)?${T_POST_LBL}$/o ) {
sof's avatar
sof committed
746
747
748
749
750
            # all CC_ symbols go in the data section...
	    $chk[++$i]   = $_;
	    $chkcat[$i]  = 'data';
	    $chksymb[$i] = '';

751
        } elsif ( /^${T_US}([A-Za-z0-9_]+)_hpc${T_POST_LBL}$/o ) {
andy@galois.com's avatar
andy@galois.com committed
752
753
754
755
756
           # hpc shares tick boxes across modules
           $chk[++$i]   = $_;
           $chkcat[$i]  = 'data';
           $chksymb[$i] = '';

757
	} elsif ( /^${T_US}([A-Za-z0-9_]+)_(alt|dflt)${T_POST_LBL}$/o ) {
758
759
	    $chk[++$i]   = $_;
	    $chkcat[$i]  = 'misc';
760
	    $chksymb[$i] = '';
761
	} elsif ( /^${T_US}([A-Za-z0-9_]+)_vtbl${T_POST_LBL}$/o ) {
762
763
	    $chk[++$i]   = $_;
	    $chkcat[$i]  = 'vector';
764
765
766
767
768
	    $chksymb[$i] = $1;

	    $vectorchk{$1} = $i;

	} elsif ( $TargetPlatform =~ /^i386-.*-solaris2/
chak's avatar
chak committed
769
770
771
772
773
774
775
776
	     &&   /^[A-Za-z0-9][A-Za-z0-9_]*:/ ) {
            # Some Solaris system headers contain function definitions (as
	    # opposed to mere prototypes), which end up in the .hc file when
	    # a Haskell module foreign imports the corresponding system 
	    # functions (most notably stat()).  We put them into the text 
            # segment.  Note that this currently does not extend to function
	    # names starting with an underscore. 
	    # - chak 7/2001
777
	    $chk[++$i]   = $_;
chak's avatar
chak committed
778
	    $chkcat[$i]  = 'misc';
779
780
	    $chksymb[$i] = $1;

781
782
783
784
785
786
787
788
789
        } elsif ( $TargetPlatform =~ /^i386-apple-darwin/ && /^(___i686\.get_pc_thunk\.[abcd]x):/o) {
                # To handle PIC on Darwin/x86, we need to appropriately pass through
                # the get_pc_thunk functions. The need to be put into a special section
                # marked as coalesced (otherwise the .weak_definition doesn't work
                # on Darwin).
            $chk[++$i]   = $_;
            $chkcat[$i]  = 'get_pc_thunk';
            $chksymb[$i] = $1;

790
	} elsif ( /^${T_US}[A-Za-z0-9_]/o
791
		&& ( $TargetPlatform !~ /^hppa/ # need to avoid local labels in this case
792
793
794
		   || ! /^L\$\d+$/ ) 
		&& ( $TargetPlatform !~ /^powerpc64/ # we need to avoid local labels in this case
		   || ! /^\.L\d+:$/ ) ) {
795
796
	    local($thing);
	    chop($thing = $_);
797
	    $thing =~ s/:$//;
798
799
800
	    $chk[++$i]   = $_;
	    $chksymb[$i] = '';
	    if (
801
		       /^${T_US}stg_.*${T_POST_LBL}$/o          # RTS internals
sof's avatar
sof committed
802
		    || /^${T_US}__stg_.*${T_POST_LBL}$/o        # more RTS internals
803
		    || /^${T_US}__fexp_.*${T_POST_LBL}$/o       # foreign export
804
		    || /^${T_US}.*_slow${T_POST_LBL}$/o         # slow entry
ken's avatar
ken committed
805
		    || /^${T_US}__stginit.*${T_POST_LBL}$/o     # __stginit<module>
806
		    || /^${T_US}.*_btm${T_POST_LBL}$/o          # large bitmaps
807
		    || /^${T_US}.*_fast${T_POST_LBL}$/o         # primops
808
                    || /^_uname:/o			        # x86/Solaris2
809
810
811
812
813
814
815
816
		)
            {
	    	$chkcat[$i]  = 'misc';
            } elsif (
		       /^${T_US}.*_srtd${T_POST_LBL}$/o          # large bitmaps
		    || /^${T_US}.*_closure_tbl${T_POST_LBL}$/o  # closure tables
                )
            {
817
                $chkcat[$i] = 'relrodata';
818
819
820
821
822
            } else
            {
	        print STDERR "Warning: retaining unknown function \`$thing' in output from C compiler\n";
	    	$chkcat[$i]  = 'unknown';
	    }
823

824
825
826
827
828
829
830
831
832
833
	} elsif ( $TargetPlatform =~ /^powerpc-.*-linux/ && /^\.LCTOC1 = /o ) {
		# PowerPC Linux's large-model PIC (-fPIC) generates a gobal offset
		# table "by hand". Be sure to copy it over.
		# Note that this label and all entries in the table should actually
		# go into the .got2 section, but it isn't easy to distinguish them
		# from other constant literals (.LC\d+), so we just put everything
		# in .rodata.
	    $chk[++$i]   = $_;
	    $chkcat[$i]  = 'literal';
	    $chksymb[$i] = 'LCTOC1';
834
835
836
837
838
839
	} else { # simple line (duplicated at the top)

	    $chk[$i] .= $_;
	}
    }
    $numchks = $#chk + 1;
840
841
    $chk[$numchks] = ''; # We might push .note.GNU-stack into this
    $chkcat[$numchks] = 'verbatim'; # If we do, write it straight back out
842

843
844
845
    # open CHUNKS, ">/tmp/chunks1" or die "Cannot open /tmp/chunks1: $!\n";
    # for (my $i = 0; $i < @chk; ++$i) { print CHUNKS "======= $i =======\n", $chk[$i] }
    # close CHUNKS;
ken's avatar
ken committed
846

847
848
849
850
851
852
853
854
    # the division into chunks is imperfect;
    # we throw some things over the fence into the next
    # chunk.
    #
    # also, there are things we would like to know
    # about the whole module before we start spitting
    # output.

sof's avatar
sof committed
855
    local($FIRST_MANGLABLE) = ($TargetPlatform =~ /^(alpha-|hppa|mips-)/) ? 1 : 0;
ken's avatar
ken committed
856
    local($FIRST_TOSSABLE ) = ($TargetPlatform =~ /^(hppa|mips-)/) ? 1 : 0;
857
858
859
860
861

#   print STDERR "first chunk to mangle: $FIRST_MANGLABLE\n";

    # Alphas: NB: we start meddling at chunk 1, not chunk 0
    # The first ".rdata" is quite magical; as of GCC 2.7.x, it
ken's avatar
ken committed
862
    # spits a ".quad 0" in after the very first ".rdata"; we
863
864
865
866
867
    # detect this special case (tossing the ".quad 0")!
    local($magic_rdata_seen) = 0;
  
    # HPPAs, MIPSen: also start medding at chunk 1

ken's avatar
ken committed
868
    for ($i = $FIRST_TOSSABLE; $i < $numchks; $i++) {
869
870
871
872
	$c = $chk[$i]; # convenience copy

#	print STDERR "\nCHK $i (BEFORE) (",$chkcat[$i],"):\n", $c;

873
874
	# toss all prologue stuff; HPPA is pretty weird
	# (see elsewhere)
875
876
	$c = &hppa_mash_prologue($c) if $TargetPlatform =~ /^hppa-/;

877
878
879
	undef $ia64_locnum;
	undef $ia64_outnum;

880
881
882
883
	# be slightly paranoid to make sure there's
	# nothing surprising in there
	if ( $c =~ /--- BEGIN ---/ ) {
	    if (($p, $r) = split(/--- BEGIN ---/, $c)) {
884

885
886
887
888
		# remove junk whitespace around the split point
		$p =~ s/\t+$//;
		$r =~ s/^\s*\n//;

889
		if ($TargetPlatform =~ /^i386-/) {
890
891
892
893
894
895
		    if ($p =~ /^\tsubl\s+\$(\d+),\s*\%esp\n/) {
			if ($1 >= 8192) {
			    die "Error: reserved stack space exceeded!\n  Possible workarounds: compile with -fasm, or try another version of gcc.\n"
			}
		    }

896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
		# gcc 3.4.3 puts this kind of stuff in the prologue, eg.
		# when compiling PrimOps.cmm with -optc-O2:
		#        xorl    %ecx, %ecx
		#        xorl    %edx, %edx
		#        movl    %ecx, 16(%esp)
		#        movl    %edx, 20(%esp)
		# but then the code of the function doesn't assume
		# anything about the contnets of these stack locations.
		# I think it's to do with the use of inline functions for
		# PK_Word64() and friends, where gcc is initialising the
		# contents of the struct to zero, and failing to optimise
		# away the initialisation.  Let's live dangerously and
		# discard these initalisations.

		    $p =~ s/^\tpushl\s+\%e(di|si|bx)\n//g;
		    $p =~ s/^\txorl\s+\%e(ax|cx|dx),\s*\%e(ax|cx|dx)\n//g;
		    $p =~ s/^\tmovl\s+\%e(ax|cx|dx|si|di),\s*\d*\(\%esp\)\n//g;
		    $p =~ s/^\tmovl\s+\$\d+,\s*\d*\(\%esp\)\n//g;
914
		    $p =~ s/^\tsubl\s+\$\d+,\s*\%esp\n//;
915
                    $p =~ s/^\tmovl\s+\$\d+,\s*\%eax\n\tcall\s+__alloca\n// if ($TargetPlatform =~ /^.*-(cygwin32|mingw32)/);
916

917
918
919
920
921
922
923
924
925
926
927
928
929
930
                    if ($TargetPlatform =~ /^i386-apple-darwin/) {
                        $pcrel_label = $p;
                        $pcrel_label =~ s/(.|\n)*^(\"?L\d+\$pb\"?):\n(.|\n)*/$2/ or $pcrel_label = "";
                        $pcrel_reg = $p;
                        $pcrel_reg =~ s/(.|\n)*.*___i686\.get_pc_thunk\.([abcd]x)\n(.|\n)*/$2/ or $pcrel_reg = "";
                        $p =~ s/^\s+call\s+___i686\.get_pc_thunk\..x//;
                        $p =~ s/^\"?L\d+\$pb\"?:\n//;

                        if ($pcrel_reg eq "bx") {
                            # Bad gcc. Goes and uses %ebx, our BaseReg, for PIC. Bad gcc.
                            die "Darwin/x86: -fPIC -via-C doesn't work yet, use -fasm. Aborting."
                        }
                    }

931
932
933
934
935
		} elsif ($TargetPlatform =~ /^x86_64-/) {
		    $p =~ s/^\tpushq\s+\%r(bx|bp|12|13|14)\n//g;
		    $p =~ s/^\tmovq\s+\%r(bx|bp|12|13|14),\s*\d*\(\%rsp\)\n//g;
		    $p =~ s/^\tsubq\s+\$\d+,\s*\%rsp\n//;

936
937
		} elsif ($TargetPlatform =~ /^ia64-/) {
		    $p =~ s/^\t\.prologue .*\n//;
938
939
940
941
942
943

		    # Record the number of local and out registers for register relocation later
		    $p =~ s/^\t\.save ar\.pfs, r\d+\n\talloc r\d+ = ar\.pfs, 0, (\d+), (\d+), 0\n//;
		    $ia64_locnum = $1;
		    $ia64_outnum = $2;

944
945
		    $p =~ s/^\t\.fframe \d+\n\tadds r12 = -\d+, r12\n//;
		    $p =~ s/^\t\.save rp, r\d+\n\tmov r\d+ = b0\n//;
Ian Lynagh's avatar
Ian Lynagh committed
946

947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
		    # Ignore save/restore of these registers; they're taken
		    # care of in StgRun()
		    $p =~ s/^\t\.save ar\.lc, r\d+\n//;
		    $p =~ s/^\t\.save pr, r\d+\n//;
		    $p =~ s/^\tmov r\d+ = ar\.lc\n//;
		    $p =~ s/^\tmov r\d+ = pr\n//;

		    # Remove .proc and .body directives
		    $p =~ s/^\t\.proc [a-zA-Z0-9_.]+#\n//;
            	    $p =~ s/^\t\.body\n//;

            	    # If there's a label, move it to the body
            	    if ($p =~ /^[a-zA-Z0-9.]+:\n/) {
            	        $p = $` . $';
            	        $r = $& . $r;
            	      }

            	    # Remove floating-point spill instructions.
965
966
            	    # Only fp registers 2-5 and 16-23 are saved by the runtime.
            	    if ($p =~ s/^\tstf\.spill \[r1[4-9]\] = f([2-5]|1[6-9]|2[0-3])(, [0-9]+)?\n//g) {
967
968
969
970
971
            	        # Being paranoid, only try to remove these if we saw a
			# spill operation.
                        $p =~ s/^\tmov r1[4-9] = r12\n//;
                        $p =~ s/^\tadds r1[4-9] = -[0-9]+, r12\n//g;
                        $p =~ s/^\t\.save\.f 0x[0-9a-fA-F]\n//g;
972
                        $p =~ s/^\t\.save\.gf 0x0, 0x[0-9a-fA-F]+\n//g;
973
974
975
976
            	    }

            	    $p =~ s/^\tnop(?:\.[mifb])?\s+\d+\n//g; # remove nop instructions
            	    $p =~ s/^\t\.(mii|mmi|mfi)\n//g;    # bundling is no longer sensible
977
978
		    $p =~ s/^\t;;\n//g;		# discard stops
		    $p =~ s/^\t\/\/.*\n//g;	# gcc inserts timings in // comments
979

980
981
982
983
984
985
               	    # GCC 3.3 saves r1 in the prologue, move this to the body
		    # (Does this register get restored anywhere?)
               	    if ($p =~ /^\tmov r\d+ = r1\n/) {
               	      $p = $` . $';
               	      $r = $& . $r;
               	    }
986
987
		} elsif ($TargetPlatform =~ /^m68k-/) {
		    $p =~ s/^\tlink a6,#-?\d.*\n//;
sof's avatar
sof committed
988
989
990
991
992
 		    $p =~ s/^\tpea a6@\n\tmovel sp,a6\n//;    
 				# The above showed up in the asm code,
 				# so I added it here.
 				# I hope it's correct.
 				# CaS
993
994
995
996
997
998
999
1000
		    $p =~ s/^\tmovel d2,sp\@-\n//;
		    $p =~ s/^\tmovel d5,sp\@-\n//; # SMmark.* only?
		    $p =~ s/^\tmoveml \#0x[0-9a-f]+,sp\@-\n//; # SMmark.* only?
		} elsif ($TargetPlatform =~ /^mips-/) {
		    # the .frame/.mask/.fmask that we use is the same
		    # as that produced by GCC for miniInterpret; this
		    # gives GDB some chance of figuring out what happened
		    $FRAME = "\t.frame\t\$sp,2168,\$31\n\t.mask\t0x90000000,-4\n\t.fmask\t0x00000000,0\n";
For faster browsing, not all history is shown. View entire blame