CodeGen.hs 86.6 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
-----------------------------------------------------------------------------
--
-- Generating machine code (instruction selection)
--
-- (c) The University of Glasgow 1996-2004
--
-----------------------------------------------------------------------------

-- This is a big module, but, if you pay attention to
-- (a) the sectioning, (b) the type signatures, and
-- (c) the #if blah_TARGET_ARCH} things, the
-- structure should not be too overwhelming.

14
15
16
17
18
module X86.CodeGen (
        cmmTopCodeGen,
        generateJumpTableForInstr,
        InstrBlock
)
19
20
21
22
23

where

#include "HsVersions.h"
#include "nativeGen/NCG.h"
Simon Marlow's avatar
Simon Marlow committed
24
#include "../includes/MachDeps.h"
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40

-- NCG stuff:
import X86.Instr
import X86.Cond
import X86.Regs
import X86.RegInfo
import Instruction
import PIC
import NCGMonad
import Size
import Reg
import Platform

-- Our intermediate code:
import BasicTypes
import BlockId
41
import PprCmm           ()
42
import OldCmm
dterei's avatar
dterei committed
43
import OldPprCmm        ()
44
45
46
import CLabel

-- The rest:
47
48
import StaticFlags      ( opt_PIC )
import ForeignCall      ( CCallConv(..) )
49
50
import OrdList
import Outputable
51
import Unique
52
import FastString
53
54
import FastBool         ( isFastTrue )
import Constants        ( wORD_SIZE )
55
56
import DynFlags

57
import Control.Monad
tibbe's avatar
tibbe committed
58
import Data.Bits
59
import Data.Int
60
import Data.Maybe
dterei's avatar
dterei committed
61
62
import Data.Word

63
64
65
sse2Enabled :: NatM Bool
sse2Enabled = do
  dflags <- getDynFlagsNat
66
67
68
69
70
71
72
73
74
  case platformArch (targetPlatform dflags) of
      ArchX86_64 -> -- SSE2 is fixed on for x86_64.  It would be
                    -- possible to make it optional, but we'd need to
                    -- fix at least the foreign call code where the
                    -- calling convention specifies the use of xmm regs,
                    -- and possibly other places.
                    return True
      ArchX86    -> return (dopt Opt_SSE2 dflags)
      _          -> panic "sse2Enabled: Not an X86* arch"
75
76
77
78
79

if_sse2 :: NatM a -> NatM a -> NatM a
if_sse2 sse2 x87 = do
  b <- sse2Enabled
  if b then sse2 else x87
80

81
cmmTopCodeGen
Ian Lynagh's avatar
Ian Lynagh committed
82
        :: RawCmmTop
83
        -> NatM [NatCmmTop (Alignment, CmmStatics) Instr]
84

Ian Lynagh's avatar
Ian Lynagh committed
85
cmmTopCodeGen (CmmProc info lab (ListGraph blocks)) = do
86
87
  (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
  picBaseMb <- getPicBaseMaybeNat
88
  dflags <- getDynFlagsNat
89
  let proc = CmmProc info lab (ListGraph $ concat nat_blocks)
90
      tops = proc : concat statics
91
      os   = platformOS $ targetPlatform dflags
92
93
94
95

  case picBaseMb of
      Just picBase -> initializePicBase_x86 ArchX86 os picBase tops
      Nothing -> return tops
96

Ian Lynagh's avatar
Ian Lynagh committed
97
cmmTopCodeGen (CmmData sec dat) = do
98
  return [CmmData sec (1, dat)]  -- no translation, we just use CmmStatic
99
100


101
102
103
basicBlockCodeGen
        :: CmmBasicBlock
        -> NatM ( [NatBasicBlock Instr]
104
                , [NatCmmTop (Alignment, CmmStatics) Instr])
105
106
107
108
109
110
111
112

basicBlockCodeGen (BasicBlock id stmts) = do
  instrs <- stmtsToInstrs stmts
  -- code generation may introduce new basic block boundaries, which
  -- are indicated by the NEWBLOCK instruction.  We must split up the
  -- instruction stream into basic blocks again.  Also, we extract
  -- LDATAs here too.
  let
113
114
115
116
117
118
119
120
        (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs

        mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
          = ([], BasicBlock id instrs : blocks, statics)
        mkBlocks (LDATA sec dat) (instrs,blocks,statics)
          = (instrs, blocks, CmmData sec dat:statics)
        mkBlocks instr (instrs,blocks,statics)
          = (instr:instrs, blocks, statics)
121
122
123
124
125
126
127
128
129
130
131
  -- in
  return (BasicBlock id top : other_blocks, statics)


stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
stmtsToInstrs stmts
   = do instrss <- mapM stmtToInstrs stmts
        return (concatOL instrss)


stmtToInstrs :: CmmStmt -> NatM InstrBlock
132
133
134
135
stmtToInstrs stmt = do
  dflags <- getDynFlagsNat
  let is32Bit = target32Bit (targetPlatform dflags)
  case stmt of
136
    CmmNop         -> return nilOL
137
138
139
    CmmComment s   -> return (unitOL (COMMENT s))

    CmmAssign reg src
140
141
142
      | isFloatType ty         -> assignReg_FltCode size reg src
      | is32Bit && isWord64 ty -> assignReg_I64Code      reg src
      | otherwise              -> assignReg_IntCode size reg src
143
144
        where ty = cmmRegType reg
              size = cmmTypeSize ty
145
146

    CmmStore addr src
147
148
149
      | isFloatType ty         -> assignMem_FltCode size addr src
      | is32Bit && isWord64 ty -> assignMem_I64Code      addr src
      | otherwise              -> assignMem_IntCode size addr src
150
151
        where ty = cmmExprType src
              size = cmmTypeSize ty
152
153
154
155

    CmmCall target result_regs args _ _
       -> genCCall target result_regs args

156
    CmmBranch id          -> genBranch id
157
158
    CmmCondBranch arg id  -> genCondJump id arg
    CmmSwitch arg ids     -> genSwitch arg ids
dterei's avatar
dterei committed
159
160
    CmmJump arg _         -> genJump arg
    CmmReturn _           ->
161
162
163
164
165
      panic "stmtToInstrs: return statement should have been cps'd away"


--------------------------------------------------------------------------------
-- | 'InstrBlock's are the insn sequences generated by the insn selectors.
166
167
--      They are really trees of insns to facilitate fast appending, where a
--      left-to-right traversal yields the insns in the correct order.
168
--
169
170
type InstrBlock
        = OrdList Instr
171
172
173
174


-- | Condition codes passed up the tree.
--
175
176
data CondCode
        = CondCode Bool Cond InstrBlock
177
178
179


-- | a.k.a "Register64"
180
181
--      Reg is the lower 32-bit temporary which contains the result.
--      Use getHiVRegFromLo to find the other VRegUnique.
182
--
183
184
--      Rules of this simplified insn selection game are therefore that
--      the returned Reg may be modified
185
--
186
187
data ChildCode64
   = ChildCode64
188
        InstrBlock
189
        Reg
190
191
192


-- | Register's passed up the tree.  If the stix code forces the register
193
194
195
--      to live in a pre-decided machine register, it comes out as @Fixed@;
--      otherwise, it comes out as @Any@, and the parent can decide which
--      register to put it in.
196
197
--
data Register
198
199
        = Fixed Size Reg InstrBlock
        | Any   Size (Reg -> InstrBlock)
200
201
202
203
204
205
206
207


swizzleRegisterRep :: Register -> Size -> Register
swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code
swizzleRegisterRep (Any _ codefn)     size = Any   size codefn


-- | Grab the Reg for a CmmReg
208
getRegisterReg :: Bool -> CmmReg -> Reg
209

210
211
212
213
214
getRegisterReg use_sse2 (CmmLocal (LocalReg u pk))
  = let sz = cmmTypeSize pk in
    if isFloatSize sz && not use_sse2
       then RegVirtual (mkVirtualReg u FF80)
       else RegVirtual (mkVirtualReg u sz)
215

216
getRegisterReg _ (CmmGlobal mid)
217
218
219
220
221
222
  = case globalRegMaybe mid of
        Just reg -> RegReal $ reg
        Nothing  -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
        -- By this stage, the only MagicIds remaining should be the
        -- ones which map to a real machine register on this
        -- platform.  Hence ...
223
224
225


-- | Memory addressing modes passed up the tree.
226
227
data Amode
        = Amode AddrMode InstrBlock
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248

{-
Now, given a tree (the argument to an CmmLoad) that references memory,
produce a suitable addressing mode.

A Rule of the Game (tm) for Amodes: use of the addr bit must
immediately follow use of the code part, since the code part puts
values in registers which the addr then refers to.  So you can't put
anything in between, lest it overwrite some of those registers.  If
you need to do some other computation between the code part and use of
the addr bit, first store the effective address from the amode in a
temporary, then do the other computation, and then use the temporary:

    code
    LEA amode, tmp
    ... other computation ...
    ... (tmp) ...
-}


-- | Check whether an integer will fit in 32 bits.
249
250
251
252
--      A CmmInt is intended to be truncated to the appropriate
--      number of bits, so here we truncate it to Int64.  This is
--      important because e.g. -1 as a CmmInt might be either
--      -1 or 18446744073709551615.
253
254
255
256
257
258
259
260
261
--
is32BitInteger :: Integer -> Bool
is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
  where i64 = fromIntegral i :: Int64


-- | Convert a BlockId to some CmmStatic data
jumpTableEntry :: Maybe BlockId -> CmmStatic
jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth)
262
263
jumpTableEntry (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
    where blockLabel = mkAsmTempLabel (getUnique blockid)
264
265
266
267
268
269
270


-- -----------------------------------------------------------------------------
-- General things for putting together code sequences

-- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
-- CmmExprs into CmmRegOff?
dterei's avatar
dterei committed
271
272
mangleIndexTree :: CmmReg -> Int -> CmmExpr
mangleIndexTree reg off
273
274
275
276
  = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
  where width = typeWidth (cmmRegType reg)

-- | The dual to getAnyReg: compute an expression into a register, but
277
--      we don't mind which one it is.
278
279
280
281
282
getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg expr = do
  r <- getRegister expr
  case r of
    Any rep code -> do
283
284
285
286
        tmp <- getNewRegNat rep
        return (tmp, code tmp)
    Fixed _ reg code ->
        return (reg, code)
287
288
289
290
291
292


assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
assignMem_I64Code addrTree valueTree = do
  Amode addr addr_code <- getAmode addrTree
  ChildCode64 vcode rlo <- iselExpr64 valueTree
293
  let
294
295
296
297
298
299
300
301
302
303
        rhi = getHiVRegFromLo rlo

        -- Little-endian store
        mov_lo = MOV II32 (OpReg rlo) (OpAddr addr)
        mov_hi = MOV II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
  -- in
  return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)


assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
dterei's avatar
dterei committed
304
assignReg_I64Code (CmmLocal (LocalReg u_dst _)) valueTree = do
305
   ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
306
   let
307
         r_dst_lo = RegVirtual $ mkVirtualReg u_dst II32
308
309
310
311
312
313
314
315
316
         r_dst_hi = getHiVRegFromLo r_dst_lo
         r_src_hi = getHiVRegFromLo r_src_lo
         mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo)
         mov_hi = MOV II32 (OpReg r_src_hi) (OpReg r_dst_hi)
   -- in
   return (
        vcode `snocOL` mov_lo `snocOL` mov_hi
     )

dterei's avatar
dterei committed
317
assignReg_I64Code _ _
318
319
320
321
322
323
324
   = panic "assignReg_I64Code(i386): invalid lvalue"


iselExpr64        :: CmmExpr -> NatM ChildCode64
iselExpr64 (CmmLit (CmmInt i _)) = do
  (rlo,rhi) <- getNewRegPairNat II32
  let
325
        r = fromIntegral (fromIntegral i :: Word32)
326
        q = fromIntegral (fromIntegral (i `shiftR` 32) :: Word32)
327
328
329
330
        code = toOL [
                MOV II32 (OpImm (ImmInteger r)) (OpReg rlo),
                MOV II32 (OpImm (ImmInteger q)) (OpReg rhi)
                ]
331
332
333
334
335
336
  -- in
  return (ChildCode64 code rlo)

iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
   Amode addr addr_code <- getAmode addrTree
   (rlo,rhi) <- getNewRegPairNat II32
337
   let
338
339
340
341
        mov_lo = MOV II32 (OpAddr addr) (OpReg rlo)
        mov_hi = MOV II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
   -- in
   return (
342
            ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
343
344
345
346
                        rlo
     )

iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
347
   = return (ChildCode64 nilOL (RegVirtual $ mkVirtualReg vu II32))
348

349
350
351
352
353
-- we handle addition, but rather badly
iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
   ChildCode64 code1 r1lo <- iselExpr64 e1
   (rlo,rhi) <- getNewRegPairNat II32
   let
354
        r = fromIntegral (fromIntegral i :: Word32)
355
        q = fromIntegral (fromIntegral (i `shiftR` 32) :: Word32)
356
357
358
359
360
361
        r1hi = getHiVRegFromLo r1lo
        code =  code1 `appOL`
                toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
                       ADD II32 (OpImm (ImmInteger r)) (OpReg rlo),
                       MOV II32 (OpReg r1hi) (OpReg rhi),
                       ADC II32 (OpImm (ImmInteger q)) (OpReg rhi) ]
362
363
364
365
366
367
368
369
   -- in
   return (ChildCode64 code rlo)

iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
   ChildCode64 code1 r1lo <- iselExpr64 e1
   ChildCode64 code2 r2lo <- iselExpr64 e2
   (rlo,rhi) <- getNewRegPairNat II32
   let
370
371
372
373
374
375
376
377
        r1hi = getHiVRegFromLo r1lo
        r2hi = getHiVRegFromLo r2lo
        code =  code1 `appOL`
                code2 `appOL`
                toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
                       ADD II32 (OpReg r2lo) (OpReg rlo),
                       MOV II32 (OpReg r1hi) (OpReg rhi),
                       ADC II32 (OpReg r2hi) (OpReg rhi) ]
378
379
380
381
382
383
384
385
386
   -- in
   return (ChildCode64 code rlo)

iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do
     fn <- getAnyReg expr
     r_dst_lo <-  getNewRegNat II32
     let r_dst_hi = getHiVRegFromLo r_dst_lo
         code = fn r_dst_lo
     return (
387
             ChildCode64 (code `snocOL`
388
389
390
391
392
393
394
395
396
397
                          MOV II32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
                          r_dst_lo
            )

iselExpr64 expr
   = pprPanic "iselExpr64(i386)" (ppr expr)


--------------------------------------------------------------------------------
getRegister :: CmmExpr -> NatM Register
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
getRegister e = do dflags <- getDynFlagsNat
                   getRegister' (target32Bit (targetPlatform dflags)) e

getRegister' :: Bool -> CmmExpr -> NatM Register

getRegister' is32Bit (CmmReg reg)
  = case reg of
        CmmGlobal PicBaseReg
         | is32Bit ->
            -- on x86_64, we have %rip for PicBaseReg, but it's not
            -- a full-featured register, it can only be used for
            -- rip-relative addressing.
            do reg' <- getPicBaseNat archWordSize
               return (Fixed archWordSize reg' nilOL)
        _ ->
            do use_sse2 <- sse2Enabled
               let
                 sz = cmmTypeSize (cmmRegType reg)
                 size | not use_sse2 && isFloatSize sz = FF80
                      | otherwise                      = sz
               --
               return (Fixed size (getRegisterReg use_sse2 reg) nilOL)


getRegister' is32Bit (CmmRegOff r n)
  = getRegister' is32Bit $ mangleIndexTree r n

-- for 32-bit architectuers, support some 64 -> 32 bit conversions:
-- TO_W_(x), TO_W_(x >> 32)

getRegister' is32Bit (CmmMachOp (MO_UU_Conv W64 W32)
                     [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
 | is32Bit = do
431
432
433
  ChildCode64 code rlo <- iselExpr64 x
  return $ Fixed II32 (getHiVRegFromLo rlo) code

434
435
436
getRegister' is32Bit (CmmMachOp (MO_SS_Conv W64 W32)
                     [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
 | is32Bit = do
437
438
439
  ChildCode64 code rlo <- iselExpr64 x
  return $ Fixed II32 (getHiVRegFromLo rlo) code

440
441
getRegister' is32Bit (CmmMachOp (MO_UU_Conv W64 W32) [x])
 | is32Bit = do
442
443
444
  ChildCode64 code rlo <- iselExpr64 x
  return $ Fixed II32 rlo code

445
446
getRegister' is32Bit (CmmMachOp (MO_SS_Conv W64 W32) [x])
 | is32Bit = do
447
  ChildCode64 code rlo <- iselExpr64 x
448
  return $ Fixed II32 rlo code
449

450
getRegister' _ (CmmLit lit@(CmmFloat f w)) =
451
452
453
454
455
456
457
  if_sse2 float_const_sse2 float_const_x87
 where
  float_const_sse2
    | f == 0.0 = do
      let
          size = floatSize w
          code dst = unitOL  (XOR size (OpReg dst) (OpReg dst))
458
459
        -- I don't know why there are xorpd, xorps, and pxor instructions.
        -- They all appear to do the same thing --SDM
460
461
462
463
464
465
466
467
468
469
470
      return (Any size code)

   | otherwise = do
      Amode addr code <- memConstant (widthInBytes w) lit
      loadFloatAmode True w addr code

  float_const_x87 = case w of
    W64
      | f == 0.0 ->
        let code dst = unitOL (GLDZ dst)
        in  return (Any FF80 code)
471

472
473
474
      | f == 1.0 ->
        let code dst = unitOL (GLD1 dst)
        in  return (Any FF80 code)
475

476
477
478
    _otherwise -> do
      Amode addr code <- memConstant (widthInBytes w) lit
      loadFloatAmode False w addr code
479
480

-- catch simple cases of zero- or sign-extended load
481
getRegister' _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do
482
483
484
  code <- intLoadCode (MOVZxL II8) addr
  return (Any II32 code)

485
getRegister' _ (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _]) = do
486
487
488
  code <- intLoadCode (MOVSxL II8) addr
  return (Any II32 code)

489
getRegister' _ (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _]) = do
490
491
492
  code <- intLoadCode (MOVZxL II16) addr
  return (Any II32 code)

493
getRegister' _ (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _]) = do
494
495
496
497
  code <- intLoadCode (MOVSxL II16) addr
  return (Any II32 code)

-- catch simple cases of zero- or sign-extended load
498
499
getRegister' is32Bit (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _])
 | not is32Bit = do
500
501
502
  code <- intLoadCode (MOVZxL II8) addr
  return (Any II64 code)

503
504
getRegister' is32Bit (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _])
 | not is32Bit = do
505
506
507
  code <- intLoadCode (MOVSxL II8) addr
  return (Any II64 code)

508
509
getRegister' is32Bit (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _])
 | not is32Bit = do
510
511
512
  code <- intLoadCode (MOVZxL II16) addr
  return (Any II64 code)

513
514
getRegister' is32Bit (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _])
 | not is32Bit = do
515
516
517
  code <- intLoadCode (MOVSxL II16) addr
  return (Any II64 code)

518
519
getRegister' is32Bit (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _])
 | not is32Bit = do
520
521
522
  code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend
  return (Any II64 code)

523
524
getRegister' is32Bit (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _])
 | not is32Bit = do
525
526
527
  code <- intLoadCode (MOVSxL II32) addr
  return (Any II64 code)

528
getRegister' is32Bit (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
529
                                     CmmLit displacement])
530
531
 | not is32Bit = do
      return $ Any II64 (\dst -> unitOL $
532
533
        LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))

534
getRegister' is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
535
536
537
538
539
    sse2 <- sse2Enabled
    case mop of
      MO_F_Neg w
         | sse2      -> sse2NegCode w x
         | otherwise -> trivialUFCode FF80 (GNEG FF80) x
540
541
542
543
544
545
546
547
548
549
550
551

      MO_S_Neg w -> triv_ucode NEGI (intSize w)
      MO_Not w   -> triv_ucode NOT  (intSize w)

      -- Nop conversions
      MO_UU_Conv W32 W8  -> toI8Reg  W32 x
      MO_SS_Conv W32 W8  -> toI8Reg  W32 x
      MO_UU_Conv W16 W8  -> toI8Reg  W16 x
      MO_SS_Conv W16 W8  -> toI8Reg  W16 x
      MO_UU_Conv W32 W16 -> toI16Reg W32 x
      MO_SS_Conv W32 W16 -> toI16Reg W32 x

552
553
554
555
556
557
      MO_UU_Conv W64 W32 | not is32Bit -> conversionNop II64 x
      MO_SS_Conv W64 W32 | not is32Bit -> conversionNop II64 x
      MO_UU_Conv W64 W16 | not is32Bit -> toI16Reg W64 x
      MO_SS_Conv W64 W16 | not is32Bit -> toI16Reg W64 x
      MO_UU_Conv W64 W8  | not is32Bit -> toI8Reg  W64 x
      MO_SS_Conv W64 W8  | not is32Bit -> toI8Reg  W64 x
558
559
560
561
562
563
564
565
566
567
568
569
570

      MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
      MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x

      -- widenings
      MO_UU_Conv W8  W32 -> integerExtend W8  W32 MOVZxL x
      MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x
      MO_UU_Conv W8  W16 -> integerExtend W8  W16 MOVZxL x

      MO_SS_Conv W8  W32 -> integerExtend W8  W32 MOVSxL x
      MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x
      MO_SS_Conv W8  W16 -> integerExtend W8  W16 MOVSxL x

571
572
573
574
575
576
      MO_UU_Conv W8  W64 | not is32Bit -> integerExtend W8  W64 MOVZxL x
      MO_UU_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVZxL x
      MO_UU_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOVZxL x
      MO_SS_Conv W8  W64 | not is32Bit -> integerExtend W8  W64 MOVSxL x
      MO_SS_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVSxL x
      MO_SS_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOVSxL x
577
578
579
580
        -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
        -- However, we don't want the register allocator to throw it
        -- away as an unnecessary reg-to-reg move, so we keep it in
        -- the form of a movzl and print it as a movl later.
581

582
583
      MO_FF_Conv W32 W64
        | sse2      -> coerceFP2FP W64 x
584
        | otherwise -> conversionNop FF80 x
585

586
      MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
587
588
589
590

      MO_FS_Conv from to -> coerceFP2Int from to x
      MO_SF_Conv from to -> coerceInt2FP from to x

dterei's avatar
dterei committed
591
      _other -> pprPanic "getRegister" (pprMachOp mop)
592
   where
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
        triv_ucode :: (Size -> Operand -> Instr) -> Size -> NatM Register
        triv_ucode instr size = trivialUCode size (instr size) x

        -- signed or unsigned extension.
        integerExtend :: Width -> Width
                      -> (Size -> Operand -> Operand -> Instr)
                      -> CmmExpr -> NatM Register
        integerExtend from to instr expr = do
            (reg,e_code) <- if from == W8 then getByteReg expr
                                          else getSomeReg expr
            let
                code dst =
                  e_code `snocOL`
                  instr (intSize from) (OpReg reg) (OpReg dst)
            return (Any (intSize to) code)

        toI8Reg :: Width -> CmmExpr -> NatM Register
        toI8Reg new_rep expr
611
            = do codefn <- getAnyReg expr
612
613
614
615
616
617
618
                 return (Any (intSize new_rep) codefn)
                -- HACK: use getAnyReg to get a byte-addressable register.
                -- If the source was a Fixed register, this will add the
                -- mov instruction to put it into the desired destination.
                -- We're assuming that the destination won't be a fixed
                -- non-byte-addressable register; it won't be, because all
                -- fixed registers are word-sized.
619

620
        toI16Reg = toI8Reg -- for now
621

622
        conversionNop :: Size -> CmmExpr -> NatM Register
623
        conversionNop new_size expr
624
            = do e_code <- getRegister' is32Bit expr
625
626
627
                 return (swizzleRegisterRep e_code new_size)


628
getRegister' _ (CmmMachOp mop [x, y]) = do -- dyadic MachOps
629
630
  sse2 <- sse2Enabled
  case mop of
dterei's avatar
dterei committed
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
      MO_F_Eq _ -> condFltReg EQQ x y
      MO_F_Ne _ -> condFltReg NE  x y
      MO_F_Gt _ -> condFltReg GTT x y
      MO_F_Ge _ -> condFltReg GE  x y
      MO_F_Lt _ -> condFltReg LTT x y
      MO_F_Le _ -> condFltReg LE  x y

      MO_Eq _   -> condIntReg EQQ x y
      MO_Ne _   -> condIntReg NE  x y

      MO_S_Gt _ -> condIntReg GTT x y
      MO_S_Ge _ -> condIntReg GE  x y
      MO_S_Lt _ -> condIntReg LTT x y
      MO_S_Le _ -> condIntReg LE  x y

      MO_U_Gt _ -> condIntReg GU  x y
      MO_U_Ge _ -> condIntReg GEU x y
      MO_U_Lt _ -> condIntReg LU  x y
      MO_U_Le _ -> condIntReg LEU x y
650

651
      MO_F_Add w  | sse2      -> trivialFCode_sse2 w ADD  x y
dterei's avatar
dterei committed
652
                  | otherwise -> trivialFCode_x87    GADD x y
653
      MO_F_Sub w  | sse2      -> trivialFCode_sse2 w SUB  x y
dterei's avatar
dterei committed
654
                  | otherwise -> trivialFCode_x87    GSUB x y
655
      MO_F_Quot w | sse2      -> trivialFCode_sse2 w FDIV x y
dterei's avatar
dterei committed
656
                  | otherwise -> trivialFCode_x87    GDIV x y
657
      MO_F_Mul w  | sse2      -> trivialFCode_sse2 w MUL x y
dterei's avatar
dterei committed
658
                  | otherwise -> trivialFCode_x87    GMUL x y
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674

      MO_Add rep -> add_code rep x y
      MO_Sub rep -> sub_code rep x y

      MO_S_Quot rep -> div_code rep True  True  x y
      MO_S_Rem  rep -> div_code rep True  False x y
      MO_U_Quot rep -> div_code rep False True  x y
      MO_U_Rem  rep -> div_code rep False False x y

      MO_S_MulMayOflo rep -> imulMayOflo rep x y

      MO_Mul rep -> triv_op rep IMUL
      MO_And rep -> triv_op rep AND
      MO_Or  rep -> triv_op rep OR
      MO_Xor rep -> triv_op rep XOR

675
676
677
678
        {- Shift ops on x86s have constraints on their source, it
           either has to be Imm, CL or 1
            => trivialCode is not restrictive enough (sigh.)
        -}
679
680
681
682
      MO_Shl rep   -> shift_code rep SHL x y {-False-}
      MO_U_Shr rep -> shift_code rep SHR x y {-False-}
      MO_S_Shr rep -> shift_code rep SAR x y {-False-}

dterei's avatar
dterei committed
683
      _other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
684
685
686
  where
    --------------------
    triv_op width instr = trivialCode width op (Just op) x y
687
                        where op   = instr (intSize width)
688
689
690
691
692

    imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
    imulMayOflo rep a b = do
         (a_reg, a_code) <- getNonClobberedReg a
         b_code <- getAnyReg b
693
694
695
696
697
         let
             shift_amt  = case rep of
                           W32 -> 31
                           W64 -> 63
                           _ -> panic "shift_amt"
698

699
             size = intSize rep
700
701
             code = a_code `appOL` b_code eax `appOL`
                        toOL [
702
                           IMUL2 size (OpReg a_reg),   -- result in %edx:%eax
703
                           SAR size (OpImm (ImmInt shift_amt)) (OpReg eax),
704
                                -- sign extend lower part
705
                           SUB size (OpReg edx) (OpReg eax)
706
                                -- compare against upper
707
708
709
                           -- eax==0 if high part == sign extended low part
                        ]
         -- in
710
         return (Fixed size eax code)
711
712
713

    --------------------
    shift_code :: Width
714
715
716
717
               -> (Size -> Operand -> Operand -> Instr)
               -> CmmExpr
               -> CmmExpr
               -> NatM Register
718
719

    {- Case1: shift length as immediate -}
dterei's avatar
dterei committed
720
    shift_code width instr x (CmmLit lit) = do
721
722
723
724
725
726
727
728
729
          x_code <- getAnyReg x
          let
               size = intSize width
               code dst
                  = x_code dst `snocOL`
                    instr size (OpImm (litToImm lit)) (OpReg dst)
          -- in
          return (Any size code)

730
731
732
733
    {- Case2: shift length is complex (non-immediate)
      * y must go in %ecx.
      * we cannot do y first *and* put its result in %ecx, because
        %ecx might be clobbered by x.
734
      * if we do y second, then x cannot be
735
736
737
738
739
740
741
742
743
744
745
        in a clobbered reg.  Also, we cannot clobber x's reg
        with the instruction itself.
      * so we can either:
        - do y first, put its result in a fresh tmp, then copy it to %ecx later
        - do y second and put its result into %ecx.  x gets placed in a fresh
          tmp.  This is likely to be better, becuase the reg alloc can
          eliminate this reg->reg move here (it won't eliminate the other one,
          because the move is into the fixed %ecx).
    -}
    shift_code width instr x y{-amount-} = do
        x_code <- getAnyReg x
746
747
        let size = intSize width
        tmp <- getNewRegNat size
748
        y_code <- getAnyReg y
749
750
751
752
        let
           code = x_code tmp `appOL`
                  y_code ecx `snocOL`
                  instr size (OpReg ecx) (OpReg tmp)
753
754
755
756
757
758
        -- in
        return (Fixed size tmp code)

    --------------------
    add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
    add_code rep x (CmmLit (CmmInt y _))
759
        | is32BitInteger y = add_int rep x y
760
761
762
763
764
765
    add_code rep x y = trivialCode rep (ADD size) (Just (ADD size)) x y
      where size = intSize rep

    --------------------
    sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
    sub_code rep x (CmmLit (CmmInt y _))
766
        | is32BitInteger (-y) = add_int rep x (-y)
767
768
769
770
    sub_code rep x y = trivialCode rep (SUB (intSize rep)) Nothing x y

    -- our three-operand add instruction:
    add_int width x y = do
771
772
773
774
775
        (x_reg, x_code) <- getSomeReg x
        let
            size = intSize width
            imm = ImmInt (fromInteger y)
            code dst
776
               = x_code `snocOL`
777
778
                 LEA size
                        (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
779
                        (OpReg dst)
780
781
        --
        return (Any size code)
782
783
784

    ----------------------
    div_code width signed quotient x y = do
785
786
787
788
789
790
           (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
           x_code <- getAnyReg x
           let
             size = intSize width
             widen | signed    = CLTD size
                   | otherwise = XOR size (OpReg edx) (OpReg edx)
791

792
793
             instr | signed    = IDIV
                   | otherwise = DIV
794

795
796
797
             code = y_code `appOL`
                    x_code eax `appOL`
                    toOL [widen, instr size y_op]
798

799
800
             result | quotient  = eax
                    | otherwise = edx
801

802
           -- in
803
804
805
           return (Fixed size result code)


806
getRegister' _ (CmmLoad mem pk)
807
808
  | isFloatType pk
  = do
809
810
811
    Amode addr mem_code <- getAmode mem
    use_sse2 <- sse2Enabled
    loadFloatAmode use_sse2 (typeWidth pk) addr mem_code
812

813
814
getRegister' is32Bit (CmmLoad mem pk)
  | is32Bit && not (isWord64 pk)
815
  = do
816
817
818
819
820
821
    code <- intLoadCode instr mem
    return (Any size code)
  where
    width = typeWidth pk
    size = intSize width
    instr = case width of
822
823
824
825
826
827
828
                W8     -> MOVZxL II8
                _other -> MOV size
        -- We always zero-extend 8-bit loads, if we
        -- can't think of anything better.  This is because
        -- we can't guarantee access to an 8-bit variant of every register
        -- (esi and edi don't have 8-bit variants), so to make things
        -- simpler we do our 8-bit arithmetic with full 32-bit registers.
829
830

-- Simpler memory load code on x86_64
831
832
getRegister' is32Bit (CmmLoad mem pk)
 | not is32Bit
833
  = do
834
835
836
837
    code <- intLoadCode (MOV size) mem
    return (Any size code)
  where size = intSize $ typeWidth pk

838
getRegister' _ (CmmLit (CmmInt 0 width))
839
  = let
840
        size = intSize width
841

842
843
844
        -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
        size1 = IF_ARCH_i386( size, case size of II64 -> II32; _ -> size )
        code dst
845
846
           = unitOL (XOR size1 (OpReg dst) (OpReg dst))
    in
847
        return (Any size code)
848
849
850
851

  -- optimisation for loading small literals on x86_64: take advantage
  -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
  -- instruction forms are shorter.
852
853
getRegister' is32Bit (CmmLit lit)
  | not is32Bit, isWord64 (cmmLitType lit), not (isBigLit lit)
854
855
856
  = let
        imm = litToImm lit
        code dst = unitOL (MOV II32 (OpImm imm) (OpReg dst))
857
    in
858
        return (Any II64 code)
859
860
861
  where
   isBigLit (CmmInt i _) = i < 0 || i > 0xffffffff
   isBigLit _ = False
862
863
864
865
866
        -- note1: not the same as (not.is32BitLit), because that checks for
        -- signed literals that fit in 32 bits, but we want unsigned
        -- literals here.
        -- note2: all labels are small, because we're assuming the
        -- small memory model (see gcc docs, -mcmodel=small).
867

868
getRegister' _ (CmmLit lit)
869
870
871
872
  = let
        size = cmmTypeSize (cmmLitType lit)
        imm = litToImm lit
        code dst = unitOL (MOV size (OpImm imm) (OpReg dst))
873
    in
874
        return (Any size code)
875

876
getRegister' _ other = pprPanic "getRegister(x86)" (ppr other)
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900


intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
   -> NatM (Reg -> InstrBlock)
intLoadCode instr mem = do
  Amode src mem_code <- getAmode mem
  return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))

-- Compute an expression into *any* register, adding the appropriate
-- move instruction if necessary.
getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
getAnyReg expr = do
  r <- getRegister expr
  anyReg r

anyReg :: Register -> NatM (Reg -> InstrBlock)
anyReg (Any _ code)          = return code
anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)

-- A bit like getSomeReg, but we want a reg that can be byte-addressed.
-- Fixed registers might not be byte-addressable, so we make sure we've
-- got a temporary, inserting an extra reg copy if necessary.
getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
getByteReg expr = do
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
  dflags <- getDynFlagsNat
  if target32Bit (targetPlatform dflags)
      then do r <- getRegister expr
              case r of
                Any rep code -> do
                    tmp <- getNewRegNat rep
                    return (tmp, code tmp)
                Fixed rep reg code
                    | isVirtualReg reg -> return (reg,code)
                    | otherwise -> do
                        tmp <- getNewRegNat rep
                        return (tmp, code `snocOL` reg2reg rep reg tmp)
                    -- ToDo: could optimise slightly by checking for
                    -- byte-addressable real registers, but that will
                    -- happen very rarely if at all.
      else getSomeReg expr -- all regs are byte-addressable on x86_64
917
918
919
920
921
922
923
924

-- Another variant: this time we want the result in a register that cannot
-- be modified by code to evaluate an arbitrary expression.
getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
getNonClobberedReg expr = do
  r <- getRegister expr
  case r of
    Any rep code -> do
925
926
        tmp <- getNewRegNat rep
        return (tmp, code tmp)
927
    Fixed rep reg code
928
929
930
931
932
933
934
935
        -- only free regs can be clobbered
        | RegReal (RealRegSingle rr) <- reg
        , isFastTrue (freeReg rr)
        -> do
                tmp <- getNewRegNat rep
                return (tmp, code `snocOL` reg2reg rep reg tmp)
        | otherwise ->
                return (reg, code)
936
937

reg2reg :: Size -> Reg -> Reg -> Instr
938
reg2reg size src dst
939
  | size == FF80 = GMOV src dst
940
  | otherwise    = MOV size (OpReg src) (OpReg dst)
941
942
943
944


--------------------------------------------------------------------------------
getAmode :: CmmExpr -> NatM Amode
945
946
getAmode e = do dflags <- getDynFlagsNat
                getAmode' (target32Bit (targetPlatform dflags)) e
947

948
949
getAmode' :: Bool -> CmmExpr -> NatM Amode
getAmode' _ (CmmRegOff r n) = getAmode $ mangleIndexTree r n
950

951
952
953
getAmode' is32Bit (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
                                                  CmmLit displacement])
 | not is32Bit
954
955
956
    = return $ Amode (ripRel (litToImm displacement)) nilOL


957
-- This is all just ridiculous, since it carefully undoes
958
-- what mangleIndexTree has just done.
959
getAmode' _ (CmmMachOp (MO_Sub _rep) [x, CmmLit lit@(CmmInt i _)])
960
961
962
963
964
  | is32BitLit lit
  -- ASSERT(rep == II32)???
  = do (x_reg, x_code) <- getSomeReg x
       let off = ImmInt (-(fromInteger i))
       return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
965

966
getAmode' _ (CmmMachOp (MO_Add _rep) [x, CmmLit lit])
967
968
969
  | is32BitLit lit
  -- ASSERT(rep == II32)???
  = do (x_reg, x_code) <- getSomeReg x
970
       let off = litToImm lit
971
972
       return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)

973
-- Turn (lit1 << n  + lit2) into  (lit2 + lit1 << n) so it will be
974
-- recognised by the next rule.
975
getAmode' is32Bit (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
976
                                  b@(CmmLit _)])
977
  = getAmode' is32Bit (CmmMachOp (MO_Add rep) [b,a])
978

979
getAmode' _ (CmmMachOp (MO_Add _) [x, CmmMachOp (MO_Shl _)
980
                                        [y, CmmLit (CmmInt shift _)]])
981
982
983
  | shift == 0 || shift == 1 || shift == 2 || shift == 3
  = x86_complex_amode x y shift 0

984
getAmode' _ (CmmMachOp (MO_Add _)
985
986
987
988
989
990
991
                [x, CmmMachOp (MO_Add _)
                        [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
                         CmmLit (CmmInt offset _)]])
  | shift == 0 || shift == 1 || shift == 2 || shift == 3
  && is32BitInteger offset
  = x86_complex_amode x y shift offset

992
getAmode' _ (CmmMachOp (MO_Add _) [x,y])
993
994
  = x86_complex_amode x y 0 0

995
getAmode' _ (CmmLit lit) | is32BitLit lit
996
997
  = return (Amode (ImmAddr (litToImm lit) 0) nilOL)

998
getAmode' _ expr = do
999
1000
1001
1002
1003
1004
1005
  (reg,code) <- getSomeReg expr
  return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)


x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
x86_complex_amode base index shift offset
  = do (x_reg, x_code) <- getNonClobberedReg base
1006
1007
        -- x must be in a temp, because it has to stay live over y_code
        -- we could compre x_reg and y_reg and do something better here...
1008
1009
       (y_reg, y_code) <- getSomeReg index
       let
1010
           code = x_code `appOL` y_code
dterei's avatar
dterei committed
1011
1012
           base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8;
                                n -> panic $ "x86_complex_amode: unhandled shift! (" ++ show n ++ ")"
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
       return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
               code)




-- -----------------------------------------------------------------------------
-- getOperand: sometimes any operand will do.

-- getNonClobberedOperand: the value of the operand will remain valid across
-- the computation of an arbitrary expression, unless the expression
-- is computed directly into a register which the operand refers to
-- (see trivialCode where this function is used for an example).

getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
getNonClobberedOperand (CmmLit lit) = do
  use_sse2 <- sse2Enabled
  if use_sse2 && isSuitableFloatingPointLit lit
    then do
      let CmmFloat _ w = lit
      Amode addr code <- memConstant (widthInBytes w) lit
      return (OpAddr addr, code)
     else do

  if is32BitLit lit && not (isFloatType (cmmLitType lit))
    then return (OpImm (litToImm lit), nilOL)
    else getNonClobberedOperand_generic (CmmLit lit)

getNonClobberedOperand (CmmLoad mem pk) = do
  use_sse2 <- sse2Enabled
  if (not (isFloatType pk) || use_sse2)
      && IF_ARCH_i386(not (isWord64 pk), True)
    then do
      Amode src mem_code <- getAmode mem
1047
1048
1049
1050
1051
1052
1053
1054
      (src',save_code) <-
        if (amodeCouldBeClobbered src)
                then do
                   tmp <- getNewRegNat archWordSize
                   return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
                           unitOL (LEA II32 (OpAddr src) (OpReg tmp)))
                else
                   return (src, nilOL)
1055
1056
1057
1058
1059
1060
1061
1062
      return (OpAddr src', save_code `appOL` mem_code)
    else do
      getNonClobberedOperand_generic (CmmLoad mem pk)

getNonClobberedOperand e = getNonClobberedOperand_generic e

getNonClobberedOperand_generic :: CmmExpr -> NatM (Operand, InstrBlock)
getNonClobberedOperand_generic e = do
1063
1064
1065
1066
1067
1068
    (reg, code) <- getNonClobberedReg e
    return (OpReg reg, code)

amodeCouldBeClobbered :: AddrMode -> Bool
amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)

dterei's avatar
dterei committed
1069
regClobbered :: Reg -> Bool
1070
regClobbered (RegReal (RealRegSingle rr)) = isFastTrue (freeReg rr)
1071
1072
1073
1074
1075
regClobbered _ = False

-- getOperand: the operand is not required to remain valid across the
-- computation of an arbitrary expression.
getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100

getOperand (CmmLit lit) = do
  use_sse2 <- sse2Enabled
  if (use_sse2 && isSuitableFloatingPointLit lit)
    then do
      let CmmFloat _ w = lit
      Amode addr code <- memConstant (widthInBytes w) lit
      return (OpAddr addr, code)
    else do

  if is32BitLit lit && not (isFloatType (cmmLitType lit))
    then return (OpImm (litToImm lit), nilOL)
    else getOperand_generic (CmmLit lit)

getOperand (CmmLoad mem pk) = do
  use_sse2 <- sse2Enabled
  if (not (isFloatType pk) || use_sse2) && IF_ARCH_i386(not (isWord64 pk), True)
     then do
       Amode src mem_code <- getAmode mem
       return (OpAddr src, mem_code)
     else
       getOperand_generic (CmmLoad mem pk)

getOperand e = getOperand_generic e

dterei's avatar
dterei committed
1101
getOperand_generic :: CmmExpr -> NatM (Operand, InstrBlock)
1102
getOperand_generic e = do
1103
1104
1105
1106
1107
1108
    (reg, code) <- getSomeReg e
    return (OpReg reg, code)

isOperand :: CmmExpr -> Bool
isOperand (CmmLoad _ _) = True
isOperand (CmmLit lit)  = is32BitLit lit
1109
                          || isSuitableFloatingPointLit lit
1110
1111
isOperand _             = False

1112
1113
1114
1115
memConstant :: Int -> CmmLit -> NatM Amode
memConstant align lit = do
  lbl <- getNewLabelNat
  dflags <- getDynFlagsNat
1116
1117
1118
1119
1120
1121
1122
1123
1124
  (addr, addr_code) <- if target32Bit (targetPlatform dflags)
                       then do dynRef <- cmmMakeDynamicReference
                                             dflags
                                             addImportNat
                                             DataReference
                                             lbl
                               Amode addr addr_code <- getAmode dynRef
                               return (addr, addr_code)
                       else return (ripRel (ImmCLbl lbl), nilOL)
1125
  let code =
1126
        LDATA ReadOnlyData (align, Statics lbl [CmmStaticLit lit])
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
        `consOL` addr_code
  return (Amode addr code)


loadFloatAmode :: Bool -> Width -> AddrMode -> InstrBlock -> NatM Register
loadFloatAmode use_sse2 w addr addr_code = do
  let size = floatSize w
      code dst = addr_code `snocOL`
                 if use_sse2
                    then MOV size (OpAddr addr) (OpReg dst)
                    else GLD size addr dst
  -- in
  return (Any (if use_sse2 then size else FF80) code)


1142
1143
1144
1145
-- if we want a floating-point literal as an operand, we can
-- use it directly from memory.  However, if the literal is
-- zero, we're better off generating it into a register using
-- xor.
dterei's avatar
dterei committed
1146
isSuitableFloatingPointLit :: CmmLit -> Bool
1147
1148
1149
1150
isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
isSuitableFloatingPointLit _ = False

getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
1151
1152
1153
1154
1155
1156
1157
1158
1159
getRegOrMem e@(CmmLoad mem pk) = do
  use_sse2 <- sse2Enabled
  if (not (isFloatType pk) || use_sse2) && IF_ARCH_i386(not (isWord64 pk), True)
     then do
       Amode src mem_code <- getAmode mem
       return (OpAddr src, mem_code)
     else do
       (reg, code) <- getNonClobberedReg e
       return (OpReg reg, code)
1160
1161
1162
1163
getRegOrMem e = do
    (reg, code) <- getNonClobberedReg e
    return (OpReg reg, code)

dterei's avatar
dterei committed
1164
is32BitLit :: CmmLit -> Bool
1165
1166
1167
1168
1169
#if x86_64_TARGET_ARCH
is32BitLit (CmmInt i W64) = is32BitInteger i
   -- assume that labels are in the range 0-2^31-1: this assumes the
   -- small memory model (see gcc docs, -mcmodel=small).
#endif
dterei's avatar
dterei committed
1170
is32BitLit _ = True
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181




-- Set up a condition code for a conditional branch.

getCondCode :: CmmExpr -> NatM CondCode

-- yes, they really do seem to want exactly the same!

getCondCode (CmmMachOp mop [x, y])
1182
  =
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
    case mop of
      MO_F_Eq W32 -> condFltCode EQQ x y
      MO_F_Ne W32 -> condFltCode NE  x y
      MO_F_Gt W32 -> condFltCode GTT x y
      MO_F_Ge W32 -> condFltCode GE  x y
      MO_F_Lt W32 -> condFltCode LTT x y
      MO_F_Le W32 -> condFltCode LE  x y

      MO_F_Eq W64 -> condFltCode EQQ x y
      MO_F_Ne W64 -> condFltCode NE  x y
      MO_F_Gt W64 -> condFltCode GTT x y
      MO_F_Ge W64 -> condFltCode GE  x y
      MO_F_Lt W64 -> condFltCode LTT x y
      MO_F_Le W64 -> condFltCode LE  x y

dterei's avatar
dterei committed
1198
1199
      MO_Eq _ -> condIntCode EQQ x y
      MO_Ne _ -> condIntCode NE  x y
1200

dterei's avatar
dterei committed
1201
1202
1203
1204
      MO_S_Gt _ -> condIntCode GTT x y
      MO_S_Ge _ -> condIntCode GE  x y
      MO_S_Lt _ -> condIntCode LTT x y
      MO_S_Le _ -> condIntCode LE  x y
1205

dterei's avatar
dterei committed
1206
1207
1208
1209
      MO_U_Gt _ -> condIntCode GU  x y
      MO_U_Ge _ -> condIntCode GEU x y
      MO_U_Lt _ -> condIntCode LU  x y
      MO_U_Le _ -> condIntCode LEU x y
1210

dterei's avatar
dterei committed
1211
      _other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226

getCondCode other =  pprPanic "getCondCode(2)(x86,sparc)" (ppr other)




-- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
-- passed back up the tree.

condIntCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode

-- memory vs immediate
condIntCode cond (CmmLoad x pk) (CmmLit lit) | is32BitLit lit = do
    Amode x_addr x_code <- getAmode x
    let
1227
1228
1229
        imm  = litToImm lit
        code = x_code `snocOL`
                  CMP (cmmTypeSize pk) (OpImm imm) (OpAddr x_addr)
1230
1231
1232
1233
1234
    --
    return (CondCode False cond code)

-- anything vs zero, using a mask
-- TODO: Add some sanity checking!!!!