MachCodeGen.hs 158 KB
Newer Older
1 2 3 4 5 6 7
{-# OPTIONS -w #-}
-- The above warning supression flag is a temporary kludge.
-- While working on this module you are encouraged to remove it and fix
-- any warnings in the module. See
--     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
-- for details

8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
-----------------------------------------------------------------------------
--
-- Generating machine code (instruction selection)
--
-- (c) The University of Glasgow 1996-2004
--
-----------------------------------------------------------------------------

-- This is a big module, but, if you pay attention to
-- (a) the sectioning, (b) the type signatures, and
-- (c) the #if blah_TARGET_ARCH} things, the
-- structure should not be too overwhelming.

module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where

#include "HsVersions.h"
#include "nativeGen/NCG.h"
25
#include "MachDeps.h"
26 27 28 29 30

-- NCG stuff:
import MachInstrs
import MachRegs
import NCGMonad
31
import PositionIndependentCode
32 33
import RegAllocInfo 	( mkBranchInstr, mkRegRegMoveInstr )
import MachRegs
34
import PprMach
35 36

-- Our intermediate code:
37
import BlockId
38 39 40
import PprCmm		( pprExpr )
import Cmm
import CLabel
41
import ClosureInfo	( C_SRT(..) )
42 43

-- The rest:
44
import StaticFlags	( opt_PIC )
45 46 47
import ForeignCall	( CCallConv(..) )
import OrdList
import Pretty
48
import qualified Outputable as O
49 50
import Outputable
import FastString
Ian Lynagh's avatar
Ian Lynagh committed
51
import FastBool		( isFastTrue )
52
import Constants	( wORD_SIZE )
53

54
import Debug.Trace	( trace )
55 56

import Control.Monad	( mapAndUnzipM )
Simon Marlow's avatar
Simon Marlow committed
57 58 59
import Data.Maybe	( fromJust )
import Data.Bits
import Data.Word
60
import Data.Int
61

62

63 64 65 66 67 68 69 70 71 72
-- -----------------------------------------------------------------------------
-- Top-level of the instruction selector

-- | 'InstrBlock's are the insn sequences generated by the insn selectors.
-- They are really trees of insns to facilitate fast appending, where a
-- left-to-right traversal (pre-order?) yields the insns in the correct
-- order.

type InstrBlock = OrdList Instr

73
cmmTopCodeGen :: RawCmmTop -> NatM [NatCmmTop]
74
cmmTopCodeGen (CmmProc info lab params (ListGraph blocks)) = do
75
  (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
76
  picBaseMb <- getPicBaseMaybeNat
77
  let proc = CmmProc info lab params (ListGraph $ concat nat_blocks)
78 79 80 81 82
      tops = proc : concat statics
  case picBaseMb of
      Just picBase -> initializePicBase picBase tops
      Nothing -> return tops
  
83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
cmmTopCodeGen (CmmData sec dat) = do
  return [CmmData sec dat]  -- no translation, we just use CmmStatic

basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
basicBlockCodeGen (BasicBlock id stmts) = do
  instrs <- stmtsToInstrs stmts
  -- code generation may introduce new basic block boundaries, which
  -- are indicated by the NEWBLOCK instruction.  We must split up the
  -- instruction stream into basic blocks again.  Also, we extract
  -- LDATAs here too.
  let
	(top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
	
	mkBlocks (NEWBLOCK id) (instrs,blocks,statics) 
	  = ([], BasicBlock id instrs : blocks, statics)
	mkBlocks (LDATA sec dat) (instrs,blocks,statics) 
	  = (instrs, blocks, CmmData sec dat:statics)
	mkBlocks instr (instrs,blocks,statics)
	  = (instr:instrs, blocks, statics)
  -- in
  return (BasicBlock id top : other_blocks, statics)

stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
stmtsToInstrs stmts
   = do instrss <- mapM stmtToInstrs stmts
        return (concatOL instrss)

stmtToInstrs :: CmmStmt -> NatM InstrBlock
stmtToInstrs stmt = case stmt of
    CmmNop	   -> return nilOL
    CmmComment s   -> return (unitOL (COMMENT s))

    CmmAssign reg src
116
      | isFloatType ty -> assignReg_FltCode size reg src
117
#if WORD_SIZE_IN_BITS==32
118
      | isWord64 ty    -> assignReg_I64Code      reg src
119
#endif
120 121 122
      | otherwise	 -> assignReg_IntCode size reg src
	where ty = cmmRegType reg
	      size = cmmTypeSize ty
123 124

    CmmStore addr src
125
      | isFloatType ty -> assignMem_FltCode size addr src
126
#if WORD_SIZE_IN_BITS==32
127
      | isWord64 ty 	 -> assignMem_I64Code      addr src
128
#endif
129 130 131
      | otherwise	 -> assignMem_IntCode size addr src
	where ty = cmmExprType src
	      size = cmmTypeSize ty
132

133
    CmmCall target result_regs args _ _
134
       -> genCCall target result_regs args
135 136 137 138 139

    CmmBranch id	  -> genBranch id
    CmmCondBranch arg id  -> genCondJump id arg
    CmmSwitch arg ids     -> genSwitch arg ids
    CmmJump arg params	  -> genJump arg
dias@eecs.harvard.edu's avatar
dias@eecs.harvard.edu committed
140 141
    CmmReturn params	  ->
      panic "stmtToInstrs: return statement should have been cps'd away"
142 143 144 145 146 147 148 149

-- -----------------------------------------------------------------------------
-- General things for putting together code sequences

-- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
-- CmmExprs into CmmRegOff?
mangleIndexTree :: CmmExpr -> CmmExpr
mangleIndexTree (CmmRegOff reg off)
150 151
  = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
  where width = typeWidth (cmmRegType reg)
152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177

-- -----------------------------------------------------------------------------
--  Code gen for 64-bit arithmetic on 32-bit platforms

{-
Simple support for generating 64-bit code (ie, 64 bit values and 64
bit assignments) on 32-bit platforms.  Unlike the main code generator
we merely shoot for generating working code as simply as possible, and
pay little attention to code quality.  Specifically, there is no
attempt to deal cleverly with the fixed-vs-floating register
distinction; all values are generated into (pairs of) floating
registers, even if this would mean some redundant reg-reg moves as a
result.  Only one of the VRegUniques is returned, since it will be
of the VRegUniqueLo form, and the upper-half VReg can be determined
by applying getHiVRegFromLo to it.
-}

data ChildCode64 	-- a.k.a "Register64"
   = ChildCode64 
        InstrBlock 	-- code
        Reg	 	-- the lower 32-bit temporary which contains the
			-- result; use getHiVRegFromLo to find the other
			-- VRegUnique.  Rules of this simplified insn
			-- selection game are therefore that the returned
			-- Reg may be modified

178
#if WORD_SIZE_IN_BITS==32
179 180
assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
181 182 183
#endif

#ifndef x86_64_TARGET_ARCH
184
iselExpr64        :: CmmExpr -> NatM ChildCode64
185
#endif
186 187 188 189 190 191 192 193 194 195 196 197

-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

#if i386_TARGET_ARCH

assignMem_I64Code addrTree valueTree = do
  Amode addr addr_code <- getAmode addrTree
  ChildCode64 vcode rlo <- iselExpr64 valueTree
  let 
        rhi = getHiVRegFromLo rlo

        -- Little-endian store
198 199
        mov_lo = MOV II32 (OpReg rlo) (OpAddr addr)
        mov_hi = MOV II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
200 201 202 203
  -- in
  return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)


204
assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
205 206
   ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
   let 
207
         r_dst_lo = mkVReg u_dst II32
208 209
         r_dst_hi = getHiVRegFromLo r_dst_lo
         r_src_hi = getHiVRegFromLo r_src_lo
210 211
         mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo)
         mov_hi = MOV II32 (OpReg r_src_hi) (OpReg r_dst_hi)
212 213 214 215 216 217 218 219 220 221 222
   -- in
   return (
        vcode `snocOL` mov_lo `snocOL` mov_hi
     )

assignReg_I64Code lvalue valueTree
   = panic "assignReg_I64Code(i386): invalid lvalue"

------------

iselExpr64 (CmmLit (CmmInt i _)) = do
223
  (rlo,rhi) <- getNewRegPairNat II32
224 225 226 227
  let
	r = fromIntegral (fromIntegral i :: Word32)
	q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
	code = toOL [
228 229
		MOV II32 (OpImm (ImmInteger r)) (OpReg rlo),
		MOV II32 (OpImm (ImmInteger q)) (OpReg rhi)
230 231 232 233
		]
  -- in
  return (ChildCode64 code rlo)

234
iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
235
   Amode addr addr_code <- getAmode addrTree
236
   (rlo,rhi) <- getNewRegPairNat II32
237
   let 
238 239
        mov_lo = MOV II32 (OpAddr addr) (OpReg rlo)
        mov_hi = MOV II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
240 241 242 243 244 245
   -- in
   return (
            ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi) 
                        rlo
     )

246 247
iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
   = return (ChildCode64 nilOL (mkVReg vu II32))
248 249 250 251
         
-- we handle addition, but rather badly
iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
   ChildCode64 code1 r1lo <- iselExpr64 e1
252
   (rlo,rhi) <- getNewRegPairNat II32
253 254 255 256 257
   let
	r = fromIntegral (fromIntegral i :: Word32)
	q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
	r1hi = getHiVRegFromLo r1lo
	code =  code1 `appOL`
258 259 260 261
		toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
		       ADD II32 (OpImm (ImmInteger r)) (OpReg rlo),
		       MOV II32 (OpReg r1hi) (OpReg rhi),
		       ADC II32 (OpImm (ImmInteger q)) (OpReg rhi) ]
262 263 264 265 266 267
   -- in
   return (ChildCode64 code rlo)

iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
   ChildCode64 code1 r1lo <- iselExpr64 e1
   ChildCode64 code2 r2lo <- iselExpr64 e2
268
   (rlo,rhi) <- getNewRegPairNat II32
269 270 271 272 273
   let
	r1hi = getHiVRegFromLo r1lo
	r2hi = getHiVRegFromLo r2lo
	code =  code1 `appOL`
		code2 `appOL`
274 275 276 277
		toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
		       ADD II32 (OpReg r2lo) (OpReg rlo),
		       MOV II32 (OpReg r1hi) (OpReg rhi),
		       ADC II32 (OpReg r2hi) (OpReg rhi) ]
278 279 280
   -- in
   return (ChildCode64 code rlo)

281
iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do
282
     fn <- getAnyReg expr
283
     r_dst_lo <-  getNewRegNat II32
284 285 286 287
     let r_dst_hi = getHiVRegFromLo r_dst_lo
         code = fn r_dst_lo
     return (
             ChildCode64 (code `snocOL` 
288
                          MOV II32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
289 290 291
                          r_dst_lo
            )

292 293 294 295 296 297 298 299 300
iselExpr64 expr
   = pprPanic "iselExpr64(i386)" (ppr expr)

#endif /* i386_TARGET_ARCH */

-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

#if sparc_TARGET_ARCH

301 302 303 304 305
assignMem_I64Code addrTree valueTree = do
     Amode addr addr_code <- getAmode addrTree
     ChildCode64 vcode rlo <- iselExpr64 valueTree  
     (src, code) <- getSomeReg addrTree
     let 
306 307
         rhi = getHiVRegFromLo rlo
         -- Big-endian store
308 309
         mov_hi = ST II32 rhi (AddrRegImm src (ImmInt 0))
         mov_lo = ST II32 rlo (AddrRegImm src (ImmInt 4))
310
     return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
311

312
assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
313
     ChildCode64 vcode r_src_lo <- iselExpr64 valueTree    
314
     let 
315
         r_dst_lo = mkVReg u_dst (cmmTypeSize pk)
316 317 318 319 320
         r_dst_hi = getHiVRegFromLo r_dst_lo
         r_src_hi = getHiVRegFromLo r_src_lo
         mov_lo = mkMOV r_src_lo r_dst_lo
         mov_hi = mkMOV r_src_hi r_dst_hi
         mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
321
     return (vcode `snocOL` mov_hi `snocOL` mov_lo)
322
assignReg_I64Code lvalue valueTree
323
   = panic "assignReg_I64Code(sparc): invalid lvalue"
324 325 326 327


-- Don't delete this -- it's very handy for debugging.
--iselExpr64 expr 
328
--   | trace ("iselExpr64: " ++ showSDoc (ppr expr)) False
329 330
--   = panic "iselExpr64(???)"

331
iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
332
     Amode (AddrRegReg r1 r2) addr_code <- getAmode addrTree
333
     rlo <- getNewRegNat II32
334
     let rhi = getHiVRegFromLo rlo
335 336
         mov_hi = LD II32 (AddrRegImm r1 (ImmInt 0)) rhi
         mov_lo = LD II32 (AddrRegImm r1 (ImmInt 4)) rlo
337 338 339 340
     return (
            ChildCode64 (addr_code `snocOL` mov_hi `snocOL` mov_lo) 
                         rlo
          )
341

342 343
iselExpr64 (CmmReg (CmmLocal (LocalReg uq ty))) | isWord64 ty = do
     r_dst_lo <-  getNewRegNat II32
344
     let r_dst_hi = getHiVRegFromLo r_dst_lo
345
         r_src_lo = mkVReg uq II32
346 347 348 349
         r_src_hi = getHiVRegFromLo r_src_lo
         mov_lo = mkMOV r_src_lo r_dst_lo
         mov_hi = mkMOV r_src_hi r_dst_hi
         mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
350 351
     return (
            ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
352 353 354
         )

iselExpr64 expr
355
   = pprPanic "iselExpr64(sparc)" (ppr expr)
356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379

#endif /* sparc_TARGET_ARCH */

-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

#if powerpc_TARGET_ARCH

getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
getI64Amodes addrTree = do
    Amode hi_addr addr_code <- getAmode addrTree
    case addrOffset hi_addr 4 of
        Just lo_addr -> return (hi_addr, lo_addr, addr_code)
        Nothing      -> do (hi_ptr, code) <- getSomeReg addrTree
                           return (AddrRegImm hi_ptr (ImmInt 0),
                                   AddrRegImm hi_ptr (ImmInt 4),
                                   code)

assignMem_I64Code addrTree valueTree = do
        (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
	ChildCode64 vcode rlo <- iselExpr64 valueTree
	let 
		rhi = getHiVRegFromLo rlo

		-- Big-endian store
380 381
		mov_hi = ST II32 rhi hi_addr
		mov_lo = ST II32 rlo lo_addr
382 383 384
	-- in
	return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)

385
assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
386 387
   ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
   let 
388
         r_dst_lo = mkVReg u_dst II32
389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406
         r_dst_hi = getHiVRegFromLo r_dst_lo
         r_src_hi = getHiVRegFromLo r_src_lo
         mov_lo = MR r_dst_lo r_src_lo
         mov_hi = MR r_dst_hi r_src_hi
   -- in
   return (
        vcode `snocOL` mov_lo `snocOL` mov_hi
     )

assignReg_I64Code lvalue valueTree
   = panic "assignReg_I64Code(powerpc): invalid lvalue"


-- Don't delete this -- it's very handy for debugging.
--iselExpr64 expr 
--   | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
--   = panic "iselExpr64(???)"

407
iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
408
    (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
409 410 411
    (rlo, rhi) <- getNewRegPairNat II32
    let mov_hi = LD II32 rhi hi_addr
        mov_lo = LD II32 rlo lo_addr
412 413 414
    return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi) 
                         rlo

415 416
iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
   = return (ChildCode64 nilOL (mkVReg vu II32))
417 418

iselExpr64 (CmmLit (CmmInt i _)) = do
419
  (rlo,rhi) <- getNewRegPairNat II32
420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437
  let
	half0 = fromIntegral (fromIntegral i :: Word16)
	half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
	half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
	half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
	
	code = toOL [
		LIS rlo (ImmInt half1),
		OR rlo rlo (RIImm $ ImmInt half0),
		LIS rhi (ImmInt half3),
		OR rlo rlo (RIImm $ ImmInt half2)
		]
  -- in
  return (ChildCode64 code rlo)

iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
   ChildCode64 code1 r1lo <- iselExpr64 e1
   ChildCode64 code2 r2lo <- iselExpr64 e2
438
   (rlo,rhi) <- getNewRegPairNat II32
439 440 441 442 443 444 445 446 447 448
   let
	r1hi = getHiVRegFromLo r1lo
	r2hi = getHiVRegFromLo r2lo
	code =  code1 `appOL`
		code2 `appOL`
		toOL [ ADDC rlo r1lo r2lo,
		       ADDE rhi r1hi r2hi ]
   -- in
   return (ChildCode64 code rlo)

449
iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do
450
    (expr_reg,expr_code) <- getSomeReg expr
451
    (rlo, rhi) <- getNewRegPairNat II32
452 453 454 455
    let mov_hi = LI rhi (ImmInt 0)
        mov_lo = MR rlo expr_reg
    return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
                         rlo
456 457 458 459 460 461 462 463 464 465 466 467 468 469 470
iselExpr64 expr
   = pprPanic "iselExpr64(powerpc)" (ppr expr)

#endif /* powerpc_TARGET_ARCH */


-- -----------------------------------------------------------------------------
-- The 'Register' type

-- 'Register's passed up the tree.  If the stix code forces the register
-- to live in a pre-decided machine register, it comes out as @Fixed@;
-- otherwise, it comes out as @Any@, and the parent can decide which
-- register to put it in.

data Register
471 472
  = Fixed   Size Reg InstrBlock
  | Any	    Size (Reg -> InstrBlock)
473

474 475 476 477
swizzleRegisterRep :: Register -> Size -> Register
-- Change the width; it's a no-op
swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code
swizzleRegisterRep (Any _ codefn)     size = Any   size codefn
478 479


480 481 482 483 484 485 486 487 488 489 490 491 492 493 494
-- -----------------------------------------------------------------------------
-- Utils based on getRegister, below

-- The dual to getAnyReg: compute an expression into a register, but
-- we don't mind which one it is.
getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg expr = do
  r <- getRegister expr
  case r of
    Any rep code -> do
	tmp <- getNewRegNat rep
	return (tmp, code tmp)
    Fixed _ reg code -> 
	return (reg, code)

495 496 497 498 499
-- -----------------------------------------------------------------------------
-- Grab the Reg for a CmmReg

getRegisterReg :: CmmReg -> Reg

500 501
getRegisterReg (CmmLocal (LocalReg u pk))
  = mkVReg u (cmmTypeSize pk)
502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521

getRegisterReg (CmmGlobal mid)
  = case get_GlobalReg_reg_or_addr mid of
       Left (RealReg rrno) -> RealReg rrno
       _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
          -- By this stage, the only MagicIds remaining should be the
          -- ones which map to a real machine register on this
          -- platform.  Hence ...


-- -----------------------------------------------------------------------------
-- Generate code to get a subtree into a Register

-- Don't delete this -- it's very handy for debugging.
--getRegister expr 
--   | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
--   = panic "getRegister(???)"

getRegister :: CmmExpr -> NatM Register

522 523 524
#if !x86_64_TARGET_ARCH
    -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured
    -- register, it can only be used for rip-relative addressing.
525 526
getRegister (CmmReg (CmmGlobal PicBaseReg))
  = do
527 528
      reg <- getPicBaseNat wordSize
      return (Fixed wordSize reg nilOL)
529
#endif
530

531
getRegister (CmmReg reg) 
532 533
  = return (Fixed (cmmTypeSize (cmmRegType reg)) 
		  (getRegisterReg reg) nilOL)
534 535 536 537

getRegister tree@(CmmRegOff _ _) 
  = getRegister (mangleIndexTree tree)

538 539 540 541 542

#if WORD_SIZE_IN_BITS==32
    -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
    -- TO_W_(x), TO_W_(x >> 32)

543 544
getRegister (CmmMachOp (MO_UU_Conv W64 W32)
             [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
545
  ChildCode64 code rlo <- iselExpr64 x
546
  return $ Fixed II32 (getHiVRegFromLo rlo) code
547

548 549
getRegister (CmmMachOp (MO_SS_Conv W64 W32)
             [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
550
  ChildCode64 code rlo <- iselExpr64 x
551
  return $ Fixed II32 (getHiVRegFromLo rlo) code
552

553
getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do
554
  ChildCode64 code rlo <- iselExpr64 x
555
  return $ Fixed II32 rlo code
556

557
getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do
558
  ChildCode64 code rlo <- iselExpr64 x
559
  return $ Fixed II32 rlo code       
560 561 562

#endif

563 564 565 566 567 568 569 570 571 572 573 574 575 576
-- end of machine-"independent" bit; here we go on the rest...

#if alpha_TARGET_ARCH

getRegister (StDouble d)
  = getBlockIdNat 	    	    `thenNat` \ lbl ->
    getNewRegNat PtrRep    	    `thenNat` \ tmp ->
    let code dst = mkSeqInstrs [
	    LDATA RoDataSegment lbl [
		    DATA TF [ImmLab (rational d)]
		],
	    LDA tmp (AddrImm (ImmCLbl lbl)),
	    LD TF dst (AddrReg tmp)]
    in
577
    	return (Any FF64 code)
578 579 580 581 582 583 584

getRegister (StPrim primop [x]) -- unary PrimOps
  = case primop of
      IntNegOp -> trivialUCode (NEG Q False) x

      NotOp    -> trivialUCode NOT x

585 586
      FloatNegOp  -> trivialUFCode FloatRep (FNEG TF) x
      DoubleNegOp -> trivialUFCode FF64 (FNEG TF) x
587 588 589 590 591 592 593 594 595 596 597 598

      OrdOp -> coerceIntCode IntRep x
      ChrOp -> chrCode x

      Float2IntOp  -> coerceFP2Int    x
      Int2FloatOp  -> coerceInt2FP pr x
      Double2IntOp -> coerceFP2Int    x
      Int2DoubleOp -> coerceInt2FP pr x

      Double2FloatOp -> coerceFltCode x
      Float2DoubleOp -> coerceFltCode x

599
      other_op -> getRegister (StCall fn CCallConv FF64 [x])
600 601
	where
	  fn = case other_op of
Ian Lynagh's avatar
Ian Lynagh committed
602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625
		 FloatExpOp    -> fsLit "exp"
		 FloatLogOp    -> fsLit "log"
		 FloatSqrtOp   -> fsLit "sqrt"
		 FloatSinOp    -> fsLit "sin"
		 FloatCosOp    -> fsLit "cos"
		 FloatTanOp    -> fsLit "tan"
		 FloatAsinOp   -> fsLit "asin"
		 FloatAcosOp   -> fsLit "acos"
		 FloatAtanOp   -> fsLit "atan"
		 FloatSinhOp   -> fsLit "sinh"
		 FloatCoshOp   -> fsLit "cosh"
		 FloatTanhOp   -> fsLit "tanh"
		 DoubleExpOp   -> fsLit "exp"
		 DoubleLogOp   -> fsLit "log"
		 DoubleSqrtOp  -> fsLit "sqrt"
		 DoubleSinOp   -> fsLit "sin"
		 DoubleCosOp   -> fsLit "cos"
		 DoubleTanOp   -> fsLit "tan"
		 DoubleAsinOp  -> fsLit "asin"
		 DoubleAcosOp  -> fsLit "acos"
		 DoubleAtanOp  -> fsLit "atan"
		 DoubleSinhOp  -> fsLit "sinh"
		 DoubleCoshOp  -> fsLit "cosh"
		 DoubleTanhOp  -> fsLit "tanh"
626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684
  where
    pr = panic "MachCode.getRegister: no primrep needed for Alpha"

getRegister (StPrim primop [x, y]) -- dyadic PrimOps
  = case primop of
      CharGtOp -> trivialCode (CMP LTT) y x
      CharGeOp -> trivialCode (CMP LE) y x
      CharEqOp -> trivialCode (CMP EQQ) x y
      CharNeOp -> int_NE_code x y
      CharLtOp -> trivialCode (CMP LTT) x y
      CharLeOp -> trivialCode (CMP LE) x y

      IntGtOp  -> trivialCode (CMP LTT) y x
      IntGeOp  -> trivialCode (CMP LE) y x
      IntEqOp  -> trivialCode (CMP EQQ) x y
      IntNeOp  -> int_NE_code x y
      IntLtOp  -> trivialCode (CMP LTT) x y
      IntLeOp  -> trivialCode (CMP LE) x y

      WordGtOp -> trivialCode (CMP ULT) y x
      WordGeOp -> trivialCode (CMP ULE) x y
      WordEqOp -> trivialCode (CMP EQQ)  x y
      WordNeOp -> int_NE_code x y
      WordLtOp -> trivialCode (CMP ULT) x y
      WordLeOp -> trivialCode (CMP ULE) x y

      AddrGtOp -> trivialCode (CMP ULT) y x
      AddrGeOp -> trivialCode (CMP ULE) y x
      AddrEqOp -> trivialCode (CMP EQQ)  x y
      AddrNeOp -> int_NE_code x y
      AddrLtOp -> trivialCode (CMP ULT) x y
      AddrLeOp -> trivialCode (CMP ULE) x y
	
      FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
      FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
      FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
      FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
      FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
      FloatLeOp -> cmpF_code (FCMP TF LE) NE x y

      DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
      DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
      DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
      DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
      DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
      DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y

      IntAddOp  -> trivialCode (ADD Q False) x y
      IntSubOp  -> trivialCode (SUB Q False) x y
      IntMulOp  -> trivialCode (MUL Q False) x y
      IntQuotOp -> trivialCode (DIV Q False) x y
      IntRemOp  -> trivialCode (REM Q False) x y

      WordAddOp  -> trivialCode (ADD Q False) x y
      WordSubOp  -> trivialCode (SUB Q False) x y
      WordMulOp  -> trivialCode (MUL Q False) x y
      WordQuotOp -> trivialCode (DIV Q True) x y
      WordRemOp  -> trivialCode (REM Q True) x y

685 686 687 688
      FloatAddOp -> trivialFCode  W32 (FADD TF) x y
      FloatSubOp -> trivialFCode  W32 (FSUB TF) x y
      FloatMulOp -> trivialFCode  W32 (FMUL TF) x y
      FloatDivOp -> trivialFCode  W32 (FDIV TF) x y
689

690 691 692 693
      DoubleAddOp -> trivialFCode  W64 (FADD TF) x y
      DoubleSubOp -> trivialFCode  W64 (FSUB TF) x y
      DoubleMulOp -> trivialFCode  W64 (FMUL TF) x y
      DoubleDivOp -> trivialFCode  W64 (FDIV TF) x y
694 695 696 697 698 699 700 701 702 703 704 705 706 707 708

      AddrAddOp  -> trivialCode (ADD Q False) x y
      AddrSubOp  -> trivialCode (SUB Q False) x y
      AddrRemOp  -> trivialCode (REM Q True) x y

      AndOp  -> trivialCode AND x y
      OrOp   -> trivialCode OR  x y
      XorOp  -> trivialCode XOR x y
      SllOp  -> trivialCode SLL x y
      SrlOp  -> trivialCode SRL x y

      ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
      ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
      ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"

709 710
      FloatPowerOp  -> getRegister (StCall (fsLit "pow") CCallConv FF64 [x,y])
      DoublePowerOp -> getRegister (StCall (fsLit "pow") CCallConv FF64 [x,y])
711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742
  where
    {- ------------------------------------------------------------
	Some bizarre special code for getting condition codes into
	registers.  Integer non-equality is a test for equality
	followed by an XOR with 1.  (Integer comparisons always set
	the result register to 0 or 1.)  Floating point comparisons of
	any kind leave the result in a floating point register, so we
	need to wrangle an integer register out of things.
    -}
    int_NE_code :: StixTree -> StixTree -> NatM Register

    int_NE_code x y
      = trivialCode (CMP EQQ) x y	`thenNat` \ register ->
	getNewRegNat IntRep		`thenNat` \ tmp ->
	let
	    code = registerCode register tmp
	    src  = registerName register tmp
	    code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
	in
	return (Any IntRep code__2)

    {- ------------------------------------------------------------
	Comments for int_NE_code also apply to cmpF_code
    -}
    cmpF_code
	:: (Reg -> Reg -> Reg -> Instr)
	-> Cond
	-> StixTree -> StixTree
	-> NatM Register

    cmpF_code instr cond x y
      = trivialFCode pr instr x y	`thenNat` \ register ->
743
	getNewRegNat FF64		`thenNat` \ tmp ->
744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799
	getBlockIdNat			`thenNat` \ lbl ->
	let
	    code = registerCode register tmp
	    result  = registerName register tmp

	    code__2 dst = code . mkSeqInstrs [
		OR zeroh (RIImm (ImmInt 1)) dst,
		BF cond  result (ImmCLbl lbl),
		OR zeroh (RIReg zeroh) dst,
		NEWBLOCK lbl]
	in
	return (Any IntRep code__2)
      where
	pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
      ------------------------------------------------------------

getRegister (CmmLoad pk mem)
  = getAmode mem    	    	    `thenNat` \ amode ->
    let
    	code = amodeCode amode
    	src   = amodeAddr amode
    	size = primRepToSize pk
    	code__2 dst = code . mkSeqInstr (LD size dst src)
    in
    return (Any pk code__2)

getRegister (StInt i)
  | fits8Bits i
  = let
    	code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
    in
    return (Any IntRep code)
  | otherwise
  = let
    	code dst = mkSeqInstr (LDI Q dst src)
    in
    return (Any IntRep code)
  where
    src = ImmInt (fromInteger i)

getRegister leaf
  | isJust imm
  = let
    	code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
    in
    return (Any PtrRep code)
  where
    imm = maybeImm leaf
    imm__2 = case imm of Just x -> x

#endif /* alpha_TARGET_ARCH */

-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

#if i386_TARGET_ARCH

800
getRegister (CmmLit (CmmFloat f W32)) = do
801
    lbl <- getNewLabelNat
802 803
    dflags <- getDynFlagsNat
    dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
804 805
    Amode addr addr_code <- getAmode dynRef
    let code dst =
806 807
	    LDATA ReadOnlyData
			[CmmDataLabel lbl,
808
			 CmmStaticLit (CmmFloat f W32)]
809
	    `consOL` (addr_code `snocOL`
810
	    GLD FF32 addr dst)
811
    -- in
812
    return (Any FF32 code)
813 814


815
getRegister (CmmLit (CmmFloat d W64))
816 817
  | d == 0.0
  = let code dst = unitOL (GLDZ dst)
818
    in  return (Any FF64 code)
819 820 821

  | d == 1.0
  = let code dst = unitOL (GLD1 dst)
822
    in  return (Any FF64 code)
823 824 825

  | otherwise = do
    lbl <- getNewLabelNat
826 827
    dflags <- getDynFlagsNat
    dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
828 829
    Amode addr addr_code <- getAmode dynRef
    let code dst =
830 831
	    LDATA ReadOnlyData
			[CmmDataLabel lbl,
832
			 CmmStaticLit (CmmFloat d W64)]
833
	    `consOL` (addr_code `snocOL`
834
	    GLD FF64 addr dst)
835
    -- in
836
    return (Any FF64 code)
837

838 839 840 841
#endif /* i386_TARGET_ARCH */

#if x86_64_TARGET_ARCH

842 843 844
getRegister (CmmLit (CmmFloat 0.0 w)) = do
   let size = floatSize w
       code dst = unitOL  (XOR size (OpReg dst) (OpReg dst))
845 846
	-- I don't know why there are xorpd, xorps, and pxor instructions.
	-- They all appear to do the same thing --SDM
847
   return (Any size code)
848

849
getRegister (CmmLit (CmmFloat f w)) = do
850 851 852 853
    lbl <- getNewLabelNat
    let code dst = toOL [
	    LDATA ReadOnlyData
			[CmmDataLabel lbl,
854 855
			 CmmStaticLit (CmmFloat f w)],
	    MOV size (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
856 857
	    ]
    -- in
858 859
    return (Any size code)
  where size = floatSize w
860 861 862 863

#endif /* x86_64_TARGET_ARCH */

#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
864 865

-- catch simple cases of zero- or sign-extended load
866 867 868
getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do
  code <- intLoadCode (MOVZxL II8) addr
  return (Any II32 code)
869

870 871 872
getRegister (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _]) = do
  code <- intLoadCode (MOVSxL II8) addr
  return (Any II32 code)
873

874 875 876
getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _]) = do
  code <- intLoadCode (MOVZxL II16) addr
  return (Any II32 code)
877

878 879 880
getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _]) = do
  code <- intLoadCode (MOVSxL II16) addr
  return (Any II32 code)
881

882 883 884 885 886
#endif

#if x86_64_TARGET_ARCH

-- catch simple cases of zero- or sign-extended load
887 888 889
getRegister (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _]) = do
  code <- intLoadCode (MOVZxL II8) addr
  return (Any II64 code)
890

891 892 893
getRegister (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _]) = do
  code <- intLoadCode (MOVSxL II8) addr
  return (Any II64 code)
894

895 896 897
getRegister (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _]) = do
  code <- intLoadCode (MOVZxL II16) addr
  return (Any II64 code)
898

899 900 901
getRegister (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _]) = do
  code <- intLoadCode (MOVSxL II16) addr
  return (Any II64 code)
902

903 904 905
getRegister (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _]) = do
  code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend
  return (Any II64 code)
906

907 908 909
getRegister (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _]) = do
  code <- intLoadCode (MOVSxL II32) addr
  return (Any II64 code)
910 911 912

#endif

913
#if x86_64_TARGET_ARCH
914
getRegister (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
915
                                     CmmLit displacement])
916 917
    = return $ Any II64 (\dst -> unitOL $
        LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
918 919
#endif

920
#if x86_64_TARGET_ARCH
921
getRegister (CmmMachOp (MO_F_Neg W32) [x]) = do
922
  x_code <- getAnyReg x
923 924
  lbl <- getNewLabelNat
  let
925
    code dst = x_code dst `appOL` toOL [
926 927 928 929
	-- This is how gcc does it, so it can't be that bad:
	LDATA ReadOnlyData16 [
		CmmAlign 16,
		CmmDataLabel lbl,
930 931 932 933
		CmmStaticLit (CmmInt 0x80000000 W32),
		CmmStaticLit (CmmInt 0 W32),
		CmmStaticLit (CmmInt 0 W32),
		CmmStaticLit (CmmInt 0 W32)
934
	],
935
	XOR FF32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
936 937 938 939
		-- xorps, so we need the 128-bit constant
		-- ToDo: rip-relative
	]
  --
940
  return (Any FF32 code)
941

942
getRegister (CmmMachOp (MO_F_Neg W64) [x]) = do
943
  x_code <- getAnyReg x
944 945 946
  lbl <- getNewLabelNat
  let
	-- This is how gcc does it, so it can't be that bad:
947
    code dst = x_code dst `appOL` toOL [
948 949 950
	LDATA ReadOnlyData16 [
		CmmAlign 16,
		CmmDataLabel lbl,
951 952
		CmmStaticLit (CmmInt 0x8000000000000000 W64),
		CmmStaticLit (CmmInt 0 W64)
953 954
	],
		-- gcc puts an unpck here.  Wonder if we need it.
955
	XOR FF64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
956 957 958
		-- xorpd, so we need the 128-bit constant
	]
  --
959
  return (Any FF64 code)
960 961 962
#endif

#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
963 964 965

getRegister (CmmMachOp mop [x]) -- unary MachOps
  = case mop of
966
#if i386_TARGET_ARCH
967 968
      MO_F_Neg W32 -> trivialUFCode FF32 (GNEG FF32) x
      MO_F_Neg W64 -> trivialUFCode FF64 (GNEG FF64) x
969
#endif
970

971 972 973
      MO_S_Neg w -> triv_ucode NEGI (intSize w)
      MO_F_Neg w -> triv_ucode NEGI (floatSize w)
      MO_Not w   -> triv_ucode NOT  (intSize w)
974 975

      -- Nop conversions
976 977 978 979 980 981 982
      MO_UU_Conv W32 W8  -> toI8Reg  W32 x
      MO_SS_Conv W32 W8  -> toI8Reg  W32 x
      MO_UU_Conv W16 W8  -> toI8Reg  W16 x
      MO_SS_Conv W16 W8  -> toI8Reg  W16 x
      MO_UU_Conv W32 W16 -> toI16Reg W32 x
      MO_SS_Conv W32 W16 -> toI16Reg W32 x

983
#if x86_64_TARGET_ARCH
984 985 986 987 988 989
      MO_UU_Conv W64 W32 -> conversionNop II64 x
      MO_SS_Conv W64 W32 -> conversionNop II64 x
      MO_UU_Conv W64 W16 -> toI16Reg W64 x
      MO_SS_Conv W64 W16 -> toI16Reg W64 x
      MO_UU_Conv W64 W8  -> toI8Reg  W64 x
      MO_SS_Conv W64 W8  -> toI8Reg  W64 x
990 991
#endif

992 993
      MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
      MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
994 995

      -- widenings
996 997 998
      MO_UU_Conv W8  W32 -> integerExtend W8  W32 MOVZxL x
      MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x
      MO_UU_Conv W8  W16 -> integerExtend W8  W16 MOVZxL x
999

1000 1001 1002
      MO_SS_Conv W8  W32 -> integerExtend W8  W32 MOVSxL x
      MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x
      MO_SS_Conv W8  W16 -> integerExtend W8  W16 MOVSxL x
1003

1004
#if x86_64_TARGET_ARCH
1005 1006 1007 1008 1009 1010
      MO_UU_Conv W8  W64 -> integerExtend W8  W64 MOVZxL x
      MO_UU_Conv W16 W64 -> integerExtend W16 W64 MOVZxL x
      MO_UU_Conv W32 W64 -> integerExtend W32 W64 MOVZxL x
      MO_SS_Conv W8  W64 -> integerExtend W8  W64 MOVSxL x
      MO_SS_Conv W16 W64 -> integerExtend W16 W64 MOVSxL x
      MO_SS_Conv W32 W64 -> integerExtend W32 W64 MOVSxL x
1011 1012 1013 1014 1015 1016 1017
	-- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
	-- However, we don't want the register allocator to throw it
	-- away as an unnecessary reg-to-reg move, so we keep it in
	-- the form of a movzl and print it as a movl later.
#endif

#if i386_TARGET_ARCH
1018 1019
      MO_FF_Conv W32 W64 -> conversionNop FF64 x
      MO_FF_Conv W64 W32 -> conversionNop FF32 x
1020
#else
1021 1022
      MO_FF_Conv W32 W64 -> coerceFP2FP W64 x
      MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
1023 1024
#endif

1025 1026
      MO_FS_Conv from to -> coerceFP2Int from to x
      MO_SF_Conv from to -> coerceInt2FP from to x
1027

1028
      other -> pprPanic "getRegister" (pprMachOp mop)
1029
   where
1030 1031 1032
	triv_ucode :: (Size -> Operand -> Instr) -> Size -> NatM Register
	triv_ucode instr size = trivialUCode size (instr size) x

1033
	-- signed or unsigned extension.
1034 1035 1036
	integerExtend :: Width -> Width
		      -> (Size -> Operand -> Operand -> Instr)
		      -> CmmExpr -> NatM Register
1037
	integerExtend from to instr expr = do
1038
	    (reg,e_code) <- if from == W8 then getByteReg expr
1039 1040 1041 1042
					  else getSomeReg expr
	    let 
		code dst = 
		  e_code `snocOL`
1043 1044
		  instr (intSize from) (OpReg reg) (OpReg dst)
	    return (Any (intSize to) code)
1045