MachCodeGen.hs 164 KB
Newer Older
1 2 3 4 5 6 7
{-# OPTIONS -w #-}
-- The above warning supression flag is a temporary kludge.
-- While working on this module you are encouraged to remove it and fix
-- any warnings in the module. See
--     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
-- for details

8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
-----------------------------------------------------------------------------
--
-- Generating machine code (instruction selection)
--
-- (c) The University of Glasgow 1996-2004
--
-----------------------------------------------------------------------------

-- This is a big module, but, if you pay attention to
-- (a) the sectioning, (b) the type signatures, and
-- (c) the #if blah_TARGET_ARCH} things, the
-- structure should not be too overwhelming.

module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where

#include "HsVersions.h"
#include "nativeGen/NCG.h"
25
#include "MachDeps.h"
26 27

-- NCG stuff:
28 29
import Instrs
import Regs
30
import NCGMonad
31
import PositionIndependentCode
32
import RegAllocInfo 	( mkBranchInstr, mkRegRegMoveInstr )
33
import PprMach
34 35

-- Our intermediate code:
36
import BlockId
37 38 39
import PprCmm		( pprExpr )
import Cmm
import CLabel
40
import ClosureInfo	( C_SRT(..) )
41 42

-- The rest:
43
import StaticFlags	( opt_PIC )
44 45 46
import ForeignCall	( CCallConv(..) )
import OrdList
import Pretty
47
import qualified Outputable as O
48 49
import Outputable
import FastString
Ian Lynagh's avatar
Ian Lynagh committed
50
import FastBool		( isFastTrue )
51
import Constants	( wORD_SIZE )
52

53
import Debug.Trace	( trace )
54 55

import Control.Monad	( mapAndUnzipM )
Simon Marlow's avatar
Simon Marlow committed
56 57 58
import Data.Maybe	( fromJust )
import Data.Bits
import Data.Word
59
import Data.Int
60

61

62 63 64 65 66 67 68 69 70 71
-- -----------------------------------------------------------------------------
-- Top-level of the instruction selector

-- | 'InstrBlock's are the insn sequences generated by the insn selectors.
-- They are really trees of insns to facilitate fast appending, where a
-- left-to-right traversal (pre-order?) yields the insns in the correct
-- order.

type InstrBlock = OrdList Instr

72
cmmTopCodeGen :: RawCmmTop -> NatM [NatCmmTop]
73
cmmTopCodeGen (CmmProc info lab params (ListGraph blocks)) = do
74
  (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
75
  picBaseMb <- getPicBaseMaybeNat
76
  let proc = CmmProc info lab params (ListGraph $ concat nat_blocks)
77 78 79 80 81
      tops = proc : concat statics
  case picBaseMb of
      Just picBase -> initializePicBase picBase tops
      Nothing -> return tops
  
82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114
cmmTopCodeGen (CmmData sec dat) = do
  return [CmmData sec dat]  -- no translation, we just use CmmStatic

basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
basicBlockCodeGen (BasicBlock id stmts) = do
  instrs <- stmtsToInstrs stmts
  -- code generation may introduce new basic block boundaries, which
  -- are indicated by the NEWBLOCK instruction.  We must split up the
  -- instruction stream into basic blocks again.  Also, we extract
  -- LDATAs here too.
  let
	(top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
	
	mkBlocks (NEWBLOCK id) (instrs,blocks,statics) 
	  = ([], BasicBlock id instrs : blocks, statics)
	mkBlocks (LDATA sec dat) (instrs,blocks,statics) 
	  = (instrs, blocks, CmmData sec dat:statics)
	mkBlocks instr (instrs,blocks,statics)
	  = (instr:instrs, blocks, statics)
  -- in
  return (BasicBlock id top : other_blocks, statics)

stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
stmtsToInstrs stmts
   = do instrss <- mapM stmtToInstrs stmts
        return (concatOL instrss)

stmtToInstrs :: CmmStmt -> NatM InstrBlock
stmtToInstrs stmt = case stmt of
    CmmNop	   -> return nilOL
    CmmComment s   -> return (unitOL (COMMENT s))

    CmmAssign reg src
115
      | isFloatType ty -> assignReg_FltCode size reg src
116
#if WORD_SIZE_IN_BITS==32
117
      | isWord64 ty    -> assignReg_I64Code      reg src
118
#endif
119 120 121
      | otherwise	 -> assignReg_IntCode size reg src
	where ty = cmmRegType reg
	      size = cmmTypeSize ty
122 123

    CmmStore addr src
124
      | isFloatType ty -> assignMem_FltCode size addr src
125
#if WORD_SIZE_IN_BITS==32
126
      | isWord64 ty 	 -> assignMem_I64Code      addr src
127
#endif
128 129 130
      | otherwise	 -> assignMem_IntCode size addr src
	where ty = cmmExprType src
	      size = cmmTypeSize ty
131

132
    CmmCall target result_regs args _ _
133
       -> genCCall target result_regs args
134 135 136 137 138

    CmmBranch id	  -> genBranch id
    CmmCondBranch arg id  -> genCondJump id arg
    CmmSwitch arg ids     -> genSwitch arg ids
    CmmJump arg params	  -> genJump arg
139 140
    CmmReturn params	  ->
      panic "stmtToInstrs: return statement should have been cps'd away"
141 142 143 144 145 146 147 148

-- -----------------------------------------------------------------------------
-- General things for putting together code sequences

-- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
-- CmmExprs into CmmRegOff?
mangleIndexTree :: CmmExpr -> CmmExpr
mangleIndexTree (CmmRegOff reg off)
149 150
  = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
  where width = typeWidth (cmmRegType reg)
151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176

-- -----------------------------------------------------------------------------
--  Code gen for 64-bit arithmetic on 32-bit platforms

{-
Simple support for generating 64-bit code (ie, 64 bit values and 64
bit assignments) on 32-bit platforms.  Unlike the main code generator
we merely shoot for generating working code as simply as possible, and
pay little attention to code quality.  Specifically, there is no
attempt to deal cleverly with the fixed-vs-floating register
distinction; all values are generated into (pairs of) floating
registers, even if this would mean some redundant reg-reg moves as a
result.  Only one of the VRegUniques is returned, since it will be
of the VRegUniqueLo form, and the upper-half VReg can be determined
by applying getHiVRegFromLo to it.
-}

data ChildCode64 	-- a.k.a "Register64"
   = ChildCode64 
        InstrBlock 	-- code
        Reg	 	-- the lower 32-bit temporary which contains the
			-- result; use getHiVRegFromLo to find the other
			-- VRegUnique.  Rules of this simplified insn
			-- selection game are therefore that the returned
			-- Reg may be modified

177
#if WORD_SIZE_IN_BITS==32
178 179
assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
180 181 182
#endif

#ifndef x86_64_TARGET_ARCH
183
iselExpr64        :: CmmExpr -> NatM ChildCode64
184
#endif
185 186 187 188 189 190 191 192 193 194 195 196

-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

#if i386_TARGET_ARCH

assignMem_I64Code addrTree valueTree = do
  Amode addr addr_code <- getAmode addrTree
  ChildCode64 vcode rlo <- iselExpr64 valueTree
  let 
        rhi = getHiVRegFromLo rlo

        -- Little-endian store
197 198
        mov_lo = MOV II32 (OpReg rlo) (OpAddr addr)
        mov_hi = MOV II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
199 200 201 202
  -- in
  return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)


203
assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
204 205
   ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
   let 
206
         r_dst_lo = mkVReg u_dst II32
207 208
         r_dst_hi = getHiVRegFromLo r_dst_lo
         r_src_hi = getHiVRegFromLo r_src_lo
209 210
         mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo)
         mov_hi = MOV II32 (OpReg r_src_hi) (OpReg r_dst_hi)
211 212 213 214 215 216 217 218 219 220 221
   -- in
   return (
        vcode `snocOL` mov_lo `snocOL` mov_hi
     )

assignReg_I64Code lvalue valueTree
   = panic "assignReg_I64Code(i386): invalid lvalue"

------------

iselExpr64 (CmmLit (CmmInt i _)) = do
222
  (rlo,rhi) <- getNewRegPairNat II32
223 224 225 226
  let
	r = fromIntegral (fromIntegral i :: Word32)
	q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
	code = toOL [
227 228
		MOV II32 (OpImm (ImmInteger r)) (OpReg rlo),
		MOV II32 (OpImm (ImmInteger q)) (OpReg rhi)
229 230 231 232
		]
  -- in
  return (ChildCode64 code rlo)

233
iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
234
   Amode addr addr_code <- getAmode addrTree
235
   (rlo,rhi) <- getNewRegPairNat II32
236
   let 
237 238
        mov_lo = MOV II32 (OpAddr addr) (OpReg rlo)
        mov_hi = MOV II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
239 240 241 242 243 244
   -- in
   return (
            ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi) 
                        rlo
     )

245 246
iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
   = return (ChildCode64 nilOL (mkVReg vu II32))
247 248 249 250
         
-- we handle addition, but rather badly
iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
   ChildCode64 code1 r1lo <- iselExpr64 e1
251
   (rlo,rhi) <- getNewRegPairNat II32
252 253 254 255 256
   let
	r = fromIntegral (fromIntegral i :: Word32)
	q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
	r1hi = getHiVRegFromLo r1lo
	code =  code1 `appOL`
257 258 259 260
		toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
		       ADD II32 (OpImm (ImmInteger r)) (OpReg rlo),
		       MOV II32 (OpReg r1hi) (OpReg rhi),
		       ADC II32 (OpImm (ImmInteger q)) (OpReg rhi) ]
261 262 263 264 265 266
   -- in
   return (ChildCode64 code rlo)

iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
   ChildCode64 code1 r1lo <- iselExpr64 e1
   ChildCode64 code2 r2lo <- iselExpr64 e2
267
   (rlo,rhi) <- getNewRegPairNat II32
268 269 270 271 272
   let
	r1hi = getHiVRegFromLo r1lo
	r2hi = getHiVRegFromLo r2lo
	code =  code1 `appOL`
		code2 `appOL`
273 274 275 276
		toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
		       ADD II32 (OpReg r2lo) (OpReg rlo),
		       MOV II32 (OpReg r1hi) (OpReg rhi),
		       ADC II32 (OpReg r2hi) (OpReg rhi) ]
277 278 279
   -- in
   return (ChildCode64 code rlo)

280
iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do
281
     fn <- getAnyReg expr
282
     r_dst_lo <-  getNewRegNat II32
283 284 285 286
     let r_dst_hi = getHiVRegFromLo r_dst_lo
         code = fn r_dst_lo
     return (
             ChildCode64 (code `snocOL` 
287
                          MOV II32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
288 289 290
                          r_dst_lo
            )

291 292 293 294 295 296 297 298 299
iselExpr64 expr
   = pprPanic "iselExpr64(i386)" (ppr expr)

#endif /* i386_TARGET_ARCH */

-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

#if sparc_TARGET_ARCH

300 301 302 303 304
assignMem_I64Code addrTree valueTree = do
     Amode addr addr_code <- getAmode addrTree
     ChildCode64 vcode rlo <- iselExpr64 valueTree  
     (src, code) <- getSomeReg addrTree
     let 
305 306
         rhi = getHiVRegFromLo rlo
         -- Big-endian store
307 308
         mov_hi = ST II32 rhi (AddrRegImm src (ImmInt 0))
         mov_lo = ST II32 rlo (AddrRegImm src (ImmInt 4))
309
     return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
310

311
assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
312
     ChildCode64 vcode r_src_lo <- iselExpr64 valueTree    
313
     let 
314
         r_dst_lo = mkVReg u_dst (cmmTypeSize pk)
315 316 317 318 319
         r_dst_hi = getHiVRegFromLo r_dst_lo
         r_src_hi = getHiVRegFromLo r_src_lo
         mov_lo = mkMOV r_src_lo r_dst_lo
         mov_hi = mkMOV r_src_hi r_dst_hi
         mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
320
     return (vcode `snocOL` mov_hi `snocOL` mov_lo)
321
assignReg_I64Code lvalue valueTree
322
   = panic "assignReg_I64Code(sparc): invalid lvalue"
323 324


325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370
-- Load a 64 bit word
iselExpr64 (CmmLoad addrTree ty) 
 | isWord64 ty
 = do	Amode amode addr_code	<- getAmode addrTree
 	let result

		| AddrRegReg r1 r2	<- amode
		= do	rlo 	<- getNewRegNat II32
			tmp	<- getNewRegNat II32
			let rhi = getHiVRegFromLo rlo

			return	$ ChildCode64 
				(	 addr_code 
				`appOL`	 toOL
					 [ ADD False False r1 (RIReg r2) tmp
					 , LD II32 (AddrRegImm tmp (ImmInt 0)) rhi
					 , LD II32 (AddrRegImm tmp (ImmInt 4)) rlo ])
				rlo

		| AddrRegImm r1 (ImmInt i) <- amode
		= do	rlo	<- getNewRegNat II32
			let rhi = getHiVRegFromLo rlo
			
			return	$ ChildCode64 
				(	 addr_code 
				`appOL`	 toOL
					 [ LD II32 (AddrRegImm r1 (ImmInt $ 0 + i)) rhi
					 , LD II32 (AddrRegImm r1 (ImmInt $ 4 + i)) rlo ])
				rlo
		
	result


-- Add a literal to a 64 bit integer
iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) 
 = do	ChildCode64 code1 r1_lo <- iselExpr64 e1
 	let r1_hi	= getHiVRegFromLo r1_lo
	
	r_dst_lo	<- getNewRegNat II32
	let r_dst_hi	=  getHiVRegFromLo r_dst_lo 
	
	return	$ ChildCode64
			( toOL
			[ ADD False False r1_lo (RIImm (ImmInteger i)) r_dst_lo
			, ADD True  False r1_hi (RIReg g0)	   r_dst_hi ])
			r_dst_lo
371 372


373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392
-- Addition of II64
iselExpr64 (CmmMachOp (MO_Add width) [e1, e2])
 = do	ChildCode64 code1 r1_lo	<- iselExpr64 e1
	let r1_hi	= getHiVRegFromLo r1_lo

 	ChildCode64 code2 r2_lo	<- iselExpr64 e2
	let r2_hi	= getHiVRegFromLo r2_lo
	
	r_dst_lo	<- getNewRegNat II32
	let r_dst_hi	= getHiVRegFromLo r_dst_lo
	
	let code =	code1
		`appOL`	code2
		`appOL`	toOL
			[ ADD False False r1_lo (RIReg r2_lo) r_dst_lo
			, ADD True  False r1_hi (RIReg r2_hi) r_dst_hi ]
	
	return	$ ChildCode64 code r_dst_lo


393 394
iselExpr64 (CmmReg (CmmLocal (LocalReg uq ty))) | isWord64 ty = do
     r_dst_lo <-  getNewRegNat II32
395
     let r_dst_hi = getHiVRegFromLo r_dst_lo
396
         r_src_lo = mkVReg uq II32
397 398 399 400
         r_src_hi = getHiVRegFromLo r_src_lo
         mov_lo = mkMOV r_src_lo r_dst_lo
         mov_hi = mkMOV r_src_hi r_dst_hi
         mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
401 402
     return (
            ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
403 404
         )

405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421
-- Convert something into II64
iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) 
 = do
	r_dst_lo 	<- getNewRegNat II32
	let r_dst_hi	= getHiVRegFromLo r_dst_lo

	-- compute expr and load it into r_dst_lo
	(a_reg, a_code)	<- getSomeReg expr

	let code	= a_code
		`appOL`	toOL
			[ mkRegRegMoveInstr g0    r_dst_hi 	-- clear high 32 bits
			, mkRegRegMoveInstr a_reg r_dst_lo ]
			
	return	$ ChildCode64 code r_dst_lo


422
iselExpr64 expr
423
   = pprPanic "iselExpr64(sparc)" (ppr expr)
424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447

#endif /* sparc_TARGET_ARCH */

-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

#if powerpc_TARGET_ARCH

getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
getI64Amodes addrTree = do
    Amode hi_addr addr_code <- getAmode addrTree
    case addrOffset hi_addr 4 of
        Just lo_addr -> return (hi_addr, lo_addr, addr_code)
        Nothing      -> do (hi_ptr, code) <- getSomeReg addrTree
                           return (AddrRegImm hi_ptr (ImmInt 0),
                                   AddrRegImm hi_ptr (ImmInt 4),
                                   code)

assignMem_I64Code addrTree valueTree = do
        (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
	ChildCode64 vcode rlo <- iselExpr64 valueTree
	let 
		rhi = getHiVRegFromLo rlo

		-- Big-endian store
448 449
		mov_hi = ST II32 rhi hi_addr
		mov_lo = ST II32 rlo lo_addr
450 451 452
	-- in
	return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)

453
assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
454 455
   ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
   let 
456
         r_dst_lo = mkVReg u_dst II32
457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474
         r_dst_hi = getHiVRegFromLo r_dst_lo
         r_src_hi = getHiVRegFromLo r_src_lo
         mov_lo = MR r_dst_lo r_src_lo
         mov_hi = MR r_dst_hi r_src_hi
   -- in
   return (
        vcode `snocOL` mov_lo `snocOL` mov_hi
     )

assignReg_I64Code lvalue valueTree
   = panic "assignReg_I64Code(powerpc): invalid lvalue"


-- Don't delete this -- it's very handy for debugging.
--iselExpr64 expr 
--   | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
--   = panic "iselExpr64(???)"

475
iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
476
    (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
477 478 479
    (rlo, rhi) <- getNewRegPairNat II32
    let mov_hi = LD II32 rhi hi_addr
        mov_lo = LD II32 rlo lo_addr
480 481 482
    return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi) 
                         rlo

483 484
iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
   = return (ChildCode64 nilOL (mkVReg vu II32))
485 486

iselExpr64 (CmmLit (CmmInt i _)) = do
487
  (rlo,rhi) <- getNewRegPairNat II32
488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505
  let
	half0 = fromIntegral (fromIntegral i :: Word16)
	half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
	half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
	half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
	
	code = toOL [
		LIS rlo (ImmInt half1),
		OR rlo rlo (RIImm $ ImmInt half0),
		LIS rhi (ImmInt half3),
		OR rlo rlo (RIImm $ ImmInt half2)
		]
  -- in
  return (ChildCode64 code rlo)

iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
   ChildCode64 code1 r1lo <- iselExpr64 e1
   ChildCode64 code2 r2lo <- iselExpr64 e2
506
   (rlo,rhi) <- getNewRegPairNat II32
507 508 509 510 511 512 513 514 515 516
   let
	r1hi = getHiVRegFromLo r1lo
	r2hi = getHiVRegFromLo r2lo
	code =  code1 `appOL`
		code2 `appOL`
		toOL [ ADDC rlo r1lo r2lo,
		       ADDE rhi r1hi r2hi ]
   -- in
   return (ChildCode64 code rlo)

517
iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do
518
    (expr_reg,expr_code) <- getSomeReg expr
519
    (rlo, rhi) <- getNewRegPairNat II32
520 521 522 523
    let mov_hi = LI rhi (ImmInt 0)
        mov_lo = MR rlo expr_reg
    return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
                         rlo
524 525 526 527 528 529 530 531 532 533 534 535 536 537 538
iselExpr64 expr
   = pprPanic "iselExpr64(powerpc)" (ppr expr)

#endif /* powerpc_TARGET_ARCH */


-- -----------------------------------------------------------------------------
-- The 'Register' type

-- 'Register's passed up the tree.  If the stix code forces the register
-- to live in a pre-decided machine register, it comes out as @Fixed@;
-- otherwise, it comes out as @Any@, and the parent can decide which
-- register to put it in.

data Register
539 540
  = Fixed   Size Reg InstrBlock
  | Any	    Size (Reg -> InstrBlock)
541

542 543 544 545
swizzleRegisterRep :: Register -> Size -> Register
-- Change the width; it's a no-op
swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code
swizzleRegisterRep (Any _ codefn)     size = Any   size codefn
546 547


548 549 550 551 552 553 554 555 556 557 558 559 560 561 562
-- -----------------------------------------------------------------------------
-- Utils based on getRegister, below

-- The dual to getAnyReg: compute an expression into a register, but
-- we don't mind which one it is.
getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg expr = do
  r <- getRegister expr
  case r of
    Any rep code -> do
	tmp <- getNewRegNat rep
	return (tmp, code tmp)
    Fixed _ reg code -> 
	return (reg, code)

563 564 565 566 567
-- -----------------------------------------------------------------------------
-- Grab the Reg for a CmmReg

getRegisterReg :: CmmReg -> Reg

568 569
getRegisterReg (CmmLocal (LocalReg u pk))
  = mkVReg u (cmmTypeSize pk)
570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589

getRegisterReg (CmmGlobal mid)
  = case get_GlobalReg_reg_or_addr mid of
       Left (RealReg rrno) -> RealReg rrno
       _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
          -- By this stage, the only MagicIds remaining should be the
          -- ones which map to a real machine register on this
          -- platform.  Hence ...


-- -----------------------------------------------------------------------------
-- Generate code to get a subtree into a Register

-- Don't delete this -- it's very handy for debugging.
--getRegister expr 
--   | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
--   = panic "getRegister(???)"

getRegister :: CmmExpr -> NatM Register

590 591 592
#if !x86_64_TARGET_ARCH
    -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured
    -- register, it can only be used for rip-relative addressing.
593 594
getRegister (CmmReg (CmmGlobal PicBaseReg))
  = do
595 596
      reg <- getPicBaseNat wordSize
      return (Fixed wordSize reg nilOL)
597
#endif
598

599
getRegister (CmmReg reg) 
600 601
  = return (Fixed (cmmTypeSize (cmmRegType reg)) 
		  (getRegisterReg reg) nilOL)
602 603 604 605

getRegister tree@(CmmRegOff _ _) 
  = getRegister (mangleIndexTree tree)

606 607 608 609 610

#if WORD_SIZE_IN_BITS==32
    -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
    -- TO_W_(x), TO_W_(x >> 32)

611 612
getRegister (CmmMachOp (MO_UU_Conv W64 W32)
             [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
613
  ChildCode64 code rlo <- iselExpr64 x
614
  return $ Fixed II32 (getHiVRegFromLo rlo) code
615

616 617
getRegister (CmmMachOp (MO_SS_Conv W64 W32)
             [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
618
  ChildCode64 code rlo <- iselExpr64 x
619
  return $ Fixed II32 (getHiVRegFromLo rlo) code
620

621
getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do
622
  ChildCode64 code rlo <- iselExpr64 x
623
  return $ Fixed II32 rlo code
624

625
getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do
626
  ChildCode64 code rlo <- iselExpr64 x
627
  return $ Fixed II32 rlo code       
628 629 630

#endif

631 632 633 634 635 636 637 638 639 640 641 642 643 644
-- end of machine-"independent" bit; here we go on the rest...

#if alpha_TARGET_ARCH

getRegister (StDouble d)
  = getBlockIdNat 	    	    `thenNat` \ lbl ->
    getNewRegNat PtrRep    	    `thenNat` \ tmp ->
    let code dst = mkSeqInstrs [
	    LDATA RoDataSegment lbl [
		    DATA TF [ImmLab (rational d)]
		],
	    LDA tmp (AddrImm (ImmCLbl lbl)),
	    LD TF dst (AddrReg tmp)]
    in
645
    	return (Any FF64 code)
646 647 648 649 650 651 652

getRegister (StPrim primop [x]) -- unary PrimOps
  = case primop of
      IntNegOp -> trivialUCode (NEG Q False) x

      NotOp    -> trivialUCode NOT x

653 654
      FloatNegOp  -> trivialUFCode FloatRep (FNEG TF) x
      DoubleNegOp -> trivialUFCode FF64 (FNEG TF) x
655 656 657 658 659 660 661 662 663 664 665 666

      OrdOp -> coerceIntCode IntRep x
      ChrOp -> chrCode x

      Float2IntOp  -> coerceFP2Int    x
      Int2FloatOp  -> coerceInt2FP pr x
      Double2IntOp -> coerceFP2Int    x
      Int2DoubleOp -> coerceInt2FP pr x

      Double2FloatOp -> coerceFltCode x
      Float2DoubleOp -> coerceFltCode x

667
      other_op -> getRegister (StCall fn CCallConv FF64 [x])
668 669
	where
	  fn = case other_op of
670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693
		 FloatExpOp    -> fsLit "exp"
		 FloatLogOp    -> fsLit "log"
		 FloatSqrtOp   -> fsLit "sqrt"
		 FloatSinOp    -> fsLit "sin"
		 FloatCosOp    -> fsLit "cos"
		 FloatTanOp    -> fsLit "tan"
		 FloatAsinOp   -> fsLit "asin"
		 FloatAcosOp   -> fsLit "acos"
		 FloatAtanOp   -> fsLit "atan"
		 FloatSinhOp   -> fsLit "sinh"
		 FloatCoshOp   -> fsLit "cosh"
		 FloatTanhOp   -> fsLit "tanh"
		 DoubleExpOp   -> fsLit "exp"
		 DoubleLogOp   -> fsLit "log"
		 DoubleSqrtOp  -> fsLit "sqrt"
		 DoubleSinOp   -> fsLit "sin"
		 DoubleCosOp   -> fsLit "cos"
		 DoubleTanOp   -> fsLit "tan"
		 DoubleAsinOp  -> fsLit "asin"
		 DoubleAcosOp  -> fsLit "acos"
		 DoubleAtanOp  -> fsLit "atan"
		 DoubleSinhOp  -> fsLit "sinh"
		 DoubleCoshOp  -> fsLit "cosh"
		 DoubleTanhOp  -> fsLit "tanh"
694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752
  where
    pr = panic "MachCode.getRegister: no primrep needed for Alpha"

getRegister (StPrim primop [x, y]) -- dyadic PrimOps
  = case primop of
      CharGtOp -> trivialCode (CMP LTT) y x
      CharGeOp -> trivialCode (CMP LE) y x
      CharEqOp -> trivialCode (CMP EQQ) x y
      CharNeOp -> int_NE_code x y
      CharLtOp -> trivialCode (CMP LTT) x y
      CharLeOp -> trivialCode (CMP LE) x y

      IntGtOp  -> trivialCode (CMP LTT) y x
      IntGeOp  -> trivialCode (CMP LE) y x
      IntEqOp  -> trivialCode (CMP EQQ) x y
      IntNeOp  -> int_NE_code x y
      IntLtOp  -> trivialCode (CMP LTT) x y
      IntLeOp  -> trivialCode (CMP LE) x y

      WordGtOp -> trivialCode (CMP ULT) y x
      WordGeOp -> trivialCode (CMP ULE) x y
      WordEqOp -> trivialCode (CMP EQQ)  x y
      WordNeOp -> int_NE_code x y
      WordLtOp -> trivialCode (CMP ULT) x y
      WordLeOp -> trivialCode (CMP ULE) x y

      AddrGtOp -> trivialCode (CMP ULT) y x
      AddrGeOp -> trivialCode (CMP ULE) y x
      AddrEqOp -> trivialCode (CMP EQQ)  x y
      AddrNeOp -> int_NE_code x y
      AddrLtOp -> trivialCode (CMP ULT) x y
      AddrLeOp -> trivialCode (CMP ULE) x y
	
      FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
      FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
      FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
      FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
      FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
      FloatLeOp -> cmpF_code (FCMP TF LE) NE x y

      DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
      DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
      DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
      DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
      DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
      DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y

      IntAddOp  -> trivialCode (ADD Q False) x y
      IntSubOp  -> trivialCode (SUB Q False) x y
      IntMulOp  -> trivialCode (MUL Q False) x y
      IntQuotOp -> trivialCode (DIV Q False) x y
      IntRemOp  -> trivialCode (REM Q False) x y

      WordAddOp  -> trivialCode (ADD Q False) x y
      WordSubOp  -> trivialCode (SUB Q False) x y
      WordMulOp  -> trivialCode (MUL Q False) x y
      WordQuotOp -> trivialCode (DIV Q True) x y
      WordRemOp  -> trivialCode (REM Q True) x y

753 754 755 756
      FloatAddOp -> trivialFCode  W32 (FADD TF) x y
      FloatSubOp -> trivialFCode  W32 (FSUB TF) x y
      FloatMulOp -> trivialFCode  W32 (FMUL TF) x y
      FloatDivOp -> trivialFCode  W32 (FDIV TF) x y
757

758 759 760 761
      DoubleAddOp -> trivialFCode  W64 (FADD TF) x y
      DoubleSubOp -> trivialFCode  W64 (FSUB TF) x y
      DoubleMulOp -> trivialFCode  W64 (FMUL TF) x y
      DoubleDivOp -> trivialFCode  W64 (FDIV TF) x y
762 763 764 765 766 767 768 769 770 771 772 773 774 775 776

      AddrAddOp  -> trivialCode (ADD Q False) x y
      AddrSubOp  -> trivialCode (SUB Q False) x y
      AddrRemOp  -> trivialCode (REM Q True) x y

      AndOp  -> trivialCode AND x y
      OrOp   -> trivialCode OR  x y
      XorOp  -> trivialCode XOR x y
      SllOp  -> trivialCode SLL x y
      SrlOp  -> trivialCode SRL x y

      ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
      ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
      ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"

777 778
      FloatPowerOp  -> getRegister (StCall (fsLit "pow") CCallConv FF64 [x,y])
      DoublePowerOp -> getRegister (StCall (fsLit "pow") CCallConv FF64 [x,y])
779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810
  where
    {- ------------------------------------------------------------
	Some bizarre special code for getting condition codes into
	registers.  Integer non-equality is a test for equality
	followed by an XOR with 1.  (Integer comparisons always set
	the result register to 0 or 1.)  Floating point comparisons of
	any kind leave the result in a floating point register, so we
	need to wrangle an integer register out of things.
    -}
    int_NE_code :: StixTree -> StixTree -> NatM Register

    int_NE_code x y
      = trivialCode (CMP EQQ) x y	`thenNat` \ register ->
	getNewRegNat IntRep		`thenNat` \ tmp ->
	let
	    code = registerCode register tmp
	    src  = registerName register tmp
	    code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
	in
	return (Any IntRep code__2)

    {- ------------------------------------------------------------
	Comments for int_NE_code also apply to cmpF_code
    -}
    cmpF_code
	:: (Reg -> Reg -> Reg -> Instr)
	-> Cond
	-> StixTree -> StixTree
	-> NatM Register

    cmpF_code instr cond x y
      = trivialFCode pr instr x y	`thenNat` \ register ->
811
	getNewRegNat FF64		`thenNat` \ tmp ->
812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867
	getBlockIdNat			`thenNat` \ lbl ->
	let
	    code = registerCode register tmp
	    result  = registerName register tmp

	    code__2 dst = code . mkSeqInstrs [
		OR zeroh (RIImm (ImmInt 1)) dst,
		BF cond  result (ImmCLbl lbl),
		OR zeroh (RIReg zeroh) dst,
		NEWBLOCK lbl]
	in
	return (Any IntRep code__2)
      where
	pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
      ------------------------------------------------------------

getRegister (CmmLoad pk mem)
  = getAmode mem    	    	    `thenNat` \ amode ->
    let
    	code = amodeCode amode
    	src   = amodeAddr amode
    	size = primRepToSize pk
    	code__2 dst = code . mkSeqInstr (LD size dst src)
    in
    return (Any pk code__2)

getRegister (StInt i)
  | fits8Bits i
  = let
    	code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
    in
    return (Any IntRep code)
  | otherwise
  = let
    	code dst = mkSeqInstr (LDI Q dst src)
    in
    return (Any IntRep code)
  where
    src = ImmInt (fromInteger i)

getRegister leaf
  | isJust imm
  = let
    	code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
    in
    return (Any PtrRep code)
  where
    imm = maybeImm leaf
    imm__2 = case imm of Just x -> x

#endif /* alpha_TARGET_ARCH */

-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

#if i386_TARGET_ARCH

868
getRegister (CmmLit (CmmFloat f W32)) = do
869
    lbl <- getNewLabelNat
870 871
    dflags <- getDynFlagsNat
    dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
872 873
    Amode addr addr_code <- getAmode dynRef
    let code dst =
874 875
	    LDATA ReadOnlyData
			[CmmDataLabel lbl,
876
			 CmmStaticLit (CmmFloat f W32)]
877
	    `consOL` (addr_code `snocOL`
878
	    GLD FF32 addr dst)
879
    -- in
880
    return (Any FF32 code)
881 882


883
getRegister (CmmLit (CmmFloat d W64))
884 885
  | d == 0.0
  = let code dst = unitOL (GLDZ dst)
886
    in  return (Any FF64 code)
887 888 889

  | d == 1.0
  = let code dst = unitOL (GLD1 dst)
890
    in  return (Any FF64 code)
891 892 893

  | otherwise = do
    lbl <- getNewLabelNat
894 895
    dflags <- getDynFlagsNat
    dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
896 897
    Amode addr addr_code <- getAmode dynRef
    let code dst =
898 899
	    LDATA ReadOnlyData
			[CmmDataLabel lbl,
900
			 CmmStaticLit (CmmFloat d W64)]
901
	    `consOL` (addr_code `snocOL`
902
	    GLD FF64 addr dst)
903
    -- in
904
    return (Any FF64 code)
905

906 907 908 909
#endif /* i386_TARGET_ARCH */

#if x86_64_TARGET_ARCH

910 911 912
getRegister (CmmLit (CmmFloat 0.0 w)) = do
   let size = floatSize w
       code dst = unitOL  (XOR size (OpReg dst) (OpReg dst))
913 914
	-- I don't know why there are xorpd, xorps, and pxor instructions.
	-- They all appear to do the same thing --SDM
915
   return (Any size code)
916

917
getRegister (CmmLit (CmmFloat f w)) = do
918 919 920 921
    lbl <- getNewLabelNat
    let code dst = toOL [
	    LDATA ReadOnlyData
			[CmmDataLabel lbl,
922 923
			 CmmStaticLit (CmmFloat f w)],
	    MOV size (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
924 925
	    ]
    -- in
926 927
    return (Any size code)
  where size = floatSize w
928 929 930 931

#endif /* x86_64_TARGET_ARCH */

#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
932 933

-- catch simple cases of zero- or sign-extended load
934 935 936
getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do
  code <- intLoadCode (MOVZxL II8) addr
  return (Any II32 code)
937

938 939 940
getRegister (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _]) = do
  code <- intLoadCode (MOVSxL II8) addr
  return (Any II32 code)
941

942 943 944
getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _]) = do
  code <- intLoadCode (MOVZxL II16) addr
  return (Any II32 code)
945

946 947 948
getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _]) = do
  code <- intLoadCode (MOVSxL II16) addr
  return (Any II32 code)
949

950 951 952 953 954
#endif

#if x86_64_TARGET_ARCH

-- catch simple cases of zero- or sign-extended load
955 956 957
getRegister (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _]) = do
  code <- intLoadCode (MOVZxL II8) addr
  return (Any II64 code)
958

959 960 961
getRegister (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _]) = do
  code <- intLoadCode (MOVSxL II8) addr
  return (Any II64 code)
962

963 964 965
getRegister (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _]) = do
  code <- intLoadCode (MOVZxL II16) addr
  return (Any II64 code)
966

967 968 969
getRegister (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _]) = do
  code <- intLoadCode (MOVSxL II16) addr
  return (Any II64 code)
970

971 972 973
getRegister (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _]) = do
  code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend
  return (Any II64 code)
974

975 976 977
getRegister (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _]) = do
  code <- intLoadCode (MOVSxL II32) addr
  return (Any II64 code)
978 979 980

#endif

981
#if x86_64_TARGET_ARCH
982
getRegister (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
983
                                     CmmLit displacement])
984 985
    = return $ Any II64 (\dst -> unitOL $
        LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
986 987
#endif

988
#if x86_64_TARGET_ARCH
989
getRegister (CmmMachOp (MO_F_Neg W32) [x]) = do
990
  x_code <- getAnyReg x
991 992
  lbl <- getNewLabelNat
  let
993
    code dst = x_code dst `appOL` toOL [
994 995 996 997
	-- This is how gcc does it, so it can't be that bad:
	LDATA ReadOnlyData16 [
		CmmAlign 16,
		CmmDataLabel lbl,
998 999 1000 1001
		CmmStaticLit (CmmInt 0x80000000 W32),
		CmmStaticLit (CmmInt 0 W32),
		CmmStaticLit (CmmInt 0 W32),
		CmmStaticLit (CmmInt 0 W32)
1002
	],
1003
	XOR FF32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
1004 1005 1006 1007
		-- xorps, so we need the 128-bit constant
		-- ToDo: rip-relative
	]
  --
1008
  return (Any FF32 code)
1009

1010
getRegister (CmmMachOp (MO_F_Neg W64) [x]) = do
1011
  x_code <- getAnyReg x
1012 1013 1014
  lbl <- getNewLabelNat
  let
	-- This is how gcc does it, so it can't be that bad:
1015
    code dst = x_code dst `appOL` toOL [
1016 1017 1018
	LDATA ReadOnlyData16 [
		CmmAlign 16,
		CmmDataLabel lbl,
1019 1020
		CmmStaticLit (CmmInt 0x8000000000000000 W64),
		CmmStaticLit (CmmInt 0 W64)
1021 1022
	],
		-- gcc puts an unpck here.  Wonder if we need it.
1023
	XOR FF64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
1024 1025 1026
		-- xorpd, so we need the 128-bit constant
	]
  --
1027
  return (Any FF64 code)
1028 1029 1030
#endif

#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1031 1032 1033

getRegister (CmmMachOp mop [x]) -- unary MachOps
  = case mop of
1034
#if i386_TARGET_ARCH
1035 1036
      MO_F_Neg W32 -> trivialUFCode FF32 (GNEG FF32) x
      MO_F_Neg W64 -> trivialUFCode FF64 (GNEG FF64) x
1037
#endif
1038

1039 1040 1041
      MO_S_Neg w -> triv_ucode NEGI (intSize w)
      MO_F_Neg w -> triv_ucode NEGI (floatSize w)
      MO_Not w   -> triv_ucode NOT  (intSize w)
1042 1043

      -- Nop conversions
1044 1045 1046 1047 1048 1049 1050
      MO_UU_Conv W32 W8  -> toI8Reg  W32 x
      MO_SS_Conv W32 W8  -> toI8Reg  W32 x
      MO_UU_Conv W16 W8  -> toI8Reg  W16 x
      MO_SS_Conv W16 W8  -> toI8Reg  W16 x
      MO_UU_Conv W32 W16 -> toI16Reg W32 x
      MO_SS_Conv W32 W16 -> toI16Reg W32 x

1051
#if x86_64_TARGET_ARCH
1052 1053 1054 1055 1056 1057
      MO_UU_Conv W64 W32 -> conversionNop II64 x
      MO_SS_Conv W64 W32 -> conversionNop II64 x
      MO_UU_Conv W64 W16 -> toI16Reg W64 x
      MO_SS_Conv W64 W16 -> toI16Reg W64 x
      MO_UU_Conv W64 W8  -> toI8Reg  W64 x
      MO_SS_Conv W64 W8  -> toI8Reg  W64 x
1058 1059
#endif

1060 1061
      MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
      MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
1062 1063

      -- widenings
1064 1065 1066
      MO_UU_Conv W8  W32 -> integerExtend W8  W32 MOVZxL x
      MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x
      MO_UU_Conv W8  W16 -> integerExtend W8  W16 MOVZxL x
1067

1068 1069 1070
      MO_SS_Conv W8  W32 -> integerExtend W8  W32 MOVSxL x
      MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x
      MO_SS_Conv W8  W16 -> integerExtend W8  W16 MOVSxL x
1071

1072
#if x86_64_TARGET_ARCH
1073 1074 1075 1076 1077 1078
      MO_UU_Conv W8  W64 -> integerExtend W8  W64 MOVZxL x
      MO_UU_Conv W16 W64 -> integerExtend W16 W64 MOVZxL x
      MO_UU_Conv W32 W64 -> integerExtend W32 W64 MOVZxL x
      MO_SS_Conv W8  W64 -> integerExtend W8  W64 MOVSxL x
      MO_SS_Conv W16 W64 -> integerExtend W16 W64 MOVSxL x
      MO_SS_Conv W32 W64 -> integerExtend W32 W64 MOVSxL x
1079 1080 1081 1082 1083 1084 1085
	-- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
	-- However, we don't want the register allocator to throw it
	-- away as an unnecessary reg-to-reg move, so we keep it in
	-- the form of a movzl and print it as a movl later.
#endif

#if i386_TARGET_ARCH
1086 1087
      MO_FF_Conv W32 W64 -> conversionNop FF64 x
      MO_FF_Conv W64 W32 -> conversionNop FF32 x
1088
#else
1089 1090
      MO_FF_Conv W32 W64 -> coerceFP2FP W64 x
      MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
1091 1092
#endif

1093 1094
      MO_FS_Conv from to -> coerceFP2Int from to x
      MO_SF_Conv from to -> coerceInt2FP from to x
1095

1096
      other -> pprPanic "getRegister" (pprMachOp mop)
1097
   where
1098 1099 1100
	triv_ucode :: (Size -> Operand -> Instr) -> Size -> NatM Register
	triv_ucode instr size = trivialUCode size (instr size) x

1101
	-- signed or unsigned extension.
1102 1103 1104
	integerExtend :: Width -> Width
		      -> (Size -> Operand -> Operand -> Instr)
		      -> CmmExpr -> NatM Register
1105
	integerExtend from to instr expr = do
1106
	    (reg,e_code) <- if from == W8 then getByteReg expr
1107 1108 1109 1110
					  else getSomeReg expr
	    let 
		code dst = 
		  e_code `snocOL`
1111 1112
		  instr (intSize from) (OpReg reg) (OpReg dst)
	    return (Any (intSize to) code)
1113

1114
	toI8Reg :: Width -> CmmExpr -> NatM Register
1115 1116
	toI8Reg new_rep expr
            = do codefn <- getAnyReg expr
1117
		 return (Any (intSize new_rep) codefn)
1118 1119 1120 1121 1122 1123 1124 1125 1126
		-- HACK: use getAnyReg to get a byte-addressable register.
		-- If the source was a Fixed register, this will add the
		-- mov instruction to put it into the desired destination.
		-- We're assuming that the destination won't be a fixed
		-- non-byte-addressable register; it won't be, because all
		-- fixed registers are word-sized.

	toI16Reg = toI8Reg -- for now

1127 1128
	conversionNop :: Size -> CmmExpr -> NatM Register
        conversionNop new_size expr
1129
            = do e_code <- getRegister expr
1130
                 return (swizzleRegisterRep e_code new_size)
1131 1132 1133


getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
Simon Marlow's avatar
Simon Marlow committed
1134
  = case mop of
1135 1136 1137 1138 1139 1140
      MO_F_Eq w -> condFltReg EQQ x y
      MO_F_Ne w -> condFltReg NE x y
      MO_F_Gt w -> condFltReg GTT x y
      MO_F_Ge w -> condFltReg GE x y
      MO_F_Lt w -> condFltReg LTT x y
      MO_F_Le w -> condFltReg LE x y
1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154

      MO_Eq rep   -> condIntReg EQQ x y
      MO_Ne rep   -> condIntReg NE x y

      MO_S_Gt rep -> condIntReg GTT x y
      MO_S_Ge rep -> condIntReg GE x y
      MO_S_Lt rep -> condIntReg LTT x y
      MO_S_Le rep -> condIntReg LE x y

      MO_U_Gt rep -> condIntReg GU  x y
      MO_U_Ge rep -> condIntReg GEU x y
      MO_U_Lt rep -> condIntReg LU  x y
      MO_U_Le rep -> condIntReg LEU x y

1155
#if i386_TARGET_ARCH
1156 1157 1158 1159
      MO_F_Add w -> trivialFCode w GADD x y
      MO_F_Sub w -> trivialFCode w GSUB x y
      MO_F_Quot w -> trivialFCode w GDIV x y
      MO_F_Mul w -> trivialFCode w GMUL x y
1160 1161 1162
#endif

#if x86_64_TARGET_ARCH
1163 1164 1165 1166
      MO_F_Add w -> trivialFCode w ADD x y
      MO_F_Sub w -> trivialFCode w SUB x y
      MO_F_Quot w -> trivialFCode w FDIV x y
      MO_F_Mul w -> trivialFCode w MUL x y
1167
#endif
1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178

      MO_Add rep -> add_code rep x y
      MO_Sub rep -> sub_code rep x y

      MO_S_Quot rep -> div_code rep True  True  x y
      MO_S_Rem  rep -> div_code rep True  False x y
      MO_U_Quot rep -> div_code rep False True  x y
      MO_U_Rem  rep -> div_code rep False False x y

      MO_S_MulMayOflo rep -> imulMayOflo rep x y

1179 1180 1181 1182
      MO_Mul rep -> triv_op rep IMUL
      MO_And rep -> triv_op rep AND
      MO_Or  rep -> triv_op rep OR
      MO_Xor rep -> triv_op rep XOR
1183 1184 1185 1186 1187

	{- Shift ops on x86s have constraints on their source, it
	   either has to be Imm, CL or 1
	    => trivialCode is not restrictive enough (sigh.)
	-}	   
1188 1189 1190
      MO_Shl rep   -> shift_code rep SHL x y {-False-}
      MO_U_Shr rep -> shift_code rep SHR x y {-False-}
      MO_S_Shr rep -> shift_code rep SAR x y {-False-}
1191 1192 1193 1194

      other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
  where
    --------------------
1195 1196 1197 1198
    triv_op width instr = trivialCode width op (Just op) x y
			where op   = instr (intSize width)

    imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
1199
    imulMayOflo rep a b = do
1200
         (a_reg, a_code) <- getNonClobberedReg a
1201
         b_code <- getAnyReg b
1202
         let 
1203
	     shift_amt  = case rep of
1204 1205
			   W32 -> 31
			   W64 -> 63
1206 1207
			   _ -> panic "shift_amt"

1208
	     size = intSize rep
1209
             code = a_code `appOL` b_code eax `appOL`
1210
                        toOL [
1211 1212
			   IMUL2 size (OpReg a_reg),   -- result in %edx:%eax
                           SAR size (OpImm (ImmInt shift_amt)) (OpReg eax),
1213
				-- sign extend lower part
1214
                           SUB size (OpReg edx) (OpReg eax)
1215 1216
				-- compare against upper
                           -- eax==0 if high part == sign extended low part
1217 1218
                        ]
         -- in
1219
	 return (Fixed size eax code)
1220 1221

    --------------------
1222 1223
    shift_code :: Width
	       -> (Size -> Operand -> Operand -> Instr)
1224 1225 1226 1227 1228
	       -> CmmExpr
	       -> CmmExpr
	       -> NatM Register

    {- Case1: shift length as immediate -}
1229
    shift_code width instr x y@(CmmLit lit) = do
1230 1231
	  x_code <- getAnyReg x
	  let
1232
	       size = intSize width
1233 1234
	       code dst
		  = x_code dst `snocOL` 
1235
		    instr size (OpImm (litToImm lit)) (OpReg dst)
1236
	  -- in
1237
	  return (Any size code)
1238
        
1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252
    {- Case2: shift length is complex (non-immediate)
      * y must go in %ecx.
      * we cannot do y first *and* put its result in %ecx, because
        %ecx might be clobbered by x.
      * if we do y second, then x cannot be 
        in a clobbered reg.  Also, we cannot clobber x's reg
        with the instruction itself.
      * so we can either:
        - do y first, put its result in a fresh tmp, then copy it to %ecx later
        - do y second and put its result into %ecx.  x gets placed in a fresh
          tmp.  This is likely to be better, becuase the reg alloc can
          eliminate this reg->reg move here (it won't eliminate the other one,
          because the move is into the fixed %ecx).
    -}
1253
    shift_code width instr x y{-amount-} = do
1254
        x_code <- getAnyReg x
1255 1256
	let size = intSize width
	tmp <- getNewRegNat size
1257 1258
        y_code <- getAnyReg y
	let 
1259
	   code = x_code tmp `appOL`
1260
		  y_code ecx `snocOL`
1261
		  instr size (OpReg ecx) (OpReg tmp)
1262
        -- in
1263
        return (Fixed size tmp code)
1264 1265

    --------------------
1266
    add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
1267
    add_code rep x (CmmLit (CmmInt y _))
1268
	| is32BitInteger y = add_int rep x y
1269 1270
    add_code rep x y = trivialCode rep (ADD size) (Just (ADD size)) x y
      where size = intSize rep
1271 1272

    --------------------
1273
    sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
1274
    sub_code rep x (CmmLit (CmmInt y _))
1275
	| is32BitInteger (-y) = add_int rep x (-y)
1276
    sub_code rep x y = trivialCode rep (SUB (intSize rep)) Nothing x y
1277 1278

    -- our three-operand add instruction:
1279
    add_int width x y = do
1280 1281
	(x_reg, x_code) <- getSomeReg x
	let
1282
	    size = intSize width
1283 1284 1285
	    imm = ImmInt (fromInteger y)
	    code dst
               = x_code `snocOL`
1286
		 LEA size
1287
			(OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
1288