MachCodeGen.hs 164 KB
Newer Older
1 2 3 4 5 6 7
{-# OPTIONS -w #-}
-- The above warning supression flag is a temporary kludge.
-- While working on this module you are encouraged to remove it and fix
-- any warnings in the module. See
--     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
-- for details

8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
-----------------------------------------------------------------------------
--
-- Generating machine code (instruction selection)
--
-- (c) The University of Glasgow 1996-2004
--
-----------------------------------------------------------------------------

-- This is a big module, but, if you pay attention to
-- (a) the sectioning, (b) the type signatures, and
-- (c) the #if blah_TARGET_ARCH} things, the
-- structure should not be too overwhelming.

module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where

#include "HsVersions.h"
#include "nativeGen/NCG.h"
25
#include "MachDeps.h"
26 27 28 29 30

-- NCG stuff:
import MachInstrs
import MachRegs
import NCGMonad
31
import PositionIndependentCode
32 33
import RegAllocInfo 	( mkBranchInstr, mkRegRegMoveInstr )
import MachRegs
34
import PprMach
35 36

-- Our intermediate code:
37
import BlockId
38 39 40
import PprCmm		( pprExpr )
import Cmm
import CLabel
41
import ClosureInfo	( C_SRT(..) )
42 43

-- The rest:
44
import StaticFlags	( opt_PIC )
45 46 47
import ForeignCall	( CCallConv(..) )
import OrdList
import Pretty
48
import qualified Outputable as O
49 50
import Outputable
import FastString
Ian Lynagh's avatar
Ian Lynagh committed
51
import FastBool		( isFastTrue )
52
import Constants	( wORD_SIZE )
53

54
import Debug.Trace	( trace )
55 56

import Control.Monad	( mapAndUnzipM )
Simon Marlow's avatar
Simon Marlow committed
57 58 59
import Data.Maybe	( fromJust )
import Data.Bits
import Data.Word
60
import Data.Int
61

62

63 64 65 66 67 68 69 70 71 72
-- -----------------------------------------------------------------------------
-- Top-level of the instruction selector

-- | 'InstrBlock's are the insn sequences generated by the insn selectors.
-- They are really trees of insns to facilitate fast appending, where a
-- left-to-right traversal (pre-order?) yields the insns in the correct
-- order.

type InstrBlock = OrdList Instr

73
cmmTopCodeGen :: RawCmmTop -> NatM [NatCmmTop]
74
cmmTopCodeGen (CmmProc info lab params (ListGraph blocks)) = do
75
  (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
76
  picBaseMb <- getPicBaseMaybeNat
77
  let proc = CmmProc info lab params (ListGraph $ concat nat_blocks)
78 79 80 81 82
      tops = proc : concat statics
  case picBaseMb of
      Just picBase -> initializePicBase picBase tops
      Nothing -> return tops
  
83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
cmmTopCodeGen (CmmData sec dat) = do
  return [CmmData sec dat]  -- no translation, we just use CmmStatic

basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
basicBlockCodeGen (BasicBlock id stmts) = do
  instrs <- stmtsToInstrs stmts
  -- code generation may introduce new basic block boundaries, which
  -- are indicated by the NEWBLOCK instruction.  We must split up the
  -- instruction stream into basic blocks again.  Also, we extract
  -- LDATAs here too.
  let
	(top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
	
	mkBlocks (NEWBLOCK id) (instrs,blocks,statics) 
	  = ([], BasicBlock id instrs : blocks, statics)
	mkBlocks (LDATA sec dat) (instrs,blocks,statics) 
	  = (instrs, blocks, CmmData sec dat:statics)
	mkBlocks instr (instrs,blocks,statics)
	  = (instr:instrs, blocks, statics)
  -- in
  return (BasicBlock id top : other_blocks, statics)

stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
stmtsToInstrs stmts
   = do instrss <- mapM stmtToInstrs stmts
        return (concatOL instrss)

stmtToInstrs :: CmmStmt -> NatM InstrBlock
stmtToInstrs stmt = case stmt of
    CmmNop	   -> return nilOL
    CmmComment s   -> return (unitOL (COMMENT s))

    CmmAssign reg src
116
      | isFloatType ty -> assignReg_FltCode size reg src
117
#if WORD_SIZE_IN_BITS==32
118
      | isWord64 ty    -> assignReg_I64Code      reg src
119
#endif
120 121 122
      | otherwise	 -> assignReg_IntCode size reg src
	where ty = cmmRegType reg
	      size = cmmTypeSize ty
123 124

    CmmStore addr src
125
      | isFloatType ty -> assignMem_FltCode size addr src
126
#if WORD_SIZE_IN_BITS==32
127
      | isWord64 ty 	 -> assignMem_I64Code      addr src
128
#endif
129 130 131
      | otherwise	 -> assignMem_IntCode size addr src
	where ty = cmmExprType src
	      size = cmmTypeSize ty
132

133
    CmmCall target result_regs args _ _
134
       -> genCCall target result_regs args
135 136 137 138 139

    CmmBranch id	  -> genBranch id
    CmmCondBranch arg id  -> genCondJump id arg
    CmmSwitch arg ids     -> genSwitch arg ids
    CmmJump arg params	  -> genJump arg
dias@eecs.harvard.edu's avatar
dias@eecs.harvard.edu committed
140 141
    CmmReturn params	  ->
      panic "stmtToInstrs: return statement should have been cps'd away"
142 143 144 145 146 147 148 149

-- -----------------------------------------------------------------------------
-- General things for putting together code sequences

-- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
-- CmmExprs into CmmRegOff?
mangleIndexTree :: CmmExpr -> CmmExpr
mangleIndexTree (CmmRegOff reg off)
150 151
  = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
  where width = typeWidth (cmmRegType reg)
152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177

-- -----------------------------------------------------------------------------
--  Code gen for 64-bit arithmetic on 32-bit platforms

{-
Simple support for generating 64-bit code (ie, 64 bit values and 64
bit assignments) on 32-bit platforms.  Unlike the main code generator
we merely shoot for generating working code as simply as possible, and
pay little attention to code quality.  Specifically, there is no
attempt to deal cleverly with the fixed-vs-floating register
distinction; all values are generated into (pairs of) floating
registers, even if this would mean some redundant reg-reg moves as a
result.  Only one of the VRegUniques is returned, since it will be
of the VRegUniqueLo form, and the upper-half VReg can be determined
by applying getHiVRegFromLo to it.
-}

data ChildCode64 	-- a.k.a "Register64"
   = ChildCode64 
        InstrBlock 	-- code
        Reg	 	-- the lower 32-bit temporary which contains the
			-- result; use getHiVRegFromLo to find the other
			-- VRegUnique.  Rules of this simplified insn
			-- selection game are therefore that the returned
			-- Reg may be modified

178
#if WORD_SIZE_IN_BITS==32
179 180
assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
181 182 183
#endif

#ifndef x86_64_TARGET_ARCH
184
iselExpr64        :: CmmExpr -> NatM ChildCode64
185
#endif
186 187 188 189 190 191 192 193 194 195 196 197

-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

#if i386_TARGET_ARCH

assignMem_I64Code addrTree valueTree = do
  Amode addr addr_code <- getAmode addrTree
  ChildCode64 vcode rlo <- iselExpr64 valueTree
  let 
        rhi = getHiVRegFromLo rlo

        -- Little-endian store
198 199
        mov_lo = MOV II32 (OpReg rlo) (OpAddr addr)
        mov_hi = MOV II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
200 201 202 203
  -- in
  return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)


204
assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
205 206
   ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
   let 
207
         r_dst_lo = mkVReg u_dst II32
208 209
         r_dst_hi = getHiVRegFromLo r_dst_lo
         r_src_hi = getHiVRegFromLo r_src_lo
210 211
         mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo)
         mov_hi = MOV II32 (OpReg r_src_hi) (OpReg r_dst_hi)
212 213 214 215 216 217 218 219 220 221 222
   -- in
   return (
        vcode `snocOL` mov_lo `snocOL` mov_hi
     )

assignReg_I64Code lvalue valueTree
   = panic "assignReg_I64Code(i386): invalid lvalue"

------------

iselExpr64 (CmmLit (CmmInt i _)) = do
223
  (rlo,rhi) <- getNewRegPairNat II32
224 225 226 227
  let
	r = fromIntegral (fromIntegral i :: Word32)
	q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
	code = toOL [
228 229
		MOV II32 (OpImm (ImmInteger r)) (OpReg rlo),
		MOV II32 (OpImm (ImmInteger q)) (OpReg rhi)
230 231 232 233
		]
  -- in
  return (ChildCode64 code rlo)

234
iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
235
   Amode addr addr_code <- getAmode addrTree
236
   (rlo,rhi) <- getNewRegPairNat II32
237
   let 
238 239
        mov_lo = MOV II32 (OpAddr addr) (OpReg rlo)
        mov_hi = MOV II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
240 241 242 243 244 245
   -- in
   return (
            ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi) 
                        rlo
     )

246 247
iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
   = return (ChildCode64 nilOL (mkVReg vu II32))
248 249 250 251
         
-- we handle addition, but rather badly
iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
   ChildCode64 code1 r1lo <- iselExpr64 e1
252
   (rlo,rhi) <- getNewRegPairNat II32
253 254 255 256 257
   let
	r = fromIntegral (fromIntegral i :: Word32)
	q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
	r1hi = getHiVRegFromLo r1lo
	code =  code1 `appOL`
258 259 260 261
		toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
		       ADD II32 (OpImm (ImmInteger r)) (OpReg rlo),
		       MOV II32 (OpReg r1hi) (OpReg rhi),
		       ADC II32 (OpImm (ImmInteger q)) (OpReg rhi) ]
262 263 264 265 266 267
   -- in
   return (ChildCode64 code rlo)

iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
   ChildCode64 code1 r1lo <- iselExpr64 e1
   ChildCode64 code2 r2lo <- iselExpr64 e2
268
   (rlo,rhi) <- getNewRegPairNat II32
269 270 271 272 273
   let
	r1hi = getHiVRegFromLo r1lo
	r2hi = getHiVRegFromLo r2lo
	code =  code1 `appOL`
		code2 `appOL`
274 275 276 277
		toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
		       ADD II32 (OpReg r2lo) (OpReg rlo),
		       MOV II32 (OpReg r1hi) (OpReg rhi),
		       ADC II32 (OpReg r2hi) (OpReg rhi) ]
278 279 280
   -- in
   return (ChildCode64 code rlo)

281
iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do
282
     fn <- getAnyReg expr
283
     r_dst_lo <-  getNewRegNat II32
284 285 286 287
     let r_dst_hi = getHiVRegFromLo r_dst_lo
         code = fn r_dst_lo
     return (
             ChildCode64 (code `snocOL` 
288
                          MOV II32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
289 290 291
                          r_dst_lo
            )

292 293 294 295 296 297 298 299 300
iselExpr64 expr
   = pprPanic "iselExpr64(i386)" (ppr expr)

#endif /* i386_TARGET_ARCH */

-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

#if sparc_TARGET_ARCH

301 302 303 304 305
assignMem_I64Code addrTree valueTree = do
     Amode addr addr_code <- getAmode addrTree
     ChildCode64 vcode rlo <- iselExpr64 valueTree  
     (src, code) <- getSomeReg addrTree
     let 
306 307
         rhi = getHiVRegFromLo rlo
         -- Big-endian store
308 309
         mov_hi = ST II32 rhi (AddrRegImm src (ImmInt 0))
         mov_lo = ST II32 rlo (AddrRegImm src (ImmInt 4))
310
     return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
311

312
assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
313
     ChildCode64 vcode r_src_lo <- iselExpr64 valueTree    
314
     let 
315
         r_dst_lo = mkVReg u_dst (cmmTypeSize pk)
316 317 318 319 320
         r_dst_hi = getHiVRegFromLo r_dst_lo
         r_src_hi = getHiVRegFromLo r_src_lo
         mov_lo = mkMOV r_src_lo r_dst_lo
         mov_hi = mkMOV r_src_hi r_dst_hi
         mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
321
     return (vcode `snocOL` mov_hi `snocOL` mov_lo)
322
assignReg_I64Code lvalue valueTree
323
   = panic "assignReg_I64Code(sparc): invalid lvalue"
324 325


326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371
-- Load a 64 bit word
iselExpr64 (CmmLoad addrTree ty) 
 | isWord64 ty
 = do	Amode amode addr_code	<- getAmode addrTree
 	let result

		| AddrRegReg r1 r2	<- amode
		= do	rlo 	<- getNewRegNat II32
			tmp	<- getNewRegNat II32
			let rhi = getHiVRegFromLo rlo

			return	$ ChildCode64 
				(	 addr_code 
				`appOL`	 toOL
					 [ ADD False False r1 (RIReg r2) tmp
					 , LD II32 (AddrRegImm tmp (ImmInt 0)) rhi
					 , LD II32 (AddrRegImm tmp (ImmInt 4)) rlo ])
				rlo

		| AddrRegImm r1 (ImmInt i) <- amode
		= do	rlo	<- getNewRegNat II32
			let rhi = getHiVRegFromLo rlo
			
			return	$ ChildCode64 
				(	 addr_code 
				`appOL`	 toOL
					 [ LD II32 (AddrRegImm r1 (ImmInt $ 0 + i)) rhi
					 , LD II32 (AddrRegImm r1 (ImmInt $ 4 + i)) rlo ])
				rlo
		
	result


-- Add a literal to a 64 bit integer
iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) 
 = do	ChildCode64 code1 r1_lo <- iselExpr64 e1
 	let r1_hi	= getHiVRegFromLo r1_lo
	
	r_dst_lo	<- getNewRegNat II32
	let r_dst_hi	=  getHiVRegFromLo r_dst_lo 
	
	return	$ ChildCode64
			( toOL
			[ ADD False False r1_lo (RIImm (ImmInteger i)) r_dst_lo
			, ADD True  False r1_hi (RIReg g0)	   r_dst_hi ])
			r_dst_lo
372 373


374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393
-- Addition of II64
iselExpr64 (CmmMachOp (MO_Add width) [e1, e2])
 = do	ChildCode64 code1 r1_lo	<- iselExpr64 e1
	let r1_hi	= getHiVRegFromLo r1_lo

 	ChildCode64 code2 r2_lo	<- iselExpr64 e2
	let r2_hi	= getHiVRegFromLo r2_lo
	
	r_dst_lo	<- getNewRegNat II32
	let r_dst_hi	= getHiVRegFromLo r_dst_lo
	
	let code =	code1
		`appOL`	code2
		`appOL`	toOL
			[ ADD False False r1_lo (RIReg r2_lo) r_dst_lo
			, ADD True  False r1_hi (RIReg r2_hi) r_dst_hi ]
	
	return	$ ChildCode64 code r_dst_lo


394 395
iselExpr64 (CmmReg (CmmLocal (LocalReg uq ty))) | isWord64 ty = do
     r_dst_lo <-  getNewRegNat II32
396
     let r_dst_hi = getHiVRegFromLo r_dst_lo
397
         r_src_lo = mkVReg uq II32
398 399 400 401
         r_src_hi = getHiVRegFromLo r_src_lo
         mov_lo = mkMOV r_src_lo r_dst_lo
         mov_hi = mkMOV r_src_hi r_dst_hi
         mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
402 403
     return (
            ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
404 405
         )

406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422
-- Convert something into II64
iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) 
 = do
	r_dst_lo 	<- getNewRegNat II32
	let r_dst_hi	= getHiVRegFromLo r_dst_lo

	-- compute expr and load it into r_dst_lo
	(a_reg, a_code)	<- getSomeReg expr

	let code	= a_code
		`appOL`	toOL
			[ mkRegRegMoveInstr g0    r_dst_hi 	-- clear high 32 bits
			, mkRegRegMoveInstr a_reg r_dst_lo ]
			
	return	$ ChildCode64 code r_dst_lo


423
iselExpr64 expr
424
   = pprPanic "iselExpr64(sparc)" (ppr expr)
425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448

#endif /* sparc_TARGET_ARCH */

-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

#if powerpc_TARGET_ARCH

getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
getI64Amodes addrTree = do
    Amode hi_addr addr_code <- getAmode addrTree
    case addrOffset hi_addr 4 of
        Just lo_addr -> return (hi_addr, lo_addr, addr_code)
        Nothing      -> do (hi_ptr, code) <- getSomeReg addrTree
                           return (AddrRegImm hi_ptr (ImmInt 0),
                                   AddrRegImm hi_ptr (ImmInt 4),
                                   code)

assignMem_I64Code addrTree valueTree = do
        (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
	ChildCode64 vcode rlo <- iselExpr64 valueTree
	let 
		rhi = getHiVRegFromLo rlo

		-- Big-endian store
449 450
		mov_hi = ST II32 rhi hi_addr
		mov_lo = ST II32 rlo lo_addr
451 452 453
	-- in
	return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)

454
assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
455 456
   ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
   let 
457
         r_dst_lo = mkVReg u_dst II32
458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475
         r_dst_hi = getHiVRegFromLo r_dst_lo
         r_src_hi = getHiVRegFromLo r_src_lo
         mov_lo = MR r_dst_lo r_src_lo
         mov_hi = MR r_dst_hi r_src_hi
   -- in
   return (
        vcode `snocOL` mov_lo `snocOL` mov_hi
     )

assignReg_I64Code lvalue valueTree
   = panic "assignReg_I64Code(powerpc): invalid lvalue"


-- Don't delete this -- it's very handy for debugging.
--iselExpr64 expr 
--   | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
--   = panic "iselExpr64(???)"

476
iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
477
    (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
478 479 480
    (rlo, rhi) <- getNewRegPairNat II32
    let mov_hi = LD II32 rhi hi_addr
        mov_lo = LD II32 rlo lo_addr
481 482 483
    return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi) 
                         rlo

484 485
iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
   = return (ChildCode64 nilOL (mkVReg vu II32))
486 487

iselExpr64 (CmmLit (CmmInt i _)) = do
488
  (rlo,rhi) <- getNewRegPairNat II32
489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506
  let
	half0 = fromIntegral (fromIntegral i :: Word16)
	half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
	half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
	half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
	
	code = toOL [
		LIS rlo (ImmInt half1),
		OR rlo rlo (RIImm $ ImmInt half0),
		LIS rhi (ImmInt half3),
		OR rlo rlo (RIImm $ ImmInt half2)
		]
  -- in
  return (ChildCode64 code rlo)

iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
   ChildCode64 code1 r1lo <- iselExpr64 e1
   ChildCode64 code2 r2lo <- iselExpr64 e2
507
   (rlo,rhi) <- getNewRegPairNat II32
508 509 510 511 512 513 514 515 516 517
   let
	r1hi = getHiVRegFromLo r1lo
	r2hi = getHiVRegFromLo r2lo
	code =  code1 `appOL`
		code2 `appOL`
		toOL [ ADDC rlo r1lo r2lo,
		       ADDE rhi r1hi r2hi ]
   -- in
   return (ChildCode64 code rlo)

518
iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do
519
    (expr_reg,expr_code) <- getSomeReg expr
520
    (rlo, rhi) <- getNewRegPairNat II32
521 522 523 524
    let mov_hi = LI rhi (ImmInt 0)
        mov_lo = MR rlo expr_reg
    return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
                         rlo
525 526 527 528 529 530 531 532 533 534 535 536 537 538 539
iselExpr64 expr
   = pprPanic "iselExpr64(powerpc)" (ppr expr)

#endif /* powerpc_TARGET_ARCH */


-- -----------------------------------------------------------------------------
-- The 'Register' type

-- 'Register's passed up the tree.  If the stix code forces the register
-- to live in a pre-decided machine register, it comes out as @Fixed@;
-- otherwise, it comes out as @Any@, and the parent can decide which
-- register to put it in.

data Register
540 541
  = Fixed   Size Reg InstrBlock
  | Any	    Size (Reg -> InstrBlock)
542

543 544 545 546
swizzleRegisterRep :: Register -> Size -> Register
-- Change the width; it's a no-op
swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code
swizzleRegisterRep (Any _ codefn)     size = Any   size codefn
547 548


549 550 551 552 553 554 555 556 557 558 559 560 561 562 563
-- -----------------------------------------------------------------------------
-- Utils based on getRegister, below

-- The dual to getAnyReg: compute an expression into a register, but
-- we don't mind which one it is.
getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg expr = do
  r <- getRegister expr
  case r of
    Any rep code -> do
	tmp <- getNewRegNat rep
	return (tmp, code tmp)
    Fixed _ reg code -> 
	return (reg, code)

564 565 566 567 568
-- -----------------------------------------------------------------------------
-- Grab the Reg for a CmmReg

getRegisterReg :: CmmReg -> Reg

569 570
getRegisterReg (CmmLocal (LocalReg u pk))
  = mkVReg u (cmmTypeSize pk)
571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590

getRegisterReg (CmmGlobal mid)
  = case get_GlobalReg_reg_or_addr mid of
       Left (RealReg rrno) -> RealReg rrno
       _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
          -- By this stage, the only MagicIds remaining should be the
          -- ones which map to a real machine register on this
          -- platform.  Hence ...


-- -----------------------------------------------------------------------------
-- Generate code to get a subtree into a Register

-- Don't delete this -- it's very handy for debugging.
--getRegister expr 
--   | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
--   = panic "getRegister(???)"

getRegister :: CmmExpr -> NatM Register

591 592 593
#if !x86_64_TARGET_ARCH
    -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured
    -- register, it can only be used for rip-relative addressing.
594 595
getRegister (CmmReg (CmmGlobal PicBaseReg))
  = do
596 597
      reg <- getPicBaseNat wordSize
      return (Fixed wordSize reg nilOL)
598
#endif
599

600
getRegister (CmmReg reg) 
601 602
  = return (Fixed (cmmTypeSize (cmmRegType reg)) 
		  (getRegisterReg reg) nilOL)
603 604 605 606

getRegister tree@(CmmRegOff _ _) 
  = getRegister (mangleIndexTree tree)

607 608 609 610 611

#if WORD_SIZE_IN_BITS==32
    -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
    -- TO_W_(x), TO_W_(x >> 32)

612 613
getRegister (CmmMachOp (MO_UU_Conv W64 W32)
             [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
614
  ChildCode64 code rlo <- iselExpr64 x
615
  return $ Fixed II32 (getHiVRegFromLo rlo) code
616

617 618
getRegister (CmmMachOp (MO_SS_Conv W64 W32)
             [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
619
  ChildCode64 code rlo <- iselExpr64 x
620
  return $ Fixed II32 (getHiVRegFromLo rlo) code
621

622
getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do
623
  ChildCode64 code rlo <- iselExpr64 x
624
  return $ Fixed II32 rlo code
625

626
getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do
627
  ChildCode64 code rlo <- iselExpr64 x
628
  return $ Fixed II32 rlo code       
629 630 631

#endif

632 633 634 635 636 637 638 639 640 641 642 643 644 645
-- end of machine-"independent" bit; here we go on the rest...

#if alpha_TARGET_ARCH

getRegister (StDouble d)
  = getBlockIdNat 	    	    `thenNat` \ lbl ->
    getNewRegNat PtrRep    	    `thenNat` \ tmp ->
    let code dst = mkSeqInstrs [
	    LDATA RoDataSegment lbl [
		    DATA TF [ImmLab (rational d)]
		],
	    LDA tmp (AddrImm (ImmCLbl lbl)),
	    LD TF dst (AddrReg tmp)]
    in
646
    	return (Any FF64 code)
647 648 649 650 651 652 653

getRegister (StPrim primop [x]) -- unary PrimOps
  = case primop of
      IntNegOp -> trivialUCode (NEG Q False) x

      NotOp    -> trivialUCode NOT x

654 655
      FloatNegOp  -> trivialUFCode FloatRep (FNEG TF) x
      DoubleNegOp -> trivialUFCode FF64 (FNEG TF) x
656 657 658 659 660 661 662 663 664 665 666 667

      OrdOp -> coerceIntCode IntRep x
      ChrOp -> chrCode x

      Float2IntOp  -> coerceFP2Int    x
      Int2FloatOp  -> coerceInt2FP pr x
      Double2IntOp -> coerceFP2Int    x
      Int2DoubleOp -> coerceInt2FP pr x

      Double2FloatOp -> coerceFltCode x
      Float2DoubleOp -> coerceFltCode x

668
      other_op -> getRegister (StCall fn CCallConv FF64 [x])
669 670
	where
	  fn = case other_op of
Ian Lynagh's avatar
Ian Lynagh committed
671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694
		 FloatExpOp    -> fsLit "exp"
		 FloatLogOp    -> fsLit "log"
		 FloatSqrtOp   -> fsLit "sqrt"
		 FloatSinOp    -> fsLit "sin"
		 FloatCosOp    -> fsLit "cos"
		 FloatTanOp    -> fsLit "tan"
		 FloatAsinOp   -> fsLit "asin"
		 FloatAcosOp   -> fsLit "acos"
		 FloatAtanOp   -> fsLit "atan"
		 FloatSinhOp   -> fsLit "sinh"
		 FloatCoshOp   -> fsLit "cosh"
		 FloatTanhOp   -> fsLit "tanh"
		 DoubleExpOp   -> fsLit "exp"
		 DoubleLogOp   -> fsLit "log"
		 DoubleSqrtOp  -> fsLit "sqrt"
		 DoubleSinOp   -> fsLit "sin"
		 DoubleCosOp   -> fsLit "cos"
		 DoubleTanOp   -> fsLit "tan"
		 DoubleAsinOp  -> fsLit "asin"
		 DoubleAcosOp  -> fsLit "acos"
		 DoubleAtanOp  -> fsLit "atan"
		 DoubleSinhOp  -> fsLit "sinh"
		 DoubleCoshOp  -> fsLit "cosh"
		 DoubleTanhOp  -> fsLit "tanh"
695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753
  where
    pr = panic "MachCode.getRegister: no primrep needed for Alpha"

getRegister (StPrim primop [x, y]) -- dyadic PrimOps
  = case primop of
      CharGtOp -> trivialCode (CMP LTT) y x
      CharGeOp -> trivialCode (CMP LE) y x
      CharEqOp -> trivialCode (CMP EQQ) x y
      CharNeOp -> int_NE_code x y
      CharLtOp -> trivialCode (CMP LTT) x y
      CharLeOp -> trivialCode (CMP LE) x y

      IntGtOp  -> trivialCode (CMP LTT) y x
      IntGeOp  -> trivialCode (CMP LE) y x
      IntEqOp  -> trivialCode (CMP EQQ) x y
      IntNeOp  -> int_NE_code x y
      IntLtOp  -> trivialCode (CMP LTT) x y
      IntLeOp  -> trivialCode (CMP LE) x y

      WordGtOp -> trivialCode (CMP ULT) y x
      WordGeOp -> trivialCode (CMP ULE) x y
      WordEqOp -> trivialCode (CMP EQQ)  x y
      WordNeOp -> int_NE_code x y
      WordLtOp -> trivialCode (CMP ULT) x y
      WordLeOp -> trivialCode (CMP ULE) x y

      AddrGtOp -> trivialCode (CMP ULT) y x
      AddrGeOp -> trivialCode (CMP ULE) y x
      AddrEqOp -> trivialCode (CMP EQQ)  x y
      AddrNeOp -> int_NE_code x y
      AddrLtOp -> trivialCode (CMP ULT) x y
      AddrLeOp -> trivialCode (CMP ULE) x y
	
      FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
      FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
      FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
      FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
      FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
      FloatLeOp -> cmpF_code (FCMP TF LE) NE x y

      DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
      DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
      DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
      DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
      DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
      DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y

      IntAddOp  -> trivialCode (ADD Q False) x y
      IntSubOp  -> trivialCode (SUB Q False) x y
      IntMulOp  -> trivialCode (MUL Q False) x y
      IntQuotOp -> trivialCode (DIV Q False) x y
      IntRemOp  -> trivialCode (REM Q False) x y

      WordAddOp  -> trivialCode (ADD Q False) x y
      WordSubOp  -> trivialCode (SUB Q False) x y
      WordMulOp  -> trivialCode (MUL Q False) x y
      WordQuotOp -> trivialCode (DIV Q True) x y
      WordRemOp  -> trivialCode (REM Q True) x y

754 755 756 757
      FloatAddOp -> trivialFCode  W32 (FADD TF) x y
      FloatSubOp -> trivialFCode  W32 (FSUB TF) x y
      FloatMulOp -> trivialFCode  W32 (FMUL TF) x y
      FloatDivOp -> trivialFCode  W32 (FDIV TF) x y
758

759 760 761 762
      DoubleAddOp -> trivialFCode  W64 (FADD TF) x y
      DoubleSubOp -> trivialFCode  W64 (FSUB TF) x y
      DoubleMulOp -> trivialFCode  W64 (FMUL TF) x y
      DoubleDivOp -> trivialFCode  W64 (FDIV TF) x y
763 764 765 766 767 768 769 770 771 772 773 774 775 776 777

      AddrAddOp  -> trivialCode (ADD Q False) x y
      AddrSubOp  -> trivialCode (SUB Q False) x y
      AddrRemOp  -> trivialCode (REM Q True) x y

      AndOp  -> trivialCode AND x y
      OrOp   -> trivialCode OR  x y
      XorOp  -> trivialCode XOR x y
      SllOp  -> trivialCode SLL x y
      SrlOp  -> trivialCode SRL x y

      ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
      ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
      ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"

778 779
      FloatPowerOp  -> getRegister (StCall (fsLit "pow") CCallConv FF64 [x,y])
      DoublePowerOp -> getRegister (StCall (fsLit "pow") CCallConv FF64 [x,y])
780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811
  where
    {- ------------------------------------------------------------
	Some bizarre special code for getting condition codes into
	registers.  Integer non-equality is a test for equality
	followed by an XOR with 1.  (Integer comparisons always set
	the result register to 0 or 1.)  Floating point comparisons of
	any kind leave the result in a floating point register, so we
	need to wrangle an integer register out of things.
    -}
    int_NE_code :: StixTree -> StixTree -> NatM Register

    int_NE_code x y
      = trivialCode (CMP EQQ) x y	`thenNat` \ register ->
	getNewRegNat IntRep		`thenNat` \ tmp ->
	let
	    code = registerCode register tmp
	    src  = registerName register tmp
	    code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
	in
	return (Any IntRep code__2)

    {- ------------------------------------------------------------
	Comments for int_NE_code also apply to cmpF_code
    -}
    cmpF_code
	:: (Reg -> Reg -> Reg -> Instr)
	-> Cond
	-> StixTree -> StixTree
	-> NatM Register

    cmpF_code instr cond x y
      = trivialFCode pr instr x y	`thenNat` \ register ->
812
	getNewRegNat FF64		`thenNat` \ tmp ->
813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868
	getBlockIdNat			`thenNat` \ lbl ->
	let
	    code = registerCode register tmp
	    result  = registerName register tmp

	    code__2 dst = code . mkSeqInstrs [
		OR zeroh (RIImm (ImmInt 1)) dst,
		BF cond  result (ImmCLbl lbl),
		OR zeroh (RIReg zeroh) dst,
		NEWBLOCK lbl]
	in
	return (Any IntRep code__2)
      where
	pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
      ------------------------------------------------------------

getRegister (CmmLoad pk mem)
  = getAmode mem    	    	    `thenNat` \ amode ->
    let
    	code = amodeCode amode
    	src   = amodeAddr amode
    	size = primRepToSize pk
    	code__2 dst = code . mkSeqInstr (LD size dst src)
    in
    return (Any pk code__2)

getRegister (StInt i)
  | fits8Bits i
  = let
    	code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
    in
    return (Any IntRep code)
  | otherwise
  = let
    	code dst = mkSeqInstr (LDI Q dst src)
    in
    return (Any IntRep code)
  where
    src = ImmInt (fromInteger i)

getRegister leaf
  | isJust imm
  = let
    	code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
    in
    return (Any PtrRep code)
  where
    imm = maybeImm leaf
    imm__2 = case imm of Just x -> x

#endif /* alpha_TARGET_ARCH */

-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

#if i386_TARGET_ARCH

869
getRegister (CmmLit (CmmFloat f W32)) = do
870
    lbl <- getNewLabelNat
871 872
    dflags <- getDynFlagsNat
    dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
873 874
    Amode addr addr_code <- getAmode dynRef
    let code dst =
875 876
	    LDATA ReadOnlyData
			[CmmDataLabel lbl,
877
			 CmmStaticLit (CmmFloat f W32)]
878
	    `consOL` (addr_code `snocOL`
879
	    GLD FF32 addr dst)
880
    -- in
881
    return (Any FF32 code)
882 883


884
getRegister (CmmLit (CmmFloat d W64))
885 886
  | d == 0.0
  = let code dst = unitOL (GLDZ dst)
887
    in  return (Any FF64 code)
888 889 890

  | d == 1.0
  = let code dst = unitOL (GLD1 dst)
891
    in  return (Any FF64 code)
892 893 894

  | otherwise = do
    lbl <- getNewLabelNat
895 896
    dflags <- getDynFlagsNat
    dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
897 898
    Amode addr addr_code <- getAmode dynRef
    let code dst =
899 900
	    LDATA ReadOnlyData
			[CmmDataLabel lbl,
901
			 CmmStaticLit (CmmFloat d W64)]
902
	    `consOL` (addr_code `snocOL`
903
	    GLD FF64 addr dst)
904
    -- in
905
    return (Any FF64 code)
906

907 908 909 910
#endif /* i386_TARGET_ARCH */

#if x86_64_TARGET_ARCH

911 912 913
getRegister (CmmLit (CmmFloat 0.0 w)) = do
   let size = floatSize w
       code dst = unitOL  (XOR size (OpReg dst) (OpReg dst))
914 915
	-- I don't know why there are xorpd, xorps, and pxor instructions.
	-- They all appear to do the same thing --SDM
916
   return (Any size code)
917

918
getRegister (CmmLit (CmmFloat f w)) = do
919 920 921 922
    lbl <- getNewLabelNat
    let code dst = toOL [
	    LDATA ReadOnlyData
			[CmmDataLabel lbl,
923 924
			 CmmStaticLit (CmmFloat f w)],
	    MOV size (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
925 926
	    ]
    -- in
927 928
    return (Any size code)
  where size = floatSize w
929 930 931 932

#endif /* x86_64_TARGET_ARCH */

#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
933 934

-- catch simple cases of zero- or sign-extended load
935 936 937
getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do
  code <- intLoadCode (MOVZxL II8) addr
  return (Any II32 code)
938

939 940 941
getRegister (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _]) = do
  code <- intLoadCode (MOVSxL II8) addr
  return (Any II32 code)
942

943 944 945
getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _]) = do
  code <- intLoadCode (MOVZxL II16) addr
  return (Any II32 code)
946

947 948 949
getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _]) = do
  code <- intLoadCode (MOVSxL II16) addr
  return (Any II32 code)
950

951 952 953 954 955
#endif

#if x86_64_TARGET_ARCH

-- catch simple cases of zero- or sign-extended load
956 957 958
getRegister (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _]) = do
  code <- intLoadCode (MOVZxL II8) addr
  return (Any II64 code)
959

960 961 962
getRegister (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _]) = do
  code <- intLoadCode (MOVSxL II8) addr
  return (Any II64 code)
963

964 965 966
getRegister (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _]) = do
  code <- intLoadCode (MOVZxL II16) addr
  return (Any II64 code)
967

968 969 970
getRegister (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _]) = do
  code <- intLoadCode (MOVSxL II16) addr
  return (Any II64 code)
971

972 973 974
getRegister (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _]) = do
  code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend
  return (Any II64 code)
975

976 977 978
getRegister (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _]) = do
  code <- intLoadCode (MOVSxL II32) addr
  return (Any II64 code)
979 980 981

#endif

982
#if x86_64_TARGET_ARCH
983
getRegister (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
984
                                     CmmLit displacement])
985 986
    = return $ Any II64 (\dst -> unitOL $
        LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
987 988
#endif

989
#if x86_64_TARGET_ARCH
990
getRegister (CmmMachOp (MO_F_Neg W32) [x]) = do
991
  x_code <- getAnyReg x
992 993
  lbl <- getNewLabelNat
  let
994
    code dst = x_code dst `appOL` toOL [
995 996 997 998
	-- This is how gcc does it, so it can't be that bad:
	LDATA ReadOnlyData16 [
		CmmAlign 16,
		CmmDataLabel lbl,
999 1000 1001 1002
		CmmStaticLit (CmmInt 0x80000000 W32),
		CmmStaticLit (CmmInt 0 W32),
		CmmStaticLit (CmmInt 0 W32),
		CmmStaticLit (CmmInt 0 W32)
1003
	],
1004
	XOR FF32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
1005 1006 1007 1008
		-- xorps, so we need the 128-bit constant
		-- ToDo: rip-relative
	]
  --
1009
  return (Any FF32 code)
1010

1011
getRegister (CmmMachOp (MO_F_Neg W64) [x]) = do
1012
  x_code <- getAnyReg x
1013 1014 1015
  lbl <- getNewLabelNat
  let
	-- This is how gcc does it, so it can't be that bad:
1016
    code dst = x_code dst `appOL` toOL [
1017 1018 1019
	LDATA ReadOnlyData16 [
		CmmAlign 16,
		CmmDataLabel lbl,
1020 1021
		CmmStaticLit (CmmInt 0x8000000000000000 W64),
		CmmStaticLit (CmmInt 0 W64)
1022 1023
	],
		-- gcc puts an unpck here.  Wonder if we need it.
1024
	XOR FF64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
1025 1026 1027
		-- xorpd, so we need the 128-bit constant
	]
  --
1028
  return (Any FF64 code)
1029 1030 1031
#endif

#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1032 1033 1034

getRegister (CmmMachOp mop [x]) -- unary MachOps
  = case mop of
1035
#if i386_TARGET_ARCH
1036 1037
      MO_F_Neg W32 -> trivialUFCode FF32 (GNEG FF32) x
      MO_F_Neg W64 -> trivialUFCode FF64 (GNEG FF64) x
1038
#endif
1039

1040 1041 1042
      MO_S_Neg w -> triv_ucode NEGI (intSize w)
      MO_F_Neg w -> triv_ucode NEGI (floatSize w)
      MO_Not w   -> triv_ucode NOT  (intSize w)
1043 1044

      -- Nop conversions
1045 1046 1047 1048 1049 1050 1051
      MO_UU_Conv W32 W8  -> toI8Reg  W32 x
      MO_SS_Conv W32 W8  -> toI8Reg  W32 x
      MO_UU_Conv W16 W8  -> toI8Reg  W16 x
      MO_SS_Conv W16 W8  -> toI8Reg  W16 x
      MO_UU_Conv W32 W16 -> toI16Reg W32 x
      MO_SS_Conv W32 W16 -> toI16Reg W32 x

1052
#if x86_64_TARGET_ARCH
1053 1054 1055 1056 1057 1058
      MO_UU_Conv W64 W32 -> conversionNop II64 x
      MO_SS_Conv W64 W32 -> conversionNop II64 x
      MO_UU_Conv W64 W16 -> toI16Reg W64 x
      MO_SS_Conv W64 W16 -> toI16Reg W64 x
      MO_UU_Conv W64 W8  -> toI8Reg  W64 x
      MO_SS_Conv W64 W8  -> toI8Reg  W64 x
1059 1060
#endif

1061 1062
      MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
      MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
1063 1064

      -- widenings
1065 1066 1067
      MO_UU_Conv W8  W32 -> integerExtend W8  W32 MOVZxL x
      MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x
      MO_UU_Conv W8  W16 -> integerExtend W8  W16 MOVZxL x
1068

1069 1070 1071
      MO_SS_Conv W8  W32 -> integerExtend W8  W32 MOVSxL x
      MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x
      MO_SS_Conv W8  W16 -> integerExtend W8  W16 MOVSxL x
1072

1073
#if x86_64_TARGET_ARCH
1074 1075 1076 1077 1078 1079
      MO_UU_Conv W8  W64 -> integerExtend W8  W64 MOVZxL x
      MO_UU_Conv W16 W64 -> integerExtend W16 W64 MOVZxL x
      MO_UU_Conv W32 W64 -> integerExtend W32 W64 MOVZxL x
      MO_SS_Conv W8  W64 -> integerExtend W8  W64 MOVSxL x
      MO_SS_Conv W16 W64 -> integerExtend W16 W64 MOVSxL x
      MO_SS_Conv W32 W64 -> integerExtend W32 W64 MOVSxL x
1080 1081 1082 1083 1084 1085 1086
	-- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
	-- However, we don't want the register allocator to throw it
	-- away as an unnecessary reg-to-reg move, so we keep it in
	-- the form of a movzl and print it as a movl later.
#endif

#if i386_TARGET_ARCH
1087 1088
      MO_FF_Conv W32 W64 -> conversionNop FF64 x
      MO_FF_Conv W64 W32 -> conversionNop FF32 x
1089
#else
1090 1091
      MO_FF_Conv W32 W64 -> coerceFP2FP W64 x
      MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
1092 1093
#endif

1094 1095
      MO_FS_Conv from to -> coerceFP2Int from to x
      MO_SF_Conv from to -> coerceInt2FP from to x
1096

1097
      other -> pprPanic "getRegister" (pprMachOp mop)
1098
   where
1099 1100 1101
	triv_ucode :: (Size -> Operand -> Instr) -> Size -> NatM Register
	triv_ucode instr size = trivialUCode size (instr size) x

1102
	-- signed or unsigned extension.
1103 1104 1105
	integerExtend :: Width -> Width
		      -> (Size -> Operand -> Operand -> Instr)
		      -> CmmExpr -> NatM Register
1106
	integerExtend from to instr expr = do
1107
	    (reg,e_code) <- if from == W8 then getByteReg expr
1108 1109 1110 1111
					  else getSomeReg expr
	    let 
		code dst = 
		  e_code `snocOL`
1112 1113
		  instr (intSize from) (OpReg reg) (OpReg dst)
	    return (Any (intSize to) code)
1114

1115
	toI8Reg :: Width -> CmmExpr -> NatM Register
1116 1117
	toI8Reg new_rep expr
            = do codefn <- getAnyReg expr
1118
		 return (Any (intSize new_rep) codefn)
1119 1120 1121 1122 1123 1124 1125 1126 1127
		-- HACK: use getAnyReg to get a byte-addressable register.
		-- If the source was a Fixed register, this will add the
		-- mov instruction to put it into the desired destination.
		-- We're assuming that the destination won't be a fixed
		-- non-byte-addressable register; it won't be, because all
		-- fixed registers are word-sized.

	toI16Reg = toI8Reg -- for now

1128 1129
	conversionNop :: Size -> CmmExpr -> NatM Register
        conversionNop new_size expr
1130
            = do e_code <- getRegister expr
1131
                 return (swizzleRegisterRep e_code new_size)
1132 1133 1134


getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
Simon Marlow's avatar
Simon Marlow committed
1135
  = case mop of
1136 1137 1138 1139 1140 1141
      MO_F_Eq w -> condFltReg EQQ x y
      MO_F_Ne w -> condFltReg NE x y
      MO_F_Gt w -> condFltReg GTT x y
      MO_F_Ge w -> condFltReg GE x y
      MO_F_Lt w -> condFltReg LTT x y
      MO_F_Le w -> condFltReg LE x y
1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155

      MO_Eq rep   -> condIntReg EQQ x y
      MO_Ne rep   -> condIntReg NE x y

      MO_S_Gt rep -> condIntReg GTT x y
      MO_S_Ge rep -> condIntReg GE x y
      MO_S_Lt rep -> condIntReg LTT x y
      MO_S_Le rep -> condIntReg LE x y

      MO_U_Gt rep -> condIntReg GU  x y
      MO_U_Ge rep -> condIntReg GEU x y
      MO_U_Lt rep -> condIntReg LU  x y
      MO_U_Le rep -> condIntReg LEU x y

1156
#if i386_TARGET_ARCH
1157 1158 1159 1160
      MO_F_Add w -> trivialFCode w GADD x y
      MO_F_Sub w -> trivialFCode w GSUB x y
      MO_F_Quot w -> trivialFCode w GDIV x y
      MO_F_Mul w -> trivialFCode w GMUL x y
1161 1162 1163
#endif

#if x86_64_TARGET_ARCH
1164 1165 1166 1167
      MO_F_Add w -> trivialFCode w ADD x y
      MO_F_Sub w -> trivialFCode w SUB x y
      MO_F_Quot w -> trivialFCode w FDIV x y
      MO_F_Mul w -> trivialFCode w MUL x y
1168
#endif
1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179

      MO_Add rep -> add_code rep x y
      MO_Sub rep -> sub_code rep x y

      MO_S_Quot rep -> div_code rep True  True  x y
      MO_S_Rem  rep -> div_code rep True  False x y
      MO_U_Quot rep -> div_code rep False True  x y
      MO_U_Rem  rep -> div_code rep False False x y

      MO_S_MulMayOflo rep -> imulMayOflo rep x