StgCmmPrim.hs 80.5 KB
Newer Older
1 2 3 4 5 6 7 8 9
-----------------------------------------------------------------------------
--
-- Stg to C--: primitive operations
--
-- (c) The University of Glasgow 2004-2006
--
-----------------------------------------------------------------------------

module StgCmmPrim (
10 11 12
   cgOpApp,
   cgPrimOp -- internal(ish), used by cgCase to get code for a
            -- comparison without also turning it into a Bool.
13 14 15 16 17 18 19 20 21
 ) where

#include "HsVersions.h"

import StgCmmLayout
import StgCmmForeign
import StgCmmEnv
import StgCmmMonad
import StgCmmUtils
22 23
import StgCmmTicky
import StgCmmHeap
24
import StgCmmProf ( costCentreFrom, curCCS )
25

26 27
import DynFlags
import Platform
28
import BasicTypes
29
import MkGraph
30
import StgSyn
31
import Cmm
32
import CmmInfo
33
import Type     ( Type, tyConAppTyCon )
34 35 36 37 38 39 40
import TyCon
import CLabel
import CmmUtils
import PrimOp
import SMRep
import FastString
import Outputable
41
import Util
42

43
import Control.Monad (liftM, when)
44
import Data.Bits
45

46
------------------------------------------------------------------------
47
--      Primitive operations and foreign calls
48 49 50 51 52 53
------------------------------------------------------------------------

{- Note [Foreign call results]
   ~~~~~~~~~~~~~~~~~~~~~~~~~~~
A foreign call always returns an unboxed tuple of results, one
of which is the state token.  This seems to happen even for pure
54
calls.
55 56 57 58 59 60

Even if we returned a single result for pure calls, it'd still be
right to wrap it in a singleton unboxed tuple, because the result
might be a Haskell closure pointer, we don't want to evaluate it. -}

----------------------------------
61 62 63
cgOpApp :: StgOp        -- The op
        -> [StgArg]     -- Arguments
        -> Type         -- Result type (always an unboxed tuple)
64
        -> FCode ReturnKind
65

66 67
-- Foreign calls
cgOpApp (StgFCallOp fcall _) stg_args res_ty
68 69 70
  = cgForeignCall fcall stg_args res_ty
      -- Note [Foreign call results]

71
-- tagToEnum# is special: we need to pull the constructor
72 73
-- out of the table, and perform an appropriate return.

74
cgOpApp (StgPrimOp TagToEnumOp) [arg] res_ty
75
  = ASSERT(isEnumerationTyCon tycon)
76
    do  { dflags <- getDynFlags
77
        ; args' <- getNonVoidArgAmodes [arg]
78 79
        ; let amode = case args' of [amode] -> amode
                                    _ -> panic "TagToEnumOp had void arg"
80
        ; emitReturn [tagToClosure dflags tycon amode] }
81
   where
82 83 84 85 86 87
          -- If you're reading this code in the attempt to figure
          -- out why the compiler panic'ed here, it is probably because
          -- you used tagToEnum# in a non-monomorphic setting, e.g.,
          --         intToTg :: Enum a => Int -> a ; intToTg (I# x#) = tagToEnum# x#
          -- That won't work.
        tycon = tyConAppTyCon res_ty
88

89 90 91 92
cgOpApp (StgPrimOp primop) args res_ty = do
    dflags <- getDynFlags
    cmm_args <- getNonVoidArgAmodes args
    case shouldInlinePrimOp dflags primop cmm_args of
93 94 95
        Nothing -> do  -- out-of-line
          let fun = CmmLit (CmmLabel (mkRtsPrimOpLabel primop))
          emitCall (NativeNodeCall, NativeReturn) fun cmm_args
96

97
        Just f  -- inline
98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
          | ReturnsPrim VoidRep <- result_info
          -> do f []
                emitReturn []

          | ReturnsPrim rep <- result_info
          -> do dflags <- getDynFlags
                res <- newTemp (primRepCmmType dflags rep)
                f [res]
                emitReturn [CmmReg (CmmLocal res)]

          | ReturnsAlg tycon <- result_info, isUnboxedTupleTyCon tycon
          -> do (regs, _hints) <- newUnboxedTupleRegs res_ty
                f regs
                emitReturn (map (CmmReg . CmmLocal) regs)

          | otherwise -> panic "cgPrimop"
          where
             result_info = getPrimOpResultInfo primop
116

117
cgOpApp (StgPrimCallOp primcall) args _res_ty
118
  = do  { cmm_args <- getNonVoidArgAmodes args
119
        ; let fun = CmmLit (CmmLabel (mkPrimCallLabel primcall))
120
        ; emitCall (NativeNodeCall, NativeReturn) fun cmm_args }
121

122 123 124 125 126 127 128 129 130 131 132 133 134
-- | Decide whether an out-of-line primop should be replaced by an
-- inline implementation. This might happen e.g. if there's enough
-- static information, such as statically know arguments, to emit a
-- more efficient implementation inline.
--
-- Returns 'Nothing' if this primop should use its out-of-line
-- implementation (defined elsewhere) and 'Just' together with a code
-- generating function that takes the output regs as arguments
-- otherwise.
shouldInlinePrimOp :: DynFlags
                   -> PrimOp     -- ^ The primop
                   -> [CmmExpr]  -- ^ The primop arguments
                   -> Maybe ([LocalReg] -> FCode ())
135 136 137
shouldInlinePrimOp _ NewByteArrayOp_Char [(CmmLit (CmmInt n _))]
  | fromInteger n <= maxInlineAllocThreshold =
      Just $ \ [res] -> doNewByteArrayOp res (fromInteger n)
138
shouldInlinePrimOp dflags NewArrayOp [(CmmLit (CmmInt n _)), init]
139 140
  | wordsToBytes dflags (fromInteger n) <= maxInlineAllocThreshold =
      Just $ \ [res] -> doNewArrayOp res (fromInteger n) init
141 142 143 144 145 146 147 148 149 150
shouldInlinePrimOp dflags primop args
  | primOpOutOfLine primop = Nothing
  | otherwise = Just $ \ regs -> emitPrimOp dflags regs primop args

-- TODO: Several primops, such as 'copyArray#', only have an inline
-- implementation (below) but could possibly have both an inline
-- implementation and an out-of-line implementation, just like
-- 'newArray#'. This would lower the amount of code generated,
-- hopefully without a performance impact (needs to be measured).

151
---------------------------------------------------
152 153 154 155
cgPrimOp   :: [LocalReg]        -- where to put the results
           -> PrimOp            -- the op
           -> [StgArg]          -- arguments
           -> FCode ()
156 157

cgPrimOp results op args
158 159 160
  = do dflags <- getDynFlags
       arg_exprs <- getNonVoidArgAmodes args
       emitPrimOp dflags results op arg_exprs
161 162 163


------------------------------------------------------------------------
164
--      Emitting code for a primop
165 166
------------------------------------------------------------------------

167
emitPrimOp :: DynFlags
168 169 170 171
           -> [LocalReg]        -- where to put the results
           -> PrimOp            -- the op
           -> [CmmExpr]         -- arguments
           -> FCode ()
172 173 174 175

-- First we handle various awkward cases specially.  The remaining
-- easy cases are then handled by translateOp, defined below.

176
emitPrimOp dflags [res_r,res_c] IntAddCOp [aa,bb]
177
{-
178 179 180
   With some bit-twiddling, we can define int{Add,Sub}Czh portably in
   C, and without needing any comparisons.  This may not be the
   fastest way to do it - if you have better code, please send it! --SDM
181

182
   Return : r = a + b,  c = 0 if no overflow, 1 on overflow.
183 184

   We currently don't make use of the r value if c is != 0 (i.e.
185 186
   overflow), we just convert to big integers and try again.  This
   could be improved by making r and c the correct values for
187 188 189 190 191 192
   plugging into a new J#.

   { r = ((I_)(a)) + ((I_)(b));                                 \
     c = ((StgWord)(~(((I_)(a))^((I_)(b))) & (((I_)(a))^r)))    \
         >> (BITS_IN (I_) - 1);                                 \
   }
193 194 195 196 197
   Wading through the mass of bracketry, it seems to reduce to:
   c = ( (~(a^b)) & (a^r) ) >>unsigned (BITS_IN(I_)-1)

-}
   = emit $ catAGraphs [
198
        mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordAdd dflags) [aa,bb]),
199
        mkAssign (CmmLocal res_c) $
200 201 202 203 204
          CmmMachOp (mo_wordUShr dflags) [
                CmmMachOp (mo_wordAnd dflags) [
                    CmmMachOp (mo_wordNot dflags) [CmmMachOp (mo_wordXor dflags) [aa,bb]],
                    CmmMachOp (mo_wordXor dflags) [aa, CmmReg (CmmLocal res_r)]
                ],
205
                mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
206
          ]
207 208 209
     ]


210
emitPrimOp dflags [res_r,res_c] IntSubCOp [aa,bb]
211
{- Similarly:
212 213 214 215
   #define subIntCzh(r,c,a,b)                                   \
   { r = ((I_)(a)) - ((I_)(b));                                 \
     c = ((StgWord)((((I_)(a))^((I_)(b))) & (((I_)(a))^r)))     \
         >> (BITS_IN (I_) - 1);                                 \
216 217 218 219 220
   }

   c =  ((a^b) & (a^r)) >>unsigned (BITS_IN(I_)-1)
-}
   = emit $ catAGraphs [
221
        mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordSub dflags) [aa,bb]),
222
        mkAssign (CmmLocal res_c) $
223 224 225 226 227
          CmmMachOp (mo_wordUShr dflags) [
                CmmMachOp (mo_wordAnd dflags) [
                    CmmMachOp (mo_wordXor dflags) [aa,bb],
                    CmmMachOp (mo_wordXor dflags) [aa, CmmReg (CmmLocal res_r)]
                ],
228
                mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
229
          ]
230 231 232
     ]


233
emitPrimOp _ [res] ParOp [arg]
234 235 236
  =
        -- for now, just implement this in a C function
        -- later, we might want to inline it.
237
    emitCCall
238
        [(res,NoHint)]
239
        (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
240
        [(CmmReg (CmmGlobal BaseReg), AddrHint), (arg,AddrHint)]
241

242
emitPrimOp dflags [res] SparkOp [arg]
243 244 245 246 247
  = do
        -- returns the value of arg in res.  We're going to therefore
        -- refer to arg twice (once to pass to newSpark(), and once to
        -- assign to res), so put it in a temporary.
        tmp <- assignTemp arg
248
        tmp2 <- newTemp (bWord dflags)
249
        emitCCall
250
            [(tmp2,NoHint)]
251
            (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
252
            [(CmmReg (CmmGlobal BaseReg), AddrHint), ((CmmReg (CmmLocal tmp)), AddrHint)]
253
        emitAssign (CmmLocal res) (CmmReg (CmmLocal tmp))
254

255 256
emitPrimOp dflags [res] GetCCSOfOp [arg]
  = emitAssign (CmmLocal res) val
257
  where
258
    val
ian@well-typed.com's avatar
ian@well-typed.com committed
259
     | gopt Opt_SccProfilingOn dflags = costCentreFrom dflags (cmmUntag dflags arg)
260
     | otherwise                      = CmmLit (zeroCLit dflags)
261

262
emitPrimOp _ [res] GetCurrentCCSOp [_dummy_arg]
263
   = emitAssign (CmmLocal res) curCCS
264

265
emitPrimOp dflags [res] ReadMutVarOp [mutv]
266
   = emitAssign (CmmLocal res) (cmmLoadIndexW dflags mutv (fixedHdrSize dflags) (gcWord dflags))
267

268 269
emitPrimOp dflags [] WriteMutVarOp [mutv,var]
   = do emitStore (cmmOffsetW dflags mutv (fixedHdrSize dflags)) var
270 271 272 273
        emitCCall
                [{-no results-}]
                (CmmLit (CmmLabel mkDirty_MUT_VAR_Label))
                [(CmmReg (CmmGlobal BaseReg), AddrHint), (mutv,AddrHint)]
274 275

--  #define sizzeofByteArrayzh(r,a) \
276
--     r = ((StgArrWords *)(a))->bytes
277 278
emitPrimOp dflags [res] SizeofByteArrayOp [arg]
   = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSize dflags) (bWord dflags))
279 280

--  #define sizzeofMutableByteArrayzh(r,a) \
281
--      r = ((StgArrWords *)(a))->bytes
282 283
emitPrimOp dflags [res] SizeofMutableByteArrayOp [arg]
   = emitPrimOp dflags [res] SizeofByteArrayOp [arg]
284 285 286


--  #define touchzh(o)                  /* nothing */
287
emitPrimOp _ res@[] TouchOp args@[_arg]
288
   = do emitPrimCall res MO_Touch args
289 290

--  #define byteArrayContentszh(r,a) r = BYTE_ARR_CTS(a)
291 292
emitPrimOp dflags [res] ByteArrayContents_Char [arg]
   = emitAssign (CmmLocal res) (cmmOffsetB dflags arg (arrWordsHdrSize dflags))
293 294

--  #define stableNameToIntzh(r,s)   (r = ((StgStableName *)s)->sn)
295 296
emitPrimOp dflags [res] StableNameToIntOp [arg]
   = emitAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSize dflags) (bWord dflags))
297

298
--  #define eqStableNamezh(r,sn1,sn2)                                   \
299
--    (r = (((StgStableName *)sn1)->sn == ((StgStableName *)sn2)->sn))
300
emitPrimOp dflags [res] EqStableNameOp [arg1,arg2]
301
   = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [
302 303
                                   cmmLoadIndexW dflags arg1 (fixedHdrSize dflags) (bWord dflags),
                                   cmmLoadIndexW dflags arg2 (fixedHdrSize dflags) (bWord dflags)
304
                         ])
305 306


307 308
emitPrimOp dflags [res] ReallyUnsafePtrEqualityOp [arg1,arg2]
   = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [arg1,arg2])
309 310

--  #define addrToHValuezh(r,a) r=(P_)a
311
emitPrimOp _      [res] AddrToAnyOp [arg]
312
   = emitAssign (CmmLocal res) arg
313 314 315

--  #define dataToTagzh(r,a)  r=(GET_TAG(((StgClosure *)a)->header.info))
--  Note: argument may be tagged!
316
emitPrimOp dflags [res] DataToTagOp [arg]
317
   = emitAssign (CmmLocal res) (getConstrTag dflags (cmmUntag dflags arg))
318 319 320 321

{- Freezing arrays-of-ptrs requires changing an info table, for the
   benefit of the generational collector.  It needs to scavenge mutable
   objects, even if they are in old space.  When they become immutable,
322
   they can be removed from this scavenge list.  -}
323 324

--  #define unsafeFreezzeArrayzh(r,a)
325
--      {
326
--        SET_INFO((StgClosure *)a,&stg_MUT_ARR_PTRS_FROZEN0_info);
327 328
--        r = a;
--      }
329
emitPrimOp _      [res] UnsafeFreezeArrayOp [arg]
330
   = emit $ catAGraphs
331 332
   [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN_infoLabel)),
     mkAssign (CmmLocal res) arg ]
333
emitPrimOp _      [res] UnsafeFreezeArrayArrayOp [arg]
334 335 336
   = emit $ catAGraphs
   [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN_infoLabel)),
     mkAssign (CmmLocal res) arg ]
337

338
--  #define unsafeFreezzeByteArrayzh(r,a)       r=(a)
339
emitPrimOp _      [res] UnsafeFreezeByteArrayOp [arg]
340
   = emitAssign (CmmLocal res) arg
341

342 343
-- Copying pointer arrays

344
emitPrimOp _      [] CopyArrayOp [src,src_off,dst,dst_off,n] =
345
    doCopyArrayOp src src_off dst dst_off n
346
emitPrimOp _      [] CopyMutableArrayOp [src,src_off,dst,dst_off,n] =
347
    doCopyMutableArrayOp src src_off dst dst_off n
348
emitPrimOp _      [res] CloneArrayOp [src,src_off,n] =
349
    emitCloneArray mkMAP_FROZEN_infoLabel res src src_off n
350
emitPrimOp _      [res] CloneMutableArrayOp [src,src_off,n] =
351
    emitCloneArray mkMAP_DIRTY_infoLabel res src src_off n
352
emitPrimOp _      [res] FreezeArrayOp [src,src_off,n] =
353
    emitCloneArray mkMAP_FROZEN_infoLabel res src src_off n
354
emitPrimOp _      [res] ThawArrayOp [src,src_off,n] =
355 356
    emitCloneArray mkMAP_DIRTY_infoLabel res src src_off n

357
emitPrimOp _      [] CopyArrayArrayOp [src,src_off,dst,dst_off,n] =
358
    doCopyArrayOp src src_off dst dst_off n
359
emitPrimOp _      [] CopyMutableArrayArrayOp [src,src_off,dst,dst_off,n] =
360 361
    doCopyMutableArrayOp src src_off dst dst_off n

362 363
-- Reading/writing pointer arrays

364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379
emitPrimOp _      [res] ReadArrayOp  [obj,ix]    = doReadPtrArrayOp res obj ix
emitPrimOp _      [res] IndexArrayOp [obj,ix]    = doReadPtrArrayOp res obj ix
emitPrimOp _      []  WriteArrayOp [obj,ix,v]  = doWritePtrArrayOp obj ix v

emitPrimOp _      [res] IndexArrayArrayOp_ByteArray         [obj,ix]   = doReadPtrArrayOp res obj ix
emitPrimOp _      [res] IndexArrayArrayOp_ArrayArray        [obj,ix]   = doReadPtrArrayOp res obj ix
emitPrimOp _      [res] ReadArrayArrayOp_ByteArray          [obj,ix]   = doReadPtrArrayOp res obj ix
emitPrimOp _      [res] ReadArrayArrayOp_MutableByteArray   [obj,ix]   = doReadPtrArrayOp res obj ix
emitPrimOp _      [res] ReadArrayArrayOp_ArrayArray         [obj,ix]   = doReadPtrArrayOp res obj ix
emitPrimOp _      [res] ReadArrayArrayOp_MutableArrayArray  [obj,ix]   = doReadPtrArrayOp res obj ix
emitPrimOp _      []  WriteArrayArrayOp_ByteArray         [obj,ix,v] = doWritePtrArrayOp obj ix v
emitPrimOp _      []  WriteArrayArrayOp_MutableByteArray  [obj,ix,v] = doWritePtrArrayOp obj ix v
emitPrimOp _      []  WriteArrayArrayOp_ArrayArray        [obj,ix,v] = doWritePtrArrayOp obj ix v
emitPrimOp _      []  WriteArrayArrayOp_MutableArrayArray [obj,ix,v] = doWritePtrArrayOp obj ix v

emitPrimOp dflags [res] SizeofArrayOp [arg]
380
   = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSize dflags + oFFSET_StgMutArrPtrs_ptrs dflags) (bWord dflags))
381 382 383 384 385 386
emitPrimOp dflags [res] SizeofMutableArrayOp [arg]
   = emitPrimOp dflags [res] SizeofArrayOp [arg]
emitPrimOp dflags [res] SizeofArrayArrayOp [arg]
   = emitPrimOp dflags [res] SizeofArrayOp [arg]
emitPrimOp dflags [res] SizeofMutableArrayArrayOp [arg]
   = emitPrimOp dflags [res] SizeofArrayOp [arg]
pumpkin's avatar
pumpkin committed
387

388 389
-- IndexXXXoffAddr

390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405
emitPrimOp dflags res IndexOffAddrOp_Char             args = doIndexOffAddrOp   (Just (mo_u_8ToWord dflags)) b8 res args
emitPrimOp dflags res IndexOffAddrOp_WideChar         args = doIndexOffAddrOp   (Just (mo_u_32ToWord dflags)) b32 res args
emitPrimOp dflags res IndexOffAddrOp_Int              args = doIndexOffAddrOp   Nothing (bWord dflags) res args
emitPrimOp dflags res IndexOffAddrOp_Word             args = doIndexOffAddrOp   Nothing (bWord dflags) res args
emitPrimOp dflags res IndexOffAddrOp_Addr             args = doIndexOffAddrOp   Nothing (bWord dflags) res args
emitPrimOp _      res IndexOffAddrOp_Float            args = doIndexOffAddrOp   Nothing f32 res args
emitPrimOp _      res IndexOffAddrOp_Double           args = doIndexOffAddrOp   Nothing f64 res args
emitPrimOp dflags res IndexOffAddrOp_StablePtr        args = doIndexOffAddrOp   Nothing (bWord dflags) res args
emitPrimOp dflags res IndexOffAddrOp_Int8             args = doIndexOffAddrOp   (Just (mo_s_8ToWord dflags)) b8  res args
emitPrimOp dflags res IndexOffAddrOp_Int16            args = doIndexOffAddrOp   (Just (mo_s_16ToWord dflags)) b16 res args
emitPrimOp dflags res IndexOffAddrOp_Int32            args = doIndexOffAddrOp   (Just (mo_s_32ToWord dflags)) b32 res args
emitPrimOp _      res IndexOffAddrOp_Int64            args = doIndexOffAddrOp   Nothing b64 res args
emitPrimOp dflags res IndexOffAddrOp_Word8            args = doIndexOffAddrOp   (Just (mo_u_8ToWord dflags)) b8  res args
emitPrimOp dflags res IndexOffAddrOp_Word16           args = doIndexOffAddrOp   (Just (mo_u_16ToWord dflags)) b16 res args
emitPrimOp dflags res IndexOffAddrOp_Word32           args = doIndexOffAddrOp   (Just (mo_u_32ToWord dflags)) b32 res args
emitPrimOp _      res IndexOffAddrOp_Word64           args = doIndexOffAddrOp   Nothing b64 res args
406 407 408

-- ReadXXXoffAddr, which are identical, for our purposes, to IndexXXXoffAddr.

409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424
emitPrimOp dflags res ReadOffAddrOp_Char             args = doIndexOffAddrOp   (Just (mo_u_8ToWord dflags)) b8 res args
emitPrimOp dflags res ReadOffAddrOp_WideChar         args = doIndexOffAddrOp   (Just (mo_u_32ToWord dflags)) b32 res args
emitPrimOp dflags res ReadOffAddrOp_Int              args = doIndexOffAddrOp   Nothing (bWord dflags) res args
emitPrimOp dflags res ReadOffAddrOp_Word             args = doIndexOffAddrOp   Nothing (bWord dflags) res args
emitPrimOp dflags res ReadOffAddrOp_Addr             args = doIndexOffAddrOp   Nothing (bWord dflags) res args
emitPrimOp _      res ReadOffAddrOp_Float            args = doIndexOffAddrOp   Nothing f32 res args
emitPrimOp _      res ReadOffAddrOp_Double           args = doIndexOffAddrOp   Nothing f64 res args
emitPrimOp dflags res ReadOffAddrOp_StablePtr        args = doIndexOffAddrOp   Nothing (bWord dflags) res args
emitPrimOp dflags res ReadOffAddrOp_Int8             args = doIndexOffAddrOp   (Just (mo_s_8ToWord dflags)) b8  res args
emitPrimOp dflags res ReadOffAddrOp_Int16            args = doIndexOffAddrOp   (Just (mo_s_16ToWord dflags)) b16 res args
emitPrimOp dflags res ReadOffAddrOp_Int32            args = doIndexOffAddrOp   (Just (mo_s_32ToWord dflags)) b32 res args
emitPrimOp _      res ReadOffAddrOp_Int64            args = doIndexOffAddrOp   Nothing b64 res args
emitPrimOp dflags res ReadOffAddrOp_Word8            args = doIndexOffAddrOp   (Just (mo_u_8ToWord dflags)) b8  res args
emitPrimOp dflags res ReadOffAddrOp_Word16           args = doIndexOffAddrOp   (Just (mo_u_16ToWord dflags)) b16 res args
emitPrimOp dflags res ReadOffAddrOp_Word32           args = doIndexOffAddrOp   (Just (mo_u_32ToWord dflags)) b32 res args
emitPrimOp _      res ReadOffAddrOp_Word64           args = doIndexOffAddrOp   Nothing b64 res args
425 426 427

-- IndexXXXArray

428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443
emitPrimOp dflags res IndexByteArrayOp_Char             args = doIndexByteArrayOp   (Just (mo_u_8ToWord dflags)) b8 res args
emitPrimOp dflags res IndexByteArrayOp_WideChar         args = doIndexByteArrayOp   (Just (mo_u_32ToWord dflags)) b32 res args
emitPrimOp dflags res IndexByteArrayOp_Int              args = doIndexByteArrayOp   Nothing (bWord dflags) res args
emitPrimOp dflags res IndexByteArrayOp_Word             args = doIndexByteArrayOp   Nothing (bWord dflags) res args
emitPrimOp dflags res IndexByteArrayOp_Addr             args = doIndexByteArrayOp   Nothing (bWord dflags) res args
emitPrimOp _      res IndexByteArrayOp_Float            args = doIndexByteArrayOp   Nothing f32 res args
emitPrimOp _      res IndexByteArrayOp_Double           args = doIndexByteArrayOp   Nothing f64 res args
emitPrimOp dflags res IndexByteArrayOp_StablePtr        args = doIndexByteArrayOp   Nothing (bWord dflags) res args
emitPrimOp dflags res IndexByteArrayOp_Int8             args = doIndexByteArrayOp   (Just (mo_s_8ToWord dflags)) b8  res args
emitPrimOp dflags res IndexByteArrayOp_Int16            args = doIndexByteArrayOp   (Just (mo_s_16ToWord dflags)) b16  res args
emitPrimOp dflags res IndexByteArrayOp_Int32            args = doIndexByteArrayOp   (Just (mo_s_32ToWord dflags)) b32  res args
emitPrimOp _      res IndexByteArrayOp_Int64            args = doIndexByteArrayOp   Nothing b64  res args
emitPrimOp dflags res IndexByteArrayOp_Word8            args = doIndexByteArrayOp   (Just (mo_u_8ToWord dflags)) b8  res args
emitPrimOp dflags res IndexByteArrayOp_Word16           args = doIndexByteArrayOp   (Just (mo_u_16ToWord dflags)) b16  res args
emitPrimOp dflags res IndexByteArrayOp_Word32           args = doIndexByteArrayOp   (Just (mo_u_32ToWord dflags)) b32  res args
emitPrimOp _      res IndexByteArrayOp_Word64           args = doIndexByteArrayOp   Nothing b64  res args
444 445 446

-- ReadXXXArray, identical to IndexXXXArray.

447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462
emitPrimOp dflags res ReadByteArrayOp_Char             args = doIndexByteArrayOp   (Just (mo_u_8ToWord dflags)) b8 res args
emitPrimOp dflags res ReadByteArrayOp_WideChar         args = doIndexByteArrayOp   (Just (mo_u_32ToWord dflags)) b32 res args
emitPrimOp dflags res ReadByteArrayOp_Int              args = doIndexByteArrayOp   Nothing (bWord dflags) res args
emitPrimOp dflags res ReadByteArrayOp_Word             args = doIndexByteArrayOp   Nothing (bWord dflags) res args
emitPrimOp dflags res ReadByteArrayOp_Addr             args = doIndexByteArrayOp   Nothing (bWord dflags) res args
emitPrimOp _      res ReadByteArrayOp_Float            args = doIndexByteArrayOp   Nothing f32 res args
emitPrimOp _      res ReadByteArrayOp_Double           args = doIndexByteArrayOp   Nothing f64 res args
emitPrimOp dflags res ReadByteArrayOp_StablePtr        args = doIndexByteArrayOp   Nothing (bWord dflags) res args
emitPrimOp dflags res ReadByteArrayOp_Int8             args = doIndexByteArrayOp   (Just (mo_s_8ToWord dflags)) b8  res args
emitPrimOp dflags res ReadByteArrayOp_Int16            args = doIndexByteArrayOp   (Just (mo_s_16ToWord dflags)) b16  res args
emitPrimOp dflags res ReadByteArrayOp_Int32            args = doIndexByteArrayOp   (Just (mo_s_32ToWord dflags)) b32  res args
emitPrimOp _      res ReadByteArrayOp_Int64            args = doIndexByteArrayOp   Nothing b64  res args
emitPrimOp dflags res ReadByteArrayOp_Word8            args = doIndexByteArrayOp   (Just (mo_u_8ToWord dflags)) b8  res args
emitPrimOp dflags res ReadByteArrayOp_Word16           args = doIndexByteArrayOp   (Just (mo_u_16ToWord dflags)) b16  res args
emitPrimOp dflags res ReadByteArrayOp_Word32           args = doIndexByteArrayOp   (Just (mo_u_32ToWord dflags)) b32  res args
emitPrimOp _      res ReadByteArrayOp_Word64           args = doIndexByteArrayOp   Nothing b64  res args
463 464 465

-- WriteXXXoffAddr

466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481
emitPrimOp dflags res WriteOffAddrOp_Char             args = doWriteOffAddrOp (Just (mo_WordTo8 dflags))  b8 res args
emitPrimOp dflags res WriteOffAddrOp_WideChar         args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
emitPrimOp dflags res WriteOffAddrOp_Int              args = doWriteOffAddrOp Nothing (bWord dflags) res args
emitPrimOp dflags res WriteOffAddrOp_Word             args = doWriteOffAddrOp Nothing (bWord dflags) res args
emitPrimOp dflags res WriteOffAddrOp_Addr             args = doWriteOffAddrOp Nothing (bWord dflags) res args
emitPrimOp _      res WriteOffAddrOp_Float            args = doWriteOffAddrOp Nothing f32 res args
emitPrimOp _      res WriteOffAddrOp_Double           args = doWriteOffAddrOp Nothing f64 res args
emitPrimOp dflags res WriteOffAddrOp_StablePtr        args = doWriteOffAddrOp Nothing (bWord dflags) res args
emitPrimOp dflags res WriteOffAddrOp_Int8             args = doWriteOffAddrOp (Just (mo_WordTo8 dflags))  b8 res args
emitPrimOp dflags res WriteOffAddrOp_Int16            args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
emitPrimOp dflags res WriteOffAddrOp_Int32            args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
emitPrimOp _      res WriteOffAddrOp_Int64            args = doWriteOffAddrOp Nothing b64 res args
emitPrimOp dflags res WriteOffAddrOp_Word8            args = doWriteOffAddrOp (Just (mo_WordTo8 dflags))  b8 res args
emitPrimOp dflags res WriteOffAddrOp_Word16           args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
emitPrimOp dflags res WriteOffAddrOp_Word32           args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
emitPrimOp _      res WriteOffAddrOp_Word64           args = doWriteOffAddrOp Nothing b64 res args
482 483 484

-- WriteXXXArray

485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500
emitPrimOp dflags res WriteByteArrayOp_Char             args = doWriteByteArrayOp (Just (mo_WordTo8 dflags))  b8 res args
emitPrimOp dflags res WriteByteArrayOp_WideChar         args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
emitPrimOp dflags res WriteByteArrayOp_Int              args = doWriteByteArrayOp Nothing (bWord dflags) res args
emitPrimOp dflags res WriteByteArrayOp_Word             args = doWriteByteArrayOp Nothing (bWord dflags) res args
emitPrimOp dflags res WriteByteArrayOp_Addr             args = doWriteByteArrayOp Nothing (bWord dflags) res args
emitPrimOp _      res WriteByteArrayOp_Float            args = doWriteByteArrayOp Nothing f32 res args
emitPrimOp _      res WriteByteArrayOp_Double           args = doWriteByteArrayOp Nothing f64 res args
emitPrimOp dflags res WriteByteArrayOp_StablePtr        args = doWriteByteArrayOp Nothing (bWord dflags) res args
emitPrimOp dflags res WriteByteArrayOp_Int8             args = doWriteByteArrayOp (Just (mo_WordTo8 dflags))  b8 res args
emitPrimOp dflags res WriteByteArrayOp_Int16            args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
emitPrimOp dflags res WriteByteArrayOp_Int32            args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
emitPrimOp _      res WriteByteArrayOp_Int64            args = doWriteByteArrayOp Nothing b64 res args
emitPrimOp dflags res WriteByteArrayOp_Word8            args = doWriteByteArrayOp (Just (mo_WordTo8 dflags))  b8  res args
emitPrimOp dflags res WriteByteArrayOp_Word16           args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
emitPrimOp dflags res WriteByteArrayOp_Word32           args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
emitPrimOp _      res WriteByteArrayOp_Word64           args = doWriteByteArrayOp Nothing b64 res args
501

Ian Lynagh's avatar
Ian Lynagh committed
502
-- Copying and setting byte arrays
503
emitPrimOp _      [] CopyByteArrayOp [src,src_off,dst,dst_off,n] =
504
    doCopyByteArrayOp src src_off dst dst_off n
505
emitPrimOp _      [] CopyMutableByteArrayOp [src,src_off,dst,dst_off,n] =
506
    doCopyMutableByteArrayOp src src_off dst dst_off n
507 508 509 510 511 512
emitPrimOp _      [] CopyByteArrayToAddrOp [src,src_off,dst,n] =
    doCopyByteArrayToAddrOp src src_off dst n
emitPrimOp _      [] CopyMutableByteArrayToAddrOp [src,src_off,dst,n] =
    doCopyMutableByteArrayToAddrOp src src_off dst n
emitPrimOp _      [] CopyAddrToByteArrayOp [src,dst,dst_off,n] =
    doCopyAddrToByteArrayOp src dst dst_off n
513
emitPrimOp _      [] SetByteArrayOp [ba,off,len,c] =
Ian Lynagh's avatar
Ian Lynagh committed
514
    doSetByteArrayOp ba off len c
515

516 517 518 519 520
emitPrimOp _      [res] BSwap16Op [w] = emitBSwapCall res w W16
emitPrimOp _      [res] BSwap32Op [w] = emitBSwapCall res w W32
emitPrimOp _      [res] BSwap64Op [w] = emitBSwapCall res w W64
emitPrimOp dflags [res] BSwapOp   [w] = emitBSwapCall res w (wordWidth dflags)

tibbe's avatar
tibbe committed
521
-- Population count
ian@well-typed.com's avatar
ian@well-typed.com committed
522 523 524 525 526
emitPrimOp _      [res] PopCnt8Op  [w] = emitPopCntCall res w W8
emitPrimOp _      [res] PopCnt16Op [w] = emitPopCntCall res w W16
emitPrimOp _      [res] PopCnt32Op [w] = emitPopCntCall res w W32
emitPrimOp _      [res] PopCnt64Op [w] = emitPopCntCall res w W64
emitPrimOp dflags [res] PopCntOp   [w] = emitPopCntCall res w (wordWidth dflags)
tibbe's avatar
tibbe committed
527

tibbe's avatar
tibbe committed
528 529 530 531 532 533
-- Unsigned int to floating point conversions
emitPrimOp _      [res] Word2FloatOp  [w] = emitPrimCall [res]
                                            (MO_UF_Conv W32) [w]
emitPrimOp _      [res] Word2DoubleOp [w] = emitPrimCall [res]
                                            (MO_UF_Conv W64) [w]

534
-- SIMD primops
535 536
emitPrimOp dflags [res] (VecBroadcastOp vcat n w) [e] = do
    checkVecCompatibility dflags vcat n w
537
    doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros (replicate n e) res
538
  where
539 540 541 542 543 544 545 546 547 548 549 550 551
    zeros :: CmmExpr
    zeros = CmmLit $ CmmVec (replicate n zero)

    zero :: CmmLit
    zero = case vcat of
             IntVec   -> CmmInt 0 w
             WordVec  -> CmmInt 0 w
             FloatVec -> CmmFloat 0 w

    ty :: CmmType
    ty = vecVmmType vcat n w

emitPrimOp dflags [res] (VecPackOp vcat n w) es = do
552
    checkVecCompatibility dflags vcat n w
553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569
    when (length es /= n) $
        panic "emitPrimOp: VecPackOp has wrong number of arguments"
    doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros es res
  where
    zeros :: CmmExpr
    zeros = CmmLit $ CmmVec (replicate n zero)

    zero :: CmmLit
    zero = case vcat of
             IntVec   -> CmmInt 0 w
             WordVec  -> CmmInt 0 w
             FloatVec -> CmmFloat 0 w

    ty :: CmmType
    ty = vecVmmType vcat n w

emitPrimOp dflags res (VecUnpackOp vcat n w) [arg] = do
570
    checkVecCompatibility dflags vcat n w
571 572 573 574 575 576
    when (length res /= n) $
        panic "emitPrimOp: VecUnpackOp has wrong number of results"
    doVecUnpackOp (vecElemProjectCast dflags vcat w) ty arg res
  where
    ty :: CmmType
    ty = vecVmmType vcat n w
577

578 579
emitPrimOp dflags [res] (VecInsertOp vcat n w) [v,e,i] = do
    checkVecCompatibility dflags vcat n w
580
    doVecInsertOp (vecElemInjectCast dflags vcat w) ty v e i res
581
  where
582 583
    ty :: CmmType
    ty = vecVmmType vcat n w
584

585 586
emitPrimOp dflags res (VecIndexByteArrayOp vcat n w) args = do
    checkVecCompatibility dflags vcat n w
587 588 589 590
    doIndexByteArrayOp Nothing ty res args
  where
    ty :: CmmType
    ty = vecVmmType vcat n w
591

592 593
emitPrimOp dflags res (VecReadByteArrayOp vcat n w) args = do
    checkVecCompatibility dflags vcat n w
594 595 596 597
    doIndexByteArrayOp Nothing ty res args
  where
    ty :: CmmType
    ty = vecVmmType vcat n w
598

599 600
emitPrimOp dflags res (VecWriteByteArrayOp vcat n w) args = do
    checkVecCompatibility dflags vcat n w
601
    doWriteByteArrayOp Nothing ty res args
602
  where
603 604
    ty :: CmmType
    ty = vecVmmType vcat n w
605

606 607
emitPrimOp dflags res (VecIndexOffAddrOp vcat n w) args = do
    checkVecCompatibility dflags vcat n w
608
    doIndexOffAddrOp Nothing ty res args
609
  where
610 611
    ty :: CmmType
    ty = vecVmmType vcat n w
612

613 614
emitPrimOp dflags res (VecReadOffAddrOp vcat n w) args = do
    checkVecCompatibility dflags vcat n w
615 616 617 618
    doIndexOffAddrOp Nothing ty res args
  where
    ty :: CmmType
    ty = vecVmmType vcat n w
619

620 621
emitPrimOp dflags res (VecWriteOffAddrOp vcat n w) args = do
    checkVecCompatibility dflags vcat n w
622 623 624 625
    doWriteOffAddrOp Nothing ty res args
  where
    ty :: CmmType
    ty = vecVmmType vcat n w
626

627 628
emitPrimOp dflags res (VecIndexScalarByteArrayOp vcat n w) args = do
    checkVecCompatibility dflags vcat n w
629
    doIndexByteArrayOpAs Nothing vecty ty res args
630
  where
631 632
    vecty :: CmmType
    vecty = vecVmmType vcat n w
633

634 635 636
    ty :: CmmType
    ty = vecCmmCat vcat w

637 638
emitPrimOp dflags res (VecReadScalarByteArrayOp vcat n w) args = do
    checkVecCompatibility dflags vcat n w
639
    doIndexByteArrayOpAs Nothing vecty ty res args
640
  where
641 642
    vecty :: CmmType
    vecty = vecVmmType vcat n w
643

644 645
    ty :: CmmType
    ty = vecCmmCat vcat w
646

647 648
emitPrimOp dflags res (VecWriteScalarByteArrayOp vcat n w) args = do
    checkVecCompatibility dflags vcat n w
649 650 651 652
    doWriteByteArrayOp Nothing ty res args
  where
    ty :: CmmType
    ty = vecCmmCat vcat w
653

654 655
emitPrimOp dflags res (VecIndexScalarOffAddrOp vcat n w) args = do
    checkVecCompatibility dflags vcat n w
656
    doIndexOffAddrOpAs Nothing vecty ty res args
657
  where
658 659
    vecty :: CmmType
    vecty = vecVmmType vcat n w
660

661 662 663
    ty :: CmmType
    ty = vecCmmCat vcat w

664 665
emitPrimOp dflags res (VecReadScalarOffAddrOp vcat n w) args = do
    checkVecCompatibility dflags vcat n w
666
    doIndexOffAddrOpAs Nothing vecty ty res args
667
  where
668 669
    vecty :: CmmType
    vecty = vecVmmType vcat n w
670

671 672
    ty :: CmmType
    ty = vecCmmCat vcat w
673

674 675
emitPrimOp dflags res (VecWriteScalarOffAddrOp vcat n w) args = do
    checkVecCompatibility dflags vcat n w
676 677 678 679
    doWriteOffAddrOp Nothing ty res args
  where
    ty :: CmmType
    ty = vecCmmCat vcat w
680

gmainland's avatar
gmainland committed
681
-- Prefetch
682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697
emitPrimOp _ res PrefetchByteArrayOp3        args = doPrefetchByteArrayOp 3 res args
emitPrimOp _ res PrefetchMutableByteArrayOp3 args = doPrefetchByteArrayOp 3 res args
emitPrimOp _ res PrefetchAddrOp3             args = doPrefetchAddrOp  3 res args

emitPrimOp _ res PrefetchByteArrayOp2        args = doPrefetchByteArrayOp 2 res args
emitPrimOp _ res PrefetchMutableByteArrayOp2 args = doPrefetchByteArrayOp 2 res args
emitPrimOp _ res PrefetchAddrOp2             args = doPrefetchAddrOp 2 res args

emitPrimOp _ res PrefetchByteArrayOp1        args = doPrefetchByteArrayOp 1 res args
emitPrimOp _ res PrefetchMutableByteArrayOp1 args = doPrefetchByteArrayOp 1 res args
emitPrimOp _ res PrefetchAddrOp1             args = doPrefetchAddrOp 1 res args

emitPrimOp _ res PrefetchByteArrayOp0        args = doPrefetchByteArrayOp 0 res args
emitPrimOp _ res PrefetchMutableByteArrayOp0 args = doPrefetchByteArrayOp 0 res args
emitPrimOp _ res PrefetchAddrOp0             args = doPrefetchAddrOp 0 res args

gmainland's avatar
gmainland committed
698

699
-- The rest just translate straightforwardly
700
emitPrimOp dflags [res] op [arg]
701
   | nopOp op
702
   = emitAssign (CmmLocal res) arg
703 704

   | Just (mop,rep) <- narrowOp op
705
   = emitAssign (CmmLocal res) $
706
           CmmMachOp (mop rep (wordWidth dflags)) [CmmMachOp (mop (wordWidth dflags) rep) [arg]]
707

708
emitPrimOp dflags r@[res] op args
709
   | Just prim <- callishOp op
710
   = do emitPrimCall r prim args
711

712
   | Just mop <- translateOp dflags op
713 714 715
   = let stmt = mkAssign (CmmLocal res) (CmmMachOp mop args) in
     emit stmt

716 717
emitPrimOp dflags results op args
   = case callishPrimOpSupported dflags op of
718 719 720 721 722 723 724 725
          Left op   -> emit $ mkUnsafeCall (PrimTarget op) results args
          Right gen -> gen results args

type GenericOp = [CmmFormal] -> [CmmActual] -> FCode ()

callishPrimOpSupported :: DynFlags -> PrimOp -> Either CallishMachOp GenericOp
callishPrimOpSupported dflags op
  = case op of
726 727
      IntQuotRemOp   | ncg && x86ish  -> Left (MO_S_QuotRem  (wordWidth dflags))
                     | otherwise      -> Right (genericIntQuotRemOp dflags)
728

729 730
      WordQuotRemOp  | ncg && x86ish  -> Left (MO_U_QuotRem  (wordWidth dflags))
                     | otherwise      -> Right (genericWordQuotRemOp dflags)
731

732
      WordQuotRem2Op | ncg && x86ish  -> Left (MO_U_QuotRem2 (wordWidth dflags))
733
                     | otherwise      -> Right (genericWordQuotRem2Op dflags)
734

735
      WordAdd2Op     | ncg && x86ish  -> Left (MO_Add2       (wordWidth dflags))
736 737
                     | otherwise      -> Right genericWordAdd2Op

738
      WordMul2Op     | ncg && x86ish  -> Left (MO_U_Mul2     (wordWidth dflags))
739 740
                     | otherwise      -> Right genericWordMul2Op

Edward Z. Yang's avatar
Edward Z. Yang committed
741
      _ -> pprPanic "emitPrimOp: can't translate PrimOp " (ppr op)
742 743 744 745 746 747 748 749 750 751
 where
  ncg = case hscTarget dflags of
           HscAsm -> True
           _      -> False

  x86ish = case platformArch (targetPlatform dflags) of
             ArchX86    -> True
             ArchX86_64 -> True
             _          -> False

752 753
genericIntQuotRemOp :: DynFlags -> GenericOp
genericIntQuotRemOp dflags [res_q, res_r] [arg_x, arg_y]
754
   = emit $ mkAssign (CmmLocal res_q)
755
              (CmmMachOp (MO_S_Quot (wordWidth dflags)) [arg_x, arg_y]) <*>
756
            mkAssign (CmmLocal res_r)
757 758
              (CmmMachOp (MO_S_Rem  (wordWidth dflags)) [arg_x, arg_y])
genericIntQuotRemOp _ _ _ = panic "genericIntQuotRemOp"
759

760 761
genericWordQuotRemOp :: DynFlags -> GenericOp
genericWordQuotRemOp dflags [res_q, res_r] [arg_x, arg_y]
762
    = emit $ mkAssign (CmmLocal res_q)
763
               (CmmMachOp (MO_U_Quot (wordWidth dflags)) [arg_x, arg_y]) <*>
764
             mkAssign (CmmLocal res_r)
765 766
               (CmmMachOp (MO_U_Rem  (wordWidth dflags)) [arg_x, arg_y])
genericWordQuotRemOp _ _ _ = panic "genericWordQuotRemOp"
767

768 769
genericWordQuotRem2Op :: DynFlags -> GenericOp
genericWordQuotRem2Op dflags [res_q, res_r] [arg_x_high, arg_x_low, arg_y]
770
    = emit =<< f (widthInBits (wordWidth dflags)) zero arg_x_high arg_x_low
771
    where    ty = cmmExprType dflags arg_x_high
772 773 774 775 776 777 778
             shl   x i = CmmMachOp (MO_Shl   (wordWidth dflags)) [x, i]
             shr   x i = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, i]
             or    x y = CmmMachOp (MO_Or    (wordWidth dflags)) [x, y]
             ge    x y = CmmMachOp (MO_U_Ge  (wordWidth dflags)) [x, y]
             ne    x y = CmmMachOp (MO_Ne    (wordWidth dflags)) [x, y]
             minus x y = CmmMachOp (MO_Sub   (wordWidth dflags)) [x, y]
             times x y = CmmMachOp (MO_Mul   (wordWidth dflags)) [x, y]
779 780
             zero   = lit 0
             one    = lit 1
781 782
             negone = lit (fromIntegral (widthInBits (wordWidth dflags)) - 1)
             lit i = CmmLit (CmmInt i (wordWidth dflags))
783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814

             f :: Int -> CmmExpr -> CmmExpr -> CmmExpr -> FCode CmmAGraph
             f 0 acc high _ = return (mkAssign (CmmLocal res_q) acc <*>
                                      mkAssign (CmmLocal res_r) high)
             f i acc high low =
                 do roverflowedBit <- newTemp ty
                    rhigh'         <- newTemp ty
                    rhigh''        <- newTemp ty
                    rlow'          <- newTemp ty
                    risge          <- newTemp ty
                    racc'          <- newTemp ty
                    let high'         = CmmReg (CmmLocal rhigh')
                        isge          = CmmReg (CmmLocal risge)
                        overflowedBit = CmmReg (CmmLocal roverflowedBit)
                    let this = catAGraphs
                               [mkAssign (CmmLocal roverflowedBit)
                                          (shr high negone),
                                mkAssign (CmmLocal rhigh')
                                          (or (shl high one) (shr low negone)),
                                mkAssign (CmmLocal rlow')
                                          (shl low one),
                                mkAssign (CmmLocal risge)
                                          (or (overflowedBit `ne` zero)
                                              (high' `ge` arg_y)),
                                mkAssign (CmmLocal rhigh'')
                                          (high' `minus` (arg_y `times` isge)),
                                mkAssign (CmmLocal racc')
                                          (or (shl acc one) isge)]
                    rest <- f (i - 1) (CmmReg (CmmLocal racc'))
                                      (CmmReg (CmmLocal rhigh''))
                                      (CmmReg (CmmLocal rlow'))
                    return (this <*> rest)
815
genericWordQuotRem2Op _ _ _ = panic "genericWordQuotRem2Op"
816 817 818

genericWordAdd2Op :: GenericOp
genericWordAdd2Op [res_h, res_l] [arg_x, arg_y]
819
  = do dflags <- getDynFlags
820 821
       r1 <- newTemp (cmmExprType dflags arg_x)
       r2 <- newTemp (cmmExprType dflags arg_x)
822 823 824 825 826
       let topHalf x = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, hww]
           toTopHalf x = CmmMachOp (MO_Shl (wordWidth dflags)) [x, hww]
           bottomHalf x = CmmMachOp (MO_And (wordWidth dflags)) [x, hwm]
           add x y = CmmMachOp (MO_Add (wordWidth dflags)) [x, y]
           or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
827
           hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth dflags)))
828 829
                                (wordWidth dflags))
           hwm = CmmLit (CmmInt (halfWordMask dflags) (wordWidth dflags))
830 831 832 833 834 835 836 837 838 839 840 841 842 843 844
       emit $ catAGraphs
          [mkAssign (CmmLocal r1)
               (add (bottomHalf arg_x) (bottomHalf arg_y)),
           mkAssign (CmmLocal r2)
               (add (topHalf (CmmReg (CmmLocal r1)))
                    (add (topHalf arg_x) (topHalf arg_y))),
           mkAssign (CmmLocal res_h)
               (topHalf (CmmReg (CmmLocal r2))),
           mkAssign (CmmLocal res_l)
               (or (toTopHalf (CmmReg (CmmLocal r2)))
                   (bottomHalf (CmmReg (CmmLocal r1))))]
genericWordAdd2Op _ _ = panic "genericWordAdd2Op"

genericWordMul2Op :: GenericOp
genericWordMul2Op [res_h, res_l] [arg_x, arg_y]
845
 = do dflags <- getDynFlags
846
      let t = cmmExprType dflags arg_x
847 848 849 850 851 852