StgCmmPrim.hs 98.7 KB
Newer Older
1 2
{-# LANGUAGE CPP #-}

3
----------------------------------------------------------------------------
4 5 6 7 8 9 10 11
--
-- Stg to C--: primitive operations
--
-- (c) The University of Glasgow 2004-2006
--
-----------------------------------------------------------------------------

module StgCmmPrim (
12
   cgOpApp,
13 14 15
   cgPrimOp, -- internal(ish), used by cgCase to get code for a
             -- comparison without also turning it into a Bool.
   shouldInlinePrimOp
16 17 18 19
 ) where

#include "HsVersions.h"

20 21
import GhcPrelude hiding ((<*>))

22 23 24 25 26
import StgCmmLayout
import StgCmmForeign
import StgCmmEnv
import StgCmmMonad
import StgCmmUtils
27 28
import StgCmmTicky
import StgCmmHeap
29
import StgCmmProf ( costCentreFrom, curCCS )
30

31 32
import DynFlags
import Platform
33
import BasicTypes
34
import BlockId
35
import MkGraph
36
import StgSyn
37
import Cmm
38
import CmmInfo
39
import Type     ( Type, tyConAppTyCon )
40 41 42 43 44 45 46
import TyCon
import CLabel
import CmmUtils
import PrimOp
import SMRep
import FastString
import Outputable
47
import Util
48

tibbe's avatar
tibbe committed
49
import Data.Bits ((.&.), bit)
50
import Control.Monad (liftM, when, unless)
51

52
------------------------------------------------------------------------
53
--      Primitive operations and foreign calls
54 55 56 57 58 59
------------------------------------------------------------------------

{- Note [Foreign call results]
   ~~~~~~~~~~~~~~~~~~~~~~~~~~~
A foreign call always returns an unboxed tuple of results, one
of which is the state token.  This seems to happen even for pure
60
calls.
61 62 63 64 65 66

Even if we returned a single result for pure calls, it'd still be
right to wrap it in a singleton unboxed tuple, because the result
might be a Haskell closure pointer, we don't want to evaluate it. -}

----------------------------------
67 68 69
cgOpApp :: StgOp        -- The op
        -> [StgArg]     -- Arguments
        -> Type         -- Result type (always an unboxed tuple)
70
        -> FCode ReturnKind
71

72 73
-- Foreign calls
cgOpApp (StgFCallOp fcall _) stg_args res_ty
74 75 76
  = cgForeignCall fcall stg_args res_ty
      -- Note [Foreign call results]

77
-- tagToEnum# is special: we need to pull the constructor
78 79
-- out of the table, and perform an appropriate return.

80
cgOpApp (StgPrimOp TagToEnumOp) [arg] res_ty
81
  = ASSERT(isEnumerationTyCon tycon)
82
    do  { dflags <- getDynFlags
83
        ; args' <- getNonVoidArgAmodes [arg]
84 85
        ; let amode = case args' of [amode] -> amode
                                    _ -> panic "TagToEnumOp had void arg"
86
        ; emitReturn [tagToClosure dflags tycon amode] }
87
   where
88 89 90 91 92 93
          -- If you're reading this code in the attempt to figure
          -- out why the compiler panic'ed here, it is probably because
          -- you used tagToEnum# in a non-monomorphic setting, e.g.,
          --         intToTg :: Enum a => Int -> a ; intToTg (I# x#) = tagToEnum# x#
          -- That won't work.
        tycon = tyConAppTyCon res_ty
94

95 96
cgOpApp (StgPrimOp primop) args res_ty = do
    dflags <- getDynFlags
97
    cmm_args <- getNonVoidArgAmodes args
98
    case shouldInlinePrimOp dflags primop cmm_args of
99 100
        Nothing -> do  -- out-of-line
          let fun = CmmLit (CmmLabel (mkRtsPrimOpLabel primop))
101
          emitCall (NativeNodeCall, NativeReturn) fun cmm_args
102

103
        Just f  -- inline
104 105 106 107 108 109 110 111
          | ReturnsPrim VoidRep <- result_info
          -> do f []
                emitReturn []

          | ReturnsPrim rep <- result_info
          -> do dflags <- getDynFlags
                res <- newTemp (primRepCmmType dflags rep)
                f [res]
112
                emitReturn [CmmReg (CmmLocal res)]
113 114 115 116

          | ReturnsAlg tycon <- result_info, isUnboxedTupleTyCon tycon
          -> do (regs, _hints) <- newUnboxedTupleRegs res_ty
                f regs
117
                emitReturn (map (CmmReg . CmmLocal) regs)
118 119 120 121

          | otherwise -> panic "cgPrimop"
          where
             result_info = getPrimOpResultInfo primop
122

123
cgOpApp (StgPrimCallOp primcall) args _res_ty
124
  = do  { cmm_args <- getNonVoidArgAmodes args
125
        ; let fun = CmmLit (CmmLabel (mkPrimCallLabel primcall))
126
        ; emitCall (NativeNodeCall, NativeReturn) fun cmm_args }
127

tibbe's avatar
tibbe committed
128 129 130 131 132 133 134 135 136 137 138 139 140 141 142
-- | Interpret the argument as an unsigned value, assuming the value
-- is given in two-complement form in the given width.
--
-- Example: @asUnsigned W64 (-1)@ is 18446744073709551615.
--
-- This function is used to work around the fact that many array
-- primops take Int# arguments, but we interpret them as unsigned
-- quantities in the code gen. This means that we have to be careful
-- every time we work on e.g. a CmmInt literal that corresponds to the
-- array size, as it might contain a negative Integer value if the
-- user passed a value larger than 2^(wORD_SIZE_IN_BITS-1) as the Int#
-- literal.
asUnsigned :: Width -> Integer -> Integer
asUnsigned w n = n .&. (bit (widthInBits w) - 1)

143 144 145 146 147
-- TODO: Several primop implementations (e.g. 'doNewByteArrayOp') use
--     ByteOff (or some other fixed width signed type) to represent
--     array sizes or indices. This means that these will overflow for
--     large enough sizes.

148 149 150 151 152 153 154 155 156 157 158 159 160
-- | Decide whether an out-of-line primop should be replaced by an
-- inline implementation. This might happen e.g. if there's enough
-- static information, such as statically know arguments, to emit a
-- more efficient implementation inline.
--
-- Returns 'Nothing' if this primop should use its out-of-line
-- implementation (defined elsewhere) and 'Just' together with a code
-- generating function that takes the output regs as arguments
-- otherwise.
shouldInlinePrimOp :: DynFlags
                   -> PrimOp     -- ^ The primop
                   -> [CmmExpr]  -- ^ The primop arguments
                   -> Maybe ([LocalReg] -> FCode ())
161

tibbe's avatar
tibbe committed
162 163
shouldInlinePrimOp dflags NewByteArrayOp_Char [(CmmLit (CmmInt n w))]
  | asUnsigned w n <= fromIntegral (maxInlineAllocSize dflags) =
164
      Just $ \ [res] -> doNewByteArrayOp res (fromInteger n)
165

tibbe's avatar
tibbe committed
166 167
shouldInlinePrimOp dflags NewArrayOp [(CmmLit (CmmInt n w)), init]
  | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
168 169 170 171 172 173 174 175
      Just $ \ [res] ->
      doNewArrayOp res (arrPtrsRep dflags (fromInteger n)) mkMAP_DIRTY_infoLabel
      [ (mkIntExpr dflags (fromInteger n),
         fixedHdrSize dflags + oFFSET_StgMutArrPtrs_ptrs dflags)
      , (mkIntExpr dflags (nonHdrSizeW (arrPtrsRep dflags (fromInteger n))),
         fixedHdrSize dflags + oFFSET_StgMutArrPtrs_size dflags)
      ]
      (fromInteger n) init
176

177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192
shouldInlinePrimOp _ CopyArrayOp
    [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
        Just $ \ [] -> doCopyArrayOp src src_off dst dst_off (fromInteger n)

shouldInlinePrimOp _ CopyMutableArrayOp
    [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
        Just $ \ [] -> doCopyMutableArrayOp src src_off dst dst_off (fromInteger n)

shouldInlinePrimOp _ CopyArrayArrayOp
    [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
        Just $ \ [] -> doCopyArrayOp src src_off dst dst_off (fromInteger n)

shouldInlinePrimOp _ CopyMutableArrayArrayOp
    [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
        Just $ \ [] -> doCopyMutableArrayOp src src_off dst dst_off (fromInteger n)

tibbe's avatar
tibbe committed
193 194
shouldInlinePrimOp dflags CloneArrayOp [src, src_off, (CmmLit (CmmInt n w))]
  | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
195 196
      Just $ \ [res] -> emitCloneArray mkMAP_FROZEN_infoLabel res src src_off (fromInteger n)

tibbe's avatar
tibbe committed
197 198
shouldInlinePrimOp dflags CloneMutableArrayOp [src, src_off, (CmmLit (CmmInt n w))]
  | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
199 200
      Just $ \ [res] -> emitCloneArray mkMAP_DIRTY_infoLabel res src src_off (fromInteger n)

tibbe's avatar
tibbe committed
201 202
shouldInlinePrimOp dflags FreezeArrayOp [src, src_off, (CmmLit (CmmInt n w))]
  | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
203 204
      Just $ \ [res] -> emitCloneArray mkMAP_FROZEN_infoLabel res src src_off (fromInteger n)

tibbe's avatar
tibbe committed
205 206
shouldInlinePrimOp dflags ThawArrayOp [src, src_off, (CmmLit (CmmInt n w))]
  | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
207 208
      Just $ \ [res] -> emitCloneArray mkMAP_DIRTY_infoLabel res src src_off (fromInteger n)

tibbe's avatar
tibbe committed
209 210
shouldInlinePrimOp dflags NewSmallArrayOp [(CmmLit (CmmInt n w)), init]
  | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
211 212 213 214 215 216 217
      Just $ \ [res] ->
      doNewArrayOp res (smallArrPtrsRep (fromInteger n)) mkSMAP_DIRTY_infoLabel
      [ (mkIntExpr dflags (fromInteger n),
         fixedHdrSize dflags + oFFSET_StgSmallMutArrPtrs_ptrs dflags)
      ]
      (fromInteger n) init

218 219 220 221 222 223 224 225
shouldInlinePrimOp _ CopySmallArrayOp
    [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
        Just $ \ [] -> doCopySmallArrayOp src src_off dst dst_off (fromInteger n)

shouldInlinePrimOp _ CopySmallMutableArrayOp
    [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
        Just $ \ [] -> doCopySmallMutableArrayOp src src_off dst dst_off (fromInteger n)

tibbe's avatar
tibbe committed
226 227
shouldInlinePrimOp dflags CloneSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))]
  | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
228 229
      Just $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_infoLabel res src src_off (fromInteger n)

tibbe's avatar
tibbe committed
230 231
shouldInlinePrimOp dflags CloneSmallMutableArrayOp [src, src_off, (CmmLit (CmmInt n w))]
  | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
232 233
      Just $ \ [res] -> emitCloneSmallArray mkSMAP_DIRTY_infoLabel res src src_off (fromInteger n)

tibbe's avatar
tibbe committed
234 235
shouldInlinePrimOp dflags FreezeSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))]
  | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
236 237
      Just $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_infoLabel res src src_off (fromInteger n)

tibbe's avatar
tibbe committed
238 239
shouldInlinePrimOp dflags ThawSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))]
  | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
240 241
      Just $ \ [res] -> emitCloneSmallArray mkSMAP_DIRTY_infoLabel res src src_off (fromInteger n)

242 243 244 245 246 247 248 249 250 251
shouldInlinePrimOp dflags primop args
  | primOpOutOfLine primop = Nothing
  | otherwise = Just $ \ regs -> emitPrimOp dflags regs primop args

-- TODO: Several primops, such as 'copyArray#', only have an inline
-- implementation (below) but could possibly have both an inline
-- implementation and an out-of-line implementation, just like
-- 'newArray#'. This would lower the amount of code generated,
-- hopefully without a performance impact (needs to be measured).

252
---------------------------------------------------
253 254 255 256
cgPrimOp   :: [LocalReg]        -- where to put the results
           -> PrimOp            -- the op
           -> [StgArg]          -- arguments
           -> FCode ()
257 258

cgPrimOp results op args
259
  = do dflags <- getDynFlags
260
       arg_exprs <- getNonVoidArgAmodes args
261
       emitPrimOp dflags results op arg_exprs
262 263 264


------------------------------------------------------------------------
265
--      Emitting code for a primop
266 267
------------------------------------------------------------------------

268
emitPrimOp :: DynFlags
269 270 271 272
           -> [LocalReg]        -- where to put the results
           -> PrimOp            -- the op
           -> [CmmExpr]         -- arguments
           -> FCode ()
273 274 275 276

-- First we handle various awkward cases specially.  The remaining
-- easy cases are then handled by translateOp, defined below.

277
emitPrimOp _ [res] ParOp [arg]
278 279 280
  =
        -- for now, just implement this in a C function
        -- later, we might want to inline it.
281
    emitCCall
282
        [(res,NoHint)]
283
        (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
284
        [(CmmReg (CmmGlobal BaseReg), AddrHint), (arg,AddrHint)]
285

286
emitPrimOp dflags [res] SparkOp [arg]
287 288 289 290 291
  = do
        -- returns the value of arg in res.  We're going to therefore
        -- refer to arg twice (once to pass to newSpark(), and once to
        -- assign to res), so put it in a temporary.
        tmp <- assignTemp arg
292
        tmp2 <- newTemp (bWord dflags)
293
        emitCCall
294
            [(tmp2,NoHint)]
295
            (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
296
            [(CmmReg (CmmGlobal BaseReg), AddrHint), ((CmmReg (CmmLocal tmp)), AddrHint)]
297
        emitAssign (CmmLocal res) (CmmReg (CmmLocal tmp))
298

299 300
emitPrimOp dflags [res] GetCCSOfOp [arg]
  = emitAssign (CmmLocal res) val
301
  where
302
    val
ian@well-typed.com's avatar
ian@well-typed.com committed
303
     | gopt Opt_SccProfilingOn dflags = costCentreFrom dflags (cmmUntag dflags arg)
304
     | otherwise                      = CmmLit (zeroCLit dflags)
305

306
emitPrimOp _ [res] GetCurrentCCSOp [_dummy_arg]
307
   = emitAssign (CmmLocal res) curCCS
308

309
emitPrimOp dflags [res] ReadMutVarOp [mutv]
310
   = emitAssign (CmmLocal res) (cmmLoadIndexW dflags mutv (fixedHdrSizeW dflags) (gcWord dflags))
311

312 313 314 315 316
emitPrimOp dflags res@[] WriteMutVarOp [mutv,var]
   = do -- Without this write barrier, other CPUs may see this pointer before
        -- the writes for the closure it points to have occurred.
        emitPrimCall res MO_WriteBarrier []
        emitStore (cmmOffsetW dflags mutv (fixedHdrSizeW dflags)) var
317 318 319 320
        emitCCall
                [{-no results-}]
                (CmmLit (CmmLabel mkDirty_MUT_VAR_Label))
                [(CmmReg (CmmGlobal BaseReg), AddrHint), (mutv,AddrHint)]
321 322

--  #define sizzeofByteArrayzh(r,a) \
siddhanathan's avatar
siddhanathan committed
323
--     r = ((StgArrBytes *)(a))->bytes
324
emitPrimOp dflags [res] SizeofByteArrayOp [arg]
325
   = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags) (bWord dflags))
326 327

--  #define sizzeofMutableByteArrayzh(r,a) \
siddhanathan's avatar
siddhanathan committed
328
--      r = ((StgArrBytes *)(a))->bytes
329 330
emitPrimOp dflags [res] SizeofMutableByteArrayOp [arg]
   = emitPrimOp dflags [res] SizeofByteArrayOp [arg]
331

332
--  #define getSizzeofMutableByteArrayzh(r,a) \
siddhanathan's avatar
siddhanathan committed
333
--      r = ((StgArrBytes *)(a))->bytes
334 335 336
emitPrimOp dflags [res] GetSizeofMutableByteArrayOp [arg]
   = emitAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags) (bWord dflags))

337 338

--  #define touchzh(o)                  /* nothing */
339
emitPrimOp _ res@[] TouchOp args@[_arg]
340
   = do emitPrimCall res MO_Touch args
341 342

--  #define byteArrayContentszh(r,a) r = BYTE_ARR_CTS(a)
343 344
emitPrimOp dflags [res] ByteArrayContents_Char [arg]
   = emitAssign (CmmLocal res) (cmmOffsetB dflags arg (arrWordsHdrSize dflags))
345 346

--  #define stableNameToIntzh(r,s)   (r = ((StgStableName *)s)->sn)
347
emitPrimOp dflags [res] StableNameToIntOp [arg]
348
   = emitAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags) (bWord dflags))
349

350
--  #define eqStableNamezh(r,sn1,sn2)                                   \
351
--    (r = (((StgStableName *)sn1)->sn == ((StgStableName *)sn2)->sn))
352
emitPrimOp dflags [res] EqStableNameOp [arg1,arg2]
353
   = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [
354 355
                                   cmmLoadIndexW dflags arg1 (fixedHdrSizeW dflags) (bWord dflags),
                                   cmmLoadIndexW dflags arg2 (fixedHdrSizeW dflags) (bWord dflags)
356
                         ])
357

358 359
emitPrimOp dflags [res] ReallyUnsafePtrEqualityOp [arg1,arg2]
   = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [arg1,arg2])
360 361

--  #define addrToHValuezh(r,a) r=(P_)a
362
emitPrimOp _      [res] AddrToAnyOp [arg]
363
   = emitAssign (CmmLocal res) arg
364

gcampax's avatar
gcampax committed
365 366 367 368
--  #define hvalueToAddrzh(r, a) r=(W_)a
emitPrimOp _      [res] AnyToAddrOp [arg]
   = emitAssign (CmmLocal res) arg

369 370
--  #define dataToTagzh(r,a)  r=(GET_TAG(((StgClosure *)a)->header.info))
--  Note: argument may be tagged!
371
emitPrimOp dflags [res] DataToTagOp [arg]
372
   = emitAssign (CmmLocal res) (getConstrTag dflags (cmmUntag dflags arg))
373 374 375 376

{- Freezing arrays-of-ptrs requires changing an info table, for the
   benefit of the generational collector.  It needs to scavenge mutable
   objects, even if they are in old space.  When they become immutable,
377
   they can be removed from this scavenge list.  -}
378 379

--  #define unsafeFreezzeArrayzh(r,a)
380
--      {
381
--        SET_INFO((StgClosure *)a,&stg_MUT_ARR_PTRS_FROZEN0_info);
382 383
--        r = a;
--      }
384
emitPrimOp _      [res] UnsafeFreezeArrayOp [arg]
385
   = emit $ catAGraphs
386
   [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN0_infoLabel)),
387
     mkAssign (CmmLocal res) arg ]
388
emitPrimOp _      [res] UnsafeFreezeArrayArrayOp [arg]
389
   = emit $ catAGraphs
390
   [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN0_infoLabel)),
391
     mkAssign (CmmLocal res) arg ]
392 393 394 395
emitPrimOp _      [res] UnsafeFreezeSmallArrayOp [arg]
   = emit $ catAGraphs
   [ setInfo arg (CmmLit (CmmLabel mkSMAP_FROZEN0_infoLabel)),
     mkAssign (CmmLocal res) arg ]
396

397
--  #define unsafeFreezzeByteArrayzh(r,a)       r=(a)
398
emitPrimOp _      [res] UnsafeFreezeByteArrayOp [arg]
399
   = emitAssign (CmmLocal res) arg
400 401 402

-- Reading/writing pointer arrays

403 404 405 406 407 408 409 410 411 412 413 414 415 416 417
emitPrimOp _      [res] ReadArrayOp  [obj,ix]    = doReadPtrArrayOp res obj ix
emitPrimOp _      [res] IndexArrayOp [obj,ix]    = doReadPtrArrayOp res obj ix
emitPrimOp _      []  WriteArrayOp [obj,ix,v]  = doWritePtrArrayOp obj ix v

emitPrimOp _      [res] IndexArrayArrayOp_ByteArray         [obj,ix]   = doReadPtrArrayOp res obj ix
emitPrimOp _      [res] IndexArrayArrayOp_ArrayArray        [obj,ix]   = doReadPtrArrayOp res obj ix
emitPrimOp _      [res] ReadArrayArrayOp_ByteArray          [obj,ix]   = doReadPtrArrayOp res obj ix
emitPrimOp _      [res] ReadArrayArrayOp_MutableByteArray   [obj,ix]   = doReadPtrArrayOp res obj ix
emitPrimOp _      [res] ReadArrayArrayOp_ArrayArray         [obj,ix]   = doReadPtrArrayOp res obj ix
emitPrimOp _      [res] ReadArrayArrayOp_MutableArrayArray  [obj,ix]   = doReadPtrArrayOp res obj ix
emitPrimOp _      []  WriteArrayArrayOp_ByteArray         [obj,ix,v] = doWritePtrArrayOp obj ix v
emitPrimOp _      []  WriteArrayArrayOp_MutableByteArray  [obj,ix,v] = doWritePtrArrayOp obj ix v
emitPrimOp _      []  WriteArrayArrayOp_ArrayArray        [obj,ix,v] = doWritePtrArrayOp obj ix v
emitPrimOp _      []  WriteArrayArrayOp_MutableArrayArray [obj,ix,v] = doWritePtrArrayOp obj ix v

418 419 420 421 422 423
emitPrimOp _      [res] ReadSmallArrayOp  [obj,ix] = doReadSmallPtrArrayOp res obj ix
emitPrimOp _      [res] IndexSmallArrayOp [obj,ix] = doReadSmallPtrArrayOp res obj ix
emitPrimOp _      []  WriteSmallArrayOp [obj,ix,v] = doWriteSmallPtrArrayOp obj ix v

-- Getting the size of pointer arrays

424
emitPrimOp dflags [res] SizeofArrayOp [arg]
Ben Gamari's avatar
Ben Gamari committed
425 426 427
   = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg
    (fixedHdrSizeW dflags + bytesToWordsRoundUp dflags (oFFSET_StgMutArrPtrs_ptrs dflags))
        (bWord dflags))
428 429 430 431 432 433
emitPrimOp dflags [res] SizeofMutableArrayOp [arg]
   = emitPrimOp dflags [res] SizeofArrayOp [arg]
emitPrimOp dflags [res] SizeofArrayArrayOp [arg]
   = emitPrimOp dflags [res] SizeofArrayOp [arg]
emitPrimOp dflags [res] SizeofMutableArrayArrayOp [arg]
   = emitPrimOp dflags [res] SizeofArrayOp [arg]
pumpkin's avatar
pumpkin committed
434

435 436 437
emitPrimOp dflags [res] SizeofSmallArrayOp [arg] =
    emit $ mkAssign (CmmLocal res)
    (cmmLoadIndexW dflags arg
Ben Gamari's avatar
Ben Gamari committed
438 439
     (fixedHdrSizeW dflags + bytesToWordsRoundUp dflags (oFFSET_StgSmallMutArrPtrs_ptrs dflags))
        (bWord dflags))
440 441 442
emitPrimOp dflags [res] SizeofSmallMutableArrayOp [arg] =
    emitPrimOp dflags [res] SizeofSmallArrayOp [arg]

443 444
-- IndexXXXoffAddr

445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460
emitPrimOp dflags res IndexOffAddrOp_Char             args = doIndexOffAddrOp   (Just (mo_u_8ToWord dflags)) b8 res args
emitPrimOp dflags res IndexOffAddrOp_WideChar         args = doIndexOffAddrOp   (Just (mo_u_32ToWord dflags)) b32 res args
emitPrimOp dflags res IndexOffAddrOp_Int              args = doIndexOffAddrOp   Nothing (bWord dflags) res args
emitPrimOp dflags res IndexOffAddrOp_Word             args = doIndexOffAddrOp   Nothing (bWord dflags) res args
emitPrimOp dflags res IndexOffAddrOp_Addr             args = doIndexOffAddrOp   Nothing (bWord dflags) res args
emitPrimOp _      res IndexOffAddrOp_Float            args = doIndexOffAddrOp   Nothing f32 res args
emitPrimOp _      res IndexOffAddrOp_Double           args = doIndexOffAddrOp   Nothing f64 res args
emitPrimOp dflags res IndexOffAddrOp_StablePtr        args = doIndexOffAddrOp   Nothing (bWord dflags) res args
emitPrimOp dflags res IndexOffAddrOp_Int8             args = doIndexOffAddrOp   (Just (mo_s_8ToWord dflags)) b8  res args
emitPrimOp dflags res IndexOffAddrOp_Int16            args = doIndexOffAddrOp   (Just (mo_s_16ToWord dflags)) b16 res args
emitPrimOp dflags res IndexOffAddrOp_Int32            args = doIndexOffAddrOp   (Just (mo_s_32ToWord dflags)) b32 res args
emitPrimOp _      res IndexOffAddrOp_Int64            args = doIndexOffAddrOp   Nothing b64 res args
emitPrimOp dflags res IndexOffAddrOp_Word8            args = doIndexOffAddrOp   (Just (mo_u_8ToWord dflags)) b8  res args
emitPrimOp dflags res IndexOffAddrOp_Word16           args = doIndexOffAddrOp   (Just (mo_u_16ToWord dflags)) b16 res args
emitPrimOp dflags res IndexOffAddrOp_Word32           args = doIndexOffAddrOp   (Just (mo_u_32ToWord dflags)) b32 res args
emitPrimOp _      res IndexOffAddrOp_Word64           args = doIndexOffAddrOp   Nothing b64 res args
461 462 463

-- ReadXXXoffAddr, which are identical, for our purposes, to IndexXXXoffAddr.

464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479
emitPrimOp dflags res ReadOffAddrOp_Char             args = doIndexOffAddrOp   (Just (mo_u_8ToWord dflags)) b8 res args
emitPrimOp dflags res ReadOffAddrOp_WideChar         args = doIndexOffAddrOp   (Just (mo_u_32ToWord dflags)) b32 res args
emitPrimOp dflags res ReadOffAddrOp_Int              args = doIndexOffAddrOp   Nothing (bWord dflags) res args
emitPrimOp dflags res ReadOffAddrOp_Word             args = doIndexOffAddrOp   Nothing (bWord dflags) res args
emitPrimOp dflags res ReadOffAddrOp_Addr             args = doIndexOffAddrOp   Nothing (bWord dflags) res args
emitPrimOp _      res ReadOffAddrOp_Float            args = doIndexOffAddrOp   Nothing f32 res args
emitPrimOp _      res ReadOffAddrOp_Double           args = doIndexOffAddrOp   Nothing f64 res args
emitPrimOp dflags res ReadOffAddrOp_StablePtr        args = doIndexOffAddrOp   Nothing (bWord dflags) res args
emitPrimOp dflags res ReadOffAddrOp_Int8             args = doIndexOffAddrOp   (Just (mo_s_8ToWord dflags)) b8  res args
emitPrimOp dflags res ReadOffAddrOp_Int16            args = doIndexOffAddrOp   (Just (mo_s_16ToWord dflags)) b16 res args
emitPrimOp dflags res ReadOffAddrOp_Int32            args = doIndexOffAddrOp   (Just (mo_s_32ToWord dflags)) b32 res args
emitPrimOp _      res ReadOffAddrOp_Int64            args = doIndexOffAddrOp   Nothing b64 res args
emitPrimOp dflags res ReadOffAddrOp_Word8            args = doIndexOffAddrOp   (Just (mo_u_8ToWord dflags)) b8  res args
emitPrimOp dflags res ReadOffAddrOp_Word16           args = doIndexOffAddrOp   (Just (mo_u_16ToWord dflags)) b16 res args
emitPrimOp dflags res ReadOffAddrOp_Word32           args = doIndexOffAddrOp   (Just (mo_u_32ToWord dflags)) b32 res args
emitPrimOp _      res ReadOffAddrOp_Word64           args = doIndexOffAddrOp   Nothing b64 res args
480 481 482

-- IndexXXXArray

483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498
emitPrimOp dflags res IndexByteArrayOp_Char             args = doIndexByteArrayOp   (Just (mo_u_8ToWord dflags)) b8 res args
emitPrimOp dflags res IndexByteArrayOp_WideChar         args = doIndexByteArrayOp   (Just (mo_u_32ToWord dflags)) b32 res args
emitPrimOp dflags res IndexByteArrayOp_Int              args = doIndexByteArrayOp   Nothing (bWord dflags) res args
emitPrimOp dflags res IndexByteArrayOp_Word             args = doIndexByteArrayOp   Nothing (bWord dflags) res args
emitPrimOp dflags res IndexByteArrayOp_Addr             args = doIndexByteArrayOp   Nothing (bWord dflags) res args
emitPrimOp _      res IndexByteArrayOp_Float            args = doIndexByteArrayOp   Nothing f32 res args
emitPrimOp _      res IndexByteArrayOp_Double           args = doIndexByteArrayOp   Nothing f64 res args
emitPrimOp dflags res IndexByteArrayOp_StablePtr        args = doIndexByteArrayOp   Nothing (bWord dflags) res args
emitPrimOp dflags res IndexByteArrayOp_Int8             args = doIndexByteArrayOp   (Just (mo_s_8ToWord dflags)) b8  res args
emitPrimOp dflags res IndexByteArrayOp_Int16            args = doIndexByteArrayOp   (Just (mo_s_16ToWord dflags)) b16  res args
emitPrimOp dflags res IndexByteArrayOp_Int32            args = doIndexByteArrayOp   (Just (mo_s_32ToWord dflags)) b32  res args
emitPrimOp _      res IndexByteArrayOp_Int64            args = doIndexByteArrayOp   Nothing b64  res args
emitPrimOp dflags res IndexByteArrayOp_Word8            args = doIndexByteArrayOp   (Just (mo_u_8ToWord dflags)) b8  res args
emitPrimOp dflags res IndexByteArrayOp_Word16           args = doIndexByteArrayOp   (Just (mo_u_16ToWord dflags)) b16  res args
emitPrimOp dflags res IndexByteArrayOp_Word32           args = doIndexByteArrayOp   (Just (mo_u_32ToWord dflags)) b32  res args
emitPrimOp _      res IndexByteArrayOp_Word64           args = doIndexByteArrayOp   Nothing b64  res args
499 500 501

-- ReadXXXArray, identical to IndexXXXArray.

502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517
emitPrimOp dflags res ReadByteArrayOp_Char             args = doIndexByteArrayOp   (Just (mo_u_8ToWord dflags)) b8 res args
emitPrimOp dflags res ReadByteArrayOp_WideChar         args = doIndexByteArrayOp   (Just (mo_u_32ToWord dflags)) b32 res args
emitPrimOp dflags res ReadByteArrayOp_Int              args = doIndexByteArrayOp   Nothing (bWord dflags) res args
emitPrimOp dflags res ReadByteArrayOp_Word             args = doIndexByteArrayOp   Nothing (bWord dflags) res args
emitPrimOp dflags res ReadByteArrayOp_Addr             args = doIndexByteArrayOp   Nothing (bWord dflags) res args
emitPrimOp _      res ReadByteArrayOp_Float            args = doIndexByteArrayOp   Nothing f32 res args
emitPrimOp _      res ReadByteArrayOp_Double           args = doIndexByteArrayOp   Nothing f64 res args
emitPrimOp dflags res ReadByteArrayOp_StablePtr        args = doIndexByteArrayOp   Nothing (bWord dflags) res args
emitPrimOp dflags res ReadByteArrayOp_Int8             args = doIndexByteArrayOp   (Just (mo_s_8ToWord dflags)) b8  res args
emitPrimOp dflags res ReadByteArrayOp_Int16            args = doIndexByteArrayOp   (Just (mo_s_16ToWord dflags)) b16  res args
emitPrimOp dflags res ReadByteArrayOp_Int32            args = doIndexByteArrayOp   (Just (mo_s_32ToWord dflags)) b32  res args
emitPrimOp _      res ReadByteArrayOp_Int64            args = doIndexByteArrayOp   Nothing b64  res args
emitPrimOp dflags res ReadByteArrayOp_Word8            args = doIndexByteArrayOp   (Just (mo_u_8ToWord dflags)) b8  res args
emitPrimOp dflags res ReadByteArrayOp_Word16           args = doIndexByteArrayOp   (Just (mo_u_16ToWord dflags)) b16  res args
emitPrimOp dflags res ReadByteArrayOp_Word32           args = doIndexByteArrayOp   (Just (mo_u_32ToWord dflags)) b32  res args
emitPrimOp _      res ReadByteArrayOp_Word64           args = doIndexByteArrayOp   Nothing b64  res args
518 519 520

-- WriteXXXoffAddr

521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536
emitPrimOp dflags res WriteOffAddrOp_Char             args = doWriteOffAddrOp (Just (mo_WordTo8 dflags))  b8 res args
emitPrimOp dflags res WriteOffAddrOp_WideChar         args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
emitPrimOp dflags res WriteOffAddrOp_Int              args = doWriteOffAddrOp Nothing (bWord dflags) res args
emitPrimOp dflags res WriteOffAddrOp_Word             args = doWriteOffAddrOp Nothing (bWord dflags) res args
emitPrimOp dflags res WriteOffAddrOp_Addr             args = doWriteOffAddrOp Nothing (bWord dflags) res args
emitPrimOp _      res WriteOffAddrOp_Float            args = doWriteOffAddrOp Nothing f32 res args
emitPrimOp _      res WriteOffAddrOp_Double           args = doWriteOffAddrOp Nothing f64 res args
emitPrimOp dflags res WriteOffAddrOp_StablePtr        args = doWriteOffAddrOp Nothing (bWord dflags) res args
emitPrimOp dflags res WriteOffAddrOp_Int8             args = doWriteOffAddrOp (Just (mo_WordTo8 dflags))  b8 res args
emitPrimOp dflags res WriteOffAddrOp_Int16            args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
emitPrimOp dflags res WriteOffAddrOp_Int32            args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
emitPrimOp _      res WriteOffAddrOp_Int64            args = doWriteOffAddrOp Nothing b64 res args
emitPrimOp dflags res WriteOffAddrOp_Word8            args = doWriteOffAddrOp (Just (mo_WordTo8 dflags))  b8 res args
emitPrimOp dflags res WriteOffAddrOp_Word16           args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
emitPrimOp dflags res WriteOffAddrOp_Word32           args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
emitPrimOp _      res WriteOffAddrOp_Word64           args = doWriteOffAddrOp Nothing b64 res args
537 538 539

-- WriteXXXArray

540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555
emitPrimOp dflags res WriteByteArrayOp_Char             args = doWriteByteArrayOp (Just (mo_WordTo8 dflags))  b8 res args
emitPrimOp dflags res WriteByteArrayOp_WideChar         args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
emitPrimOp dflags res WriteByteArrayOp_Int              args = doWriteByteArrayOp Nothing (bWord dflags) res args
emitPrimOp dflags res WriteByteArrayOp_Word             args = doWriteByteArrayOp Nothing (bWord dflags) res args
emitPrimOp dflags res WriteByteArrayOp_Addr             args = doWriteByteArrayOp Nothing (bWord dflags) res args
emitPrimOp _      res WriteByteArrayOp_Float            args = doWriteByteArrayOp Nothing f32 res args
emitPrimOp _      res WriteByteArrayOp_Double           args = doWriteByteArrayOp Nothing f64 res args
emitPrimOp dflags res WriteByteArrayOp_StablePtr        args = doWriteByteArrayOp Nothing (bWord dflags) res args
emitPrimOp dflags res WriteByteArrayOp_Int8             args = doWriteByteArrayOp (Just (mo_WordTo8 dflags))  b8 res args
emitPrimOp dflags res WriteByteArrayOp_Int16            args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
emitPrimOp dflags res WriteByteArrayOp_Int32            args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
emitPrimOp _      res WriteByteArrayOp_Int64            args = doWriteByteArrayOp Nothing b64 res args
emitPrimOp dflags res WriteByteArrayOp_Word8            args = doWriteByteArrayOp (Just (mo_WordTo8 dflags))  b8  res args
emitPrimOp dflags res WriteByteArrayOp_Word16           args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
emitPrimOp dflags res WriteByteArrayOp_Word32           args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
emitPrimOp _      res WriteByteArrayOp_Word64           args = doWriteByteArrayOp Nothing b64 res args
556

Ian Lynagh's avatar
Ian Lynagh committed
557
-- Copying and setting byte arrays
558
emitPrimOp _      [] CopyByteArrayOp [src,src_off,dst,dst_off,n] =
559
    doCopyByteArrayOp src src_off dst dst_off n
560
emitPrimOp _      [] CopyMutableByteArrayOp [src,src_off,dst,dst_off,n] =
561
    doCopyMutableByteArrayOp src src_off dst dst_off n
562 563 564 565 566 567
emitPrimOp _      [] CopyByteArrayToAddrOp [src,src_off,dst,n] =
    doCopyByteArrayToAddrOp src src_off dst n
emitPrimOp _      [] CopyMutableByteArrayToAddrOp [src,src_off,dst,n] =
    doCopyMutableByteArrayToAddrOp src src_off dst n
emitPrimOp _      [] CopyAddrToByteArrayOp [src,dst,dst_off,n] =
    doCopyAddrToByteArrayOp src dst dst_off n
568
emitPrimOp _      [] SetByteArrayOp [ba,off,len,c] =
Ian Lynagh's avatar
Ian Lynagh committed
569
    doSetByteArrayOp ba off len c
570

571 572 573 574
-- Comparing byte arrays
emitPrimOp _      [res] CompareByteArraysOp [ba1,ba1_off,ba2,ba2_off,n] =
    doCompareByteArraysOp res ba1 ba1_off ba2 ba2_off n

575 576 577 578 579
emitPrimOp _      [res] BSwap16Op [w] = emitBSwapCall res w W16
emitPrimOp _      [res] BSwap32Op [w] = emitBSwapCall res w W32
emitPrimOp _      [res] BSwap64Op [w] = emitBSwapCall res w W64
emitPrimOp dflags [res] BSwapOp   [w] = emitBSwapCall res w (wordWidth dflags)

tibbe's avatar
tibbe committed
580
-- Population count
ian@well-typed.com's avatar
ian@well-typed.com committed
581 582 583 584 585
emitPrimOp _      [res] PopCnt8Op  [w] = emitPopCntCall res w W8
emitPrimOp _      [res] PopCnt16Op [w] = emitPopCntCall res w W16
emitPrimOp _      [res] PopCnt32Op [w] = emitPopCntCall res w W32
emitPrimOp _      [res] PopCnt64Op [w] = emitPopCntCall res w W64
emitPrimOp dflags [res] PopCntOp   [w] = emitPopCntCall res w (wordWidth dflags)
tibbe's avatar
tibbe committed
586

587 588 589 590 591 592 593 594 595 596 597 598 599 600
-- count leading zeros
emitPrimOp _      [res] Clz8Op  [w] = emitClzCall res w W8
emitPrimOp _      [res] Clz16Op [w] = emitClzCall res w W16
emitPrimOp _      [res] Clz32Op [w] = emitClzCall res w W32
emitPrimOp _      [res] Clz64Op [w] = emitClzCall res w W64
emitPrimOp dflags [res] ClzOp   [w] = emitClzCall res w (wordWidth dflags)

-- count trailing zeros
emitPrimOp _      [res] Ctz8Op [w]  = emitCtzCall res w W8
emitPrimOp _      [res] Ctz16Op [w] = emitCtzCall res w W16
emitPrimOp _      [res] Ctz32Op [w] = emitCtzCall res w W32
emitPrimOp _      [res] Ctz64Op [w] = emitCtzCall res w W64
emitPrimOp dflags [res] CtzOp   [w] = emitCtzCall res w (wordWidth dflags)

tibbe's avatar
tibbe committed
601 602 603 604 605 606
-- Unsigned int to floating point conversions
emitPrimOp _      [res] Word2FloatOp  [w] = emitPrimCall [res]
                                            (MO_UF_Conv W32) [w]
emitPrimOp _      [res] Word2DoubleOp [w] = emitPrimCall [res]
                                            (MO_UF_Conv W64) [w]

607
-- SIMD primops
608 609
emitPrimOp dflags [res] (VecBroadcastOp vcat n w) [e] = do
    checkVecCompatibility dflags vcat n w
610
    doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros (replicate n e) res
611
  where
612 613 614 615 616 617 618 619 620 621 622 623 624
    zeros :: CmmExpr
    zeros = CmmLit $ CmmVec (replicate n zero)

    zero :: CmmLit
    zero = case vcat of
             IntVec   -> CmmInt 0 w
             WordVec  -> CmmInt 0 w
             FloatVec -> CmmFloat 0 w

    ty :: CmmType
    ty = vecVmmType vcat n w

emitPrimOp dflags [res] (VecPackOp vcat n w) es = do
625
    checkVecCompatibility dflags vcat n w
626
    when (es `lengthIsNot` n) $
627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642
        panic "emitPrimOp: VecPackOp has wrong number of arguments"
    doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros es res
  where
    zeros :: CmmExpr
    zeros = CmmLit $ CmmVec (replicate n zero)

    zero :: CmmLit
    zero = case vcat of
             IntVec   -> CmmInt 0 w
             WordVec  -> CmmInt 0 w
             FloatVec -> CmmFloat 0 w

    ty :: CmmType
    ty = vecVmmType vcat n w

emitPrimOp dflags res (VecUnpackOp vcat n w) [arg] = do
643
    checkVecCompatibility dflags vcat n w
644
    when (res `lengthIsNot` n) $
645 646 647 648 649
        panic "emitPrimOp: VecUnpackOp has wrong number of results"
    doVecUnpackOp (vecElemProjectCast dflags vcat w) ty arg res
  where
    ty :: CmmType
    ty = vecVmmType vcat n w
650

651 652
emitPrimOp dflags [res] (VecInsertOp vcat n w) [v,e,i] = do
    checkVecCompatibility dflags vcat n w
653
    doVecInsertOp (vecElemInjectCast dflags vcat w) ty v e i res
654
  where
655 656
    ty :: CmmType
    ty = vecVmmType vcat n w
657

658 659
emitPrimOp dflags res (VecIndexByteArrayOp vcat n w) args = do
    checkVecCompatibility dflags vcat n w
660 661 662 663
    doIndexByteArrayOp Nothing ty res args
  where
    ty :: CmmType
    ty = vecVmmType vcat n w
664

665 666
emitPrimOp dflags res (VecReadByteArrayOp vcat n w) args = do
    checkVecCompatibility dflags vcat n w
667 668 669 670
    doIndexByteArrayOp Nothing ty res args
  where
    ty :: CmmType
    ty = vecVmmType vcat n w
671

672 673
emitPrimOp dflags res (VecWriteByteArrayOp vcat n w) args = do
    checkVecCompatibility dflags vcat n w
674
    doWriteByteArrayOp Nothing ty res args
675
  where
676 677
    ty :: CmmType
    ty = vecVmmType vcat n w
678

679 680
emitPrimOp dflags res (VecIndexOffAddrOp vcat n w) args = do
    checkVecCompatibility dflags vcat n w
681
    doIndexOffAddrOp Nothing ty res args
682
  where
683 684
    ty :: CmmType
    ty = vecVmmType vcat n w
685

686 687
emitPrimOp dflags res (VecReadOffAddrOp vcat n w) args = do
    checkVecCompatibility dflags vcat n w
688 689 690 691
    doIndexOffAddrOp Nothing ty res args
  where
    ty :: CmmType
    ty = vecVmmType vcat n w
692

693 694
emitPrimOp dflags res (VecWriteOffAddrOp vcat n w) args = do
    checkVecCompatibility dflags vcat n w
695 696 697 698
    doWriteOffAddrOp Nothing ty res args
  where
    ty :: CmmType
    ty = vecVmmType vcat n w
699

700 701
emitPrimOp dflags res (VecIndexScalarByteArrayOp vcat n w) args = do
    checkVecCompatibility dflags vcat n w
702
    doIndexByteArrayOpAs Nothing vecty ty res args
703
  where
704 705
    vecty :: CmmType
    vecty = vecVmmType vcat n w
706

707 708 709
    ty :: CmmType
    ty = vecCmmCat vcat w

710 711
emitPrimOp dflags res (VecReadScalarByteArrayOp vcat n w) args = do
    checkVecCompatibility dflags vcat n w
712
    doIndexByteArrayOpAs Nothing vecty ty res args
713
  where
714 715
    vecty :: CmmType
    vecty = vecVmmType vcat n w
716

717 718
    ty :: CmmType
    ty = vecCmmCat vcat w
719

720 721
emitPrimOp dflags res (VecWriteScalarByteArrayOp vcat n w) args = do
    checkVecCompatibility dflags vcat n w
722 723 724 725
    doWriteByteArrayOp Nothing ty res args
  where
    ty :: CmmType
    ty = vecCmmCat vcat w
726

727 728
emitPrimOp dflags res (VecIndexScalarOffAddrOp vcat n w) args = do
    checkVecCompatibility dflags vcat n w
729
    doIndexOffAddrOpAs Nothing vecty ty res args
730
  where
731 732
    vecty :: CmmType
    vecty = vecVmmType vcat n w
733

734 735 736
    ty :: CmmType
    ty = vecCmmCat vcat w

737 738
emitPrimOp dflags res (VecReadScalarOffAddrOp vcat n w) args = do
    checkVecCompatibility dflags vcat n w
739
    doIndexOffAddrOpAs Nothing vecty ty res args
740
  where
741 742
    vecty :: CmmType
    vecty = vecVmmType vcat n w
743

744 745
    ty :: CmmType
    ty = vecCmmCat vcat w
746

747 748
emitPrimOp dflags res (VecWriteScalarOffAddrOp vcat n w) args = do
    checkVecCompatibility dflags vcat n w
749 750 751 752
    doWriteOffAddrOp Nothing ty res args
  where
    ty :: CmmType
    ty = vecCmmCat vcat w
753

gmainland's avatar
gmainland committed
754
-- Prefetch
755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773
emitPrimOp _ [] PrefetchByteArrayOp3        args = doPrefetchByteArrayOp 3  args
emitPrimOp _ [] PrefetchMutableByteArrayOp3 args = doPrefetchMutableByteArrayOp 3  args
emitPrimOp _ [] PrefetchAddrOp3             args = doPrefetchAddrOp  3  args
emitPrimOp _ [] PrefetchValueOp3            args = doPrefetchValueOp 3 args

emitPrimOp _ [] PrefetchByteArrayOp2        args = doPrefetchByteArrayOp 2  args
emitPrimOp _ [] PrefetchMutableByteArrayOp2 args = doPrefetchMutableByteArrayOp 2  args
emitPrimOp _ [] PrefetchAddrOp2             args = doPrefetchAddrOp 2  args
emitPrimOp _ [] PrefetchValueOp2           args = doPrefetchValueOp 2 args

emitPrimOp _ [] PrefetchByteArrayOp1        args = doPrefetchByteArrayOp 1  args
emitPrimOp _ [] PrefetchMutableByteArrayOp1 args = doPrefetchMutableByteArrayOp 1  args
emitPrimOp _ [] PrefetchAddrOp1             args = doPrefetchAddrOp 1  args
emitPrimOp _ [] PrefetchValueOp1            args = doPrefetchValueOp 1 args

emitPrimOp _ [] PrefetchByteArrayOp0        args = doPrefetchByteArrayOp 0  args
emitPrimOp _ [] PrefetchMutableByteArrayOp0 args = doPrefetchMutableByteArrayOp 0  args
emitPrimOp _ [] PrefetchAddrOp0             args = doPrefetchAddrOp 0  args
emitPrimOp _ [] PrefetchValueOp0            args = doPrefetchValueOp 0 args
774

775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793
-- Atomic read-modify-write
emitPrimOp dflags [res] FetchAddByteArrayOp_Int [mba, ix, n] =
    doAtomicRMW res AMO_Add mba ix (bWord dflags) n
emitPrimOp dflags [res] FetchSubByteArrayOp_Int [mba, ix, n] =
    doAtomicRMW res AMO_Sub mba ix (bWord dflags) n
emitPrimOp dflags [res] FetchAndByteArrayOp_Int [mba, ix, n] =
    doAtomicRMW res AMO_And mba ix (bWord dflags) n
emitPrimOp dflags [res] FetchNandByteArrayOp_Int [mba, ix, n] =
    doAtomicRMW res AMO_Nand mba ix (bWord dflags) n
emitPrimOp dflags [res] FetchOrByteArrayOp_Int [mba, ix, n] =
    doAtomicRMW res AMO_Or mba ix (bWord dflags) n
emitPrimOp dflags [res] FetchXorByteArrayOp_Int [mba, ix, n] =
    doAtomicRMW res AMO_Xor mba ix (bWord dflags) n
emitPrimOp dflags [res] AtomicReadByteArrayOp_Int [mba, ix] =
    doAtomicReadByteArray res mba ix (bWord dflags)
emitPrimOp dflags [] AtomicWriteByteArrayOp_Int [mba, ix, val] =
    doAtomicWriteByteArray mba ix (bWord dflags) val
emitPrimOp dflags [res] CasByteArrayOp_Int [mba, ix, old, new] =
    doCasByteArray res mba ix (bWord dflags) old new
gmainland's avatar
gmainland committed
794

795
-- The rest just translate straightforwardly
796
emitPrimOp dflags [res] op [arg]
797
   | nopOp op
798
   = emitAssign (CmmLocal res) arg
799 800

   | Just (mop,rep) <- narrowOp op
801
   = emitAssign (CmmLocal res) $
802
           CmmMachOp (mop rep (wordWidth dflags)) [CmmMachOp (mop (wordWidth dflags) rep) [arg]]
803

804
emitPrimOp dflags r@[res] op args
805
   | Just prim <- callishOp op
806
   = do emitPrimCall r prim args
807

808
   | Just mop <- translateOp dflags op
809 810 811
   = let stmt = mkAssign (CmmLocal res) (CmmMachOp mop args) in
     emit stmt

812 813
emitPrimOp dflags results op args
   = case callishPrimOpSupported dflags op of
814 815 816 817 818 819 820 821
          Left op   -> emit $ mkUnsafeCall (PrimTarget op) results args
          Right gen -> gen results args

type GenericOp = [CmmFormal] -> [CmmActual] -> FCode ()

callishPrimOpSupported :: DynFlags -> PrimOp -> Either CallishMachOp GenericOp
callishPrimOpSupported dflags op
  = case op of
822 823
      IntQuotRemOp   | ncg && (x86ish
                              || ppc) -> Left (MO_S_QuotRem  (wordWidth dflags))
824
                     | otherwise      -> Right (genericIntQuotRemOp dflags)
825

826 827
      WordQuotRemOp  | ncg && (x86ish
                              || ppc) -> Left (MO_U_QuotRem  (wordWidth dflags))
828
                     | otherwise      -> Right (genericWordQuotRemOp dflags)
829

830 831
      WordQuotRem2Op | (ncg && (x86ish
                                || ppc))
832
                          || llvm     -> Left (MO_U_QuotRem2 (wordWidth dflags))
833
                     | otherwise      -> Right (genericWordQuotRem2Op dflags)
834

835 836
      WordAdd2Op     | (ncg && (x86ish
                                || ppc))
837
                         || llvm      -> Left (MO_Add2       (wordWidth dflags))
838 839
                     | otherwise      -> Right genericWordAdd2Op

840 841
      WordSubCOp     | (ncg && (x86ish
                                || ppc))
nkaretnikov's avatar
nkaretnikov committed
842 843 844
                         || llvm      -> Left (MO_SubWordC   (wordWidth dflags))
                     | otherwise      -> Right genericWordSubCOp

845 846
      IntAddCOp      | (ncg && (x86ish
                                || ppc))
847
                         || llvm      -> Left (MO_AddIntC    (wordWidth dflags))
848
                     | otherwise      -> Right genericIntAddCOp
849

850 851
      IntSubCOp      | (ncg && (x86ish
                                || ppc))
852
                         || llvm      -> Left (MO_SubIntC    (wordWidth dflags))
853
                     | otherwise      -> Right genericIntSubCOp
854

855 856
      WordMul2Op     | ncg && (x86ish
                               || ppc)
857
                         || llvm      -> Left (MO_U_Mul2     (wordWidth dflags))
858
                     | otherwise      -> Right genericWordMul2Op
859 860
      FloatFabsOp    | (ncg && x86ish
                               || ppc)
861 862
                         || llvm      -> Left MO_F32_Fabs
                     | otherwise      -> Right $ genericFabsOp W32
863 864
      DoubleFabsOp   | (ncg && x86ish
                               || ppc)
865 866
                         || llvm      -> Left MO_F64_Fabs
                     | otherwise      -> Right $ genericFabsOp W64
867

Edward Z. Yang's avatar
Edward Z. Yang committed
868
      _ -> pprPanic "emitPrimOp: can't translate PrimOp " (ppr op)
869 870 871 872
 where
  ncg = case hscTarget dflags of
           HscAsm -> True
           _      -> False
873 874 875
  llvm = case hscTarget dflags of
           HscLlvm -> True
           _       -> False
876 877 878 879
  x86ish = case platformArch (targetPlatform dflags) of
             ArchX86    -> True
             ArchX86_64 -> True
             _          -> False
880 881 882 883
  ppc = case platformArch (targetPlatform dflags) of
          ArchPPC      -> True
          ArchPPC_64 _ -> True
          _            -> False
884

885 886
genericIntQuotRemOp :: DynFlags ->