StgCmmPrim.hs 115 KB
Newer Older
1
{-# LANGUAGE CPP #-}
2 3
-- emitPrimOp is quite large
{-# OPTIONS_GHC -fmax-pmcheck-iterations=4000000 #-}
4

5
----------------------------------------------------------------------------
6 7 8 9 10 11 12 13
--
-- Stg to C--: primitive operations
--
-- (c) The University of Glasgow 2004-2006
--
-----------------------------------------------------------------------------

module StgCmmPrim (
14
   cgOpApp,
15 16 17
   cgPrimOp, -- internal(ish), used by cgCase to get code for a
             -- comparison without also turning it into a Bool.
   shouldInlinePrimOp
18 19 20 21
 ) where

#include "HsVersions.h"

22 23
import GhcPrelude hiding ((<*>))

24 25 26 27 28
import StgCmmLayout
import StgCmmForeign
import StgCmmEnv
import StgCmmMonad
import StgCmmUtils
29 30
import StgCmmTicky
import StgCmmHeap
31
import StgCmmProf ( costCentreFrom )
32

33
import DynFlags
John Ericson's avatar
John Ericson committed
34
import GHC.Platform
35
import BasicTypes
36
import BlockId
37
import MkGraph
38
import StgSyn
39
import Cmm
40
import Type     ( Type, tyConAppTyCon )
41 42 43 44 45 46 47
import TyCon
import CLabel
import CmmUtils
import PrimOp
import SMRep
import FastString
import Outputable
48
import Util
49

tibbe's avatar
tibbe committed
50
import Data.Bits ((.&.), bit)
51
import Control.Monad (liftM, when, unless)
52

53
------------------------------------------------------------------------
54
--      Primitive operations and foreign calls
55 56 57 58 59 60
------------------------------------------------------------------------

{- Note [Foreign call results]
   ~~~~~~~~~~~~~~~~~~~~~~~~~~~
A foreign call always returns an unboxed tuple of results, one
of which is the state token.  This seems to happen even for pure
61
calls.
62 63 64 65 66 67

Even if we returned a single result for pure calls, it'd still be
right to wrap it in a singleton unboxed tuple, because the result
might be a Haskell closure pointer, we don't want to evaluate it. -}

----------------------------------
68 69 70
cgOpApp :: StgOp        -- The op
        -> [StgArg]     -- Arguments
        -> Type         -- Result type (always an unboxed tuple)
71
        -> FCode ReturnKind
72

73
-- Foreign calls
74
cgOpApp (StgFCallOp fcall ty) stg_args res_ty
75
  = cgForeignCall fcall ty stg_args res_ty
76 77
      -- Note [Foreign call results]

78
-- tagToEnum# is special: we need to pull the constructor
79 80
-- out of the table, and perform an appropriate return.

81
cgOpApp (StgPrimOp TagToEnumOp) [arg] res_ty
82
  = ASSERT(isEnumerationTyCon tycon)
83
    do  { dflags <- getDynFlags
84
        ; args' <- getNonVoidArgAmodes [arg]
85 86
        ; let amode = case args' of [amode] -> amode
                                    _ -> panic "TagToEnumOp had void arg"
87
        ; emitReturn [tagToClosure dflags tycon amode] }
88
   where
89 90 91 92 93 94
          -- If you're reading this code in the attempt to figure
          -- out why the compiler panic'ed here, it is probably because
          -- you used tagToEnum# in a non-monomorphic setting, e.g.,
          --         intToTg :: Enum a => Int -> a ; intToTg (I# x#) = tagToEnum# x#
          -- That won't work.
        tycon = tyConAppTyCon res_ty
95

96 97
cgOpApp (StgPrimOp primop) args res_ty = do
    dflags <- getDynFlags
98
    cmm_args <- getNonVoidArgAmodes args
99
    case shouldInlinePrimOp dflags primop cmm_args of
100 101
        Nothing -> do  -- out-of-line
          let fun = CmmLit (CmmLabel (mkRtsPrimOpLabel primop))
102
          emitCall (NativeNodeCall, NativeReturn) fun cmm_args
103

104
        Just f  -- inline
105 106 107 108 109 110 111 112
          | ReturnsPrim VoidRep <- result_info
          -> do f []
                emitReturn []

          | ReturnsPrim rep <- result_info
          -> do dflags <- getDynFlags
                res <- newTemp (primRepCmmType dflags rep)
                f [res]
113
                emitReturn [CmmReg (CmmLocal res)]
114 115 116 117

          | ReturnsAlg tycon <- result_info, isUnboxedTupleTyCon tycon
          -> do (regs, _hints) <- newUnboxedTupleRegs res_ty
                f regs
118
                emitReturn (map (CmmReg . CmmLocal) regs)
119 120 121 122

          | otherwise -> panic "cgPrimop"
          where
             result_info = getPrimOpResultInfo primop
123

124
cgOpApp (StgPrimCallOp primcall) args _res_ty
125
  = do  { cmm_args <- getNonVoidArgAmodes args
126
        ; let fun = CmmLit (CmmLabel (mkPrimCallLabel primcall))
127
        ; emitCall (NativeNodeCall, NativeReturn) fun cmm_args }
128

tibbe's avatar
tibbe committed
129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
-- | Interpret the argument as an unsigned value, assuming the value
-- is given in two-complement form in the given width.
--
-- Example: @asUnsigned W64 (-1)@ is 18446744073709551615.
--
-- This function is used to work around the fact that many array
-- primops take Int# arguments, but we interpret them as unsigned
-- quantities in the code gen. This means that we have to be careful
-- every time we work on e.g. a CmmInt literal that corresponds to the
-- array size, as it might contain a negative Integer value if the
-- user passed a value larger than 2^(wORD_SIZE_IN_BITS-1) as the Int#
-- literal.
asUnsigned :: Width -> Integer -> Integer
asUnsigned w n = n .&. (bit (widthInBits w) - 1)

144 145 146 147 148
-- TODO: Several primop implementations (e.g. 'doNewByteArrayOp') use
--     ByteOff (or some other fixed width signed type) to represent
--     array sizes or indices. This means that these will overflow for
--     large enough sizes.

149 150 151 152 153 154 155 156 157 158 159 160 161
-- | Decide whether an out-of-line primop should be replaced by an
-- inline implementation. This might happen e.g. if there's enough
-- static information, such as statically know arguments, to emit a
-- more efficient implementation inline.
--
-- Returns 'Nothing' if this primop should use its out-of-line
-- implementation (defined elsewhere) and 'Just' together with a code
-- generating function that takes the output regs as arguments
-- otherwise.
shouldInlinePrimOp :: DynFlags
                   -> PrimOp     -- ^ The primop
                   -> [CmmExpr]  -- ^ The primop arguments
                   -> Maybe ([LocalReg] -> FCode ())
162

tibbe's avatar
tibbe committed
163 164
shouldInlinePrimOp dflags NewByteArrayOp_Char [(CmmLit (CmmInt n w))]
  | asUnsigned w n <= fromIntegral (maxInlineAllocSize dflags) =
165
      Just $ \ [res] -> doNewByteArrayOp res (fromInteger n)
166

tibbe's avatar
tibbe committed
167 168
shouldInlinePrimOp dflags NewArrayOp [(CmmLit (CmmInt n w)), init]
  | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
169 170 171 172 173 174 175 176
      Just $ \ [res] ->
      doNewArrayOp res (arrPtrsRep dflags (fromInteger n)) mkMAP_DIRTY_infoLabel
      [ (mkIntExpr dflags (fromInteger n),
         fixedHdrSize dflags + oFFSET_StgMutArrPtrs_ptrs dflags)
      , (mkIntExpr dflags (nonHdrSizeW (arrPtrsRep dflags (fromInteger n))),
         fixedHdrSize dflags + oFFSET_StgMutArrPtrs_size dflags)
      ]
      (fromInteger n) init
177

178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193
shouldInlinePrimOp _ CopyArrayOp
    [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
        Just $ \ [] -> doCopyArrayOp src src_off dst dst_off (fromInteger n)

shouldInlinePrimOp _ CopyMutableArrayOp
    [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
        Just $ \ [] -> doCopyMutableArrayOp src src_off dst dst_off (fromInteger n)

shouldInlinePrimOp _ CopyArrayArrayOp
    [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
        Just $ \ [] -> doCopyArrayOp src src_off dst dst_off (fromInteger n)

shouldInlinePrimOp _ CopyMutableArrayArrayOp
    [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
        Just $ \ [] -> doCopyMutableArrayOp src src_off dst dst_off (fromInteger n)

tibbe's avatar
tibbe committed
194 195
shouldInlinePrimOp dflags CloneArrayOp [src, src_off, (CmmLit (CmmInt n w))]
  | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
196
      Just $ \ [res] -> emitCloneArray mkMAP_FROZEN_CLEAN_infoLabel res src src_off (fromInteger n)
197

tibbe's avatar
tibbe committed
198 199
shouldInlinePrimOp dflags CloneMutableArrayOp [src, src_off, (CmmLit (CmmInt n w))]
  | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
200 201
      Just $ \ [res] -> emitCloneArray mkMAP_DIRTY_infoLabel res src src_off (fromInteger n)

tibbe's avatar
tibbe committed
202 203
shouldInlinePrimOp dflags FreezeArrayOp [src, src_off, (CmmLit (CmmInt n w))]
  | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
204
      Just $ \ [res] -> emitCloneArray mkMAP_FROZEN_CLEAN_infoLabel res src src_off (fromInteger n)
205

tibbe's avatar
tibbe committed
206 207
shouldInlinePrimOp dflags ThawArrayOp [src, src_off, (CmmLit (CmmInt n w))]
  | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
208 209
      Just $ \ [res] -> emitCloneArray mkMAP_DIRTY_infoLabel res src src_off (fromInteger n)

tibbe's avatar
tibbe committed
210 211
shouldInlinePrimOp dflags NewSmallArrayOp [(CmmLit (CmmInt n w)), init]
  | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
212 213 214 215 216 217 218
      Just $ \ [res] ->
      doNewArrayOp res (smallArrPtrsRep (fromInteger n)) mkSMAP_DIRTY_infoLabel
      [ (mkIntExpr dflags (fromInteger n),
         fixedHdrSize dflags + oFFSET_StgSmallMutArrPtrs_ptrs dflags)
      ]
      (fromInteger n) init

219 220 221 222 223 224 225 226
shouldInlinePrimOp _ CopySmallArrayOp
    [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
        Just $ \ [] -> doCopySmallArrayOp src src_off dst dst_off (fromInteger n)

shouldInlinePrimOp _ CopySmallMutableArrayOp
    [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
        Just $ \ [] -> doCopySmallMutableArrayOp src src_off dst dst_off (fromInteger n)

tibbe's avatar
tibbe committed
227 228
shouldInlinePrimOp dflags CloneSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))]
  | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
229
      Just $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_CLEAN_infoLabel res src src_off (fromInteger n)
230

tibbe's avatar
tibbe committed
231 232
shouldInlinePrimOp dflags CloneSmallMutableArrayOp [src, src_off, (CmmLit (CmmInt n w))]
  | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
233 234
      Just $ \ [res] -> emitCloneSmallArray mkSMAP_DIRTY_infoLabel res src src_off (fromInteger n)

tibbe's avatar
tibbe committed
235 236
shouldInlinePrimOp dflags FreezeSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))]
  | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
237
      Just $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_CLEAN_infoLabel res src src_off (fromInteger n)
238

tibbe's avatar
tibbe committed
239 240
shouldInlinePrimOp dflags ThawSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))]
  | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
241 242
      Just $ \ [res] -> emitCloneSmallArray mkSMAP_DIRTY_infoLabel res src src_off (fromInteger n)

243 244 245 246 247 248 249 250 251 252
shouldInlinePrimOp dflags primop args
  | primOpOutOfLine primop = Nothing
  | otherwise = Just $ \ regs -> emitPrimOp dflags regs primop args

-- TODO: Several primops, such as 'copyArray#', only have an inline
-- implementation (below) but could possibly have both an inline
-- implementation and an out-of-line implementation, just like
-- 'newArray#'. This would lower the amount of code generated,
-- hopefully without a performance impact (needs to be measured).

253
---------------------------------------------------
254 255 256 257
cgPrimOp   :: [LocalReg]        -- where to put the results
           -> PrimOp            -- the op
           -> [StgArg]          -- arguments
           -> FCode ()
258 259

cgPrimOp results op args
260
  = do dflags <- getDynFlags
261
       arg_exprs <- getNonVoidArgAmodes args
262
       emitPrimOp dflags results op arg_exprs
263 264 265


------------------------------------------------------------------------
266
--      Emitting code for a primop
267 268
------------------------------------------------------------------------

269
emitPrimOp :: DynFlags
270 271 272 273
           -> [LocalReg]        -- where to put the results
           -> PrimOp            -- the op
           -> [CmmExpr]         -- arguments
           -> FCode ()
274 275 276 277

-- First we handle various awkward cases specially.  The remaining
-- easy cases are then handled by translateOp, defined below.

278
emitPrimOp _ [res] ParOp [arg]
279 280 281
  =
        -- for now, just implement this in a C function
        -- later, we might want to inline it.
282
    emitCCall
283
        [(res,NoHint)]
284
        (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
285
        [(baseExpr, AddrHint), (arg,AddrHint)]
286

287
emitPrimOp dflags [res] SparkOp [arg]
288 289 290 291 292
  = do
        -- returns the value of arg in res.  We're going to therefore
        -- refer to arg twice (once to pass to newSpark(), and once to
        -- assign to res), so put it in a temporary.
        tmp <- assignTemp arg
293
        tmp2 <- newTemp (bWord dflags)
294
        emitCCall
295
            [(tmp2,NoHint)]
296
            (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
297
            [(baseExpr, AddrHint), ((CmmReg (CmmLocal tmp)), AddrHint)]
298
        emitAssign (CmmLocal res) (CmmReg (CmmLocal tmp))
299

300 301
emitPrimOp dflags [res] GetCCSOfOp [arg]
  = emitAssign (CmmLocal res) val
302
  where
303
    val
ian@well-typed.com's avatar
ian@well-typed.com committed
304
     | gopt Opt_SccProfilingOn dflags = costCentreFrom dflags (cmmUntag dflags arg)
305
     | otherwise                      = CmmLit (zeroCLit dflags)
306

307
emitPrimOp _ [res] GetCurrentCCSOp [_dummy_arg]
308
   = emitAssign (CmmLocal res) cccsExpr
309

310 311 312
emitPrimOp _ [res] MyThreadIdOp []
   = emitAssign (CmmLocal res) currentTSOExpr

313
emitPrimOp dflags [res] ReadMutVarOp [mutv]
314
   = emitAssign (CmmLocal res) (cmmLoadIndexW dflags mutv (fixedHdrSizeW dflags) (gcWord dflags))
315

316 317 318 319 320
emitPrimOp dflags res@[] WriteMutVarOp [mutv,var]
   = do -- Without this write barrier, other CPUs may see this pointer before
        -- the writes for the closure it points to have occurred.
        emitPrimCall res MO_WriteBarrier []
        emitStore (cmmOffsetW dflags mutv (fixedHdrSizeW dflags)) var
321 322 323
        emitCCall
                [{-no results-}]
                (CmmLit (CmmLabel mkDirty_MUT_VAR_Label))
324
                [(baseExpr, AddrHint), (mutv,AddrHint)]
325 326

--  #define sizzeofByteArrayzh(r,a) \
siddhanathan's avatar
siddhanathan committed
327
--     r = ((StgArrBytes *)(a))->bytes
328
emitPrimOp dflags [res] SizeofByteArrayOp [arg]
329
   = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags) (bWord dflags))
330 331

--  #define sizzeofMutableByteArrayzh(r,a) \
siddhanathan's avatar
siddhanathan committed
332
--      r = ((StgArrBytes *)(a))->bytes
333 334
emitPrimOp dflags [res] SizeofMutableByteArrayOp [arg]
   = emitPrimOp dflags [res] SizeofByteArrayOp [arg]
335

336
--  #define getSizzeofMutableByteArrayzh(r,a) \
siddhanathan's avatar
siddhanathan committed
337
--      r = ((StgArrBytes *)(a))->bytes
338 339 340
emitPrimOp dflags [res] GetSizeofMutableByteArrayOp [arg]
   = emitAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags) (bWord dflags))

341 342

--  #define touchzh(o)                  /* nothing */
343
emitPrimOp _ res@[] TouchOp args@[_arg]
344
   = do emitPrimCall res MO_Touch args
345 346

--  #define byteArrayContentszh(r,a) r = BYTE_ARR_CTS(a)
347 348
emitPrimOp dflags [res] ByteArrayContents_Char [arg]
   = emitAssign (CmmLocal res) (cmmOffsetB dflags arg (arrWordsHdrSize dflags))
349 350

--  #define stableNameToIntzh(r,s)   (r = ((StgStableName *)s)->sn)
351
emitPrimOp dflags [res] StableNameToIntOp [arg]
352
   = emitAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags) (bWord dflags))
353

354 355
emitPrimOp dflags [res] ReallyUnsafePtrEqualityOp [arg1,arg2]
   = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [arg1,arg2])
356 357

--  #define addrToHValuezh(r,a) r=(P_)a
358
emitPrimOp _      [res] AddrToAnyOp [arg]
359
   = emitAssign (CmmLocal res) arg
360

gcampax's avatar
gcampax committed
361 362 363 364
--  #define hvalueToAddrzh(r, a) r=(W_)a
emitPrimOp _      [res] AnyToAddrOp [arg]
   = emitAssign (CmmLocal res) arg

365 366 367
{- Freezing arrays-of-ptrs requires changing an info table, for the
   benefit of the generational collector.  It needs to scavenge mutable
   objects, even if they are in old space.  When they become immutable,
368
   they can be removed from this scavenge list.  -}
369 370

--  #define unsafeFreezzeArrayzh(r,a)
371
--      {
372
--        SET_INFO((StgClosure *)a,&stg_MUT_ARR_PTRS_FROZEN_DIRTY_info);
373 374
--        r = a;
--      }
375
emitPrimOp _      [res] UnsafeFreezeArrayOp [arg]
376
   = emit $ catAGraphs
377
   [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN_DIRTY_infoLabel)),
378
     mkAssign (CmmLocal res) arg ]
379
emitPrimOp _      [res] UnsafeFreezeArrayArrayOp [arg]
380
   = emit $ catAGraphs
381
   [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN_DIRTY_infoLabel)),
382
     mkAssign (CmmLocal res) arg ]
383 384
emitPrimOp _      [res] UnsafeFreezeSmallArrayOp [arg]
   = emit $ catAGraphs
385
   [ setInfo arg (CmmLit (CmmLabel mkSMAP_FROZEN_DIRTY_infoLabel)),
386
     mkAssign (CmmLocal res) arg ]
387

388
--  #define unsafeFreezzeByteArrayzh(r,a)       r=(a)
389
emitPrimOp _      [res] UnsafeFreezeByteArrayOp [arg]
390
   = emitAssign (CmmLocal res) arg
391 392 393

-- Reading/writing pointer arrays

394 395 396 397 398 399 400 401 402 403 404 405 406 407 408
emitPrimOp _      [res] ReadArrayOp  [obj,ix]    = doReadPtrArrayOp res obj ix
emitPrimOp _      [res] IndexArrayOp [obj,ix]    = doReadPtrArrayOp res obj ix
emitPrimOp _      []  WriteArrayOp [obj,ix,v]  = doWritePtrArrayOp obj ix v

emitPrimOp _      [res] IndexArrayArrayOp_ByteArray         [obj,ix]   = doReadPtrArrayOp res obj ix
emitPrimOp _      [res] IndexArrayArrayOp_ArrayArray        [obj,ix]   = doReadPtrArrayOp res obj ix
emitPrimOp _      [res] ReadArrayArrayOp_ByteArray          [obj,ix]   = doReadPtrArrayOp res obj ix
emitPrimOp _      [res] ReadArrayArrayOp_MutableByteArray   [obj,ix]   = doReadPtrArrayOp res obj ix
emitPrimOp _      [res] ReadArrayArrayOp_ArrayArray         [obj,ix]   = doReadPtrArrayOp res obj ix
emitPrimOp _      [res] ReadArrayArrayOp_MutableArrayArray  [obj,ix]   = doReadPtrArrayOp res obj ix
emitPrimOp _      []  WriteArrayArrayOp_ByteArray         [obj,ix,v] = doWritePtrArrayOp obj ix v
emitPrimOp _      []  WriteArrayArrayOp_MutableByteArray  [obj,ix,v] = doWritePtrArrayOp obj ix v
emitPrimOp _      []  WriteArrayArrayOp_ArrayArray        [obj,ix,v] = doWritePtrArrayOp obj ix v
emitPrimOp _      []  WriteArrayArrayOp_MutableArrayArray [obj,ix,v] = doWritePtrArrayOp obj ix v

409 410 411 412 413 414
emitPrimOp _      [res] ReadSmallArrayOp  [obj,ix] = doReadSmallPtrArrayOp res obj ix
emitPrimOp _      [res] IndexSmallArrayOp [obj,ix] = doReadSmallPtrArrayOp res obj ix
emitPrimOp _      []  WriteSmallArrayOp [obj,ix,v] = doWriteSmallPtrArrayOp obj ix v

-- Getting the size of pointer arrays

415
emitPrimOp dflags [res] SizeofArrayOp [arg]
Ben Gamari's avatar
Ben Gamari committed
416 417 418
   = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg
    (fixedHdrSizeW dflags + bytesToWordsRoundUp dflags (oFFSET_StgMutArrPtrs_ptrs dflags))
        (bWord dflags))
419 420 421 422 423 424
emitPrimOp dflags [res] SizeofMutableArrayOp [arg]
   = emitPrimOp dflags [res] SizeofArrayOp [arg]
emitPrimOp dflags [res] SizeofArrayArrayOp [arg]
   = emitPrimOp dflags [res] SizeofArrayOp [arg]
emitPrimOp dflags [res] SizeofMutableArrayArrayOp [arg]
   = emitPrimOp dflags [res] SizeofArrayOp [arg]
425

426 427 428
emitPrimOp dflags [res] SizeofSmallArrayOp [arg] =
    emit $ mkAssign (CmmLocal res)
    (cmmLoadIndexW dflags arg
Ben Gamari's avatar
Ben Gamari committed
429 430
     (fixedHdrSizeW dflags + bytesToWordsRoundUp dflags (oFFSET_StgSmallMutArrPtrs_ptrs dflags))
        (bWord dflags))
431 432 433
emitPrimOp dflags [res] SizeofSmallMutableArrayOp [arg] =
    emitPrimOp dflags [res] SizeofSmallArrayOp [arg]

434 435
-- IndexXXXoffAddr

436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451
emitPrimOp dflags res IndexOffAddrOp_Char             args = doIndexOffAddrOp   (Just (mo_u_8ToWord dflags)) b8 res args
emitPrimOp dflags res IndexOffAddrOp_WideChar         args = doIndexOffAddrOp   (Just (mo_u_32ToWord dflags)) b32 res args
emitPrimOp dflags res IndexOffAddrOp_Int              args = doIndexOffAddrOp   Nothing (bWord dflags) res args
emitPrimOp dflags res IndexOffAddrOp_Word             args = doIndexOffAddrOp   Nothing (bWord dflags) res args
emitPrimOp dflags res IndexOffAddrOp_Addr             args = doIndexOffAddrOp   Nothing (bWord dflags) res args
emitPrimOp _      res IndexOffAddrOp_Float            args = doIndexOffAddrOp   Nothing f32 res args
emitPrimOp _      res IndexOffAddrOp_Double           args = doIndexOffAddrOp   Nothing f64 res args
emitPrimOp dflags res IndexOffAddrOp_StablePtr        args = doIndexOffAddrOp   Nothing (bWord dflags) res args
emitPrimOp dflags res IndexOffAddrOp_Int8             args = doIndexOffAddrOp   (Just (mo_s_8ToWord dflags)) b8  res args
emitPrimOp dflags res IndexOffAddrOp_Int16            args = doIndexOffAddrOp   (Just (mo_s_16ToWord dflags)) b16 res args
emitPrimOp dflags res IndexOffAddrOp_Int32            args = doIndexOffAddrOp   (Just (mo_s_32ToWord dflags)) b32 res args
emitPrimOp _      res IndexOffAddrOp_Int64            args = doIndexOffAddrOp   Nothing b64 res args
emitPrimOp dflags res IndexOffAddrOp_Word8            args = doIndexOffAddrOp   (Just (mo_u_8ToWord dflags)) b8  res args
emitPrimOp dflags res IndexOffAddrOp_Word16           args = doIndexOffAddrOp   (Just (mo_u_16ToWord dflags)) b16 res args
emitPrimOp dflags res IndexOffAddrOp_Word32           args = doIndexOffAddrOp   (Just (mo_u_32ToWord dflags)) b32 res args
emitPrimOp _      res IndexOffAddrOp_Word64           args = doIndexOffAddrOp   Nothing b64 res args
452 453 454

-- ReadXXXoffAddr, which are identical, for our purposes, to IndexXXXoffAddr.

455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470
emitPrimOp dflags res ReadOffAddrOp_Char             args = doIndexOffAddrOp   (Just (mo_u_8ToWord dflags)) b8 res args
emitPrimOp dflags res ReadOffAddrOp_WideChar         args = doIndexOffAddrOp   (Just (mo_u_32ToWord dflags)) b32 res args
emitPrimOp dflags res ReadOffAddrOp_Int              args = doIndexOffAddrOp   Nothing (bWord dflags) res args
emitPrimOp dflags res ReadOffAddrOp_Word             args = doIndexOffAddrOp   Nothing (bWord dflags) res args
emitPrimOp dflags res ReadOffAddrOp_Addr             args = doIndexOffAddrOp   Nothing (bWord dflags) res args
emitPrimOp _      res ReadOffAddrOp_Float            args = doIndexOffAddrOp   Nothing f32 res args
emitPrimOp _      res ReadOffAddrOp_Double           args = doIndexOffAddrOp   Nothing f64 res args
emitPrimOp dflags res ReadOffAddrOp_StablePtr        args = doIndexOffAddrOp   Nothing (bWord dflags) res args
emitPrimOp dflags res ReadOffAddrOp_Int8             args = doIndexOffAddrOp   (Just (mo_s_8ToWord dflags)) b8  res args
emitPrimOp dflags res ReadOffAddrOp_Int16            args = doIndexOffAddrOp   (Just (mo_s_16ToWord dflags)) b16 res args
emitPrimOp dflags res ReadOffAddrOp_Int32            args = doIndexOffAddrOp   (Just (mo_s_32ToWord dflags)) b32 res args
emitPrimOp _      res ReadOffAddrOp_Int64            args = doIndexOffAddrOp   Nothing b64 res args
emitPrimOp dflags res ReadOffAddrOp_Word8            args = doIndexOffAddrOp   (Just (mo_u_8ToWord dflags)) b8  res args
emitPrimOp dflags res ReadOffAddrOp_Word16           args = doIndexOffAddrOp   (Just (mo_u_16ToWord dflags)) b16 res args
emitPrimOp dflags res ReadOffAddrOp_Word32           args = doIndexOffAddrOp   (Just (mo_u_32ToWord dflags)) b32 res args
emitPrimOp _      res ReadOffAddrOp_Word64           args = doIndexOffAddrOp   Nothing b64 res args
471 472 473

-- IndexXXXArray

474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489
emitPrimOp dflags res IndexByteArrayOp_Char             args = doIndexByteArrayOp   (Just (mo_u_8ToWord dflags)) b8 res args
emitPrimOp dflags res IndexByteArrayOp_WideChar         args = doIndexByteArrayOp   (Just (mo_u_32ToWord dflags)) b32 res args
emitPrimOp dflags res IndexByteArrayOp_Int              args = doIndexByteArrayOp   Nothing (bWord dflags) res args
emitPrimOp dflags res IndexByteArrayOp_Word             args = doIndexByteArrayOp   Nothing (bWord dflags) res args
emitPrimOp dflags res IndexByteArrayOp_Addr             args = doIndexByteArrayOp   Nothing (bWord dflags) res args
emitPrimOp _      res IndexByteArrayOp_Float            args = doIndexByteArrayOp   Nothing f32 res args
emitPrimOp _      res IndexByteArrayOp_Double           args = doIndexByteArrayOp   Nothing f64 res args
emitPrimOp dflags res IndexByteArrayOp_StablePtr        args = doIndexByteArrayOp   Nothing (bWord dflags) res args
emitPrimOp dflags res IndexByteArrayOp_Int8             args = doIndexByteArrayOp   (Just (mo_s_8ToWord dflags)) b8  res args
emitPrimOp dflags res IndexByteArrayOp_Int16            args = doIndexByteArrayOp   (Just (mo_s_16ToWord dflags)) b16  res args
emitPrimOp dflags res IndexByteArrayOp_Int32            args = doIndexByteArrayOp   (Just (mo_s_32ToWord dflags)) b32  res args
emitPrimOp _      res IndexByteArrayOp_Int64            args = doIndexByteArrayOp   Nothing b64  res args
emitPrimOp dflags res IndexByteArrayOp_Word8            args = doIndexByteArrayOp   (Just (mo_u_8ToWord dflags)) b8  res args
emitPrimOp dflags res IndexByteArrayOp_Word16           args = doIndexByteArrayOp   (Just (mo_u_16ToWord dflags)) b16  res args
emitPrimOp dflags res IndexByteArrayOp_Word32           args = doIndexByteArrayOp   (Just (mo_u_32ToWord dflags)) b32  res args
emitPrimOp _      res IndexByteArrayOp_Word64           args = doIndexByteArrayOp   Nothing b64  res args
490 491 492

-- ReadXXXArray, identical to IndexXXXArray.

493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508
emitPrimOp dflags res ReadByteArrayOp_Char             args = doIndexByteArrayOp   (Just (mo_u_8ToWord dflags)) b8 res args
emitPrimOp dflags res ReadByteArrayOp_WideChar         args = doIndexByteArrayOp   (Just (mo_u_32ToWord dflags)) b32 res args
emitPrimOp dflags res ReadByteArrayOp_Int              args = doIndexByteArrayOp   Nothing (bWord dflags) res args
emitPrimOp dflags res ReadByteArrayOp_Word             args = doIndexByteArrayOp   Nothing (bWord dflags) res args
emitPrimOp dflags res ReadByteArrayOp_Addr             args = doIndexByteArrayOp   Nothing (bWord dflags) res args
emitPrimOp _      res ReadByteArrayOp_Float            args = doIndexByteArrayOp   Nothing f32 res args
emitPrimOp _      res ReadByteArrayOp_Double           args = doIndexByteArrayOp   Nothing f64 res args
emitPrimOp dflags res ReadByteArrayOp_StablePtr        args = doIndexByteArrayOp   Nothing (bWord dflags) res args
emitPrimOp dflags res ReadByteArrayOp_Int8             args = doIndexByteArrayOp   (Just (mo_s_8ToWord dflags)) b8  res args
emitPrimOp dflags res ReadByteArrayOp_Int16            args = doIndexByteArrayOp   (Just (mo_s_16ToWord dflags)) b16  res args
emitPrimOp dflags res ReadByteArrayOp_Int32            args = doIndexByteArrayOp   (Just (mo_s_32ToWord dflags)) b32  res args
emitPrimOp _      res ReadByteArrayOp_Int64            args = doIndexByteArrayOp   Nothing b64  res args
emitPrimOp dflags res ReadByteArrayOp_Word8            args = doIndexByteArrayOp   (Just (mo_u_8ToWord dflags)) b8  res args
emitPrimOp dflags res ReadByteArrayOp_Word16           args = doIndexByteArrayOp   (Just (mo_u_16ToWord dflags)) b16  res args
emitPrimOp dflags res ReadByteArrayOp_Word32           args = doIndexByteArrayOp   (Just (mo_u_32ToWord dflags)) b32  res args
emitPrimOp _      res ReadByteArrayOp_Word64           args = doIndexByteArrayOp   Nothing b64  res args
509

510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543
-- IndexWord8ArrayAsXXX

emitPrimOp dflags res IndexByteArrayOp_Word8AsChar      args = doIndexByteArrayOpAs   (Just (mo_u_8ToWord dflags)) b8 b8 res args
emitPrimOp dflags res IndexByteArrayOp_Word8AsWideChar  args = doIndexByteArrayOpAs   (Just (mo_u_32ToWord dflags)) b32 b8 res args
emitPrimOp dflags res IndexByteArrayOp_Word8AsInt       args = doIndexByteArrayOpAs   Nothing (bWord dflags) b8 res args
emitPrimOp dflags res IndexByteArrayOp_Word8AsWord      args = doIndexByteArrayOpAs   Nothing (bWord dflags) b8 res args
emitPrimOp dflags res IndexByteArrayOp_Word8AsAddr      args = doIndexByteArrayOpAs   Nothing (bWord dflags) b8 res args
emitPrimOp _      res IndexByteArrayOp_Word8AsFloat     args = doIndexByteArrayOpAs   Nothing f32 b8 res args
emitPrimOp _      res IndexByteArrayOp_Word8AsDouble    args = doIndexByteArrayOpAs   Nothing f64 b8 res args
emitPrimOp dflags res IndexByteArrayOp_Word8AsStablePtr args = doIndexByteArrayOpAs   Nothing (bWord dflags) b8 res args
emitPrimOp dflags res IndexByteArrayOp_Word8AsInt16     args = doIndexByteArrayOpAs   (Just (mo_s_16ToWord dflags)) b16 b8 res args
emitPrimOp dflags res IndexByteArrayOp_Word8AsInt32     args = doIndexByteArrayOpAs   (Just (mo_s_32ToWord dflags)) b32 b8 res args
emitPrimOp _      res IndexByteArrayOp_Word8AsInt64     args = doIndexByteArrayOpAs   Nothing b64 b8 res args
emitPrimOp dflags res IndexByteArrayOp_Word8AsWord16    args = doIndexByteArrayOpAs   (Just (mo_u_16ToWord dflags)) b16 b8 res args
emitPrimOp dflags res IndexByteArrayOp_Word8AsWord32    args = doIndexByteArrayOpAs   (Just (mo_u_32ToWord dflags)) b32 b8 res args
emitPrimOp _      res IndexByteArrayOp_Word8AsWord64    args = doIndexByteArrayOpAs   Nothing b64 b8 res args

-- ReadInt8ArrayAsXXX, identical to IndexInt8ArrayAsXXX

emitPrimOp dflags res ReadByteArrayOp_Word8AsChar      args = doIndexByteArrayOpAs   (Just (mo_u_8ToWord dflags)) b8 b8 res args
emitPrimOp dflags res ReadByteArrayOp_Word8AsWideChar  args = doIndexByteArrayOpAs   (Just (mo_u_32ToWord dflags)) b32 b8 res args
emitPrimOp dflags res ReadByteArrayOp_Word8AsInt       args = doIndexByteArrayOpAs   Nothing (bWord dflags) b8 res args
emitPrimOp dflags res ReadByteArrayOp_Word8AsWord      args = doIndexByteArrayOpAs   Nothing (bWord dflags) b8 res args
emitPrimOp dflags res ReadByteArrayOp_Word8AsAddr      args = doIndexByteArrayOpAs   Nothing (bWord dflags) b8 res args
emitPrimOp _      res ReadByteArrayOp_Word8AsFloat     args = doIndexByteArrayOpAs   Nothing f32 b8 res args
emitPrimOp _      res ReadByteArrayOp_Word8AsDouble    args = doIndexByteArrayOpAs   Nothing f64 b8 res args
emitPrimOp dflags res ReadByteArrayOp_Word8AsStablePtr args = doIndexByteArrayOpAs   Nothing (bWord dflags) b8 res args
emitPrimOp dflags res ReadByteArrayOp_Word8AsInt16     args = doIndexByteArrayOpAs   (Just (mo_s_16ToWord dflags)) b16 b8 res args
emitPrimOp dflags res ReadByteArrayOp_Word8AsInt32     args = doIndexByteArrayOpAs   (Just (mo_s_32ToWord dflags)) b32 b8 res args
emitPrimOp _      res ReadByteArrayOp_Word8AsInt64     args = doIndexByteArrayOpAs   Nothing b64 b8 res args
emitPrimOp dflags res ReadByteArrayOp_Word8AsWord16    args = doIndexByteArrayOpAs   (Just (mo_u_16ToWord dflags)) b16 b8 res args
emitPrimOp dflags res ReadByteArrayOp_Word8AsWord32    args = doIndexByteArrayOpAs   (Just (mo_u_32ToWord dflags)) b32 b8 res args
emitPrimOp _      res ReadByteArrayOp_Word8AsWord64    args = doIndexByteArrayOpAs   Nothing b64 b8 res args

544 545
-- WriteXXXoffAddr

546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561
emitPrimOp dflags res WriteOffAddrOp_Char             args = doWriteOffAddrOp (Just (mo_WordTo8 dflags))  b8 res args
emitPrimOp dflags res WriteOffAddrOp_WideChar         args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
emitPrimOp dflags res WriteOffAddrOp_Int              args = doWriteOffAddrOp Nothing (bWord dflags) res args
emitPrimOp dflags res WriteOffAddrOp_Word             args = doWriteOffAddrOp Nothing (bWord dflags) res args
emitPrimOp dflags res WriteOffAddrOp_Addr             args = doWriteOffAddrOp Nothing (bWord dflags) res args
emitPrimOp _      res WriteOffAddrOp_Float            args = doWriteOffAddrOp Nothing f32 res args
emitPrimOp _      res WriteOffAddrOp_Double           args = doWriteOffAddrOp Nothing f64 res args
emitPrimOp dflags res WriteOffAddrOp_StablePtr        args = doWriteOffAddrOp Nothing (bWord dflags) res args
emitPrimOp dflags res WriteOffAddrOp_Int8             args = doWriteOffAddrOp (Just (mo_WordTo8 dflags))  b8 res args
emitPrimOp dflags res WriteOffAddrOp_Int16            args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
emitPrimOp dflags res WriteOffAddrOp_Int32            args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
emitPrimOp _      res WriteOffAddrOp_Int64            args = doWriteOffAddrOp Nothing b64 res args
emitPrimOp dflags res WriteOffAddrOp_Word8            args = doWriteOffAddrOp (Just (mo_WordTo8 dflags))  b8 res args
emitPrimOp dflags res WriteOffAddrOp_Word16           args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
emitPrimOp dflags res WriteOffAddrOp_Word32           args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
emitPrimOp _      res WriteOffAddrOp_Word64           args = doWriteOffAddrOp Nothing b64 res args
562 563 564

-- WriteXXXArray

565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580
emitPrimOp dflags res WriteByteArrayOp_Char             args = doWriteByteArrayOp (Just (mo_WordTo8 dflags))  b8 res args
emitPrimOp dflags res WriteByteArrayOp_WideChar         args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
emitPrimOp dflags res WriteByteArrayOp_Int              args = doWriteByteArrayOp Nothing (bWord dflags) res args
emitPrimOp dflags res WriteByteArrayOp_Word             args = doWriteByteArrayOp Nothing (bWord dflags) res args
emitPrimOp dflags res WriteByteArrayOp_Addr             args = doWriteByteArrayOp Nothing (bWord dflags) res args
emitPrimOp _      res WriteByteArrayOp_Float            args = doWriteByteArrayOp Nothing f32 res args
emitPrimOp _      res WriteByteArrayOp_Double           args = doWriteByteArrayOp Nothing f64 res args
emitPrimOp dflags res WriteByteArrayOp_StablePtr        args = doWriteByteArrayOp Nothing (bWord dflags) res args
emitPrimOp dflags res WriteByteArrayOp_Int8             args = doWriteByteArrayOp (Just (mo_WordTo8 dflags))  b8 res args
emitPrimOp dflags res WriteByteArrayOp_Int16            args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
emitPrimOp dflags res WriteByteArrayOp_Int32            args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
emitPrimOp _      res WriteByteArrayOp_Int64            args = doWriteByteArrayOp Nothing b64 res args
emitPrimOp dflags res WriteByteArrayOp_Word8            args = doWriteByteArrayOp (Just (mo_WordTo8 dflags))  b8  res args
emitPrimOp dflags res WriteByteArrayOp_Word16           args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
emitPrimOp dflags res WriteByteArrayOp_Word32           args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
emitPrimOp _      res WriteByteArrayOp_Word64           args = doWriteByteArrayOp Nothing b64 res args
581

582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598
-- WriteInt8ArrayAsXXX

emitPrimOp dflags res WriteByteArrayOp_Word8AsChar       args = doWriteByteArrayOp (Just (mo_WordTo8 dflags))  b8 res args
emitPrimOp dflags res WriteByteArrayOp_Word8AsWideChar   args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b8 res args
emitPrimOp _      res WriteByteArrayOp_Word8AsInt        args = doWriteByteArrayOp Nothing b8 res args
emitPrimOp _      res WriteByteArrayOp_Word8AsWord       args = doWriteByteArrayOp Nothing b8 res args
emitPrimOp _      res WriteByteArrayOp_Word8AsAddr       args = doWriteByteArrayOp Nothing b8 res args
emitPrimOp _      res WriteByteArrayOp_Word8AsFloat      args = doWriteByteArrayOp Nothing b8 res args
emitPrimOp _      res WriteByteArrayOp_Word8AsDouble     args = doWriteByteArrayOp Nothing b8 res args
emitPrimOp _      res WriteByteArrayOp_Word8AsStablePtr  args = doWriteByteArrayOp Nothing b8 res args
emitPrimOp dflags res WriteByteArrayOp_Word8AsInt16      args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b8 res args
emitPrimOp dflags res WriteByteArrayOp_Word8AsInt32      args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b8 res args
emitPrimOp _      res WriteByteArrayOp_Word8AsInt64      args = doWriteByteArrayOp Nothing b8 res args
emitPrimOp dflags res WriteByteArrayOp_Word8AsWord16     args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b8 res args
emitPrimOp dflags res WriteByteArrayOp_Word8AsWord32     args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b8 res args
emitPrimOp _      res WriteByteArrayOp_Word8AsWord64     args = doWriteByteArrayOp Nothing b8 res args

Ian Lynagh's avatar
Ian Lynagh committed
599
-- Copying and setting byte arrays
600
emitPrimOp _      [] CopyByteArrayOp [src,src_off,dst,dst_off,n] =
601
    doCopyByteArrayOp src src_off dst dst_off n
602
emitPrimOp _      [] CopyMutableByteArrayOp [src,src_off,dst,dst_off,n] =
603
    doCopyMutableByteArrayOp src src_off dst dst_off n
604 605 606 607 608 609
emitPrimOp _      [] CopyByteArrayToAddrOp [src,src_off,dst,n] =
    doCopyByteArrayToAddrOp src src_off dst n
emitPrimOp _      [] CopyMutableByteArrayToAddrOp [src,src_off,dst,n] =
    doCopyMutableByteArrayToAddrOp src src_off dst n
emitPrimOp _      [] CopyAddrToByteArrayOp [src,dst,dst_off,n] =
    doCopyAddrToByteArrayOp src dst dst_off n
610
emitPrimOp _      [] SetByteArrayOp [ba,off,len,c] =
Ian Lynagh's avatar
Ian Lynagh committed
611
    doSetByteArrayOp ba off len c
612

613 614 615 616
-- Comparing byte arrays
emitPrimOp _      [res] CompareByteArraysOp [ba1,ba1_off,ba2,ba2_off,n] =
    doCompareByteArraysOp res ba1 ba1_off ba2 ba2_off n

617 618 619 620 621
emitPrimOp _      [res] BSwap16Op [w] = emitBSwapCall res w W16
emitPrimOp _      [res] BSwap32Op [w] = emitBSwapCall res w W32
emitPrimOp _      [res] BSwap64Op [w] = emitBSwapCall res w W64
emitPrimOp dflags [res] BSwapOp   [w] = emitBSwapCall res w (wordWidth dflags)

622 623 624 625 626 627
emitPrimOp _      [res] BRev8Op  [w] = emitBRevCall res w W8
emitPrimOp _      [res] BRev16Op [w] = emitBRevCall res w W16
emitPrimOp _      [res] BRev32Op [w] = emitBRevCall res w W32
emitPrimOp _      [res] BRev64Op [w] = emitBRevCall res w W64
emitPrimOp dflags [res] BRevOp   [w] = emitBRevCall res w (wordWidth dflags)

tibbe's avatar
tibbe committed
628
-- Population count
ian@well-typed.com's avatar
ian@well-typed.com committed
629 630 631 632 633
emitPrimOp _      [res] PopCnt8Op  [w] = emitPopCntCall res w W8
emitPrimOp _      [res] PopCnt16Op [w] = emitPopCntCall res w W16
emitPrimOp _      [res] PopCnt32Op [w] = emitPopCntCall res w W32
emitPrimOp _      [res] PopCnt64Op [w] = emitPopCntCall res w W64
emitPrimOp dflags [res] PopCntOp   [w] = emitPopCntCall res w (wordWidth dflags)
tibbe's avatar
tibbe committed
634

635 636 637 638 639 640 641 642 643 644 645 646 647 648
-- Parallel bit deposit
emitPrimOp _      [res] Pdep8Op  [src, mask] = emitPdepCall res src mask W8
emitPrimOp _      [res] Pdep16Op [src, mask] = emitPdepCall res src mask W16
emitPrimOp _      [res] Pdep32Op [src, mask] = emitPdepCall res src mask W32
emitPrimOp _      [res] Pdep64Op [src, mask] = emitPdepCall res src mask W64
emitPrimOp dflags [res] PdepOp   [src, mask] = emitPdepCall res src mask (wordWidth dflags)

-- Parallel bit extract
emitPrimOp _      [res] Pext8Op  [src, mask] = emitPextCall res src mask W8
emitPrimOp _      [res] Pext16Op [src, mask] = emitPextCall res src mask W16
emitPrimOp _      [res] Pext32Op [src, mask] = emitPextCall res src mask W32
emitPrimOp _      [res] Pext64Op [src, mask] = emitPextCall res src mask W64
emitPrimOp dflags [res] PextOp   [src, mask] = emitPextCall res src mask (wordWidth dflags)

649 650 651 652 653 654 655 656 657 658 659 660 661 662
-- count leading zeros
emitPrimOp _      [res] Clz8Op  [w] = emitClzCall res w W8
emitPrimOp _      [res] Clz16Op [w] = emitClzCall res w W16
emitPrimOp _      [res] Clz32Op [w] = emitClzCall res w W32
emitPrimOp _      [res] Clz64Op [w] = emitClzCall res w W64
emitPrimOp dflags [res] ClzOp   [w] = emitClzCall res w (wordWidth dflags)

-- count trailing zeros
emitPrimOp _      [res] Ctz8Op [w]  = emitCtzCall res w W8
emitPrimOp _      [res] Ctz16Op [w] = emitCtzCall res w W16
emitPrimOp _      [res] Ctz32Op [w] = emitCtzCall res w W32
emitPrimOp _      [res] Ctz64Op [w] = emitCtzCall res w W64
emitPrimOp dflags [res] CtzOp   [w] = emitCtzCall res w (wordWidth dflags)

663 664 665 666 667 668
-- Unsigned int to floating point conversions
emitPrimOp _      [res] Word2FloatOp  [w] = emitPrimCall [res]
                                            (MO_UF_Conv W32) [w]
emitPrimOp _      [res] Word2DoubleOp [w] = emitPrimCall [res]
                                            (MO_UF_Conv W64) [w]

669
-- SIMD primops
670 671
emitPrimOp dflags [res] (VecBroadcastOp vcat n w) [e] = do
    checkVecCompatibility dflags vcat n w
672
    doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros (replicate n e) res
673
  where
674 675 676 677 678 679 680 681 682 683 684 685 686
    zeros :: CmmExpr
    zeros = CmmLit $ CmmVec (replicate n zero)

    zero :: CmmLit
    zero = case vcat of
             IntVec   -> CmmInt 0 w
             WordVec  -> CmmInt 0 w
             FloatVec -> CmmFloat 0 w

    ty :: CmmType
    ty = vecVmmType vcat n w

emitPrimOp dflags [res] (VecPackOp vcat n w) es = do
687
    checkVecCompatibility dflags vcat n w
688
    when (es `lengthIsNot` n) $
689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704
        panic "emitPrimOp: VecPackOp has wrong number of arguments"
    doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros es res
  where
    zeros :: CmmExpr
    zeros = CmmLit $ CmmVec (replicate n zero)

    zero :: CmmLit
    zero = case vcat of
             IntVec   -> CmmInt 0 w
             WordVec  -> CmmInt 0 w
             FloatVec -> CmmFloat 0 w

    ty :: CmmType
    ty = vecVmmType vcat n w

emitPrimOp dflags res (VecUnpackOp vcat n w) [arg] = do
705
    checkVecCompatibility dflags vcat n w
706
    when (res `lengthIsNot` n) $
707 708 709 710 711
        panic "emitPrimOp: VecUnpackOp has wrong number of results"
    doVecUnpackOp (vecElemProjectCast dflags vcat w) ty arg res
  where
    ty :: CmmType
    ty = vecVmmType vcat n w
712

713 714
emitPrimOp dflags [res] (VecInsertOp vcat n w) [v,e,i] = do
    checkVecCompatibility dflags vcat n w
715
    doVecInsertOp (vecElemInjectCast dflags vcat w) ty v e i res
716
  where
717 718
    ty :: CmmType
    ty = vecVmmType vcat n w
719

720 721
emitPrimOp dflags res (VecIndexByteArrayOp vcat n w) args = do
    checkVecCompatibility dflags vcat n w
722 723 724 725
    doIndexByteArrayOp Nothing ty res args
  where
    ty :: CmmType
    ty = vecVmmType vcat n w
726

727 728
emitPrimOp dflags res (VecReadByteArrayOp vcat n w) args = do
    checkVecCompatibility dflags vcat n w
729 730 731 732
    doIndexByteArrayOp Nothing ty res args
  where
    ty :: CmmType
    ty = vecVmmType vcat n w
733

734 735
emitPrimOp dflags res (VecWriteByteArrayOp vcat n w) args = do
    checkVecCompatibility dflags vcat n w
736
    doWriteByteArrayOp Nothing ty res args
737
  where
738 739
    ty :: CmmType
    ty = vecVmmType vcat n w
740

741 742
emitPrimOp dflags res (VecIndexOffAddrOp vcat n w) args = do
    checkVecCompatibility dflags vcat n w
743
    doIndexOffAddrOp Nothing ty res args
744
  where
745 746
    ty :: CmmType
    ty = vecVmmType vcat n w
747

748 749
emitPrimOp dflags res (VecReadOffAddrOp vcat n w) args = do
    checkVecCompatibility dflags vcat n w
750 751 752 753
    doIndexOffAddrOp Nothing ty res args
  where
    ty :: CmmType
    ty = vecVmmType vcat n w
754

755 756
emitPrimOp dflags res (VecWriteOffAddrOp vcat n w) args = do
    checkVecCompatibility dflags vcat n w
757 758 759 760
    doWriteOffAddrOp Nothing ty res args
  where
    ty :: CmmType
    ty = vecVmmType vcat n w
761

762 763
emitPrimOp dflags res (VecIndexScalarByteArrayOp vcat n w) args = do
    checkVecCompatibility dflags vcat n w
764
    doIndexByteArrayOpAs Nothing vecty ty res args
765
  where
766 767
    vecty :: CmmType
    vecty = vecVmmType vcat n w
768

769 770 771
    ty :: CmmType
    ty = vecCmmCat vcat w

772 773
emitPrimOp dflags res (VecReadScalarByteArrayOp vcat n w) args = do
    checkVecCompatibility dflags vcat n w
774
    doIndexByteArrayOpAs Nothing vecty ty res args
775
  where
776 777
    vecty :: CmmType
    vecty = vecVmmType vcat n w
778

779 780
    ty :: CmmType
    ty = vecCmmCat vcat w
781

782 783
emitPrimOp dflags res (VecWriteScalarByteArrayOp vcat n w) args = do
    checkVecCompatibility dflags vcat n w
784 785 786 787
    doWriteByteArrayOp Nothing ty res args
  where
    ty :: CmmType
    ty = vecCmmCat vcat w
788

789 790
emitPrimOp dflags res (VecIndexScalarOffAddrOp vcat n w) args = do
    checkVecCompatibility dflags vcat n w
791
    doIndexOffAddrOpAs Nothing vecty ty res args
792
  where
793 794
    vecty :: CmmType
    vecty = vecVmmType vcat n w
795

796 797 798
    ty :: CmmType
    ty = vecCmmCat vcat w

799 800
emitPrimOp dflags res (VecReadScalarOffAddrOp vcat n w) args = do
    checkVecCompatibility dflags vcat n w
801
    doIndexOffAddrOpAs Nothing vecty ty res args
802
  where
803 804
    vecty :: CmmType
    vecty = vecVmmType vcat n w
805

806 807
    ty :: CmmType
    ty = vecCmmCat vcat w
808

809 810
emitPrimOp dflags res (VecWriteScalarOffAddrOp vcat n w) args = do
    checkVecCompatibility dflags vcat n w
811 812 813 814
    doWriteOffAddrOp Nothing ty res args
  where
    ty :: CmmType
    ty = vecCmmCat vcat w
815

gmainland's avatar
gmainland committed
816
-- Prefetch
817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835
emitPrimOp _ [] PrefetchByteArrayOp3        args = doPrefetchByteArrayOp 3  args
emitPrimOp _ [] PrefetchMutableByteArrayOp3 args = doPrefetchMutableByteArrayOp 3  args
emitPrimOp _ [] PrefetchAddrOp3             args = doPrefetchAddrOp  3  args
emitPrimOp _ [] PrefetchValueOp3            args = doPrefetchValueOp 3 args

emitPrimOp _ [] PrefetchByteArrayOp2        args = doPrefetchByteArrayOp 2  args
emitPrimOp _ [] PrefetchMutableByteArrayOp2 args = doPrefetchMutableByteArrayOp 2  args
emitPrimOp _ [] PrefetchAddrOp2             args = doPrefetchAddrOp 2  args
emitPrimOp _ [] PrefetchValueOp2           args = doPrefetchValueOp 2 args

emitPrimOp _ [] PrefetchByteArrayOp1        args = doPrefetchByteArrayOp 1  args
emitPrimOp _ [] PrefetchMutableByteArrayOp1 args = doPrefetchMutableByteArrayOp 1  args
emitPrimOp _ [] PrefetchAddrOp1             args = doPrefetchAddrOp 1  args
emitPrimOp _ [] PrefetchValueOp1            args = doPrefetchValueOp 1 args

emitPrimOp _ [] PrefetchByteArrayOp0        args = doPrefetchByteArrayOp 0  args
emitPrimOp _ [] PrefetchMutableByteArrayOp0 args = doPrefetchMutableByteArrayOp 0  args
emitPrimOp _ [] PrefetchAddrOp0             args = doPrefetchAddrOp 0  args
emitPrimOp _ [] PrefetchValueOp0            args = doPrefetchValueOp 0 args
836

837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855
-- Atomic read-modify-write
emitPrimOp dflags [res] FetchAddByteArrayOp_Int [mba, ix, n] =
    doAtomicRMW res AMO_Add mba ix (bWord dflags) n
emitPrimOp dflags [res] FetchSubByteArrayOp_Int [mba, ix, n] =
    doAtomicRMW res AMO_Sub mba ix (bWord dflags) n
emitPrimOp dflags [res] FetchAndByteArrayOp_Int [mba, ix, n] =
    doAtomicRMW res AMO_And mba ix (bWord dflags) n
emitPrimOp dflags [res] FetchNandByteArrayOp_Int [mba, ix, n] =
    doAtomicRMW res AMO_Nand mba ix (bWord dflags) n
emitPrimOp dflags [res] FetchOrByteArrayOp_Int [mba, ix, n] =
    doAtomicRMW res AMO_Or mba ix (bWord dflags) n
emitPrimOp dflags [res] FetchXorByteArrayOp_Int [mba, ix, n] =
    doAtomicRMW res AMO_Xor mba ix (bWord dflags) n
emitPrimOp dflags [res] AtomicReadByteArrayOp_Int [mba, ix] =
    doAtomicReadByteArray res mba ix (bWord dflags)
emitPrimOp dflags [] AtomicWriteByteArrayOp_Int [mba, ix, val] =
    doAtomicWriteByteArray mba ix (bWord dflags) val
emitPrimOp dflags [res] CasByteArrayOp_Int [mba, ix, old, new] =
    doCasByteArray res mba ix (bWord dflags) old new
gmainland's avatar
gmainland committed
856

857
-- The rest just translate straightforwardly
858
emitPrimOp dflags [res] op [arg]
859
   | nopOp op
860
   = emitAssign (CmmLocal res) arg
861 862

   | Just (mop,rep) <- narrowOp op
863
   = emitAssign (CmmLocal res) $
864
           CmmMachOp (mop rep (wordWidth dflags)) [CmmMachOp (mop (wordWidth dflags) rep) [arg]]
865

866
emitPrimOp dflags r@[res] op args
867
   | Just prim <- callishOp op
868
   = do emitPrimCall r prim args
869

870
   | Just mop <- translateOp dflags op
871 872 873
   = let stmt = mkAssign (CmmLocal res) (CmmMachOp mop args) in
     emit stmt

874 875
emitPrimOp dflags results op args
   = case callishPrimOpSupported dflags op of
876 877 878 879 880 881 882 883
          Left op   -> emit $ mkUnsafeCall (PrimTarget op) results args
          Right gen -> gen results args

type GenericOp = [CmmFormal] -> [CmmActual] -> FCode ()

callishPrimOpSupported :: DynFlags -> PrimOp -> Either CallishMachOp GenericOp
callishPrimOpSupported dflags op
  = case op of
Michal Terepeta's avatar
Michal Terepeta committed
884 885 886 887
      IntQuotRemOp   | ncg && (x86ish || ppc) ->
                         Left (MO_S_QuotRem  (wordWidth dflags))
                     | otherwise              ->
                         Right (genericIntQuotRemOp (wordWidth dflags))
888

889
      Int8QuotRemOp  | ncg && (x86ish || ppc)
890
                                     -> Left (MO_S_QuotRem W8)
Michal Terepeta's avatar
Michal Terepeta committed
891 892
                     | otherwise     -> Right (genericIntQuotRemOp W8)

893
      Int16QuotRemOp | ncg && (x86ish || ppc)
894
                                     -> Left (MO_S_QuotRem W16)
895 896 897
                     | otherwise     -> Right (genericIntQuotRemOp W16)


Michal Terepeta's avatar
Michal Terepeta committed
898 899 900 901
      WordQuotRemOp  | ncg && (x86ish || ppc) ->
                         Left (MO_U_QuotRem  (wordWidth dflags))
                     | otherwise      ->
                         Right (genericWordQuotRemOp (wordWidth dflags))
902

903
      WordQuotRem2Op | (ncg && (x86ish || ppc))
904
                          || llvm     -> Left (MO_U_QuotRem2 (wordWidth dflags))
905
                     | otherwise      -> Right (genericWordQuotRem2Op dflags)
906

907
      Word8QuotRemOp | ncg && (x86ish || ppc)
908 909
                                      -> Left (MO_U_QuotRem W8)
                     | otherwise      -> Right (genericWordQuotRemOp W8)
Michal Terepeta's avatar
Michal Terepeta committed
910

911
      Word16QuotRemOp| ncg && (x86ish || ppc)
912
                                     -> Left (MO_U_QuotRem W16)
913 914
                     | otherwise     -> Right (genericWordQuotRemOp W16)

915
      WordAdd2Op     | (ncg && (x86ish || ppc))
916
                         || llvm      -> Left (MO_Add2       (wordWidth dflags))
917 918
                     | otherwise      -> Right genericWordAdd2Op

919
      WordAddCOp     | (ncg && (x86ish || ppc))
Sebastian Graf's avatar
Sebastian Graf committed
920 921 922
                         || llvm      -> Left (MO_AddWordC   (wordWidth dflags))
                     | otherwise      -> Right genericWordAddCOp

923
      WordSubCOp     | (ncg && (x86ish || ppc))
nkaretnikov's avatar
nkaretnikov committed
924 925 926
                         || llvm      -> Left (MO_SubWordC   (wordWidth dflags))
                     | otherwise      -> Right genericWordSubCOp

927
      IntAddCOp      | (ncg && (x86ish || ppc))
928
                         || llvm      -> Left (MO_AddIntC    (wordWidth dflags))
929
                     | otherwise      -> Right genericIntAddCOp
930

931
      IntSubCOp      | (ncg && (x86ish || ppc))
932
                         || llvm      -> Left (MO_SubIntC    (wordWidth dflags))
933
                     | otherwise      -> Right genericIntSubCOp
934

935
      WordMul2Op     | ncg && (x86ish || ppc)
936
                         || llvm      -> Left (MO_U_Mul2     (wordWidth dflags))
937
                     | otherwise      -> Right genericWordMul2Op
938
      FloatFabsOp    | (ncg && x86ish || ppc)
939 940
                         || llvm      -> Left MO_F32_Fabs
                     | otherwise      -> Right $ genericFabsOp W32
941
      DoubleFabsOp   | (ncg && x86ish || ppc)
942 943
                         || llvm      -> Left MO_F64_Fabs
                     | otherwise      -> Right $ genericFabsOp W64
944

Edward Z. Yang's avatar
Edward Z. Yang committed
945
      _ -> pprPanic "emitPrimOp: can't translate PrimOp " (ppr op)
946 947 948 949
 where
  ncg = case hscTarget dflags of
           HscAsm -> True
           _      -> False
950 951 952
  llvm = case hscTarget dflags of
           HscLlvm -> True
           _       -> False
953 954 955 956
  x86ish = case platformArch (targetPlatform dflags) of
             ArchX86    -> True
             ArchX86_64 -> True
             _          -> False
957 958 959 960
  ppc = case platformArch (targetPlatform dflags) of
          ArchPPC      -> True
          ArchPPC_64 _ -> True
          _            -> False
961

Michal Terepeta's avatar
Michal Terepeta committed
962 963
genericIntQuotRemOp :: Width -> GenericOp
genericIntQuotRemOp width [res_q, res_r] [arg_x, arg_y]
964
   = emit $ mkAssign (CmmLocal res_q)
Michal Terepeta's avatar
Michal Terepeta committed
965
              (CmmMachOp (MO_S_Quot width) [arg_x, arg_y]) <*>
966
            mkAssign (CmmLocal res_r)
Michal Terepeta's avatar
Michal Terepeta committed
967
              (CmmMachOp (MO_S_Rem  width) [arg_x, arg_y])
968
genericIntQuotRemOp _ _ _ = panic "genericIntQuotRemOp"
969

Michal Terepeta's avatar
Michal Terepeta committed
970 971
genericWordQuotRemOp :: Width -> GenericOp
genericWordQuotRemOp width [res_q, res_r] [arg_x, arg_y]
972
    = emit $ mkAssign (CmmLocal res_q)
Michal Terepeta's avatar
Michal Terepeta committed
973
               (CmmMachOp (MO_U_Quot width) [arg_x, arg_y]) <*>
974
             mkAssign (CmmLocal res_r)
Michal Terepeta's avatar
Michal Terepeta committed
975
               (CmmMachOp (MO_U_Rem  width) [arg_x, arg_y])
976
genericWordQuotRemOp _ _ _ = panic "genericWordQuotRemOp"
977

978 979
genericWordQuotRem2Op :: DynFlags -> GenericOp
genericWordQuotRem2Op dflags [res_q, res_r] [arg_x_high, arg_x_low, arg_y]
980
    = emit =<< f (widthInBits (wordWidth dflags)) zero arg_x_high arg_x_low
981
    where    ty = cmmExprType dflags arg_x_high
982 983 984 985 986 987 988
             shl   x i = CmmMachOp (MO_Shl   (wordWidth dflags)) [x, i]
             shr   x i = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, i]
             or    x y = CmmMachOp (MO_Or    (wordWidth dflags)) [x, y]
             ge    x y = CmmMachOp (MO_U_Ge  (wordWidth dflags)) [x, y]
             ne    x y = CmmMachOp (MO_Ne    (wordWidth dflags)) [x, y]
             minus x y = CmmMachOp (MO_Sub   (wordWidth dflags)) [x, y]
             times x y = CmmMachOp (MO_Mul   (wordWidth dflags)) [x, y]
989 990
             zero   = lit 0
             one    = lit 1
991 992
             negone = lit (fromIntegral (widthInBits (wordWidth dflags)) - 1)
             lit i = CmmLit (CmmInt i (wordWidth dflags))
993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024

             f :: Int -> CmmExpr -> CmmExpr -> CmmExpr -> FCode CmmAGraph
             f 0 acc high _ = return (mkAssign (CmmLocal res_q) acc <*>
                                      mkAssign (CmmLocal res_r) high)
             f i acc high low =
                 do roverflowedBit <- newTemp ty
                    rhigh'         <- newTemp ty
                    rhigh''        <- newTemp ty
                    rlow'          <- newTemp ty
                    risge          <- newTemp ty
                    racc'          <- newTemp ty
                    let high'         = CmmReg (CmmLocal rhigh')
                        isge          = CmmReg (CmmLocal risge)
                        overflowedBit = CmmReg (CmmLocal roverflowedBit)
                    let this = catAGraphs
                               [mkAssign (CmmLocal roverflowedBit)
                                          (shr high negone),
                                mkAssign (CmmLocal rhigh')
                                          (or (shl high one) (shr low negone)),
                                mkAssign (CmmLocal rlow')
                                          (shl low one),
                                mkAssign (CmmLocal