PrimOps.cmm 75.3 KB
Newer Older
1
/* -*- tab-width: 8 -*- */
2 3
/* -----------------------------------------------------------------------------
 *
4
 * (c) The GHC Team, 1998-2012
5 6 7 8 9 10 11 12 13
 *
 * Out-of-line primitive operations
 *
 * This file contains the implementations of all the primitive
 * operations ("primops") which are not expanded inline.  See
 * ghc/compiler/prelude/primops.txt.pp for a list of all the primops;
 * this file contains code for most of those with the attribute
 * out_of_line=True.
 *
14 15 16
 * Entry convention: the entry convention for a primop is the
 * NativeNodeCall convention, and the return convention is
 * NativeReturn.  (see compiler/cmm/CmmCallConv.hs)
17 18 19 20 21 22 23 24
 *
 * This file is written in a subset of C--, extended with various
 * features specific to GHC.  It is compiled by GHC directly.  For the
 * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
 *
 * ---------------------------------------------------------------------------*/

#include "Cmm.h"
25
#include "MachDeps.h"
Simon Marlow's avatar
Simon Marlow committed
26
#include "SMPClosureOps.h"
27

Ben Gamari's avatar
Ben Gamari committed
28
#if defined(__PIC__)
29 30
import pthread_mutex_lock;
import pthread_mutex_unlock;
31
#endif
32
import CLOSURE base_ControlziExceptionziBase_nestedAtomically_closure;
33 34
import EnterCriticalSection;
import LeaveCriticalSection;
35
import CLOSURE ghczmprim_GHCziTypes_False_closure;
36
#if defined(USE_MINIINTERPRETER) || !defined(mingw32_HOST_OS)
37
import CLOSURE sm_mutex;
38
#endif
Ben Gamari's avatar
Ben Gamari committed
39
#if defined(PROFILING)
40 41
import CLOSURE CCS_MAIN;
#endif
42

43 44 45 46 47 48 49 50 51 52 53 54 55
/*-----------------------------------------------------------------------------
  Array Primitives

  Basically just new*Array - the others are all inline macros.

  The slow entry point is for returning from a heap check, the saved
  size argument must be re-loaded from the stack.
  -------------------------------------------------------------------------- */

/* for objects that are *less* than the size of a word, make sure we
 * round up to the nearest word for the size of the array.
 */

56
stg_newByteArrayzh ( W_ n )
57
{
58 59 60 61 62
    W_ words, payload_words;
    gcptr p;

    MAYBE_GC_N(stg_newByteArrayzh, n);

63
    payload_words = ROUNDUP_BYTES_TO_WDS(n);
siddhanathan's avatar
siddhanathan committed
64
    words = BYTES_TO_WDS(SIZEOF_StgArrBytes) + payload_words;
65 66 67 68
    ("ptr" p) = ccall allocateMightFail(MyCapability() "ptr", words);
    if (p == NULL) {
        jump stg_raisezh(base_GHCziIOziException_heapOverflow_closure);
    }
siddhanathan's avatar
siddhanathan committed
69
    TICK_ALLOC_PRIM(SIZEOF_StgArrBytes,WDS(payload_words),0);
70
    SET_HDR(p, stg_ARR_WORDS_info, CCCS);
siddhanathan's avatar
siddhanathan committed
71
    StgArrBytes_bytes(p) = n;
72
    return (p);
73 74
}

Simon Marlow's avatar
Simon Marlow committed
75 76 77
#define BA_ALIGN 16
#define BA_MASK  (BA_ALIGN-1)

78
stg_newPinnedByteArrayzh ( W_ n )
79
{
80 81 82 83
    W_ words, bytes, payload_words;
    gcptr p;

    MAYBE_GC_N(stg_newPinnedByteArrayzh, n);
84

85
    bytes = n;
86 87 88 89
    /* payload_words is what we will tell the profiler we had to allocate */
    payload_words = ROUNDUP_BYTES_TO_WDS(bytes);
    /* When we actually allocate memory, we need to allow space for the
       header: */
siddhanathan's avatar
siddhanathan committed
90
    bytes = bytes + SIZEOF_StgArrBytes;
91 92 93 94 95
    /* And we want to align to BA_ALIGN bytes, so we need to allow space
       to shift up to BA_ALIGN - 1 bytes: */
    bytes = bytes + BA_ALIGN - 1;
    /* Now we convert to a number of words: */
    words = ROUNDUP_BYTES_TO_WDS(bytes);
Simon Marlow's avatar
Simon Marlow committed
96

97
    ("ptr" p) = ccall allocatePinned(MyCapability() "ptr", words);
98 99 100
    if (p == NULL) {
        jump stg_raisezh(base_GHCziIOziException_heapOverflow_closure);
    }
siddhanathan's avatar
siddhanathan committed
101
    TICK_ALLOC_PRIM(SIZEOF_StgArrBytes,WDS(payload_words),0);
Simon Marlow's avatar
Simon Marlow committed
102

103 104
    /* Now we need to move p forward so that the payload is aligned
       to BA_ALIGN bytes: */
siddhanathan's avatar
siddhanathan committed
105
    p = p + ((-p - SIZEOF_StgArrBytes) & BA_MASK);
Simon Marlow's avatar
Simon Marlow committed
106

107
    SET_HDR(p, stg_ARR_WORDS_info, CCCS);
siddhanathan's avatar
siddhanathan committed
108
    StgArrBytes_bytes(p) = n;
109
    return (p);
Simon Marlow's avatar
Simon Marlow committed
110 111
}

112
stg_newAlignedPinnedByteArrayzh ( W_ n, W_ alignment )
Simon Marlow's avatar
Simon Marlow committed
113
{
114 115
    W_ words, bytes, payload_words;
    gcptr p;
Simon Marlow's avatar
Simon Marlow committed
116

117
    again: MAYBE_GC(again);
Simon Marlow's avatar
Simon Marlow committed
118

119 120 121 122 123
    /* we always supply at least word-aligned memory, so there's no
       need to allow extra space for alignment if the requirement is less
       than a word.  This also prevents mischief with alignment == 0. */
    if (alignment <= SIZEOF_W) { alignment = 1; }

124 125
    bytes = n;

126 127
    /* payload_words is what we will tell the profiler we had to allocate */
    payload_words = ROUNDUP_BYTES_TO_WDS(bytes);
Simon Marlow's avatar
Simon Marlow committed
128

129 130
    /* When we actually allocate memory, we need to allow space for the
       header: */
siddhanathan's avatar
siddhanathan committed
131
    bytes = bytes + SIZEOF_StgArrBytes;
132 133 134 135 136
    /* And we want to align to <alignment> bytes, so we need to allow space
       to shift up to <alignment - 1> bytes: */
    bytes = bytes + alignment - 1;
    /* Now we convert to a number of words: */
    words = ROUNDUP_BYTES_TO_WDS(bytes);
137

138
    ("ptr" p) = ccall allocatePinned(MyCapability() "ptr", words);
139 140 141
    if (p == NULL) {
        jump stg_raisezh(base_GHCziIOziException_heapOverflow_closure);
    }
siddhanathan's avatar
siddhanathan committed
142
    TICK_ALLOC_PRIM(SIZEOF_StgArrBytes,WDS(payload_words),0);
143

144 145 146
    /* Now we need to move p forward so that the payload is aligned
       to <alignment> bytes. Note that we are assuming that
       <alignment> is a power of 2, which is technically not guaranteed */
siddhanathan's avatar
siddhanathan committed
147
    p = p + ((-p - SIZEOF_StgArrBytes) & (alignment - 1));
148

149
    SET_HDR(p, stg_ARR_WORDS_info, CCCS);
siddhanathan's avatar
siddhanathan committed
150
    StgArrBytes_bytes(p) = n;
151
    return (p);
152 153
}

154 155 156 157 158 159 160 161 162 163 164 165 166 167
stg_compareByteArrayszh ( gcptr src1, W_ src1_ofs, gcptr src2, W_ src2_ofs, W_ size )
// ByteArray# -> Int# -> ByteArray# -> Int# -> Int# -> Int#
{
    CInt res;
    W_ src1p, src2p;

    src1p = src1 + SIZEOF_StgHeader + OFFSET_StgArrBytes_payload + src1_ofs;
    src2p = src2 + SIZEOF_StgHeader + OFFSET_StgArrBytes_payload + src2_ofs;

    (res) = ccall memcmp(src1p "ptr", src2p "ptr", size);

    return (TO_W_(res));
}

168 169
stg_isByteArrayPinnedzh ( gcptr ba )
// ByteArray# s -> Int#
170 171
{
    W_ bd, flags;
172
    bd = Bdescr(ba);
173
    // Pinned byte arrays live in blocks with the BF_PINNED flag set.
Gabor Greif's avatar
Gabor Greif committed
174
    // We also consider BF_LARGE objects to be immovable. See #13894.
175 176
    // See the comment in Storage.c:allocatePinned.
    flags = TO_W_(bdescr_flags(bd));
177
    return (flags & (BF_PINNED | BF_LARGE) != 0);
178 179
}

180 181 182 183 184 185
stg_isMutableByteArrayPinnedzh ( gcptr mba )
// MutableByteArray# s -> Int#
{
    jump stg_isByteArrayPinnedzh(mba);
}

186 187 188 189 190
// shrink size of MutableByteArray in-place
stg_shrinkMutableByteArrayzh ( gcptr mba, W_ new_size )
// MutableByteArray# s -> Int# -> State# s -> State# s
{
   ASSERT(new_size >= 0);
siddhanathan's avatar
siddhanathan committed
191
   ASSERT(new_size <= StgArrBytes_bytes(mba));
192

siddhanathan's avatar
siddhanathan committed
193
   OVERWRITING_CLOSURE_OFS(mba, (BYTES_TO_WDS(SIZEOF_StgArrBytes) +
194
                                 ROUNDUP_BYTES_TO_WDS(new_size)));
siddhanathan's avatar
siddhanathan committed
195
   StgArrBytes_bytes(mba) = new_size;
196 197 198 199 200 201 202 203 204 205 206
   LDV_RECORD_CREATE(mba);

   return ();
}

// resize MutableByteArray
//
// The returned MutableByteArray is either the original
// MutableByteArray resized in-place or, if not possible, a newly
// allocated (unpinned) MutableByteArray (with the original content
// copied over)
Gabor Greif's avatar
Gabor Greif committed
207
stg_resizzeMutableByteArrayzh ( gcptr mba, W_ new_size )
208 209 210 211 212 213 214 215 216
// MutableByteArray# s -> Int# -> State# s -> (# State# s,MutableByteArray# s #)
{
   W_ new_size_wds;

   ASSERT(new_size >= 0);

   new_size_wds = ROUNDUP_BYTES_TO_WDS(new_size);

   if (new_size_wds <= BYTE_ARR_WDS(mba)) {
siddhanathan's avatar
siddhanathan committed
217
      OVERWRITING_CLOSURE_OFS(mba, (BYTES_TO_WDS(SIZEOF_StgArrBytes) +
218
                                    new_size_wds));
siddhanathan's avatar
siddhanathan committed
219
      StgArrBytes_bytes(mba) = new_size;
220 221 222 223 224 225 226 227 228 229 230 231 232 233
      LDV_RECORD_CREATE(mba);

      return (mba);
   } else {
      (P_ new_mba) = call stg_newByteArrayzh(new_size);

      // maybe at some point in the future we may be able to grow the
      // MBA in-place w/o copying if we know the space after the
      // current MBA is still available, as often we want to grow the
      // MBA shortly after we allocated the original MBA. So maybe no
      // further allocations have occurred by then.

      // copy over old content
      prim %memcpy(BYTE_ARR_CTS(new_mba), BYTE_ARR_CTS(mba),
siddhanathan's avatar
siddhanathan committed
234
                   StgArrBytes_bytes(mba), SIZEOF_W);
235 236 237 238 239

      return (new_mba);
   }
}

240 241 242 243 244
// RRN: This one does not use the "ticketing" approach because it
// deals in unboxed scalars, not heap pointers.
stg_casIntArrayzh( gcptr arr, W_ ind, W_ old, W_ new )
/* MutableByteArray# s -> Int# -> Int# -> Int# -> State# s -> (# State# s, Int# #) */
{
tibbe's avatar
tibbe committed
245
    W_ p, h;
246

siddhanathan's avatar
siddhanathan committed
247
    p = arr + SIZEOF_StgArrBytes + WDS(ind);
248
    (h) = prim %cmpxchgW(p, old, new);
249 250 251 252

    return(h);
}

253

254
stg_newArrayzh ( W_ n /* words */, gcptr init )
255
{
tibbe's avatar
tibbe committed
256 257
    W_ words, size, p;
    gcptr arr;
258

259
    again: MAYBE_GC(again);
260

261 262 263 264 265
    // the mark area contains one byte for each 2^MUT_ARR_PTRS_CARD_BITS words
    // in the array, making sure we round up, and then rounding up to a whole
    // number of words.
    size = n + mutArrPtrsCardWords(n);
    words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + size;
266 267 268 269
    ("ptr" arr) = ccall allocateMightFail(MyCapability() "ptr",words);
    if (arr == NULL) {
        jump stg_raisezh(base_GHCziIOziException_heapOverflow_closure);
    }
tibbe's avatar
tibbe committed
270
    TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(size), 0);
271

272
    SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, CCCS);
273
    StgMutArrPtrs_ptrs(arr) = n;
274
    StgMutArrPtrs_size(arr) = size;
275 276 277 278

    // Initialise all elements of the the array with the value in R2
    p = arr + SIZEOF_StgMutArrPtrs;
  for:
tibbe's avatar
tibbe committed
279
    if (p < arr + SIZEOF_StgMutArrPtrs + WDS(n)) {
ian@well-typed.com's avatar
ian@well-typed.com committed
280 281 282
        W_[p] = init;
        p = p + WDS(1);
        goto for;
283 284
    }

285
    return (arr);
286 287
}

288
stg_unsafeThawArrayzh ( gcptr arr )
289
{
tibbe's avatar
tibbe committed
290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308
    // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST
    //
    // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN
    // normally doesn't.  However, when we freeze a MUT_ARR_PTRS, we leave
    // it on the mutable list for the GC to remove (removing something from
    // the mutable list is not easy).
    //
    // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list,
    // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0
    // to indicate that it is still on the mutable list.
    //
    // So, when we thaw a MUT_ARR_PTRS_FROZEN, we must cope with two cases:
    // either it is on a mut_list, or it isn't.  We adopt the convention that
    // the closure type is MUT_ARR_PTRS_FROZEN0 if it is on the mutable list,
    // and MUT_ARR_PTRS_FROZEN otherwise.  In fact it wouldn't matter if
    // we put it on the mutable list more than once, but it would get scavenged
    // multiple times during GC, which would be unnecessarily slow.
    //
    if (StgHeader_info(arr) != stg_MUT_ARR_PTRS_FROZEN0_info) {
309 310
        SET_INFO(arr,stg_MUT_ARR_PTRS_DIRTY_info);
        recordMutable(arr);
ian@well-typed.com's avatar
ian@well-typed.com committed
311 312
        // must be done after SET_INFO, because it ASSERTs closure_MUTABLE()
        return (arr);
tibbe's avatar
tibbe committed
313
    } else {
ian@well-typed.com's avatar
ian@well-typed.com committed
314 315
        SET_INFO(arr,stg_MUT_ARR_PTRS_DIRTY_info);
        return (arr);
tibbe's avatar
tibbe committed
316
    }
317 318
}

319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338
stg_copyArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n )
{
    copyArray(src, src_off, dst, dst_off, n)
}

stg_copyMutableArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n )
{
    copyMutableArray(src, src_off, dst, dst_off, n)
}

stg_copyArrayArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n )
{
    copyArray(src, src_off, dst, dst_off, n)
}

stg_copyMutableArrayArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n )
{
    copyMutableArray(src, src_off, dst, dst_off, n)
}

339 340
stg_cloneArrayzh ( gcptr src, W_ offset, W_ n )
{
tibbe's avatar
tibbe committed
341
    cloneArray(stg_MUT_ARR_PTRS_FROZEN_info, src, offset, n)
342 343 344 345
}

stg_cloneMutableArrayzh ( gcptr src, W_ offset, W_ n )
{
tibbe's avatar
tibbe committed
346
    cloneArray(stg_MUT_ARR_PTRS_DIRTY_info, src, offset, n)
347 348 349 350 351
}

// We have to escape the "z" in the name.
stg_freezzeArrayzh ( gcptr src, W_ offset, W_ n )
{
tibbe's avatar
tibbe committed
352
    cloneArray(stg_MUT_ARR_PTRS_FROZEN_info, src, offset, n)
353 354 355 356
}

stg_thawArrayzh ( gcptr src, W_ offset, W_ n )
{
tibbe's avatar
tibbe committed
357
    cloneArray(stg_MUT_ARR_PTRS_DIRTY_info, src, offset, n)
358 359
}

360
// RRN: Uses the ticketed approach; see casMutVar
361
stg_casArrayzh ( gcptr arr, W_ ind, gcptr old, gcptr new )
362
/* MutableArray# s a -> Int# -> a -> a -> State# s -> (# State# s, Int#, Any a #) */
363
{
tibbe's avatar
tibbe committed
364 365
    gcptr h;
    W_ p, len;
366 367

    p = arr + SIZEOF_StgMutArrPtrs + WDS(ind);
368
    (h) = prim %cmpxchgW(p, old, new);
369

370 371
    if (h != old) {
        // Failure, return what was there instead of 'old':
372
        return (1,h);
373 374
    } else {
        // Compare and Swap Succeeded:
rrnewton's avatar
rrnewton committed
375 376 377 378
        SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, CCCS);
        len = StgMutArrPtrs_ptrs(arr);
        // The write barrier.  We must write a byte into the mark table:
        I8[arr + SIZEOF_StgMutArrPtrs + WDS(len) + (ind >> MUT_ARR_PTRS_CARD_BITS )] = 1;
379
        return (0,new);
380 381 382
    }
}

383
stg_newArrayArrayzh ( W_ n /* words */ )
384
{
tibbe's avatar
tibbe committed
385 386
    W_ words, size, p;
    gcptr arr;
387

388
    MAYBE_GC_N(stg_newArrayArrayzh, n);
389 390 391 392 393 394

    // the mark area contains one byte for each 2^MUT_ARR_PTRS_CARD_BITS words
    // in the array, making sure we round up, and then rounding up to a whole
    // number of words.
    size = n + mutArrPtrsCardWords(n);
    words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + size;
395 396 397 398
    ("ptr" arr) = ccall allocateMightFail(MyCapability() "ptr",words);
    if (arr == NULL) {
        jump stg_raisezh(base_GHCziIOziException_heapOverflow_closure);
    }
tibbe's avatar
tibbe committed
399
    TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(size), 0);
400 401 402 403 404 405 406 407

    SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]);
    StgMutArrPtrs_ptrs(arr) = n;
    StgMutArrPtrs_size(arr) = size;

    // Initialise all elements of the array with a pointer to the new array
    p = arr + SIZEOF_StgMutArrPtrs;
  for:
tibbe's avatar
tibbe committed
408
    if (p < arr + SIZEOF_StgMutArrPtrs + WDS(n)) {
ian@well-typed.com's avatar
ian@well-typed.com committed
409 410 411
        W_[p] = arr;
        p = p + WDS(1);
        goto for;
412 413
    }

414
    return (arr);
415 416
}

pumpkin's avatar
pumpkin committed
417

418 419 420 421 422 423 424 425 426 427 428 429
/* -----------------------------------------------------------------------------
   SmallArray primitives
   -------------------------------------------------------------------------- */

stg_newSmallArrayzh ( W_ n /* words */, gcptr init )
{
    W_ words, size, p;
    gcptr arr;

    again: MAYBE_GC(again);

    words = BYTES_TO_WDS(SIZEOF_StgSmallMutArrPtrs) + n;
430 431 432 433
    ("ptr" arr) = ccall allocateMightFail(MyCapability() "ptr",words);
    if (arr == NULL) {
        jump stg_raisezh(base_GHCziIOziException_heapOverflow_closure);
    }
434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494
    TICK_ALLOC_PRIM(SIZEOF_StgSmallMutArrPtrs, WDS(n), 0);

    SET_HDR(arr, stg_SMALL_MUT_ARR_PTRS_DIRTY_info, CCCS);
    StgSmallMutArrPtrs_ptrs(arr) = n;

    // Initialise all elements of the the array with the value in R2
    p = arr + SIZEOF_StgSmallMutArrPtrs;
  for:
    if (p < arr + SIZEOF_StgSmallMutArrPtrs + WDS(n)) {
        W_[p] = init;
        p = p + WDS(1);
        goto for;
    }

    return (arr);
}

stg_unsafeThawSmallArrayzh ( gcptr arr )
{
    // See stg_unsafeThawArrayzh
    if (StgHeader_info(arr) != stg_SMALL_MUT_ARR_PTRS_FROZEN0_info) {
        SET_INFO(arr, stg_SMALL_MUT_ARR_PTRS_DIRTY_info);
        recordMutable(arr);
        // must be done after SET_INFO, because it ASSERTs closure_MUTABLE()
        return (arr);
    } else {
        SET_INFO(arr, stg_SMALL_MUT_ARR_PTRS_DIRTY_info);
        return (arr);
    }
}

stg_cloneSmallArrayzh ( gcptr src, W_ offset, W_ n )
{
    cloneSmallArray(stg_SMALL_MUT_ARR_PTRS_FROZEN_info, src, offset, n)
}

stg_cloneSmallMutableArrayzh ( gcptr src, W_ offset, W_ n )
{
    cloneSmallArray(stg_SMALL_MUT_ARR_PTRS_DIRTY_info, src, offset, n)
}

// We have to escape the "z" in the name.
stg_freezzeSmallArrayzh ( gcptr src, W_ offset, W_ n )
{
    cloneSmallArray(stg_SMALL_MUT_ARR_PTRS_FROZEN_info, src, offset, n)
}

stg_thawSmallArrayzh ( gcptr src, W_ offset, W_ n )
{
    cloneSmallArray(stg_SMALL_MUT_ARR_PTRS_DIRTY_info, src, offset, n)
}

stg_copySmallArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n)
{
    W_ dst_p, src_p, bytes;

    SET_INFO(dst, stg_SMALL_MUT_ARR_PTRS_DIRTY_info);

    dst_p = dst + SIZEOF_StgSmallMutArrPtrs + WDS(dst_off);
    src_p = src + SIZEOF_StgSmallMutArrPtrs + WDS(src_off);
    bytes = WDS(n);
495
    prim %memcpy(dst_p, src_p, bytes, SIZEOF_W);
496 497 498 499 500 501 502 503 504 505 506 507 508 509

    return ();
}

stg_copySmallMutableArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n)
{
    W_ dst_p, src_p, bytes;

    SET_INFO(dst, stg_SMALL_MUT_ARR_PTRS_DIRTY_info);

    dst_p = dst + SIZEOF_StgSmallMutArrPtrs + WDS(dst_off);
    src_p = src + SIZEOF_StgSmallMutArrPtrs + WDS(src_off);
    bytes = WDS(n);
    if (src == dst) {
510
        prim %memmove(dst_p, src_p, bytes, SIZEOF_W);
511
    } else {
512
        prim %memcpy(dst_p, src_p, bytes, SIZEOF_W);
513 514 515 516 517 518 519 520 521 522 523 524 525
    }

    return ();
}

// RRN: Uses the ticketed approach; see casMutVar
stg_casSmallArrayzh ( gcptr arr, W_ ind, gcptr old, gcptr new )
/* SmallMutableArray# s a -> Int# -> a -> a -> State# s -> (# State# s, Int#, Any a #) */
{
    gcptr h;
    W_ p, len;

    p = arr + SIZEOF_StgSmallMutArrPtrs + WDS(ind);
526
    (h) = prim %cmpxchgW(p, old, new);
527 528 529 530 531 532 533 534 535 536 537 538

    if (h != old) {
        // Failure, return what was there instead of 'old':
        return (1,h);
    } else {
        // Compare and Swap Succeeded:
        SET_HDR(arr, stg_SMALL_MUT_ARR_PTRS_DIRTY_info, CCCS);
        return (0,new);
    }
}


539 540 541 542
/* -----------------------------------------------------------------------------
   MutVar primitives
   -------------------------------------------------------------------------- */

543
stg_newMutVarzh ( gcptr init )
544 545 546
{
    W_ mv;

547
    ALLOC_PRIM_P (SIZEOF_StgMutVar, stg_newMutVarzh, init);
548 549

    mv = Hp - SIZEOF_StgMutVar + WDS(1);
550
    SET_HDR(mv,stg_MUT_VAR_DIRTY_info,CCCS);
551
    StgMutVar_var(mv) = init;
ian@well-typed.com's avatar
ian@well-typed.com committed
552

553
    return (mv);
554 555
}

556 557 558 559 560
// RRN: To support the "ticketed" approach, we return the NEW rather
// than old value if the CAS is successful.  This is received in an
// opaque form in the Haskell code, preventing the compiler from
// changing its pointer identity.  The ticket can then be safely used
// in future CAS operations.
561
stg_casMutVarzh ( gcptr mv, gcptr old, gcptr new )
562
 /* MutVar# s a -> a -> a -> State# s -> (# State#, Int#, Any a #) */
Simon Marlow's avatar
Simon Marlow committed
563
{
564
#if defined(THREADED_RTS)
565
    gcptr h;
Simon Marlow's avatar
Simon Marlow committed
566

567
    (h) = prim %cmpxchgW(mv + SIZEOF_StgHeader + OFFSET_StgMutVar_var, old, new);
Simon Marlow's avatar
Simon Marlow committed
568
    if (h != old) {
569
        return (1,h);
Simon Marlow's avatar
Simon Marlow committed
570
    } else {
571
        if (GET_INFO(mv) == stg_MUT_VAR_CLEAN_info) {
tibbe's avatar
tibbe committed
572
            ccall dirty_MUT_VAR(BaseReg "ptr", mv "ptr");
573
        }
574
        return (0,new);
Simon Marlow's avatar
Simon Marlow committed
575
    }
576 577 578 579 580 581 582 583 584 585 586 587 588 589
#else
    gcptr prev_val;

    prev_val = StgMutVar_var(mv);
    if (prev_val != old) {
        return (1,prev_val);
    } else {
        StgMutVar_var(mv) = new;
        if (GET_INFO(mv) == stg_MUT_VAR_CLEAN_info) {
            ccall dirty_MUT_VAR(BaseReg "ptr", mv "ptr");
        }
        return (0,new);
    }
#endif
Simon Marlow's avatar
Simon Marlow committed
590 591
}

592
stg_atomicModifyMutVarzh ( gcptr mv, gcptr f )
593
{
594
    W_ z, x, y, r, h;
595

ian@well-typed.com's avatar
ian@well-typed.com committed
596
    /* If x is the current contents of the MutVar#, then
597 598 599
       We want to make the new contents point to

         (sel_0 (f x))
ian@well-typed.com's avatar
ian@well-typed.com committed
600

601
       and the return value is
ian@well-typed.com's avatar
ian@well-typed.com committed
602 603

         (sel_1 (f x))
604 605 606 607

        obviously we can share (f x).

         z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
ian@well-typed.com's avatar
ian@well-typed.com committed
608
         y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
609 610 611
         r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
    */

612
#if defined(MIN_UPD_SIZE) && MIN_UPD_SIZE > 1
613
#define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
614 615
#define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),WDS(MIN_UPD_SIZE-1))
#else
616
#define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(1))
617 618 619
#define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),0)
#endif

620
#if defined(MIN_UPD_SIZE) && MIN_UPD_SIZE > 2
621
#define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
622 623
#define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),WDS(MIN_UPD_SIZE-2))
#else
624
#define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(2))
625 626 627 628 629
#define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0)
#endif

#define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE)

tibbe's avatar
tibbe committed
630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655
    HP_CHK_GEN_TICKY(SIZE);

    TICK_ALLOC_THUNK_2();
    CCCS_ALLOC(THUNK_2_SIZE);
    z = Hp - THUNK_2_SIZE + WDS(1);
    SET_HDR(z, stg_ap_2_upd_info, CCCS);
    LDV_RECORD_CREATE(z);
    StgThunk_payload(z,0) = f;

    TICK_ALLOC_THUNK_1();
    CCCS_ALLOC(THUNK_1_SIZE);
    y = z - THUNK_1_SIZE;
    SET_HDR(y, stg_sel_0_upd_info, CCCS);
    LDV_RECORD_CREATE(y);
    StgThunk_payload(y,0) = z;

    TICK_ALLOC_THUNK_1();
    CCCS_ALLOC(THUNK_1_SIZE);
    r = y - THUNK_1_SIZE;
    SET_HDR(r, stg_sel_1_upd_info, CCCS);
    LDV_RECORD_CREATE(r);
    StgThunk_payload(r,0) = z;

  retry:
    x = StgMutVar_var(mv);
    StgThunk_payload(z,1) = x;
Ben Gamari's avatar
Ben Gamari committed
656
#if defined(THREADED_RTS)
657
    (h) = prim %cmpxchgW(mv + SIZEOF_StgHeader + OFFSET_StgMutVar_var, x, y);
tibbe's avatar
tibbe committed
658
    if (h != x) { goto retry; }
659
#else
tibbe's avatar
tibbe committed
660
    StgMutVar_var(mv) = y;
661
#endif
662

tibbe's avatar
tibbe committed
663 664 665
    if (GET_INFO(mv) == stg_MUT_VAR_CLEAN_info) {
        ccall dirty_MUT_VAR(BaseReg "ptr", mv "ptr");
    }
666

tibbe's avatar
tibbe committed
667
    return (r);
668 669 670 671 672 673 674 675
}

/* -----------------------------------------------------------------------------
   Weak Pointer Primitives
   -------------------------------------------------------------------------- */

STRING(stg_weak_msg,"New weak pointer at %p\n")

676 677 678
stg_mkWeakzh ( gcptr key,
               gcptr value,
               gcptr finalizer /* or stg_NO_FINALIZER_closure */ )
679
{
tibbe's avatar
tibbe committed
680
    gcptr w;
681

tibbe's avatar
tibbe committed
682
    ALLOC_PRIM (SIZEOF_StgWeak)
683

tibbe's avatar
tibbe committed
684 685
    w = Hp - SIZEOF_StgWeak + WDS(1);
    SET_HDR(w, stg_WEAK_info, CCCS);
686

tibbe's avatar
tibbe committed
687 688 689 690
    StgWeak_key(w)         = key;
    StgWeak_value(w)       = value;
    StgWeak_finalizer(w)   = finalizer;
    StgWeak_cfinalizers(w) = stg_NO_FINALIZER_closure;
691

692 693 694 695 696
    StgWeak_link(w) = Capability_weak_ptr_list_hd(MyCapability());
    Capability_weak_ptr_list_hd(MyCapability()) = w;
    if (Capability_weak_ptr_list_tl(MyCapability()) == NULL) {
        Capability_weak_ptr_list_tl(MyCapability()) = w;
    }
697

tibbe's avatar
tibbe committed
698
    IF_DEBUG(weak, ccall debugBelch(stg_weak_msg,w));
699

tibbe's avatar
tibbe committed
700
    return (w);
701 702
}

703
stg_mkWeakNoFinalizzerzh ( gcptr key, gcptr value )
704
{
tibbe's avatar
tibbe committed
705
    jump stg_mkWeakzh (key, value, stg_NO_FINALIZER_closure);
706 707
}

708 709 710 711 712 713 714
STRING(stg_cfinalizer_msg,"Adding a finalizer to %p\n")

stg_addCFinalizzerToWeakzh ( W_ fptr,   // finalizer
                             W_ ptr,
                             W_ flag,   // has environment (0 or 1)
                             W_ eptr,
                             gcptr w )
715
{
tibbe's avatar
tibbe committed
716
    W_ c, info;
717

tibbe's avatar
tibbe committed
718
    ALLOC_PRIM (SIZEOF_StgCFinalizerList)
719

tibbe's avatar
tibbe committed
720 721
    c = Hp - SIZEOF_StgCFinalizerList + WDS(1);
    SET_HDR(c, stg_C_FINALIZER_LIST_info, CCCS);
722

tibbe's avatar
tibbe committed
723 724 725 726
    StgCFinalizerList_fptr(c) = fptr;
    StgCFinalizerList_ptr(c) = ptr;
    StgCFinalizerList_eptr(c) = eptr;
    StgCFinalizerList_flag(c) = flag;
727

tibbe's avatar
tibbe committed
728
    LOCK_CLOSURE(w, info);
729

tibbe's avatar
tibbe committed
730 731 732 733 734
    if (info == stg_DEAD_WEAK_info) {
        // Already dead.
        unlockClosure(w, info);
        return (0);
    }
735

tibbe's avatar
tibbe committed
736 737
    StgCFinalizerList_link(c) = StgWeak_cfinalizers(w);
    StgWeak_cfinalizers(w) = c;
738

tibbe's avatar
tibbe committed
739
    unlockClosure(w, info);
740

tibbe's avatar
tibbe committed
741
    recordMutable(w);
742

tibbe's avatar
tibbe committed
743
    IF_DEBUG(weak, ccall debugBelch(stg_cfinalizer_msg,w));
744

tibbe's avatar
tibbe committed
745
    return (1);
746
}
747

748
stg_finalizzeWeakzh ( gcptr w )
749
{
tibbe's avatar
tibbe committed
750 751
    gcptr f, list;
    W_ info;
752

tibbe's avatar
tibbe committed
753
    LOCK_CLOSURE(w, info);
754

tibbe's avatar
tibbe committed
755 756 757 758 759
    // already dead?
    if (info == stg_DEAD_WEAK_info) {
        unlockClosure(w, info);
        return (0,stg_NO_FINALIZER_closure);
    }
760

tibbe's avatar
tibbe committed
761 762
    f    = StgWeak_finalizer(w);
    list = StgWeak_cfinalizers(w);
763

tibbe's avatar
tibbe committed
764
    // kill it
Ben Gamari's avatar
Ben Gamari committed
765
#if defined(PROFILING)
tibbe's avatar
tibbe committed
766 767 768 769 770 771 772 773
    // @LDV profiling
    // A weak pointer is inherently used, so we do not need to call
    // LDV_recordDead_FILL_SLOP_DYNAMIC():
    //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
    // or, LDV_recordDead():
    //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
    // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as
    // large as weak pointers, so there is no need to fill the slop, either.
774
    // See stg_DEAD_WEAK_info in StgMiscClosures.cmm.
775 776
#endif

tibbe's avatar
tibbe committed
777 778 779 780
    //
    // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
    //
    unlockClosure(w, stg_DEAD_WEAK_info);
781

tibbe's avatar
tibbe committed
782
    LDV_RECORD_CREATE(w);
783

tibbe's avatar
tibbe committed
784 785 786
    if (list != stg_NO_FINALIZER_closure) {
      ccall runCFinalizers(list);
    }
787

tibbe's avatar
tibbe committed
788 789 790 791 792 793
    /* return the finalizer */
    if (f == stg_NO_FINALIZER_closure) {
        return (0,stg_NO_FINALIZER_closure);
    } else {
        return (1,f);
    }
794 795
}

796
stg_deRefWeakzh ( gcptr w )
797
{
tibbe's avatar
tibbe committed
798 799
    W_ code, info;
    gcptr val;
800

tibbe's avatar
tibbe committed
801
    info = GET_INFO(w);
802

tibbe's avatar
tibbe committed
803 804 805 806
    if (info == stg_WHITEHOLE_info) {
        // w is locked by another thread. Now it's not immediately clear if w is
        // alive or not. We use lockClosure to wait for the info pointer to become
        // something other than stg_WHITEHOLE_info.
807

tibbe's avatar
tibbe committed
808 809 810
        LOCK_CLOSURE(w, info);
        unlockClosure(w, info);
    }
811

tibbe's avatar
tibbe committed
812 813 814 815 816 817 818 819
    if (info == stg_WEAK_info) {
        code = 1;
        val = StgWeak_value(w);
    } else {
        code = 0;
        val = w;
    }
    return (code,val);
820 821 822
}

/* -----------------------------------------------------------------------------
823
   Floating point operations.
824 825
   -------------------------------------------------------------------------- */

826
stg_decodeFloatzuIntzh ( F_ arg )
ian@well-typed.com's avatar
ian@well-typed.com committed
827
{
828
    W_ p;
829
    W_ tmp, mp_tmp1, mp_tmp_w, r1, r2;
830

831
    STK_CHK_GEN_N (WDS(2));
832

833
    reserve 2 = tmp {
ian@well-typed.com's avatar
ian@well-typed.com committed
834

tibbe's avatar
tibbe committed
835 836
        mp_tmp1  = tmp + WDS(1);
        mp_tmp_w = tmp;
837

tibbe's avatar
tibbe committed
838 839
        /* Perform the operation */
        ccall __decodeFloat_Int(mp_tmp1 "ptr", mp_tmp_w "ptr", arg);
840

tibbe's avatar
tibbe committed
841 842
        r1 = W_[mp_tmp1];
        r2 = W_[mp_tmp_w];
843
    }
ian@well-typed.com's avatar
ian@well-typed.com committed
844

845
    /* returns: (Int# (mantissa), Int# (exponent)) */
846
    return (r1, r2);
847 848
}

849
stg_decodeDoublezu2Intzh ( D_ arg )
ian@well-typed.com's avatar
ian@well-typed.com committed
850
{
851 852 853
    W_ p, tmp;
    W_ mp_tmp1, mp_tmp2, mp_result1, mp_result2;
    W_ r1, r2, r3, r4;
854

855
    STK_CHK_GEN_N (WDS(4));
856

857 858
    reserve 4 = tmp {

tibbe's avatar
tibbe committed
859 860 861 862
        mp_tmp1    = tmp + WDS(3);
        mp_tmp2    = tmp + WDS(2);
        mp_result1 = tmp + WDS(1);
        mp_result2 = tmp;
863

tibbe's avatar
tibbe committed
864 865 866 867 868 869 870 871 872
        /* Perform the operation */
        ccall __decodeDouble_2Int(mp_tmp1 "ptr", mp_tmp2 "ptr",
                                  mp_result1 "ptr", mp_result2 "ptr",
                                  arg);

        r1 = W_[mp_tmp1];
        r2 = W_[mp_tmp2];
        r3 = W_[mp_result1];
        r4 = W_[mp_result2];
873
    }
874 875 876

    /* returns:
       (Int# (mant sign), Word# (mant high), Word# (mant low), Int# (expn)) */
877
    return (r1, r2, r3, r4);
878 879
}

880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895
/* Double# -> (# Int64#, Int# #) */
stg_decodeDoublezuInt64zh ( D_ arg )
{
    CInt exp;
    I64  mant;
    W_   mant_ptr;

    STK_CHK_GEN_N (SIZEOF_INT64);
    reserve BYTES_TO_WDS(SIZEOF_INT64) = mant_ptr {
        (exp) = ccall __decodeDouble_Int64(mant_ptr "ptr", arg);
        mant = I64[mant_ptr];
    }

    return (mant, TO_W_(exp));
}

896 897 898 899
/* -----------------------------------------------------------------------------
 * Concurrency primitives
 * -------------------------------------------------------------------------- */

900
stg_forkzh ( gcptr closure )
901
{
tibbe's avatar
tibbe committed
902
    MAYBE_GC_P(stg_forkzh, closure);
903

tibbe's avatar
tibbe committed
904
    gcptr threadid;
905

tibbe's avatar
tibbe committed
906 907 908
    ("ptr" threadid) = ccall createIOThread( MyCapability() "ptr",
                                  RtsFlags_GcFlags_initialStkSize(RtsFlags),
                                  closure "ptr");
909

tibbe's avatar
tibbe committed
910 911 912 913
    /* start blocked if the current thread is blocked */
    StgTSO_flags(threadid) = %lobits16(
        TO_W_(StgTSO_flags(threadid)) |
        TO_W_(StgTSO_flags(CurrentTSO)) & (TSO_BLOCKEX | TSO_INTERRUPTIBLE));
914

tibbe's avatar
tibbe committed
915
    ccall scheduleThread(MyCapability() "ptr", threadid "ptr");
916

tibbe's avatar
tibbe committed
917 918 919
    // context switch soon, but not immediately: we don't want every
    // forkIO to force a context-switch.
    Capability_context_switch(MyCapability()) = 1 :: CInt;
ian@well-typed.com's avatar
ian@well-typed.com committed
920

tibbe's avatar
tibbe committed
921
    return (threadid);
922 923
}

924
stg_forkOnzh ( W_ cpu, gcptr closure )
925
{
926
again: MAYBE_GC(again);
927

tibbe's avatar
tibbe committed
928
    gcptr threadid;
929

tibbe's avatar
tibbe committed
930 931 932 933
    ("ptr" threadid) = ccall createIOThread(
        MyCapability() "ptr",
        RtsFlags_GcFlags_initialStkSize(RtsFlags),
        closure "ptr");
934

tibbe's avatar
tibbe committed
935 936 937 938
    /* start blocked if the current thread is blocked */
    StgTSO_flags(threadid) = %lobits16(
        TO_W_(StgTSO_flags(threadid)) |
        TO_W_(StgTSO_flags(CurrentTSO)) & (TSO_BLOCKEX | TSO_INTERRUPTIBLE));
939

tibbe's avatar
tibbe committed
940
    ccall scheduleThreadOn(MyCapability() "ptr", cpu, threadid "ptr");
941

tibbe's avatar
tibbe committed
942 943 944
    // context switch soon, but not immediately: we don't want every
    // forkIO to force a context-switch.
    Capability_context_switch(MyCapability()) = 1 :: CInt;
ian@well-typed.com's avatar
ian@well-typed.com committed
945

tibbe's avatar
tibbe committed
946
    return (threadid);
947 948
}

949
stg_yieldzh ()
950
{
tibbe's avatar
tibbe committed
951 952 953 954 955 956
    // when we yield to the scheduler, we have to tell it to put the
    // current thread to the back of the queue by setting the
    // context_switch flag.  If we don't do this, it will run the same
    // thread again.
    Capability_context_switch(MyCapability()) = 1 :: CInt;
    jump stg_yield_noregs();
957 958
}

959
stg_myThreadIdzh ()
960
{
tibbe's avatar
tibbe committed
961
    return (CurrentTSO);
962 963
}

964
stg_labelThreadzh ( gcptr threadid, W_ addr )
965
{
966
#if defined(DEBUG) || defined(TRACING) || defined(DTRACE)
tibbe's avatar
tibbe committed
967
    ccall labelThread(MyCapability() "ptr", threadid "ptr", addr "ptr");
968
#endif
tibbe's avatar
tibbe committed
969
    return ();
970 971
}

972
stg_isCurrentThreadBoundzh (/* no args */)
973
{
tibbe's avatar
tibbe committed
974 975 976
    W_ r;
    (r) = ccall isThreadBound(CurrentTSO);
    return (r);
977