PrimOps.cmm 74.9 KB
Newer Older
1
/* -*- tab-width: 8 -*- */
2 3
/* -----------------------------------------------------------------------------
 *
4
 * (c) The GHC Team, 1998-2012
5 6 7 8 9 10 11 12 13
 *
 * Out-of-line primitive operations
 *
 * This file contains the implementations of all the primitive
 * operations ("primops") which are not expanded inline.  See
 * ghc/compiler/prelude/primops.txt.pp for a list of all the primops;
 * this file contains code for most of those with the attribute
 * out_of_line=True.
 *
14 15 16
 * Entry convention: the entry convention for a primop is the
 * NativeNodeCall convention, and the return convention is
 * NativeReturn.  (see compiler/cmm/CmmCallConv.hs)
17 18 19 20 21 22 23 24
 *
 * This file is written in a subset of C--, extended with various
 * features specific to GHC.  It is compiled by GHC directly.  For the
 * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
 *
 * ---------------------------------------------------------------------------*/

#include "Cmm.h"
25
#include "MachDeps.h"
Simon Marlow's avatar
Simon Marlow committed
26
#include "SMPClosureOps.h"
27

Ben Gamari's avatar
Ben Gamari committed
28
#if defined(__PIC__)
29 30
import pthread_mutex_lock;
import pthread_mutex_unlock;
31
#endif
32
import CLOSURE base_ControlziExceptionziBase_nestedAtomically_closure;
33
import CLOSURE base_GHCziIOziException_heapOverflow_closure;
34 35
import EnterCriticalSection;
import LeaveCriticalSection;
36
import CLOSURE ghczmprim_GHCziTypes_False_closure;
37
#if defined(USE_MINIINTERPRETER) || !defined(mingw32_HOST_OS)
38
import CLOSURE sm_mutex;
39
#endif
Ben Gamari's avatar
Ben Gamari committed
40
#if defined(PROFILING)
41 42
import CLOSURE CCS_MAIN;
#endif
43

44 45 46 47 48 49 50 51 52 53 54 55 56
/*-----------------------------------------------------------------------------
  Array Primitives

  Basically just new*Array - the others are all inline macros.

  The slow entry point is for returning from a heap check, the saved
  size argument must be re-loaded from the stack.
  -------------------------------------------------------------------------- */

/* for objects that are *less* than the size of a word, make sure we
 * round up to the nearest word for the size of the array.
 */

57
stg_newByteArrayzh ( W_ n )
58
{
59 60 61 62 63
    W_ words, payload_words;
    gcptr p;

    MAYBE_GC_N(stg_newByteArrayzh, n);

64
    payload_words = ROUNDUP_BYTES_TO_WDS(n);
siddhanathan's avatar
siddhanathan committed
65
    words = BYTES_TO_WDS(SIZEOF_StgArrBytes) + payload_words;
66 67 68 69
    ("ptr" p) = ccall allocateMightFail(MyCapability() "ptr", words);
    if (p == NULL) {
        jump stg_raisezh(base_GHCziIOziException_heapOverflow_closure);
    }
siddhanathan's avatar
siddhanathan committed
70
    TICK_ALLOC_PRIM(SIZEOF_StgArrBytes,WDS(payload_words),0);
71
    SET_HDR(p, stg_ARR_WORDS_info, CCCS);
siddhanathan's avatar
siddhanathan committed
72
    StgArrBytes_bytes(p) = n;
73
    return (p);
74 75
}

Simon Marlow's avatar
Simon Marlow committed
76 77 78
#define BA_ALIGN 16
#define BA_MASK  (BA_ALIGN-1)

79
stg_newPinnedByteArrayzh ( W_ n )
80
{
81 82 83 84
    W_ words, bytes, payload_words;
    gcptr p;

    MAYBE_GC_N(stg_newPinnedByteArrayzh, n);
85

86
    bytes = n;
87 88 89 90
    /* payload_words is what we will tell the profiler we had to allocate */
    payload_words = ROUNDUP_BYTES_TO_WDS(bytes);
    /* When we actually allocate memory, we need to allow space for the
       header: */
siddhanathan's avatar
siddhanathan committed
91
    bytes = bytes + SIZEOF_StgArrBytes;
92 93 94 95 96
    /* And we want to align to BA_ALIGN bytes, so we need to allow space
       to shift up to BA_ALIGN - 1 bytes: */
    bytes = bytes + BA_ALIGN - 1;
    /* Now we convert to a number of words: */
    words = ROUNDUP_BYTES_TO_WDS(bytes);
Simon Marlow's avatar
Simon Marlow committed
97

98
    ("ptr" p) = ccall allocatePinned(MyCapability() "ptr", words);
99 100 101
    if (p == NULL) {
        jump stg_raisezh(base_GHCziIOziException_heapOverflow_closure);
    }
siddhanathan's avatar
siddhanathan committed
102
    TICK_ALLOC_PRIM(SIZEOF_StgArrBytes,WDS(payload_words),0);
Simon Marlow's avatar
Simon Marlow committed
103

104 105
    /* Now we need to move p forward so that the payload is aligned
       to BA_ALIGN bytes: */
siddhanathan's avatar
siddhanathan committed
106
    p = p + ((-p - SIZEOF_StgArrBytes) & BA_MASK);
Simon Marlow's avatar
Simon Marlow committed
107

108
    SET_HDR(p, stg_ARR_WORDS_info, CCCS);
siddhanathan's avatar
siddhanathan committed
109
    StgArrBytes_bytes(p) = n;
110
    return (p);
Simon Marlow's avatar
Simon Marlow committed
111 112
}

113
stg_newAlignedPinnedByteArrayzh ( W_ n, W_ alignment )
Simon Marlow's avatar
Simon Marlow committed
114
{
115 116
    W_ words, bytes, payload_words;
    gcptr p;
Simon Marlow's avatar
Simon Marlow committed
117

118
    again: MAYBE_GC(again);
Simon Marlow's avatar
Simon Marlow committed
119

120 121 122 123 124
    /* we always supply at least word-aligned memory, so there's no
       need to allow extra space for alignment if the requirement is less
       than a word.  This also prevents mischief with alignment == 0. */
    if (alignment <= SIZEOF_W) { alignment = 1; }

125 126
    bytes = n;

127 128
    /* payload_words is what we will tell the profiler we had to allocate */
    payload_words = ROUNDUP_BYTES_TO_WDS(bytes);
Simon Marlow's avatar
Simon Marlow committed
129

130 131
    /* When we actually allocate memory, we need to allow space for the
       header: */
siddhanathan's avatar
siddhanathan committed
132
    bytes = bytes + SIZEOF_StgArrBytes;
133 134 135 136 137
    /* And we want to align to <alignment> bytes, so we need to allow space
       to shift up to <alignment - 1> bytes: */
    bytes = bytes + alignment - 1;
    /* Now we convert to a number of words: */
    words = ROUNDUP_BYTES_TO_WDS(bytes);
138

139
    ("ptr" p) = ccall allocatePinned(MyCapability() "ptr", words);
140 141 142
    if (p == NULL) {
        jump stg_raisezh(base_GHCziIOziException_heapOverflow_closure);
    }
siddhanathan's avatar
siddhanathan committed
143
    TICK_ALLOC_PRIM(SIZEOF_StgArrBytes,WDS(payload_words),0);
144

145 146 147
    /* Now we need to move p forward so that the payload is aligned
       to <alignment> bytes. Note that we are assuming that
       <alignment> is a power of 2, which is technically not guaranteed */
siddhanathan's avatar
siddhanathan committed
148
    p = p + ((-p - SIZEOF_StgArrBytes) & (alignment - 1));
149

150
    SET_HDR(p, stg_ARR_WORDS_info, CCCS);
siddhanathan's avatar
siddhanathan committed
151
    StgArrBytes_bytes(p) = n;
152
    return (p);
153 154
}

155 156
stg_isByteArrayPinnedzh ( gcptr ba )
// ByteArray# s -> Int#
157 158
{
    W_ bd, flags;
159
    bd = Bdescr(ba);
160
    // Pinned byte arrays live in blocks with the BF_PINNED flag set.
Gabor Greif's avatar
Gabor Greif committed
161
    // We also consider BF_LARGE objects to be immovable. See #13894.
162 163
    // See the comment in Storage.c:allocatePinned.
    flags = TO_W_(bdescr_flags(bd));
164
    return (flags & (BF_PINNED | BF_LARGE) != 0);
165 166
}

167 168 169 170 171 172
stg_isMutableByteArrayPinnedzh ( gcptr mba )
// MutableByteArray# s -> Int#
{
    jump stg_isByteArrayPinnedzh(mba);
}

173 174 175 176 177
// shrink size of MutableByteArray in-place
stg_shrinkMutableByteArrayzh ( gcptr mba, W_ new_size )
// MutableByteArray# s -> Int# -> State# s -> State# s
{
   ASSERT(new_size >= 0);
siddhanathan's avatar
siddhanathan committed
178
   ASSERT(new_size <= StgArrBytes_bytes(mba));
179

siddhanathan's avatar
siddhanathan committed
180
   OVERWRITING_CLOSURE_OFS(mba, (BYTES_TO_WDS(SIZEOF_StgArrBytes) +
181
                                 ROUNDUP_BYTES_TO_WDS(new_size)));
siddhanathan's avatar
siddhanathan committed
182
   StgArrBytes_bytes(mba) = new_size;
183 184 185 186 187 188 189 190 191 192 193
   LDV_RECORD_CREATE(mba);

   return ();
}

// resize MutableByteArray
//
// The returned MutableByteArray is either the original
// MutableByteArray resized in-place or, if not possible, a newly
// allocated (unpinned) MutableByteArray (with the original content
// copied over)
Gabor Greif's avatar
Gabor Greif committed
194
stg_resizzeMutableByteArrayzh ( gcptr mba, W_ new_size )
195 196 197 198 199 200 201 202 203
// MutableByteArray# s -> Int# -> State# s -> (# State# s,MutableByteArray# s #)
{
   W_ new_size_wds;

   ASSERT(new_size >= 0);

   new_size_wds = ROUNDUP_BYTES_TO_WDS(new_size);

   if (new_size_wds <= BYTE_ARR_WDS(mba)) {
siddhanathan's avatar
siddhanathan committed
204
      OVERWRITING_CLOSURE_OFS(mba, (BYTES_TO_WDS(SIZEOF_StgArrBytes) +
205
                                    new_size_wds));
siddhanathan's avatar
siddhanathan committed
206
      StgArrBytes_bytes(mba) = new_size;
207 208 209 210 211 212 213 214 215 216 217 218 219 220
      LDV_RECORD_CREATE(mba);

      return (mba);
   } else {
      (P_ new_mba) = call stg_newByteArrayzh(new_size);

      // maybe at some point in the future we may be able to grow the
      // MBA in-place w/o copying if we know the space after the
      // current MBA is still available, as often we want to grow the
      // MBA shortly after we allocated the original MBA. So maybe no
      // further allocations have occurred by then.

      // copy over old content
      prim %memcpy(BYTE_ARR_CTS(new_mba), BYTE_ARR_CTS(mba),
siddhanathan's avatar
siddhanathan committed
221
                   StgArrBytes_bytes(mba), SIZEOF_W);
222 223 224 225 226

      return (new_mba);
   }
}

227 228 229 230 231
// RRN: This one does not use the "ticketing" approach because it
// deals in unboxed scalars, not heap pointers.
stg_casIntArrayzh( gcptr arr, W_ ind, W_ old, W_ new )
/* MutableByteArray# s -> Int# -> Int# -> Int# -> State# s -> (# State# s, Int# #) */
{
tibbe's avatar
tibbe committed
232
    W_ p, h;
233

siddhanathan's avatar
siddhanathan committed
234
    p = arr + SIZEOF_StgArrBytes + WDS(ind);
235
    (h) = prim %cmpxchgW(p, old, new);
236 237 238 239

    return(h);
}

240

241
stg_newArrayzh ( W_ n /* words */, gcptr init )
242
{
tibbe's avatar
tibbe committed
243 244
    W_ words, size, p;
    gcptr arr;
245

246
    again: MAYBE_GC(again);
247

248 249 250 251 252
    // the mark area contains one byte for each 2^MUT_ARR_PTRS_CARD_BITS words
    // in the array, making sure we round up, and then rounding up to a whole
    // number of words.
    size = n + mutArrPtrsCardWords(n);
    words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + size;
253 254 255 256
    ("ptr" arr) = ccall allocateMightFail(MyCapability() "ptr",words);
    if (arr == NULL) {
        jump stg_raisezh(base_GHCziIOziException_heapOverflow_closure);
    }
tibbe's avatar
tibbe committed
257
    TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(size), 0);
258

259
    SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, CCCS);
260
    StgMutArrPtrs_ptrs(arr) = n;
261
    StgMutArrPtrs_size(arr) = size;
262

263
    // Initialise all elements of the array with the value in R2
264 265
    p = arr + SIZEOF_StgMutArrPtrs;
  for:
tibbe's avatar
tibbe committed
266
    if (p < arr + SIZEOF_StgMutArrPtrs + WDS(n)) {
ian@well-typed.com's avatar
ian@well-typed.com committed
267 268 269
        W_[p] = init;
        p = p + WDS(1);
        goto for;
270 271
    }

272
    return (arr);
273 274
}

275
stg_unsafeThawArrayzh ( gcptr arr )
276
{
tibbe's avatar
tibbe committed
277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295
    // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST
    //
    // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN
    // normally doesn't.  However, when we freeze a MUT_ARR_PTRS, we leave
    // it on the mutable list for the GC to remove (removing something from
    // the mutable list is not easy).
    //
    // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list,
    // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0
    // to indicate that it is still on the mutable list.
    //
    // So, when we thaw a MUT_ARR_PTRS_FROZEN, we must cope with two cases:
    // either it is on a mut_list, or it isn't.  We adopt the convention that
    // the closure type is MUT_ARR_PTRS_FROZEN0 if it is on the mutable list,
    // and MUT_ARR_PTRS_FROZEN otherwise.  In fact it wouldn't matter if
    // we put it on the mutable list more than once, but it would get scavenged
    // multiple times during GC, which would be unnecessarily slow.
    //
    if (StgHeader_info(arr) != stg_MUT_ARR_PTRS_FROZEN0_info) {
296 297
        SET_INFO(arr,stg_MUT_ARR_PTRS_DIRTY_info);
        recordMutable(arr);
ian@well-typed.com's avatar
ian@well-typed.com committed
298 299
        // must be done after SET_INFO, because it ASSERTs closure_MUTABLE()
        return (arr);
tibbe's avatar
tibbe committed
300
    } else {
ian@well-typed.com's avatar
ian@well-typed.com committed
301 302
        SET_INFO(arr,stg_MUT_ARR_PTRS_DIRTY_info);
        return (arr);
tibbe's avatar
tibbe committed
303
    }
304 305
}

306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325
stg_copyArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n )
{
    copyArray(src, src_off, dst, dst_off, n)
}

stg_copyMutableArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n )
{
    copyMutableArray(src, src_off, dst, dst_off, n)
}

stg_copyArrayArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n )
{
    copyArray(src, src_off, dst, dst_off, n)
}

stg_copyMutableArrayArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n )
{
    copyMutableArray(src, src_off, dst, dst_off, n)
}

326 327
stg_cloneArrayzh ( gcptr src, W_ offset, W_ n )
{
tibbe's avatar
tibbe committed
328
    cloneArray(stg_MUT_ARR_PTRS_FROZEN_info, src, offset, n)
329 330 331 332
}

stg_cloneMutableArrayzh ( gcptr src, W_ offset, W_ n )
{
tibbe's avatar
tibbe committed
333
    cloneArray(stg_MUT_ARR_PTRS_DIRTY_info, src, offset, n)
334 335 336 337 338
}

// We have to escape the "z" in the name.
stg_freezzeArrayzh ( gcptr src, W_ offset, W_ n )
{
tibbe's avatar
tibbe committed
339
    cloneArray(stg_MUT_ARR_PTRS_FROZEN_info, src, offset, n)
340 341 342 343
}

stg_thawArrayzh ( gcptr src, W_ offset, W_ n )
{
tibbe's avatar
tibbe committed
344
    cloneArray(stg_MUT_ARR_PTRS_DIRTY_info, src, offset, n)
345 346
}

347
// RRN: Uses the ticketed approach; see casMutVar
348
stg_casArrayzh ( gcptr arr, W_ ind, gcptr old, gcptr new )
349
/* MutableArray# s a -> Int# -> a -> a -> State# s -> (# State# s, Int#, Any a #) */
350
{
tibbe's avatar
tibbe committed
351 352
    gcptr h;
    W_ p, len;
353 354

    p = arr + SIZEOF_StgMutArrPtrs + WDS(ind);
355
    (h) = prim %cmpxchgW(p, old, new);
356

357 358
    if (h != old) {
        // Failure, return what was there instead of 'old':
359
        return (1,h);
360 361
    } else {
        // Compare and Swap Succeeded:
rrnewton's avatar
rrnewton committed
362 363 364 365
        SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, CCCS);
        len = StgMutArrPtrs_ptrs(arr);
        // The write barrier.  We must write a byte into the mark table:
        I8[arr + SIZEOF_StgMutArrPtrs + WDS(len) + (ind >> MUT_ARR_PTRS_CARD_BITS )] = 1;
366
        return (0,new);
367 368 369
    }
}

370
stg_newArrayArrayzh ( W_ n /* words */ )
371
{
tibbe's avatar
tibbe committed
372 373
    W_ words, size, p;
    gcptr arr;
374

375
    MAYBE_GC_N(stg_newArrayArrayzh, n);
376 377 378 379 380 381

    // the mark area contains one byte for each 2^MUT_ARR_PTRS_CARD_BITS words
    // in the array, making sure we round up, and then rounding up to a whole
    // number of words.
    size = n + mutArrPtrsCardWords(n);
    words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + size;
382 383 384 385
    ("ptr" arr) = ccall allocateMightFail(MyCapability() "ptr",words);
    if (arr == NULL) {
        jump stg_raisezh(base_GHCziIOziException_heapOverflow_closure);
    }
tibbe's avatar
tibbe committed
386
    TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(size), 0);
387 388 389 390 391 392 393 394

    SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]);
    StgMutArrPtrs_ptrs(arr) = n;
    StgMutArrPtrs_size(arr) = size;

    // Initialise all elements of the array with a pointer to the new array
    p = arr + SIZEOF_StgMutArrPtrs;
  for:
tibbe's avatar
tibbe committed
395
    if (p < arr + SIZEOF_StgMutArrPtrs + WDS(n)) {
ian@well-typed.com's avatar
ian@well-typed.com committed
396 397 398
        W_[p] = arr;
        p = p + WDS(1);
        goto for;
399 400
    }

401
    return (arr);
402 403
}

pumpkin's avatar
pumpkin committed
404

405 406 407 408 409 410 411 412 413 414 415 416
/* -----------------------------------------------------------------------------
   SmallArray primitives
   -------------------------------------------------------------------------- */

stg_newSmallArrayzh ( W_ n /* words */, gcptr init )
{
    W_ words, size, p;
    gcptr arr;

    again: MAYBE_GC(again);

    words = BYTES_TO_WDS(SIZEOF_StgSmallMutArrPtrs) + n;
417 418 419 420
    ("ptr" arr) = ccall allocateMightFail(MyCapability() "ptr",words);
    if (arr == NULL) {
        jump stg_raisezh(base_GHCziIOziException_heapOverflow_closure);
    }
421 422 423 424 425
    TICK_ALLOC_PRIM(SIZEOF_StgSmallMutArrPtrs, WDS(n), 0);

    SET_HDR(arr, stg_SMALL_MUT_ARR_PTRS_DIRTY_info, CCCS);
    StgSmallMutArrPtrs_ptrs(arr) = n;

426
    // Initialise all elements of the array with the value in R2
427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481
    p = arr + SIZEOF_StgSmallMutArrPtrs;
  for:
    if (p < arr + SIZEOF_StgSmallMutArrPtrs + WDS(n)) {
        W_[p] = init;
        p = p + WDS(1);
        goto for;
    }

    return (arr);
}

stg_unsafeThawSmallArrayzh ( gcptr arr )
{
    // See stg_unsafeThawArrayzh
    if (StgHeader_info(arr) != stg_SMALL_MUT_ARR_PTRS_FROZEN0_info) {
        SET_INFO(arr, stg_SMALL_MUT_ARR_PTRS_DIRTY_info);
        recordMutable(arr);
        // must be done after SET_INFO, because it ASSERTs closure_MUTABLE()
        return (arr);
    } else {
        SET_INFO(arr, stg_SMALL_MUT_ARR_PTRS_DIRTY_info);
        return (arr);
    }
}

stg_cloneSmallArrayzh ( gcptr src, W_ offset, W_ n )
{
    cloneSmallArray(stg_SMALL_MUT_ARR_PTRS_FROZEN_info, src, offset, n)
}

stg_cloneSmallMutableArrayzh ( gcptr src, W_ offset, W_ n )
{
    cloneSmallArray(stg_SMALL_MUT_ARR_PTRS_DIRTY_info, src, offset, n)
}

// We have to escape the "z" in the name.
stg_freezzeSmallArrayzh ( gcptr src, W_ offset, W_ n )
{
    cloneSmallArray(stg_SMALL_MUT_ARR_PTRS_FROZEN_info, src, offset, n)
}

stg_thawSmallArrayzh ( gcptr src, W_ offset, W_ n )
{
    cloneSmallArray(stg_SMALL_MUT_ARR_PTRS_DIRTY_info, src, offset, n)
}

stg_copySmallArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n)
{
    W_ dst_p, src_p, bytes;

    SET_INFO(dst, stg_SMALL_MUT_ARR_PTRS_DIRTY_info);

    dst_p = dst + SIZEOF_StgSmallMutArrPtrs + WDS(dst_off);
    src_p = src + SIZEOF_StgSmallMutArrPtrs + WDS(src_off);
    bytes = WDS(n);
482
    prim %memcpy(dst_p, src_p, bytes, SIZEOF_W);
483 484 485 486 487 488 489 490 491 492 493 494 495 496

    return ();
}

stg_copySmallMutableArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n)
{
    W_ dst_p, src_p, bytes;

    SET_INFO(dst, stg_SMALL_MUT_ARR_PTRS_DIRTY_info);

    dst_p = dst + SIZEOF_StgSmallMutArrPtrs + WDS(dst_off);
    src_p = src + SIZEOF_StgSmallMutArrPtrs + WDS(src_off);
    bytes = WDS(n);
    if (src == dst) {
497
        prim %memmove(dst_p, src_p, bytes, SIZEOF_W);
498
    } else {
499
        prim %memcpy(dst_p, src_p, bytes, SIZEOF_W);
500 501 502 503 504 505 506 507 508 509 510 511 512
    }

    return ();
}

// RRN: Uses the ticketed approach; see casMutVar
stg_casSmallArrayzh ( gcptr arr, W_ ind, gcptr old, gcptr new )
/* SmallMutableArray# s a -> Int# -> a -> a -> State# s -> (# State# s, Int#, Any a #) */
{
    gcptr h;
    W_ p, len;

    p = arr + SIZEOF_StgSmallMutArrPtrs + WDS(ind);
513
    (h) = prim %cmpxchgW(p, old, new);
514 515 516 517 518 519 520 521 522 523 524 525

    if (h != old) {
        // Failure, return what was there instead of 'old':
        return (1,h);
    } else {
        // Compare and Swap Succeeded:
        SET_HDR(arr, stg_SMALL_MUT_ARR_PTRS_DIRTY_info, CCCS);
        return (0,new);
    }
}


526 527 528 529
/* -----------------------------------------------------------------------------
   MutVar primitives
   -------------------------------------------------------------------------- */

530
stg_newMutVarzh ( gcptr init )
531 532 533
{
    W_ mv;

534
    ALLOC_PRIM_P (SIZEOF_StgMutVar, stg_newMutVarzh, init);
535 536

    mv = Hp - SIZEOF_StgMutVar + WDS(1);
537
    SET_HDR(mv,stg_MUT_VAR_DIRTY_info,CCCS);
538
    StgMutVar_var(mv) = init;
ian@well-typed.com's avatar
ian@well-typed.com committed
539

540
    return (mv);
541 542
}

543 544 545 546 547
// RRN: To support the "ticketed" approach, we return the NEW rather
// than old value if the CAS is successful.  This is received in an
// opaque form in the Haskell code, preventing the compiler from
// changing its pointer identity.  The ticket can then be safely used
// in future CAS operations.
548
stg_casMutVarzh ( gcptr mv, gcptr old, gcptr new )
549
 /* MutVar# s a -> a -> a -> State# s -> (# State#, Int#, Any a #) */
Simon Marlow's avatar
Simon Marlow committed
550
{
551
#if defined(THREADED_RTS)
552
    gcptr h;
Simon Marlow's avatar
Simon Marlow committed
553

554
    (h) = prim %cmpxchgW(mv + SIZEOF_StgHeader + OFFSET_StgMutVar_var, old, new);
Simon Marlow's avatar
Simon Marlow committed
555
    if (h != old) {
556
        return (1,h);
Simon Marlow's avatar
Simon Marlow committed
557
    } else {
558
        if (GET_INFO(mv) == stg_MUT_VAR_CLEAN_info) {
tibbe's avatar
tibbe committed
559
            ccall dirty_MUT_VAR(BaseReg "ptr", mv "ptr");
560
        }
561
        return (0,new);
Simon Marlow's avatar
Simon Marlow committed
562
    }
563 564 565 566 567 568 569 570 571 572 573 574 575 576
#else
    gcptr prev_val;

    prev_val = StgMutVar_var(mv);
    if (prev_val != old) {
        return (1,prev_val);
    } else {
        StgMutVar_var(mv) = new;
        if (GET_INFO(mv) == stg_MUT_VAR_CLEAN_info) {
            ccall dirty_MUT_VAR(BaseReg "ptr", mv "ptr");
        }
        return (0,new);
    }
#endif
Simon Marlow's avatar
Simon Marlow committed
577 578
}

579
stg_atomicModifyMutVarzh ( gcptr mv, gcptr f )
580
{
581
    W_ z, x, y, r, h;
582

ian@well-typed.com's avatar
ian@well-typed.com committed
583
    /* If x is the current contents of the MutVar#, then
584 585 586
       We want to make the new contents point to

         (sel_0 (f x))
ian@well-typed.com's avatar
ian@well-typed.com committed
587

588
       and the return value is
ian@well-typed.com's avatar
ian@well-typed.com committed
589 590

         (sel_1 (f x))
591 592 593 594

        obviously we can share (f x).

         z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
ian@well-typed.com's avatar
ian@well-typed.com committed
595
         y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
596 597 598
         r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
    */

599
#if defined(MIN_UPD_SIZE) && MIN_UPD_SIZE > 1
600
#define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
601 602
#define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),WDS(MIN_UPD_SIZE-1))
#else
603
#define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(1))
604 605 606
#define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),0)
#endif

607
#if defined(MIN_UPD_SIZE) && MIN_UPD_SIZE > 2
608
#define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
609 610
#define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),WDS(MIN_UPD_SIZE-2))
#else
611
#define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(2))
612 613 614 615 616
#define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0)
#endif

#define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE)

tibbe's avatar
tibbe committed
617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642
    HP_CHK_GEN_TICKY(SIZE);

    TICK_ALLOC_THUNK_2();
    CCCS_ALLOC(THUNK_2_SIZE);
    z = Hp - THUNK_2_SIZE + WDS(1);
    SET_HDR(z, stg_ap_2_upd_info, CCCS);
    LDV_RECORD_CREATE(z);
    StgThunk_payload(z,0) = f;

    TICK_ALLOC_THUNK_1();
    CCCS_ALLOC(THUNK_1_SIZE);
    y = z - THUNK_1_SIZE;
    SET_HDR(y, stg_sel_0_upd_info, CCCS);
    LDV_RECORD_CREATE(y);
    StgThunk_payload(y,0) = z;

    TICK_ALLOC_THUNK_1();
    CCCS_ALLOC(THUNK_1_SIZE);
    r = y - THUNK_1_SIZE;
    SET_HDR(r, stg_sel_1_upd_info, CCCS);
    LDV_RECORD_CREATE(r);
    StgThunk_payload(r,0) = z;

  retry:
    x = StgMutVar_var(mv);
    StgThunk_payload(z,1) = x;
Ben Gamari's avatar
Ben Gamari committed
643
#if defined(THREADED_RTS)
644
    (h) = prim %cmpxchgW(mv + SIZEOF_StgHeader + OFFSET_StgMutVar_var, x, y);
tibbe's avatar
tibbe committed
645
    if (h != x) { goto retry; }
646
#else
tibbe's avatar
tibbe committed
647
    StgMutVar_var(mv) = y;
648
#endif
649

tibbe's avatar
tibbe committed
650 651 652
    if (GET_INFO(mv) == stg_MUT_VAR_CLEAN_info) {
        ccall dirty_MUT_VAR(BaseReg "ptr", mv "ptr");
    }
653

tibbe's avatar
tibbe committed
654
    return (r);
655 656 657 658 659 660 661 662
}

/* -----------------------------------------------------------------------------
   Weak Pointer Primitives
   -------------------------------------------------------------------------- */

STRING(stg_weak_msg,"New weak pointer at %p\n")

663 664 665
stg_mkWeakzh ( gcptr key,
               gcptr value,
               gcptr finalizer /* or stg_NO_FINALIZER_closure */ )
666
{
tibbe's avatar
tibbe committed
667
    gcptr w;
668

tibbe's avatar
tibbe committed
669
    ALLOC_PRIM (SIZEOF_StgWeak)
670

tibbe's avatar
tibbe committed
671 672
    w = Hp - SIZEOF_StgWeak + WDS(1);
    SET_HDR(w, stg_WEAK_info, CCCS);
673

tibbe's avatar
tibbe committed
674 675 676 677
    StgWeak_key(w)         = key;
    StgWeak_value(w)       = value;
    StgWeak_finalizer(w)   = finalizer;
    StgWeak_cfinalizers(w) = stg_NO_FINALIZER_closure;
678

679 680 681 682 683
    StgWeak_link(w) = Capability_weak_ptr_list_hd(MyCapability());
    Capability_weak_ptr_list_hd(MyCapability()) = w;
    if (Capability_weak_ptr_list_tl(MyCapability()) == NULL) {
        Capability_weak_ptr_list_tl(MyCapability()) = w;
    }
684

tibbe's avatar
tibbe committed
685
    IF_DEBUG(weak, ccall debugBelch(stg_weak_msg,w));
686

tibbe's avatar
tibbe committed
687
    return (w);
688 689
}

690
stg_mkWeakNoFinalizzerzh ( gcptr key, gcptr value )
691
{
tibbe's avatar
tibbe committed
692
    jump stg_mkWeakzh (key, value, stg_NO_FINALIZER_closure);
693 694
}

695 696 697 698 699 700 701
STRING(stg_cfinalizer_msg,"Adding a finalizer to %p\n")

stg_addCFinalizzerToWeakzh ( W_ fptr,   // finalizer
                             W_ ptr,
                             W_ flag,   // has environment (0 or 1)
                             W_ eptr,
                             gcptr w )
702
{
tibbe's avatar
tibbe committed
703
    W_ c, info;
704

tibbe's avatar
tibbe committed
705
    ALLOC_PRIM (SIZEOF_StgCFinalizerList)
706

tibbe's avatar
tibbe committed
707 708
    c = Hp - SIZEOF_StgCFinalizerList + WDS(1);
    SET_HDR(c, stg_C_FINALIZER_LIST_info, CCCS);
709

tibbe's avatar
tibbe committed
710 711 712 713
    StgCFinalizerList_fptr(c) = fptr;
    StgCFinalizerList_ptr(c) = ptr;
    StgCFinalizerList_eptr(c) = eptr;
    StgCFinalizerList_flag(c) = flag;
714

tibbe's avatar
tibbe committed
715
    LOCK_CLOSURE(w, info);
716

tibbe's avatar
tibbe committed
717 718 719 720 721
    if (info == stg_DEAD_WEAK_info) {
        // Already dead.
        unlockClosure(w, info);
        return (0);
    }
722

tibbe's avatar
tibbe committed
723 724
    StgCFinalizerList_link(c) = StgWeak_cfinalizers(w);
    StgWeak_cfinalizers(w) = c;
725

tibbe's avatar
tibbe committed
726
    unlockClosure(w, info);
727

tibbe's avatar
tibbe committed
728
    recordMutable(w);
729

tibbe's avatar
tibbe committed
730
    IF_DEBUG(weak, ccall debugBelch(stg_cfinalizer_msg,w));
731

tibbe's avatar
tibbe committed
732
    return (1);
733
}
734

735
stg_finalizzeWeakzh ( gcptr w )
736
{
tibbe's avatar
tibbe committed
737 738
    gcptr f, list;
    W_ info;
739

tibbe's avatar
tibbe committed
740
    LOCK_CLOSURE(w, info);
741

tibbe's avatar
tibbe committed
742 743 744 745 746
    // already dead?
    if (info == stg_DEAD_WEAK_info) {
        unlockClosure(w, info);
        return (0,stg_NO_FINALIZER_closure);
    }
747

tibbe's avatar
tibbe committed
748 749
    f    = StgWeak_finalizer(w);
    list = StgWeak_cfinalizers(w);
750

tibbe's avatar
tibbe committed
751
    // kill it
Ben Gamari's avatar
Ben Gamari committed
752
#if defined(PROFILING)
tibbe's avatar
tibbe committed
753 754 755 756 757 758 759 760
    // @LDV profiling
    // A weak pointer is inherently used, so we do not need to call
    // LDV_recordDead_FILL_SLOP_DYNAMIC():
    //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
    // or, LDV_recordDead():
    //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
    // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as
    // large as weak pointers, so there is no need to fill the slop, either.
761
    // See stg_DEAD_WEAK_info in StgMiscClosures.cmm.
762 763
#endif

tibbe's avatar
tibbe committed
764 765 766 767
    //
    // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
    //
    unlockClosure(w, stg_DEAD_WEAK_info);
768

tibbe's avatar
tibbe committed
769
    LDV_RECORD_CREATE(w);
770

tibbe's avatar
tibbe committed
771 772 773
    if (list != stg_NO_FINALIZER_closure) {
      ccall runCFinalizers(list);
    }
774

tibbe's avatar
tibbe committed
775 776 777 778 779 780
    /* return the finalizer */
    if (f == stg_NO_FINALIZER_closure) {
        return (0,stg_NO_FINALIZER_closure);
    } else {
        return (1,f);
    }
781 782
}

783
stg_deRefWeakzh ( gcptr w )
784
{
tibbe's avatar
tibbe committed
785 786
    W_ code, info;
    gcptr val;
787

tibbe's avatar
tibbe committed
788
    info = GET_INFO(w);
789

tibbe's avatar
tibbe committed
790 791 792 793
    if (info == stg_WHITEHOLE_info) {
        // w is locked by another thread. Now it's not immediately clear if w is
        // alive or not. We use lockClosure to wait for the info pointer to become
        // something other than stg_WHITEHOLE_info.
794

tibbe's avatar
tibbe committed
795 796 797
        LOCK_CLOSURE(w, info);
        unlockClosure(w, info);
    }
798

tibbe's avatar
tibbe committed
799 800 801 802 803 804 805 806
    if (info == stg_WEAK_info) {
        code = 1;
        val = StgWeak_value(w);
    } else {
        code = 0;
        val = w;
    }
    return (code,val);
807 808 809
}

/* -----------------------------------------------------------------------------
810
   Floating point operations.
811 812
   -------------------------------------------------------------------------- */

813
stg_decodeFloatzuIntzh ( F_ arg )
ian@well-typed.com's avatar
ian@well-typed.com committed
814
{
815
    W_ p;
816
    W_ tmp, mp_tmp1, mp_tmp_w, r1, r2;
817

818
    STK_CHK_GEN_N (WDS(2));
819

820
    reserve 2 = tmp {
ian@well-typed.com's avatar
ian@well-typed.com committed
821

tibbe's avatar
tibbe committed
822 823
        mp_tmp1  = tmp + WDS(1);
        mp_tmp_w = tmp;
824

tibbe's avatar
tibbe committed
825 826
        /* Perform the operation */
        ccall __decodeFloat_Int(mp_tmp1 "ptr", mp_tmp_w "ptr", arg);
827

tibbe's avatar
tibbe committed
828 829
        r1 = W_[mp_tmp1];
        r2 = W_[mp_tmp_w];
830
    }
ian@well-typed.com's avatar
ian@well-typed.com committed
831

832
    /* returns: (Int# (mantissa), Int# (exponent)) */
833
    return (r1, r2);
834 835
}

836
stg_decodeDoublezu2Intzh ( D_ arg )
ian@well-typed.com's avatar
ian@well-typed.com committed
837
{
838 839 840
    W_ p, tmp;
    W_ mp_tmp1, mp_tmp2, mp_result1, mp_result2;
    W_ r1, r2, r3, r4;
841

842
    STK_CHK_GEN_N (WDS(4));
843

844 845
    reserve 4 = tmp {

tibbe's avatar
tibbe committed
846 847 848 849
        mp_tmp1    = tmp + WDS(3);
        mp_tmp2    = tmp + WDS(2);
        mp_result1 = tmp + WDS(1);
        mp_result2 = tmp;
850

tibbe's avatar
tibbe committed
851 852 853 854 855 856 857 858 859
        /* Perform the operation */
        ccall __decodeDouble_2Int(mp_tmp1 "ptr", mp_tmp2 "ptr",
                                  mp_result1 "ptr", mp_result2 "ptr",
                                  arg);

        r1 = W_[mp_tmp1];
        r2 = W_[mp_tmp2];
        r3 = W_[mp_result1];
        r4 = W_[mp_result2];
860
    }
861 862 863

    /* returns:
       (Int# (mant sign), Word# (mant high), Word# (mant low), Int# (expn)) */
864
    return (r1, r2, r3, r4);
865 866
}

867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882
/* Double# -> (# Int64#, Int# #) */
stg_decodeDoublezuInt64zh ( D_ arg )
{
    CInt exp;
    I64  mant;
    W_   mant_ptr;

    STK_CHK_GEN_N (SIZEOF_INT64);
    reserve BYTES_TO_WDS(SIZEOF_INT64) = mant_ptr {
        (exp) = ccall __decodeDouble_Int64(mant_ptr "ptr", arg);
        mant = I64[mant_ptr];
    }

    return (mant, TO_W_(exp));
}

883 884 885 886
/* -----------------------------------------------------------------------------
 * Concurrency primitives
 * -------------------------------------------------------------------------- */

887
stg_forkzh ( gcptr closure )
888
{
tibbe's avatar
tibbe committed
889
    MAYBE_GC_P(stg_forkzh, closure);
890

tibbe's avatar
tibbe committed
891
    gcptr threadid;
892

tibbe's avatar
tibbe committed
893 894 895
    ("ptr" threadid) = ccall createIOThread( MyCapability() "ptr",
                                  RtsFlags_GcFlags_initialStkSize(RtsFlags),
                                  closure "ptr");
896

tibbe's avatar
tibbe committed
897 898 899 900
    /* start blocked if the current thread is blocked */
    StgTSO_flags(threadid) = %lobits16(
        TO_W_(StgTSO_flags(threadid)) |
        TO_W_(StgTSO_flags(CurrentTSO)) & (TSO_BLOCKEX | TSO_INTERRUPTIBLE));
901

tibbe's avatar
tibbe committed
902
    ccall scheduleThread(MyCapability() "ptr", threadid "ptr");
903

tibbe's avatar
tibbe committed
904 905 906
    // context switch soon, but not immediately: we don't want every
    // forkIO to force a context-switch.
    Capability_context_switch(MyCapability()) = 1 :: CInt;
ian@well-typed.com's avatar
ian@well-typed.com committed
907

tibbe's avatar
tibbe committed
908
    return (threadid);
909 910
}

911
stg_forkOnzh ( W_ cpu, gcptr closure )
912
{
913
again: MAYBE_GC(again);
914

tibbe's avatar
tibbe committed
915
    gcptr threadid;
916

tibbe's avatar
tibbe committed
917 918 919 920
    ("ptr" threadid) = ccall createIOThread(
        MyCapability() "ptr",
        RtsFlags_GcFlags_initialStkSize(RtsFlags),
        closure "ptr");
921

tibbe's avatar
tibbe committed
922 923 924 925
    /* start blocked if the current thread is blocked */
    StgTSO_flags(threadid) = %lobits16(
        TO_W_(StgTSO_flags(threadid)) |
        TO_W_(StgTSO_flags(CurrentTSO)) & (TSO_BLOCKEX | TSO_INTERRUPTIBLE));
926

tibbe's avatar
tibbe committed
927
    ccall scheduleThreadOn(MyCapability() "ptr", cpu, threadid "ptr");
928

tibbe's avatar
tibbe committed
929 930 931
    // context switch soon, but not immediately: we don't want every
    // forkIO to force a context-switch.
    Capability_context_switch(MyCapability()) = 1 :: CInt;
ian@well-typed.com's avatar
ian@well-typed.com committed
932

tibbe's avatar
tibbe committed
933
    return (threadid);
934 935
}

936
stg_yieldzh ()
937
{
tibbe's avatar
tibbe committed
938 939 940 941 942 943
    // when we yield to the scheduler, we have to tell it to put the
    // current thread to the back of the queue by setting the
    // context_switch flag.  If we don't do this, it will run the same
    // thread again.
    Capability_context_switch(MyCapability()) = 1 :: CInt;
    jump stg_yield_noregs();
944 945
}

946
stg_myThreadIdzh ()
947
{
tibbe's avatar
tibbe committed
948
    return (CurrentTSO);
949 950
}

951
stg_labelThreadzh ( gcptr threadid, W_ addr )
952
{
953
#if defined(DEBUG) || defined(TRACING) || defined(DTRACE)
tibbe's avatar
tibbe committed
954
    ccall labelThread(MyCapability() "ptr", threadid "ptr", addr "ptr");
955
#endif
tibbe's avatar
tibbe committed
956
    return ();
957 958
}

959
stg_isCurrentThreadBoundzh (/* no args */)
960
{
tibbe's avatar
tibbe committed
961 962 963
    W_ r;
    (r) = ccall isThreadBound(CurrentTSO);
    return (r);
964