PrimOps.cmm 72.3 KB
Newer Older
1
/* -*- tab-width: 8 -*- */
2 3
/* -----------------------------------------------------------------------------
 *
4
 * (c) The GHC Team, 1998-2012
5 6 7 8 9 10 11 12 13
 *
 * Out-of-line primitive operations
 *
 * This file contains the implementations of all the primitive
 * operations ("primops") which are not expanded inline.  See
 * ghc/compiler/prelude/primops.txt.pp for a list of all the primops;
 * this file contains code for most of those with the attribute
 * out_of_line=True.
 *
14 15 16
 * Entry convention: the entry convention for a primop is the
 * NativeNodeCall convention, and the return convention is
 * NativeReturn.  (see compiler/cmm/CmmCallConv.hs)
17 18 19 20 21 22 23 24
 *
 * This file is written in a subset of C--, extended with various
 * features specific to GHC.  It is compiled by GHC directly.  For the
 * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
 *
 * ---------------------------------------------------------------------------*/

#include "Cmm.h"
25
#include "MachDeps.h"
Simon Marlow's avatar
Simon Marlow committed
26
#include "SMPClosureOps.h"
27

Ben Gamari's avatar
Ben Gamari committed
28
#if defined(__PIC__)
29 30
import pthread_mutex_lock;
import pthread_mutex_unlock;
31
#endif
32
import CLOSURE base_ControlziExceptionziBase_nestedAtomically_closure;
33
import CLOSURE base_GHCziIOziException_heapOverflow_closure;
34 35
import EnterCriticalSection;
import LeaveCriticalSection;
36
import CLOSURE ghczmprim_GHCziTypes_False_closure;
37
#if defined(USE_MINIINTERPRETER) || !defined(mingw32_HOST_OS)
38
import CLOSURE sm_mutex;
39
#endif
Ben Gamari's avatar
Ben Gamari committed
40
#if defined(PROFILING)
41 42
import CLOSURE CCS_MAIN;
#endif
43

44 45 46 47 48 49 50 51 52 53 54 55 56
/*-----------------------------------------------------------------------------
  Array Primitives

  Basically just new*Array - the others are all inline macros.

  The slow entry point is for returning from a heap check, the saved
  size argument must be re-loaded from the stack.
  -------------------------------------------------------------------------- */

/* for objects that are *less* than the size of a word, make sure we
 * round up to the nearest word for the size of the array.
 */

57
stg_newByteArrayzh ( W_ n )
58
{
59 60 61 62 63
    W_ words, payload_words;
    gcptr p;

    MAYBE_GC_N(stg_newByteArrayzh, n);

64
    payload_words = ROUNDUP_BYTES_TO_WDS(n);
siddhanathan's avatar
siddhanathan committed
65
    words = BYTES_TO_WDS(SIZEOF_StgArrBytes) + payload_words;
66 67 68 69
    ("ptr" p) = ccall allocateMightFail(MyCapability() "ptr", words);
    if (p == NULL) {
        jump stg_raisezh(base_GHCziIOziException_heapOverflow_closure);
    }
siddhanathan's avatar
siddhanathan committed
70
    TICK_ALLOC_PRIM(SIZEOF_StgArrBytes,WDS(payload_words),0);
71
    SET_HDR(p, stg_ARR_WORDS_info, CCCS);
siddhanathan's avatar
siddhanathan committed
72
    StgArrBytes_bytes(p) = n;
73
    return (p);
74 75
}

Simon Marlow's avatar
Simon Marlow committed
76 77 78
#define BA_ALIGN 16
#define BA_MASK  (BA_ALIGN-1)

79
stg_newPinnedByteArrayzh ( W_ n )
80
{
81 82 83 84
    W_ words, bytes, payload_words;
    gcptr p;

    MAYBE_GC_N(stg_newPinnedByteArrayzh, n);
85

86
    bytes = n;
87 88 89 90
    /* payload_words is what we will tell the profiler we had to allocate */
    payload_words = ROUNDUP_BYTES_TO_WDS(bytes);
    /* When we actually allocate memory, we need to allow space for the
       header: */
siddhanathan's avatar
siddhanathan committed
91
    bytes = bytes + SIZEOF_StgArrBytes;
92 93 94 95 96
    /* And we want to align to BA_ALIGN bytes, so we need to allow space
       to shift up to BA_ALIGN - 1 bytes: */
    bytes = bytes + BA_ALIGN - 1;
    /* Now we convert to a number of words: */
    words = ROUNDUP_BYTES_TO_WDS(bytes);
Simon Marlow's avatar
Simon Marlow committed
97

98
    ("ptr" p) = ccall allocatePinned(MyCapability() "ptr", words);
99 100 101
    if (p == NULL) {
        jump stg_raisezh(base_GHCziIOziException_heapOverflow_closure);
    }
siddhanathan's avatar
siddhanathan committed
102
    TICK_ALLOC_PRIM(SIZEOF_StgArrBytes,WDS(payload_words),0);
Simon Marlow's avatar
Simon Marlow committed
103

104 105
    /* Now we need to move p forward so that the payload is aligned
       to BA_ALIGN bytes: */
siddhanathan's avatar
siddhanathan committed
106
    p = p + ((-p - SIZEOF_StgArrBytes) & BA_MASK);
Simon Marlow's avatar
Simon Marlow committed
107

108
    SET_HDR(p, stg_ARR_WORDS_info, CCCS);
siddhanathan's avatar
siddhanathan committed
109
    StgArrBytes_bytes(p) = n;
110
    return (p);
Simon Marlow's avatar
Simon Marlow committed
111 112
}

113
stg_newAlignedPinnedByteArrayzh ( W_ n, W_ alignment )
Simon Marlow's avatar
Simon Marlow committed
114
{
115 116
    W_ words, bytes, payload_words;
    gcptr p;
Simon Marlow's avatar
Simon Marlow committed
117

118
    again: MAYBE_GC(again);
Simon Marlow's avatar
Simon Marlow committed
119

120 121 122 123 124
    /* we always supply at least word-aligned memory, so there's no
       need to allow extra space for alignment if the requirement is less
       than a word.  This also prevents mischief with alignment == 0. */
    if (alignment <= SIZEOF_W) { alignment = 1; }

125 126
    bytes = n;

127 128
    /* payload_words is what we will tell the profiler we had to allocate */
    payload_words = ROUNDUP_BYTES_TO_WDS(bytes);
Simon Marlow's avatar
Simon Marlow committed
129

130 131
    /* When we actually allocate memory, we need to allow space for the
       header: */
siddhanathan's avatar
siddhanathan committed
132
    bytes = bytes + SIZEOF_StgArrBytes;
133 134 135 136 137
    /* And we want to align to <alignment> bytes, so we need to allow space
       to shift up to <alignment - 1> bytes: */
    bytes = bytes + alignment - 1;
    /* Now we convert to a number of words: */
    words = ROUNDUP_BYTES_TO_WDS(bytes);
138

139
    ("ptr" p) = ccall allocatePinned(MyCapability() "ptr", words);
140 141 142
    if (p == NULL) {
        jump stg_raisezh(base_GHCziIOziException_heapOverflow_closure);
    }
siddhanathan's avatar
siddhanathan committed
143
    TICK_ALLOC_PRIM(SIZEOF_StgArrBytes,WDS(payload_words),0);
144

145 146 147
    /* Now we need to move p forward so that the payload is aligned
       to <alignment> bytes. Note that we are assuming that
       <alignment> is a power of 2, which is technically not guaranteed */
siddhanathan's avatar
siddhanathan committed
148
    p = p + ((-p - SIZEOF_StgArrBytes) & (alignment - 1));
149

150
    SET_HDR(p, stg_ARR_WORDS_info, CCCS);
siddhanathan's avatar
siddhanathan committed
151
    StgArrBytes_bytes(p) = n;
152
    return (p);
153 154
}

155 156
stg_isByteArrayPinnedzh ( gcptr ba )
// ByteArray# s -> Int#
157 158
{
    W_ bd, flags;
159
    bd = Bdescr(ba);
160
    // Pinned byte arrays live in blocks with the BF_PINNED flag set.
Gabor Greif's avatar
Gabor Greif committed
161
    // We also consider BF_LARGE objects to be immovable. See #13894.
162
    // See the comment in Storage.c:allocatePinned.
163
    // We also consider BF_COMPACT objects to be immovable. See #14900.
164
    flags = TO_W_(bdescr_flags(bd));
165
    return (flags & (BF_PINNED | BF_LARGE | BF_COMPACT) != 0);
166 167
}

168 169 170 171 172 173
stg_isMutableByteArrayPinnedzh ( gcptr mba )
// MutableByteArray# s -> Int#
{
    jump stg_isByteArrayPinnedzh(mba);
}

174 175 176 177 178
// shrink size of MutableByteArray in-place
stg_shrinkMutableByteArrayzh ( gcptr mba, W_ new_size )
// MutableByteArray# s -> Int# -> State# s -> State# s
{
   ASSERT(new_size >= 0);
siddhanathan's avatar
siddhanathan committed
179
   ASSERT(new_size <= StgArrBytes_bytes(mba));
180

siddhanathan's avatar
siddhanathan committed
181
   OVERWRITING_CLOSURE_OFS(mba, (BYTES_TO_WDS(SIZEOF_StgArrBytes) +
182
                                 ROUNDUP_BYTES_TO_WDS(new_size)));
siddhanathan's avatar
siddhanathan committed
183
   StgArrBytes_bytes(mba) = new_size;
184 185 186 187 188 189 190 191 192 193 194
   LDV_RECORD_CREATE(mba);

   return ();
}

// resize MutableByteArray
//
// The returned MutableByteArray is either the original
// MutableByteArray resized in-place or, if not possible, a newly
// allocated (unpinned) MutableByteArray (with the original content
// copied over)
Gabor Greif's avatar
Gabor Greif committed
195
stg_resizzeMutableByteArrayzh ( gcptr mba, W_ new_size )
196 197 198 199 200 201 202 203 204
// MutableByteArray# s -> Int# -> State# s -> (# State# s,MutableByteArray# s #)
{
   W_ new_size_wds;

   ASSERT(new_size >= 0);

   new_size_wds = ROUNDUP_BYTES_TO_WDS(new_size);

   if (new_size_wds <= BYTE_ARR_WDS(mba)) {
siddhanathan's avatar
siddhanathan committed
205
      OVERWRITING_CLOSURE_OFS(mba, (BYTES_TO_WDS(SIZEOF_StgArrBytes) +
206
                                    new_size_wds));
siddhanathan's avatar
siddhanathan committed
207
      StgArrBytes_bytes(mba) = new_size;
208 209 210 211 212 213 214 215 216 217 218 219 220 221
      LDV_RECORD_CREATE(mba);

      return (mba);
   } else {
      (P_ new_mba) = call stg_newByteArrayzh(new_size);

      // maybe at some point in the future we may be able to grow the
      // MBA in-place w/o copying if we know the space after the
      // current MBA is still available, as often we want to grow the
      // MBA shortly after we allocated the original MBA. So maybe no
      // further allocations have occurred by then.

      // copy over old content
      prim %memcpy(BYTE_ARR_CTS(new_mba), BYTE_ARR_CTS(mba),
siddhanathan's avatar
siddhanathan committed
222
                   StgArrBytes_bytes(mba), SIZEOF_W);
223 224 225 226 227

      return (new_mba);
   }
}

228 229 230 231 232
// RRN: This one does not use the "ticketing" approach because it
// deals in unboxed scalars, not heap pointers.
stg_casIntArrayzh( gcptr arr, W_ ind, W_ old, W_ new )
/* MutableByteArray# s -> Int# -> Int# -> Int# -> State# s -> (# State# s, Int# #) */
{
tibbe's avatar
tibbe committed
233
    W_ p, h;
234

siddhanathan's avatar
siddhanathan committed
235
    p = arr + SIZEOF_StgArrBytes + WDS(ind);
236
    (h) = prim %cmpxchgW(p, old, new);
237 238 239 240

    return(h);
}

241

242
stg_newArrayzh ( W_ n /* words */, gcptr init )
243
{
tibbe's avatar
tibbe committed
244 245
    W_ words, size, p;
    gcptr arr;
246

247
    again: MAYBE_GC(again);
248

249 250 251 252 253
    // the mark area contains one byte for each 2^MUT_ARR_PTRS_CARD_BITS words
    // in the array, making sure we round up, and then rounding up to a whole
    // number of words.
    size = n + mutArrPtrsCardWords(n);
    words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + size;
254 255 256 257
    ("ptr" arr) = ccall allocateMightFail(MyCapability() "ptr",words);
    if (arr == NULL) {
        jump stg_raisezh(base_GHCziIOziException_heapOverflow_closure);
    }
tibbe's avatar
tibbe committed
258
    TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(size), 0);
259

260
    SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, CCCS);
261
    StgMutArrPtrs_ptrs(arr) = n;
262
    StgMutArrPtrs_size(arr) = size;
263

264
    // Initialise all elements of the array with the value in R2
265 266
    p = arr + SIZEOF_StgMutArrPtrs;
  for:
267
    if (p < arr + SIZEOF_StgMutArrPtrs + WDS(n)) (likely: True) {
ian@well-typed.com's avatar
ian@well-typed.com committed
268 269 270
        W_[p] = init;
        p = p + WDS(1);
        goto for;
271 272
    }

273
    return (arr);
274 275
}

276
stg_unsafeThawArrayzh ( gcptr arr )
277
{
tibbe's avatar
tibbe committed
278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296
    // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST
    //
    // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN
    // normally doesn't.  However, when we freeze a MUT_ARR_PTRS, we leave
    // it on the mutable list for the GC to remove (removing something from
    // the mutable list is not easy).
    //
    // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list,
    // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0
    // to indicate that it is still on the mutable list.
    //
    // So, when we thaw a MUT_ARR_PTRS_FROZEN, we must cope with two cases:
    // either it is on a mut_list, or it isn't.  We adopt the convention that
    // the closure type is MUT_ARR_PTRS_FROZEN0 if it is on the mutable list,
    // and MUT_ARR_PTRS_FROZEN otherwise.  In fact it wouldn't matter if
    // we put it on the mutable list more than once, but it would get scavenged
    // multiple times during GC, which would be unnecessarily slow.
    //
    if (StgHeader_info(arr) != stg_MUT_ARR_PTRS_FROZEN0_info) {
297 298
        SET_INFO(arr,stg_MUT_ARR_PTRS_DIRTY_info);
        recordMutable(arr);
ian@well-typed.com's avatar
ian@well-typed.com committed
299 300
        // must be done after SET_INFO, because it ASSERTs closure_MUTABLE()
        return (arr);
tibbe's avatar
tibbe committed
301
    } else {
ian@well-typed.com's avatar
ian@well-typed.com committed
302 303
        SET_INFO(arr,stg_MUT_ARR_PTRS_DIRTY_info);
        return (arr);
tibbe's avatar
tibbe committed
304
    }
305 306
}

307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326
stg_copyArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n )
{
    copyArray(src, src_off, dst, dst_off, n)
}

stg_copyMutableArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n )
{
    copyMutableArray(src, src_off, dst, dst_off, n)
}

stg_copyArrayArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n )
{
    copyArray(src, src_off, dst, dst_off, n)
}

stg_copyMutableArrayArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n )
{
    copyMutableArray(src, src_off, dst, dst_off, n)
}

327 328
stg_cloneArrayzh ( gcptr src, W_ offset, W_ n )
{
tibbe's avatar
tibbe committed
329
    cloneArray(stg_MUT_ARR_PTRS_FROZEN_info, src, offset, n)
330 331 332 333
}

stg_cloneMutableArrayzh ( gcptr src, W_ offset, W_ n )
{
tibbe's avatar
tibbe committed
334
    cloneArray(stg_MUT_ARR_PTRS_DIRTY_info, src, offset, n)
335 336 337 338 339
}

// We have to escape the "z" in the name.
stg_freezzeArrayzh ( gcptr src, W_ offset, W_ n )
{
tibbe's avatar
tibbe committed
340
    cloneArray(stg_MUT_ARR_PTRS_FROZEN_info, src, offset, n)
341 342 343 344
}

stg_thawArrayzh ( gcptr src, W_ offset, W_ n )
{
tibbe's avatar
tibbe committed
345
    cloneArray(stg_MUT_ARR_PTRS_DIRTY_info, src, offset, n)
346 347
}

348
// RRN: Uses the ticketed approach; see casMutVar
349
stg_casArrayzh ( gcptr arr, W_ ind, gcptr old, gcptr new )
350
/* MutableArray# s a -> Int# -> a -> a -> State# s -> (# State# s, Int#, Any a #) */
351
{
tibbe's avatar
tibbe committed
352 353
    gcptr h;
    W_ p, len;
354 355

    p = arr + SIZEOF_StgMutArrPtrs + WDS(ind);
356
    (h) = prim %cmpxchgW(p, old, new);
357

358 359
    if (h != old) {
        // Failure, return what was there instead of 'old':
360
        return (1,h);
361 362
    } else {
        // Compare and Swap Succeeded:
rrnewton's avatar
rrnewton committed
363 364 365 366
        SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, CCCS);
        len = StgMutArrPtrs_ptrs(arr);
        // The write barrier.  We must write a byte into the mark table:
        I8[arr + SIZEOF_StgMutArrPtrs + WDS(len) + (ind >> MUT_ARR_PTRS_CARD_BITS )] = 1;
367
        return (0,new);
368 369 370
    }
}

371
stg_newArrayArrayzh ( W_ n /* words */ )
372
{
tibbe's avatar
tibbe committed
373 374
    W_ words, size, p;
    gcptr arr;
375

376
    MAYBE_GC_N(stg_newArrayArrayzh, n);
377 378 379 380 381 382

    // the mark area contains one byte for each 2^MUT_ARR_PTRS_CARD_BITS words
    // in the array, making sure we round up, and then rounding up to a whole
    // number of words.
    size = n + mutArrPtrsCardWords(n);
    words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + size;
383 384 385 386
    ("ptr" arr) = ccall allocateMightFail(MyCapability() "ptr",words);
    if (arr == NULL) {
        jump stg_raisezh(base_GHCziIOziException_heapOverflow_closure);
    }
tibbe's avatar
tibbe committed
387
    TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(size), 0);
388 389 390 391 392 393 394 395

    SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]);
    StgMutArrPtrs_ptrs(arr) = n;
    StgMutArrPtrs_size(arr) = size;

    // Initialise all elements of the array with a pointer to the new array
    p = arr + SIZEOF_StgMutArrPtrs;
  for:
396
    if (p < arr + SIZEOF_StgMutArrPtrs + WDS(n)) (likely: True) {
ian@well-typed.com's avatar
ian@well-typed.com committed
397 398 399
        W_[p] = arr;
        p = p + WDS(1);
        goto for;
400 401
    }

402
    return (arr);
403 404
}

pumpkin's avatar
pumpkin committed
405

406 407 408 409 410 411 412 413 414 415 416 417
/* -----------------------------------------------------------------------------
   SmallArray primitives
   -------------------------------------------------------------------------- */

stg_newSmallArrayzh ( W_ n /* words */, gcptr init )
{
    W_ words, size, p;
    gcptr arr;

    again: MAYBE_GC(again);

    words = BYTES_TO_WDS(SIZEOF_StgSmallMutArrPtrs) + n;
418 419 420 421
    ("ptr" arr) = ccall allocateMightFail(MyCapability() "ptr",words);
    if (arr == NULL) {
        jump stg_raisezh(base_GHCziIOziException_heapOverflow_closure);
    }
422 423 424 425 426
    TICK_ALLOC_PRIM(SIZEOF_StgSmallMutArrPtrs, WDS(n), 0);

    SET_HDR(arr, stg_SMALL_MUT_ARR_PTRS_DIRTY_info, CCCS);
    StgSmallMutArrPtrs_ptrs(arr) = n;

427
    // Initialise all elements of the array with the value in R2
428 429
    p = arr + SIZEOF_StgSmallMutArrPtrs;
  for:
430
    if (p < arr + SIZEOF_StgSmallMutArrPtrs + WDS(n)) (likely: True) {
431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482
        W_[p] = init;
        p = p + WDS(1);
        goto for;
    }

    return (arr);
}

stg_unsafeThawSmallArrayzh ( gcptr arr )
{
    // See stg_unsafeThawArrayzh
    if (StgHeader_info(arr) != stg_SMALL_MUT_ARR_PTRS_FROZEN0_info) {
        SET_INFO(arr, stg_SMALL_MUT_ARR_PTRS_DIRTY_info);
        recordMutable(arr);
        // must be done after SET_INFO, because it ASSERTs closure_MUTABLE()
        return (arr);
    } else {
        SET_INFO(arr, stg_SMALL_MUT_ARR_PTRS_DIRTY_info);
        return (arr);
    }
}

stg_cloneSmallArrayzh ( gcptr src, W_ offset, W_ n )
{
    cloneSmallArray(stg_SMALL_MUT_ARR_PTRS_FROZEN_info, src, offset, n)
}

stg_cloneSmallMutableArrayzh ( gcptr src, W_ offset, W_ n )
{
    cloneSmallArray(stg_SMALL_MUT_ARR_PTRS_DIRTY_info, src, offset, n)
}

// We have to escape the "z" in the name.
stg_freezzeSmallArrayzh ( gcptr src, W_ offset, W_ n )
{
    cloneSmallArray(stg_SMALL_MUT_ARR_PTRS_FROZEN_info, src, offset, n)
}

stg_thawSmallArrayzh ( gcptr src, W_ offset, W_ n )
{
    cloneSmallArray(stg_SMALL_MUT_ARR_PTRS_DIRTY_info, src, offset, n)
}

stg_copySmallArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n)
{
    W_ dst_p, src_p, bytes;

    SET_INFO(dst, stg_SMALL_MUT_ARR_PTRS_DIRTY_info);

    dst_p = dst + SIZEOF_StgSmallMutArrPtrs + WDS(dst_off);
    src_p = src + SIZEOF_StgSmallMutArrPtrs + WDS(src_off);
    bytes = WDS(n);
483
    prim %memcpy(dst_p, src_p, bytes, SIZEOF_W);
484 485 486 487 488 489 490 491 492 493 494 495 496 497

    return ();
}

stg_copySmallMutableArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n)
{
    W_ dst_p, src_p, bytes;

    SET_INFO(dst, stg_SMALL_MUT_ARR_PTRS_DIRTY_info);

    dst_p = dst + SIZEOF_StgSmallMutArrPtrs + WDS(dst_off);
    src_p = src + SIZEOF_StgSmallMutArrPtrs + WDS(src_off);
    bytes = WDS(n);
    if (src == dst) {
498
        prim %memmove(dst_p, src_p, bytes, SIZEOF_W);
499
    } else {
500
        prim %memcpy(dst_p, src_p, bytes, SIZEOF_W);
501 502 503 504 505 506 507 508 509 510 511 512 513
    }

    return ();
}

// RRN: Uses the ticketed approach; see casMutVar
stg_casSmallArrayzh ( gcptr arr, W_ ind, gcptr old, gcptr new )
/* SmallMutableArray# s a -> Int# -> a -> a -> State# s -> (# State# s, Int#, Any a #) */
{
    gcptr h;
    W_ p, len;

    p = arr + SIZEOF_StgSmallMutArrPtrs + WDS(ind);
514
    (h) = prim %cmpxchgW(p, old, new);
515 516 517 518 519 520 521 522 523 524 525 526

    if (h != old) {
        // Failure, return what was there instead of 'old':
        return (1,h);
    } else {
        // Compare and Swap Succeeded:
        SET_HDR(arr, stg_SMALL_MUT_ARR_PTRS_DIRTY_info, CCCS);
        return (0,new);
    }
}


527 528 529 530
/* -----------------------------------------------------------------------------
   MutVar primitives
   -------------------------------------------------------------------------- */

531
stg_newMutVarzh ( gcptr init )
532 533 534
{
    W_ mv;

535
    ALLOC_PRIM_P (SIZEOF_StgMutVar, stg_newMutVarzh, init);
536 537

    mv = Hp - SIZEOF_StgMutVar + WDS(1);
538
    SET_HDR(mv,stg_MUT_VAR_DIRTY_info,CCCS);
539
    StgMutVar_var(mv) = init;
ian@well-typed.com's avatar
ian@well-typed.com committed
540

541
    return (mv);
542 543
}

544 545 546 547 548
// RRN: To support the "ticketed" approach, we return the NEW rather
// than old value if the CAS is successful.  This is received in an
// opaque form in the Haskell code, preventing the compiler from
// changing its pointer identity.  The ticket can then be safely used
// in future CAS operations.
549
stg_casMutVarzh ( gcptr mv, gcptr old, gcptr new )
550
 /* MutVar# s a -> a -> a -> State# s -> (# State#, Int#, Any a #) */
Simon Marlow's avatar
Simon Marlow committed
551
{
552
#if defined(THREADED_RTS)
553
    gcptr h;
Simon Marlow's avatar
Simon Marlow committed
554

555
    (h) = prim %cmpxchgW(mv + SIZEOF_StgHeader + OFFSET_StgMutVar_var, old, new);
Simon Marlow's avatar
Simon Marlow committed
556
    if (h != old) {
557
        return (1,h);
Simon Marlow's avatar
Simon Marlow committed
558
    } else {
559
        if (GET_INFO(mv) == stg_MUT_VAR_CLEAN_info) {
tibbe's avatar
tibbe committed
560
            ccall dirty_MUT_VAR(BaseReg "ptr", mv "ptr");
561
        }
562
        return (0,new);
Simon Marlow's avatar
Simon Marlow committed
563
    }
564 565 566 567 568 569 570 571 572 573 574 575 576 577
#else
    gcptr prev_val;

    prev_val = StgMutVar_var(mv);
    if (prev_val != old) {
        return (1,prev_val);
    } else {
        StgMutVar_var(mv) = new;
        if (GET_INFO(mv) == stg_MUT_VAR_CLEAN_info) {
            ccall dirty_MUT_VAR(BaseReg "ptr", mv "ptr");
        }
        return (0,new);
    }
#endif
Simon Marlow's avatar
Simon Marlow committed
578 579
}

580
stg_atomicModifyMutVarzh ( gcptr mv, gcptr f )
581
{
582
    W_ z, x, y, r, h;
583

ian@well-typed.com's avatar
ian@well-typed.com committed
584
    /* If x is the current contents of the MutVar#, then
585 586 587
       We want to make the new contents point to

         (sel_0 (f x))
ian@well-typed.com's avatar
ian@well-typed.com committed
588

589
       and the return value is
ian@well-typed.com's avatar
ian@well-typed.com committed
590 591

         (sel_1 (f x))
592 593 594 595

        obviously we can share (f x).

         z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
ian@well-typed.com's avatar
ian@well-typed.com committed
596
         y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
597 598 599
         r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
    */

600
#if defined(MIN_UPD_SIZE) && MIN_UPD_SIZE > 1
601
#define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
602 603
#define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),WDS(MIN_UPD_SIZE-1))
#else
604
#define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(1))
605 606 607
#define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),0)
#endif

608
#if defined(MIN_UPD_SIZE) && MIN_UPD_SIZE > 2
609
#define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
610 611
#define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),WDS(MIN_UPD_SIZE-2))
#else
612
#define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(2))
613 614 615 616 617
#define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0)
#endif

#define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE)

tibbe's avatar
tibbe committed
618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643
    HP_CHK_GEN_TICKY(SIZE);

    TICK_ALLOC_THUNK_2();
    CCCS_ALLOC(THUNK_2_SIZE);
    z = Hp - THUNK_2_SIZE + WDS(1);
    SET_HDR(z, stg_ap_2_upd_info, CCCS);
    LDV_RECORD_CREATE(z);
    StgThunk_payload(z,0) = f;

    TICK_ALLOC_THUNK_1();
    CCCS_ALLOC(THUNK_1_SIZE);
    y = z - THUNK_1_SIZE;
    SET_HDR(y, stg_sel_0_upd_info, CCCS);
    LDV_RECORD_CREATE(y);
    StgThunk_payload(y,0) = z;

    TICK_ALLOC_THUNK_1();
    CCCS_ALLOC(THUNK_1_SIZE);
    r = y - THUNK_1_SIZE;
    SET_HDR(r, stg_sel_1_upd_info, CCCS);
    LDV_RECORD_CREATE(r);
    StgThunk_payload(r,0) = z;

  retry:
    x = StgMutVar_var(mv);
    StgThunk_payload(z,1) = x;
Ben Gamari's avatar
Ben Gamari committed
644
#if defined(THREADED_RTS)
645
    (h) = prim %cmpxchgW(mv + SIZEOF_StgHeader + OFFSET_StgMutVar_var, x, y);
tibbe's avatar
tibbe committed
646
    if (h != x) { goto retry; }
647
#else
tibbe's avatar
tibbe committed
648
    StgMutVar_var(mv) = y;
649
#endif
650

tibbe's avatar
tibbe committed
651 652 653
    if (GET_INFO(mv) == stg_MUT_VAR_CLEAN_info) {
        ccall dirty_MUT_VAR(BaseReg "ptr", mv "ptr");
    }
654

tibbe's avatar
tibbe committed
655
    return (r);
656 657 658 659 660 661 662 663
}

/* -----------------------------------------------------------------------------
   Weak Pointer Primitives
   -------------------------------------------------------------------------- */

STRING(stg_weak_msg,"New weak pointer at %p\n")

664 665 666
stg_mkWeakzh ( gcptr key,
               gcptr value,
               gcptr finalizer /* or stg_NO_FINALIZER_closure */ )
667
{
tibbe's avatar
tibbe committed
668
    gcptr w;
669

tibbe's avatar
tibbe committed
670
    ALLOC_PRIM (SIZEOF_StgWeak)
671

tibbe's avatar
tibbe committed
672 673
    w = Hp - SIZEOF_StgWeak + WDS(1);
    SET_HDR(w, stg_WEAK_info, CCCS);
674

tibbe's avatar
tibbe committed
675 676 677 678
    StgWeak_key(w)         = key;
    StgWeak_value(w)       = value;
    StgWeak_finalizer(w)   = finalizer;
    StgWeak_cfinalizers(w) = stg_NO_FINALIZER_closure;
679

680 681 682 683 684
    StgWeak_link(w) = Capability_weak_ptr_list_hd(MyCapability());
    Capability_weak_ptr_list_hd(MyCapability()) = w;
    if (Capability_weak_ptr_list_tl(MyCapability()) == NULL) {
        Capability_weak_ptr_list_tl(MyCapability()) = w;
    }
685

tibbe's avatar
tibbe committed
686
    IF_DEBUG(weak, ccall debugBelch(stg_weak_msg,w));
687

tibbe's avatar
tibbe committed
688
    return (w);
689 690
}

691
stg_mkWeakNoFinalizzerzh ( gcptr key, gcptr value )
692
{
tibbe's avatar
tibbe committed
693
    jump stg_mkWeakzh (key, value, stg_NO_FINALIZER_closure);
694 695
}

696 697 698 699 700 701 702
STRING(stg_cfinalizer_msg,"Adding a finalizer to %p\n")

stg_addCFinalizzerToWeakzh ( W_ fptr,   // finalizer
                             W_ ptr,
                             W_ flag,   // has environment (0 or 1)
                             W_ eptr,
                             gcptr w )
703
{
tibbe's avatar
tibbe committed
704
    W_ c, info;
705

tibbe's avatar
tibbe committed
706
    ALLOC_PRIM (SIZEOF_StgCFinalizerList)
707

tibbe's avatar
tibbe committed
708 709
    c = Hp - SIZEOF_StgCFinalizerList + WDS(1);
    SET_HDR(c, stg_C_FINALIZER_LIST_info, CCCS);
710

tibbe's avatar
tibbe committed
711 712 713 714
    StgCFinalizerList_fptr(c) = fptr;
    StgCFinalizerList_ptr(c) = ptr;
    StgCFinalizerList_eptr(c) = eptr;
    StgCFinalizerList_flag(c) = flag;
715

tibbe's avatar
tibbe committed
716
    LOCK_CLOSURE(w, info);
717

tibbe's avatar
tibbe committed
718 719 720 721 722
    if (info == stg_DEAD_WEAK_info) {
        // Already dead.
        unlockClosure(w, info);
        return (0);
    }
723

tibbe's avatar
tibbe committed
724 725
    StgCFinalizerList_link(c) = StgWeak_cfinalizers(w);
    StgWeak_cfinalizers(w) = c;
726

tibbe's avatar
tibbe committed
727
    unlockClosure(w, info);
728

tibbe's avatar
tibbe committed
729
    recordMutable(w);
730

tibbe's avatar
tibbe committed
731
    IF_DEBUG(weak, ccall debugBelch(stg_cfinalizer_msg,w));
732

tibbe's avatar
tibbe committed
733
    return (1);
734
}
735

736
stg_finalizzeWeakzh ( gcptr w )
737
{
tibbe's avatar
tibbe committed
738 739
    gcptr f, list;
    W_ info;
740

tibbe's avatar
tibbe committed
741
    LOCK_CLOSURE(w, info);
742

tibbe's avatar
tibbe committed
743 744 745 746 747
    // already dead?
    if (info == stg_DEAD_WEAK_info) {
        unlockClosure(w, info);
        return (0,stg_NO_FINALIZER_closure);
    }
748

tibbe's avatar
tibbe committed
749 750
    f    = StgWeak_finalizer(w);
    list = StgWeak_cfinalizers(w);
751

tibbe's avatar
tibbe committed
752
    // kill it
Ben Gamari's avatar
Ben Gamari committed
753
#if defined(PROFILING)
tibbe's avatar
tibbe committed
754 755 756 757 758 759 760 761
    // @LDV profiling
    // A weak pointer is inherently used, so we do not need to call
    // LDV_recordDead_FILL_SLOP_DYNAMIC():
    //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
    // or, LDV_recordDead():
    //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
    // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as
    // large as weak pointers, so there is no need to fill the slop, either.
762
    // See stg_DEAD_WEAK_info in StgMiscClosures.cmm.
763 764
#endif

tibbe's avatar
tibbe committed
765 766 767 768
    //
    // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
    //
    unlockClosure(w, stg_DEAD_WEAK_info);
769

tibbe's avatar
tibbe committed
770
    LDV_RECORD_CREATE(w);
771

tibbe's avatar
tibbe committed
772 773 774
    if (list != stg_NO_FINALIZER_closure) {
      ccall runCFinalizers(list);
    }
775

tibbe's avatar
tibbe committed
776 777 778 779 780 781
    /* return the finalizer */
    if (f == stg_NO_FINALIZER_closure) {
        return (0,stg_NO_FINALIZER_closure);
    } else {
        return (1,f);
    }
782 783
}

784
stg_deRefWeakzh ( gcptr w )
785
{
tibbe's avatar
tibbe committed
786 787
    W_ code, info;
    gcptr val;
788

tibbe's avatar
tibbe committed
789
    info = GET_INFO(w);
790

tibbe's avatar
tibbe committed
791 792 793 794
    if (info == stg_WHITEHOLE_info) {
        // w is locked by another thread. Now it's not immediately clear if w is
        // alive or not. We use lockClosure to wait for the info pointer to become
        // something other than stg_WHITEHOLE_info.
795

tibbe's avatar
tibbe committed
796 797 798
        LOCK_CLOSURE(w, info);
        unlockClosure(w, info);
    }
799

tibbe's avatar
tibbe committed
800 801 802 803 804 805 806 807
    if (info == stg_WEAK_info) {
        code = 1;
        val = StgWeak_value(w);
    } else {
        code = 0;
        val = w;
    }
    return (code,val);
808 809 810
}

/* -----------------------------------------------------------------------------
811
   Floating point operations.
812 813
   -------------------------------------------------------------------------- */

814
stg_decodeFloatzuIntzh ( F_ arg )
ian@well-typed.com's avatar
ian@well-typed.com committed
815
{
816
    W_ p;
817
    W_ tmp, mp_tmp1, mp_tmp_w, r1, r2;
818

819
    STK_CHK_GEN_N (WDS(2));
820

821
    reserve 2 = tmp {
ian@well-typed.com's avatar
ian@well-typed.com committed
822

tibbe's avatar
tibbe committed
823 824
        mp_tmp1  = tmp + WDS(1);
        mp_tmp_w = tmp;
825

tibbe's avatar
tibbe committed
826 827
        /* Perform the operation */
        ccall __decodeFloat_Int(mp_tmp1 "ptr", mp_tmp_w "ptr", arg);
828

tibbe's avatar
tibbe committed
829 830
        r1 = W_[mp_tmp1];
        r2 = W_[mp_tmp_w];
831
    }
ian@well-typed.com's avatar
ian@well-typed.com committed
832

833
    /* returns: (Int# (mantissa), Int# (exponent)) */
834
    return (r1, r2);
835 836
}

837
stg_decodeDoublezu2Intzh ( D_ arg )
ian@well-typed.com's avatar
ian@well-typed.com committed
838
{
839 840 841
    W_ p, tmp;
    W_ mp_tmp1, mp_tmp2, mp_result1, mp_result2;
    W_ r1, r2, r3, r4;
842

843
    STK_CHK_GEN_N (WDS(4));
844

845 846
    reserve 4 = tmp {

tibbe's avatar
tibbe committed
847 848 849 850
        mp_tmp1    = tmp + WDS(3);
        mp_tmp2    = tmp + WDS(2);
        mp_result1 = tmp + WDS(1);
        mp_result2 = tmp;
851

tibbe's avatar
tibbe committed
852 853 854 855 856 857 858 859 860
        /* Perform the operation */
        ccall __decodeDouble_2Int(mp_tmp1 "ptr", mp_tmp2 "ptr",
                                  mp_result1 "ptr", mp_result2 "ptr",
                                  arg);

        r1 = W_[mp_tmp1];
        r2 = W_[mp_tmp2];
        r3 = W_[mp_result1];
        r4 = W_[mp_result2];
861
    }
862 863 864

    /* returns:
       (Int# (mant sign), Word# (mant high), Word# (mant low), Int# (expn)) */
865
    return (r1, r2, r3, r4);
866 867
}

868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883
/* Double# -> (# Int64#, Int# #) */
stg_decodeDoublezuInt64zh ( D_ arg )
{
    CInt exp;
    I64  mant;
    W_   mant_ptr;

    STK_CHK_GEN_N (SIZEOF_INT64);
    reserve BYTES_TO_WDS(SIZEOF_INT64) = mant_ptr {
        (exp) = ccall __decodeDouble_Int64(mant_ptr "ptr", arg);
        mant = I64[mant_ptr];
    }

    return (mant, TO_W_(exp));
}

884 885 886 887
/* -----------------------------------------------------------------------------
 * Concurrency primitives
 * -------------------------------------------------------------------------- */

888
stg_forkzh ( gcptr closure )
889
{
tibbe's avatar
tibbe committed
890
    MAYBE_GC_P(stg_forkzh, closure);
891

tibbe's avatar
tibbe committed
892
    gcptr threadid;
893

tibbe's avatar
tibbe committed
894 895 896
    ("ptr" threadid) = ccall createIOThread( MyCapability() "ptr",
                                  RtsFlags_GcFlags_initialStkSize(RtsFlags),
                                  closure "ptr");
897

tibbe's avatar
tibbe committed
898 899 900 901
    /* start blocked if the current thread is blocked */
    StgTSO_flags(threadid) = %lobits16(
        TO_W_(StgTSO_flags(threadid)) |
        TO_W_(StgTSO_flags(CurrentTSO)) & (TSO_BLOCKEX | TSO_INTERRUPTIBLE));
902

tibbe's avatar
tibbe committed
903
    ccall scheduleThread(MyCapability() "ptr", threadid "ptr");
904

tibbe's avatar
tibbe committed
905 906 907
    // context switch soon, but not immediately: we don't want every
    // forkIO to force a context-switch.
    Capability_context_switch(MyCapability()) = 1 :: CInt;
ian@well-typed.com's avatar
ian@well-typed.com committed
908

tibbe's avatar
tibbe committed
909
    return (threadid);
910 911
}

912
stg_forkOnzh ( W_ cpu, gcptr closure )
913
{
914
again: MAYBE_GC(again);
915

tibbe's avatar
tibbe committed
916
    gcptr threadid;
917

tibbe's avatar
tibbe committed
918 919 920 921
    ("ptr" threadid) = ccall createIOThread(
        MyCapability() "ptr",
        RtsFlags_GcFlags_initialStkSize(RtsFlags),
        closure "ptr");
922

tibbe's avatar
tibbe committed
923 924 925 926
    /* start blocked if the current thread is blocked */
    StgTSO_flags(threadid) = %lobits16(
        TO_W_(StgTSO_flags(threadid)) |
        TO_W_(StgTSO_flags(CurrentTSO)) & (TSO_BLOCKEX | TSO_INTERRUPTIBLE));
927

tibbe's avatar
tibbe committed
928
    ccall scheduleThreadOn(MyCapability() "ptr", cpu, threadid "ptr");
929

tibbe's avatar
tibbe committed
930 931 932
    // context switch soon, but not immediately: we don't want every
    // forkIO to force a context-switch.
    Capability_context_switch(MyCapability()) = 1 :: CInt;
ian@well-typed.com's avatar
ian@well-typed.com committed
933

tibbe's avatar
tibbe committed
934
    return (threadid);
935 936
}

937
stg_yieldzh ()
938
{
tibbe's avatar
tibbe committed
939 940 941 942 943 944
    // when we yield to the scheduler, we have to tell it to put the
    // current thread to the back of the queue by setting the
    // context_switch flag.  If we don't do this, it will run the same
    // thread again.
    Capability_context_switch(MyCapability()) = 1 :: CInt;
    jump stg_yield_noregs();
945 946
}

947
stg_labelThreadzh ( gcptr threadid, W_ addr )
948
{
949
#if defined(DEBUG) || defined(TRACING) || defined(DTRACE)
tibbe's avatar
tibbe committed
950
    ccall labelThread(MyCapability() "ptr", threadid "ptr", addr "ptr");
951
#endif
tibbe's avatar
tibbe committed
952
    return ();
953 954
}

955
stg_isCurrentThreadBoundzh (/* no args */)
956
{
tibbe's avatar
tibbe committed
957 958 959
    W_ r;
    (r) = ccall isThreadBound(CurrentTSO);
    return (r);
960 961
}