PrimOps.cmm 73.6 KB
Newer Older
1
/* -*- tab-width: 8 -*- */
2 3
/* -----------------------------------------------------------------------------
 *
4
 * (c) The GHC Team, 1998-2012
5 6 7 8 9 10 11 12 13
 *
 * Out-of-line primitive operations
 *
 * This file contains the implementations of all the primitive
 * operations ("primops") which are not expanded inline.  See
 * ghc/compiler/prelude/primops.txt.pp for a list of all the primops;
 * this file contains code for most of those with the attribute
 * out_of_line=True.
 *
14 15 16
 * Entry convention: the entry convention for a primop is the
 * NativeNodeCall convention, and the return convention is
 * NativeReturn.  (see compiler/cmm/CmmCallConv.hs)
17 18 19 20 21 22 23 24
 *
 * This file is written in a subset of C--, extended with various
 * features specific to GHC.  It is compiled by GHC directly.  For the
 * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
 *
 * ---------------------------------------------------------------------------*/

#include "Cmm.h"
25
#include "MachDeps.h"
Simon Marlow's avatar
Simon Marlow committed
26
#include "SMPClosureOps.h"
27

28
#ifdef __PIC__
29 30
import pthread_mutex_lock;
import pthread_mutex_unlock;
31
#endif
32
import CLOSURE base_ControlziExceptionziBase_nestedAtomically_closure;
33 34
import EnterCriticalSection;
import LeaveCriticalSection;
35
import CLOSURE ghczmprim_GHCziTypes_False_closure;
36
#if defined(USE_MINIINTERPRETER) || !defined(mingw32_HOST_OS)
37
import CLOSURE sm_mutex;
38
#endif
39 40 41
#ifdef PROFILING
import CLOSURE CCS_MAIN;
#endif
42

43 44 45 46 47 48 49 50 51 52 53 54 55
/*-----------------------------------------------------------------------------
  Array Primitives

  Basically just new*Array - the others are all inline macros.

  The slow entry point is for returning from a heap check, the saved
  size argument must be re-loaded from the stack.
  -------------------------------------------------------------------------- */

/* for objects that are *less* than the size of a word, make sure we
 * round up to the nearest word for the size of the array.
 */

56
stg_newByteArrayzh ( W_ n )
57
{
58 59 60 61 62
    W_ words, payload_words;
    gcptr p;

    MAYBE_GC_N(stg_newByteArrayzh, n);

63
    payload_words = ROUNDUP_BYTES_TO_WDS(n);
siddhanathan's avatar
siddhanathan committed
64
    words = BYTES_TO_WDS(SIZEOF_StgArrBytes) + payload_words;
65
    ("ptr" p) = ccall allocate(MyCapability() "ptr",words);
siddhanathan's avatar
siddhanathan committed
66
    TICK_ALLOC_PRIM(SIZEOF_StgArrBytes,WDS(payload_words),0);
67
    SET_HDR(p, stg_ARR_WORDS_info, CCCS);
siddhanathan's avatar
siddhanathan committed
68
    StgArrBytes_bytes(p) = n;
69
    return (p);
70 71
}

Simon Marlow's avatar
Simon Marlow committed
72 73 74
#define BA_ALIGN 16
#define BA_MASK  (BA_ALIGN-1)

75
stg_newPinnedByteArrayzh ( W_ n )
76
{
77 78 79 80
    W_ words, bytes, payload_words;
    gcptr p;

    MAYBE_GC_N(stg_newPinnedByteArrayzh, n);
81

82
    bytes = n;
83 84 85 86
    /* payload_words is what we will tell the profiler we had to allocate */
    payload_words = ROUNDUP_BYTES_TO_WDS(bytes);
    /* When we actually allocate memory, we need to allow space for the
       header: */
siddhanathan's avatar
siddhanathan committed
87
    bytes = bytes + SIZEOF_StgArrBytes;
88 89 90 91 92
    /* And we want to align to BA_ALIGN bytes, so we need to allow space
       to shift up to BA_ALIGN - 1 bytes: */
    bytes = bytes + BA_ALIGN - 1;
    /* Now we convert to a number of words: */
    words = ROUNDUP_BYTES_TO_WDS(bytes);
Simon Marlow's avatar
Simon Marlow committed
93

94
    ("ptr" p) = ccall allocatePinned(MyCapability() "ptr", words);
siddhanathan's avatar
siddhanathan committed
95
    TICK_ALLOC_PRIM(SIZEOF_StgArrBytes,WDS(payload_words),0);
Simon Marlow's avatar
Simon Marlow committed
96

97 98
    /* Now we need to move p forward so that the payload is aligned
       to BA_ALIGN bytes: */
siddhanathan's avatar
siddhanathan committed
99
    p = p + ((-p - SIZEOF_StgArrBytes) & BA_MASK);
Simon Marlow's avatar
Simon Marlow committed
100

101
    SET_HDR(p, stg_ARR_WORDS_info, CCCS);
siddhanathan's avatar
siddhanathan committed
102
    StgArrBytes_bytes(p) = n;
103
    return (p);
Simon Marlow's avatar
Simon Marlow committed
104 105
}

106
stg_newAlignedPinnedByteArrayzh ( W_ n, W_ alignment )
Simon Marlow's avatar
Simon Marlow committed
107
{
108 109
    W_ words, bytes, payload_words;
    gcptr p;
Simon Marlow's avatar
Simon Marlow committed
110

111
    again: MAYBE_GC(again);
Simon Marlow's avatar
Simon Marlow committed
112

113 114 115 116 117
    /* we always supply at least word-aligned memory, so there's no
       need to allow extra space for alignment if the requirement is less
       than a word.  This also prevents mischief with alignment == 0. */
    if (alignment <= SIZEOF_W) { alignment = 1; }

118 119
    bytes = n;

120 121
    /* payload_words is what we will tell the profiler we had to allocate */
    payload_words = ROUNDUP_BYTES_TO_WDS(bytes);
Simon Marlow's avatar
Simon Marlow committed
122

123 124
    /* When we actually allocate memory, we need to allow space for the
       header: */
siddhanathan's avatar
siddhanathan committed
125
    bytes = bytes + SIZEOF_StgArrBytes;
126 127 128 129 130
    /* And we want to align to <alignment> bytes, so we need to allow space
       to shift up to <alignment - 1> bytes: */
    bytes = bytes + alignment - 1;
    /* Now we convert to a number of words: */
    words = ROUNDUP_BYTES_TO_WDS(bytes);
131

132
    ("ptr" p) = ccall allocatePinned(MyCapability() "ptr", words);
siddhanathan's avatar
siddhanathan committed
133
    TICK_ALLOC_PRIM(SIZEOF_StgArrBytes,WDS(payload_words),0);
134

135 136 137
    /* Now we need to move p forward so that the payload is aligned
       to <alignment> bytes. Note that we are assuming that
       <alignment> is a power of 2, which is technically not guaranteed */
siddhanathan's avatar
siddhanathan committed
138
    p = p + ((-p - SIZEOF_StgArrBytes) & (alignment - 1));
139

140
    SET_HDR(p, stg_ARR_WORDS_info, CCCS);
siddhanathan's avatar
siddhanathan committed
141
    StgArrBytes_bytes(p) = n;
142
    return (p);
143 144
}

145 146
stg_isByteArrayPinnedzh ( gcptr ba )
// ByteArray# s -> Int#
147 148
{
    W_ bd, flags;
149
    bd = Bdescr(ba);
150 151 152 153 154 155
    // pinned byte arrays live in blocks with the BF_PINNED flag set.
    // See the comment in Storage.c:allocatePinned.
    flags = TO_W_(bdescr_flags(bd));
    return (flags & BF_PINNED != 0);
}

156 157 158 159 160 161
stg_isMutableByteArrayPinnedzh ( gcptr mba )
// MutableByteArray# s -> Int#
{
    jump stg_isByteArrayPinnedzh(mba);
}

162 163 164 165 166
// shrink size of MutableByteArray in-place
stg_shrinkMutableByteArrayzh ( gcptr mba, W_ new_size )
// MutableByteArray# s -> Int# -> State# s -> State# s
{
   ASSERT(new_size >= 0);
siddhanathan's avatar
siddhanathan committed
167
   ASSERT(new_size <= StgArrBytes_bytes(mba));
168

siddhanathan's avatar
siddhanathan committed
169
   OVERWRITING_CLOSURE_OFS(mba, (BYTES_TO_WDS(SIZEOF_StgArrBytes) +
170
                                 ROUNDUP_BYTES_TO_WDS(new_size)));
siddhanathan's avatar
siddhanathan committed
171
   StgArrBytes_bytes(mba) = new_size;
172 173 174 175 176 177 178 179 180 181 182
   LDV_RECORD_CREATE(mba);

   return ();
}

// resize MutableByteArray
//
// The returned MutableByteArray is either the original
// MutableByteArray resized in-place or, if not possible, a newly
// allocated (unpinned) MutableByteArray (with the original content
// copied over)
Gabor Greif's avatar
Gabor Greif committed
183
stg_resizzeMutableByteArrayzh ( gcptr mba, W_ new_size )
184 185 186 187 188 189 190 191 192
// MutableByteArray# s -> Int# -> State# s -> (# State# s,MutableByteArray# s #)
{
   W_ new_size_wds;

   ASSERT(new_size >= 0);

   new_size_wds = ROUNDUP_BYTES_TO_WDS(new_size);

   if (new_size_wds <= BYTE_ARR_WDS(mba)) {
siddhanathan's avatar
siddhanathan committed
193
      OVERWRITING_CLOSURE_OFS(mba, (BYTES_TO_WDS(SIZEOF_StgArrBytes) +
194
                                    new_size_wds));
siddhanathan's avatar
siddhanathan committed
195
      StgArrBytes_bytes(mba) = new_size;
196 197 198 199 200 201 202 203 204 205 206 207 208 209
      LDV_RECORD_CREATE(mba);

      return (mba);
   } else {
      (P_ new_mba) = call stg_newByteArrayzh(new_size);

      // maybe at some point in the future we may be able to grow the
      // MBA in-place w/o copying if we know the space after the
      // current MBA is still available, as often we want to grow the
      // MBA shortly after we allocated the original MBA. So maybe no
      // further allocations have occurred by then.

      // copy over old content
      prim %memcpy(BYTE_ARR_CTS(new_mba), BYTE_ARR_CTS(mba),
siddhanathan's avatar
siddhanathan committed
210
                   StgArrBytes_bytes(mba), SIZEOF_W);
211 212 213 214 215

      return (new_mba);
   }
}

216 217 218 219 220
// RRN: This one does not use the "ticketing" approach because it
// deals in unboxed scalars, not heap pointers.
stg_casIntArrayzh( gcptr arr, W_ ind, W_ old, W_ new )
/* MutableByteArray# s -> Int# -> Int# -> Int# -> State# s -> (# State# s, Int# #) */
{
tibbe's avatar
tibbe committed
221
    W_ p, h;
222

siddhanathan's avatar
siddhanathan committed
223
    p = arr + SIZEOF_StgArrBytes + WDS(ind);
224
    (h) = prim %cmpxchgW(p, old, new);
225 226 227 228

    return(h);
}

229

230
stg_newArrayzh ( W_ n /* words */, gcptr init )
231
{
tibbe's avatar
tibbe committed
232 233
    W_ words, size, p;
    gcptr arr;
234

235
    again: MAYBE_GC(again);
236

237 238 239 240 241
    // the mark area contains one byte for each 2^MUT_ARR_PTRS_CARD_BITS words
    // in the array, making sure we round up, and then rounding up to a whole
    // number of words.
    size = n + mutArrPtrsCardWords(n);
    words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + size;
242
    ("ptr" arr) = ccall allocate(MyCapability() "ptr",words);
tibbe's avatar
tibbe committed
243
    TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(size), 0);
244

245
    SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, CCCS);
246
    StgMutArrPtrs_ptrs(arr) = n;
247
    StgMutArrPtrs_size(arr) = size;
248 249 250 251

    // Initialise all elements of the the array with the value in R2
    p = arr + SIZEOF_StgMutArrPtrs;
  for:
tibbe's avatar
tibbe committed
252
    if (p < arr + SIZEOF_StgMutArrPtrs + WDS(n)) {
ian@well-typed.com's avatar
ian@well-typed.com committed
253 254 255
        W_[p] = init;
        p = p + WDS(1);
        goto for;
256 257
    }

258
    return (arr);
259 260
}

261
stg_unsafeThawArrayzh ( gcptr arr )
262
{
tibbe's avatar
tibbe committed
263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281
    // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST
    //
    // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN
    // normally doesn't.  However, when we freeze a MUT_ARR_PTRS, we leave
    // it on the mutable list for the GC to remove (removing something from
    // the mutable list is not easy).
    //
    // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list,
    // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0
    // to indicate that it is still on the mutable list.
    //
    // So, when we thaw a MUT_ARR_PTRS_FROZEN, we must cope with two cases:
    // either it is on a mut_list, or it isn't.  We adopt the convention that
    // the closure type is MUT_ARR_PTRS_FROZEN0 if it is on the mutable list,
    // and MUT_ARR_PTRS_FROZEN otherwise.  In fact it wouldn't matter if
    // we put it on the mutable list more than once, but it would get scavenged
    // multiple times during GC, which would be unnecessarily slow.
    //
    if (StgHeader_info(arr) != stg_MUT_ARR_PTRS_FROZEN0_info) {
282 283
        SET_INFO(arr,stg_MUT_ARR_PTRS_DIRTY_info);
        recordMutable(arr);
ian@well-typed.com's avatar
ian@well-typed.com committed
284 285
        // must be done after SET_INFO, because it ASSERTs closure_MUTABLE()
        return (arr);
tibbe's avatar
tibbe committed
286
    } else {
ian@well-typed.com's avatar
ian@well-typed.com committed
287 288
        SET_INFO(arr,stg_MUT_ARR_PTRS_DIRTY_info);
        return (arr);
tibbe's avatar
tibbe committed
289
    }
290 291
}

292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311
stg_copyArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n )
{
    copyArray(src, src_off, dst, dst_off, n)
}

stg_copyMutableArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n )
{
    copyMutableArray(src, src_off, dst, dst_off, n)
}

stg_copyArrayArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n )
{
    copyArray(src, src_off, dst, dst_off, n)
}

stg_copyMutableArrayArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n )
{
    copyMutableArray(src, src_off, dst, dst_off, n)
}

312 313
stg_cloneArrayzh ( gcptr src, W_ offset, W_ n )
{
tibbe's avatar
tibbe committed
314
    cloneArray(stg_MUT_ARR_PTRS_FROZEN_info, src, offset, n)
315 316 317 318
}

stg_cloneMutableArrayzh ( gcptr src, W_ offset, W_ n )
{
tibbe's avatar
tibbe committed
319
    cloneArray(stg_MUT_ARR_PTRS_DIRTY_info, src, offset, n)
320 321 322 323 324
}

// We have to escape the "z" in the name.
stg_freezzeArrayzh ( gcptr src, W_ offset, W_ n )
{
tibbe's avatar
tibbe committed
325
    cloneArray(stg_MUT_ARR_PTRS_FROZEN_info, src, offset, n)
326 327 328 329
}

stg_thawArrayzh ( gcptr src, W_ offset, W_ n )
{
tibbe's avatar
tibbe committed
330
    cloneArray(stg_MUT_ARR_PTRS_DIRTY_info, src, offset, n)
331 332
}

333
// RRN: Uses the ticketed approach; see casMutVar
334
stg_casArrayzh ( gcptr arr, W_ ind, gcptr old, gcptr new )
335
/* MutableArray# s a -> Int# -> a -> a -> State# s -> (# State# s, Int#, Any a #) */
336
{
tibbe's avatar
tibbe committed
337 338
    gcptr h;
    W_ p, len;
339 340

    p = arr + SIZEOF_StgMutArrPtrs + WDS(ind);
341
    (h) = prim %cmpxchgW(p, old, new);
342

343 344
    if (h != old) {
        // Failure, return what was there instead of 'old':
345
        return (1,h);
346 347
    } else {
        // Compare and Swap Succeeded:
rrnewton's avatar
rrnewton committed
348 349 350 351
        SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, CCCS);
        len = StgMutArrPtrs_ptrs(arr);
        // The write barrier.  We must write a byte into the mark table:
        I8[arr + SIZEOF_StgMutArrPtrs + WDS(len) + (ind >> MUT_ARR_PTRS_CARD_BITS )] = 1;
352
        return (0,new);
353 354 355
    }
}

356
stg_newArrayArrayzh ( W_ n /* words */ )
357
{
tibbe's avatar
tibbe committed
358 359
    W_ words, size, p;
    gcptr arr;
360

361
    MAYBE_GC_N(stg_newArrayArrayzh, n);
362 363 364 365 366 367

    // the mark area contains one byte for each 2^MUT_ARR_PTRS_CARD_BITS words
    // in the array, making sure we round up, and then rounding up to a whole
    // number of words.
    size = n + mutArrPtrsCardWords(n);
    words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + size;
368
    ("ptr" arr) = ccall allocate(MyCapability() "ptr",words);
tibbe's avatar
tibbe committed
369
    TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(size), 0);
370 371 372 373 374 375 376 377

    SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]);
    StgMutArrPtrs_ptrs(arr) = n;
    StgMutArrPtrs_size(arr) = size;

    // Initialise all elements of the array with a pointer to the new array
    p = arr + SIZEOF_StgMutArrPtrs;
  for:
tibbe's avatar
tibbe committed
378
    if (p < arr + SIZEOF_StgMutArrPtrs + WDS(n)) {
ian@well-typed.com's avatar
ian@well-typed.com committed
379 380 381
        W_[p] = arr;
        p = p + WDS(1);
        goto for;
382 383
    }

384
    return (arr);
385 386
}

pumpkin's avatar
pumpkin committed
387

388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461
/* -----------------------------------------------------------------------------
   SmallArray primitives
   -------------------------------------------------------------------------- */

stg_newSmallArrayzh ( W_ n /* words */, gcptr init )
{
    W_ words, size, p;
    gcptr arr;

    again: MAYBE_GC(again);

    words = BYTES_TO_WDS(SIZEOF_StgSmallMutArrPtrs) + n;
    ("ptr" arr) = ccall allocate(MyCapability() "ptr",words);
    TICK_ALLOC_PRIM(SIZEOF_StgSmallMutArrPtrs, WDS(n), 0);

    SET_HDR(arr, stg_SMALL_MUT_ARR_PTRS_DIRTY_info, CCCS);
    StgSmallMutArrPtrs_ptrs(arr) = n;

    // Initialise all elements of the the array with the value in R2
    p = arr + SIZEOF_StgSmallMutArrPtrs;
  for:
    if (p < arr + SIZEOF_StgSmallMutArrPtrs + WDS(n)) {
        W_[p] = init;
        p = p + WDS(1);
        goto for;
    }

    return (arr);
}

stg_unsafeThawSmallArrayzh ( gcptr arr )
{
    // See stg_unsafeThawArrayzh
    if (StgHeader_info(arr) != stg_SMALL_MUT_ARR_PTRS_FROZEN0_info) {
        SET_INFO(arr, stg_SMALL_MUT_ARR_PTRS_DIRTY_info);
        recordMutable(arr);
        // must be done after SET_INFO, because it ASSERTs closure_MUTABLE()
        return (arr);
    } else {
        SET_INFO(arr, stg_SMALL_MUT_ARR_PTRS_DIRTY_info);
        return (arr);
    }
}

stg_cloneSmallArrayzh ( gcptr src, W_ offset, W_ n )
{
    cloneSmallArray(stg_SMALL_MUT_ARR_PTRS_FROZEN_info, src, offset, n)
}

stg_cloneSmallMutableArrayzh ( gcptr src, W_ offset, W_ n )
{
    cloneSmallArray(stg_SMALL_MUT_ARR_PTRS_DIRTY_info, src, offset, n)
}

// We have to escape the "z" in the name.
stg_freezzeSmallArrayzh ( gcptr src, W_ offset, W_ n )
{
    cloneSmallArray(stg_SMALL_MUT_ARR_PTRS_FROZEN_info, src, offset, n)
}

stg_thawSmallArrayzh ( gcptr src, W_ offset, W_ n )
{
    cloneSmallArray(stg_SMALL_MUT_ARR_PTRS_DIRTY_info, src, offset, n)
}

stg_copySmallArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n)
{
    W_ dst_p, src_p, bytes;

    SET_INFO(dst, stg_SMALL_MUT_ARR_PTRS_DIRTY_info);

    dst_p = dst + SIZEOF_StgSmallMutArrPtrs + WDS(dst_off);
    src_p = src + SIZEOF_StgSmallMutArrPtrs + WDS(src_off);
    bytes = WDS(n);
462
    prim %memcpy(dst_p, src_p, bytes, SIZEOF_W);
463 464 465 466 467 468 469 470 471 472 473 474 475 476

    return ();
}

stg_copySmallMutableArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n)
{
    W_ dst_p, src_p, bytes;

    SET_INFO(dst, stg_SMALL_MUT_ARR_PTRS_DIRTY_info);

    dst_p = dst + SIZEOF_StgSmallMutArrPtrs + WDS(dst_off);
    src_p = src + SIZEOF_StgSmallMutArrPtrs + WDS(src_off);
    bytes = WDS(n);
    if (src == dst) {
477
        prim %memmove(dst_p, src_p, bytes, SIZEOF_W);
478
    } else {
479
        prim %memcpy(dst_p, src_p, bytes, SIZEOF_W);
480 481 482 483 484 485 486 487 488 489 490 491 492
    }

    return ();
}

// RRN: Uses the ticketed approach; see casMutVar
stg_casSmallArrayzh ( gcptr arr, W_ ind, gcptr old, gcptr new )
/* SmallMutableArray# s a -> Int# -> a -> a -> State# s -> (# State# s, Int#, Any a #) */
{
    gcptr h;
    W_ p, len;

    p = arr + SIZEOF_StgSmallMutArrPtrs + WDS(ind);
493
    (h) = prim %cmpxchgW(p, old, new);
494 495 496 497 498 499 500 501 502 503 504 505

    if (h != old) {
        // Failure, return what was there instead of 'old':
        return (1,h);
    } else {
        // Compare and Swap Succeeded:
        SET_HDR(arr, stg_SMALL_MUT_ARR_PTRS_DIRTY_info, CCCS);
        return (0,new);
    }
}


506 507 508 509
/* -----------------------------------------------------------------------------
   MutVar primitives
   -------------------------------------------------------------------------- */

510
stg_newMutVarzh ( gcptr init )
511 512 513
{
    W_ mv;

514
    ALLOC_PRIM_P (SIZEOF_StgMutVar, stg_newMutVarzh, init);
515 516

    mv = Hp - SIZEOF_StgMutVar + WDS(1);
517
    SET_HDR(mv,stg_MUT_VAR_DIRTY_info,CCCS);
518
    StgMutVar_var(mv) = init;
ian@well-typed.com's avatar
ian@well-typed.com committed
519

520
    return (mv);
521 522
}

523 524 525 526 527
// RRN: To support the "ticketed" approach, we return the NEW rather
// than old value if the CAS is successful.  This is received in an
// opaque form in the Haskell code, preventing the compiler from
// changing its pointer identity.  The ticket can then be safely used
// in future CAS operations.
528
stg_casMutVarzh ( gcptr mv, gcptr old, gcptr new )
529
 /* MutVar# s a -> a -> a -> State# s -> (# State#, Int#, Any a #) */
Simon Marlow's avatar
Simon Marlow committed
530
{
531
    gcptr h;
Simon Marlow's avatar
Simon Marlow committed
532

533
    (h) = prim %cmpxchgW(mv + SIZEOF_StgHeader + OFFSET_StgMutVar_var, old, new);
Simon Marlow's avatar
Simon Marlow committed
534
    if (h != old) {
535
        return (1,h);
Simon Marlow's avatar
Simon Marlow committed
536
    } else {
537
        if (GET_INFO(mv) == stg_MUT_VAR_CLEAN_info) {
tibbe's avatar
tibbe committed
538
            ccall dirty_MUT_VAR(BaseReg "ptr", mv "ptr");
539
        }
540
        return (0,new);
Simon Marlow's avatar
Simon Marlow committed
541 542 543
    }
}

544
stg_atomicModifyMutVarzh ( gcptr mv, gcptr f )
545
{
546
    W_ z, x, y, r, h;
547

ian@well-typed.com's avatar
ian@well-typed.com committed
548
    /* If x is the current contents of the MutVar#, then
549 550 551
       We want to make the new contents point to

         (sel_0 (f x))
ian@well-typed.com's avatar
ian@well-typed.com committed
552

553
       and the return value is
ian@well-typed.com's avatar
ian@well-typed.com committed
554 555

         (sel_1 (f x))
556 557 558 559

        obviously we can share (f x).

         z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
ian@well-typed.com's avatar
ian@well-typed.com committed
560
         y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
561 562 563
         r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
    */

564
#if MIN_UPD_SIZE > 1
565
#define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
566 567
#define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),WDS(MIN_UPD_SIZE-1))
#else
568
#define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(1))
569 570 571
#define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),0)
#endif

572
#if MIN_UPD_SIZE > 2
573
#define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
574 575
#define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),WDS(MIN_UPD_SIZE-2))
#else
576
#define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(2))
577 578 579 580 581
#define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0)
#endif

#define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE)

tibbe's avatar
tibbe committed
582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607
    HP_CHK_GEN_TICKY(SIZE);

    TICK_ALLOC_THUNK_2();
    CCCS_ALLOC(THUNK_2_SIZE);
    z = Hp - THUNK_2_SIZE + WDS(1);
    SET_HDR(z, stg_ap_2_upd_info, CCCS);
    LDV_RECORD_CREATE(z);
    StgThunk_payload(z,0) = f;

    TICK_ALLOC_THUNK_1();
    CCCS_ALLOC(THUNK_1_SIZE);
    y = z - THUNK_1_SIZE;
    SET_HDR(y, stg_sel_0_upd_info, CCCS);
    LDV_RECORD_CREATE(y);
    StgThunk_payload(y,0) = z;

    TICK_ALLOC_THUNK_1();
    CCCS_ALLOC(THUNK_1_SIZE);
    r = y - THUNK_1_SIZE;
    SET_HDR(r, stg_sel_1_upd_info, CCCS);
    LDV_RECORD_CREATE(r);
    StgThunk_payload(r,0) = z;

  retry:
    x = StgMutVar_var(mv);
    StgThunk_payload(z,1) = x;
608
#ifdef THREADED_RTS
609
    (h) = prim %cmpxchgW(mv + SIZEOF_StgHeader + OFFSET_StgMutVar_var, x, y);
tibbe's avatar
tibbe committed
610
    if (h != x) { goto retry; }
611
#else
tibbe's avatar
tibbe committed
612
    StgMutVar_var(mv) = y;
613
#endif
614

tibbe's avatar
tibbe committed
615 616 617
    if (GET_INFO(mv) == stg_MUT_VAR_CLEAN_info) {
        ccall dirty_MUT_VAR(BaseReg "ptr", mv "ptr");
    }
618

tibbe's avatar
tibbe committed
619
    return (r);
620 621 622 623 624 625 626 627
}

/* -----------------------------------------------------------------------------
   Weak Pointer Primitives
   -------------------------------------------------------------------------- */

STRING(stg_weak_msg,"New weak pointer at %p\n")

628 629 630
stg_mkWeakzh ( gcptr key,
               gcptr value,
               gcptr finalizer /* or stg_NO_FINALIZER_closure */ )
631
{
tibbe's avatar
tibbe committed
632
    gcptr w;
633

tibbe's avatar
tibbe committed
634
    ALLOC_PRIM (SIZEOF_StgWeak)
635

tibbe's avatar
tibbe committed
636 637
    w = Hp - SIZEOF_StgWeak + WDS(1);
    SET_HDR(w, stg_WEAK_info, CCCS);
638

tibbe's avatar
tibbe committed
639 640 641 642
    StgWeak_key(w)         = key;
    StgWeak_value(w)       = value;
    StgWeak_finalizer(w)   = finalizer;
    StgWeak_cfinalizers(w) = stg_NO_FINALIZER_closure;
643

644 645 646 647 648
    StgWeak_link(w) = Capability_weak_ptr_list_hd(MyCapability());
    Capability_weak_ptr_list_hd(MyCapability()) = w;
    if (Capability_weak_ptr_list_tl(MyCapability()) == NULL) {
        Capability_weak_ptr_list_tl(MyCapability()) = w;
    }
649

tibbe's avatar
tibbe committed
650
    IF_DEBUG(weak, ccall debugBelch(stg_weak_msg,w));
651

tibbe's avatar
tibbe committed
652
    return (w);
653 654
}

655
stg_mkWeakNoFinalizzerzh ( gcptr key, gcptr value )
656
{
tibbe's avatar
tibbe committed
657
    jump stg_mkWeakzh (key, value, stg_NO_FINALIZER_closure);
658 659
}

660 661 662 663 664 665 666
STRING(stg_cfinalizer_msg,"Adding a finalizer to %p\n")

stg_addCFinalizzerToWeakzh ( W_ fptr,   // finalizer
                             W_ ptr,
                             W_ flag,   // has environment (0 or 1)
                             W_ eptr,
                             gcptr w )
667
{
tibbe's avatar
tibbe committed
668
    W_ c, info;
669

tibbe's avatar
tibbe committed
670
    ALLOC_PRIM (SIZEOF_StgCFinalizerList)
671

tibbe's avatar
tibbe committed
672 673
    c = Hp - SIZEOF_StgCFinalizerList + WDS(1);
    SET_HDR(c, stg_C_FINALIZER_LIST_info, CCCS);
674

tibbe's avatar
tibbe committed
675 676 677 678
    StgCFinalizerList_fptr(c) = fptr;
    StgCFinalizerList_ptr(c) = ptr;
    StgCFinalizerList_eptr(c) = eptr;
    StgCFinalizerList_flag(c) = flag;
679

tibbe's avatar
tibbe committed
680
    LOCK_CLOSURE(w, info);
681

tibbe's avatar
tibbe committed
682 683 684 685 686
    if (info == stg_DEAD_WEAK_info) {
        // Already dead.
        unlockClosure(w, info);
        return (0);
    }
687

tibbe's avatar
tibbe committed
688 689
    StgCFinalizerList_link(c) = StgWeak_cfinalizers(w);
    StgWeak_cfinalizers(w) = c;
690

tibbe's avatar
tibbe committed
691
    unlockClosure(w, info);
692

tibbe's avatar
tibbe committed
693
    recordMutable(w);
694

tibbe's avatar
tibbe committed
695
    IF_DEBUG(weak, ccall debugBelch(stg_cfinalizer_msg,w));
696

tibbe's avatar
tibbe committed
697
    return (1);
698
}
699

700
stg_finalizzeWeakzh ( gcptr w )
701
{
tibbe's avatar
tibbe committed
702 703
    gcptr f, list;
    W_ info;
704

tibbe's avatar
tibbe committed
705
    LOCK_CLOSURE(w, info);
706

tibbe's avatar
tibbe committed
707 708 709 710 711
    // already dead?
    if (info == stg_DEAD_WEAK_info) {
        unlockClosure(w, info);
        return (0,stg_NO_FINALIZER_closure);
    }
712

tibbe's avatar
tibbe committed
713 714
    f    = StgWeak_finalizer(w);
    list = StgWeak_cfinalizers(w);
715

tibbe's avatar
tibbe committed
716
    // kill it
717
#ifdef PROFILING
tibbe's avatar
tibbe committed
718 719 720 721 722 723 724 725
    // @LDV profiling
    // A weak pointer is inherently used, so we do not need to call
    // LDV_recordDead_FILL_SLOP_DYNAMIC():
    //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
    // or, LDV_recordDead():
    //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
    // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as
    // large as weak pointers, so there is no need to fill the slop, either.
726
    // See stg_DEAD_WEAK_info in StgMiscClosures.cmm.
727 728
#endif

tibbe's avatar
tibbe committed
729 730 731 732
    //
    // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
    //
    unlockClosure(w, stg_DEAD_WEAK_info);
733

tibbe's avatar
tibbe committed
734
    LDV_RECORD_CREATE(w);
735

tibbe's avatar
tibbe committed
736 737 738
    if (list != stg_NO_FINALIZER_closure) {
      ccall runCFinalizers(list);
    }
739

tibbe's avatar
tibbe committed
740 741 742 743 744 745
    /* return the finalizer */
    if (f == stg_NO_FINALIZER_closure) {
        return (0,stg_NO_FINALIZER_closure);
    } else {
        return (1,f);
    }
746 747
}

748
stg_deRefWeakzh ( gcptr w )
749
{
tibbe's avatar
tibbe committed
750 751
    W_ code, info;
    gcptr val;
752

tibbe's avatar
tibbe committed
753
    info = GET_INFO(w);
754

tibbe's avatar
tibbe committed
755 756 757 758
    if (info == stg_WHITEHOLE_info) {
        // w is locked by another thread. Now it's not immediately clear if w is
        // alive or not. We use lockClosure to wait for the info pointer to become
        // something other than stg_WHITEHOLE_info.
759

tibbe's avatar
tibbe committed
760 761 762
        LOCK_CLOSURE(w, info);
        unlockClosure(w, info);
    }
763

tibbe's avatar
tibbe committed
764 765 766 767 768 769 770 771
    if (info == stg_WEAK_info) {
        code = 1;
        val = StgWeak_value(w);
    } else {
        code = 0;
        val = w;
    }
    return (code,val);
772 773 774
}

/* -----------------------------------------------------------------------------
775
   Floating point operations.
776 777
   -------------------------------------------------------------------------- */

778
stg_decodeFloatzuIntzh ( F_ arg )
ian@well-typed.com's avatar
ian@well-typed.com committed
779
{
780
    W_ p;
781
    W_ tmp, mp_tmp1, mp_tmp_w, r1, r2;
782

783
    STK_CHK_GEN_N (WDS(2));
784

785
    reserve 2 = tmp {
ian@well-typed.com's avatar
ian@well-typed.com committed
786

tibbe's avatar
tibbe committed
787 788
        mp_tmp1  = tmp + WDS(1);
        mp_tmp_w = tmp;
789

tibbe's avatar
tibbe committed
790 791
        /* Perform the operation */
        ccall __decodeFloat_Int(mp_tmp1 "ptr", mp_tmp_w "ptr", arg);
792

tibbe's avatar
tibbe committed
793 794
        r1 = W_[mp_tmp1];
        r2 = W_[mp_tmp_w];
795
    }
ian@well-typed.com's avatar
ian@well-typed.com committed
796

797
    /* returns: (Int# (mantissa), Int# (exponent)) */
798
    return (r1, r2);
799 800
}

801
stg_decodeDoublezu2Intzh ( D_ arg )
ian@well-typed.com's avatar
ian@well-typed.com committed
802
{
803 804 805
    W_ p, tmp;
    W_ mp_tmp1, mp_tmp2, mp_result1, mp_result2;
    W_ r1, r2, r3, r4;
806

807
    STK_CHK_GEN_N (WDS(4));
808

809 810
    reserve 4 = tmp {

tibbe's avatar
tibbe committed
811 812 813 814
        mp_tmp1    = tmp + WDS(3);
        mp_tmp2    = tmp + WDS(2);
        mp_result1 = tmp + WDS(1);
        mp_result2 = tmp;
815

tibbe's avatar
tibbe committed
816 817 818 819 820 821 822 823 824
        /* Perform the operation */
        ccall __decodeDouble_2Int(mp_tmp1 "ptr", mp_tmp2 "ptr",
                                  mp_result1 "ptr", mp_result2 "ptr",
                                  arg);

        r1 = W_[mp_tmp1];
        r2 = W_[mp_tmp2];
        r3 = W_[mp_result1];
        r4 = W_[mp_result2];
825
    }
826 827 828

    /* returns:
       (Int# (mant sign), Word# (mant high), Word# (mant low), Int# (expn)) */
829
    return (r1, r2, r3, r4);
830 831
}

832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847
/* Double# -> (# Int64#, Int# #) */
stg_decodeDoublezuInt64zh ( D_ arg )
{
    CInt exp;
    I64  mant;
    W_   mant_ptr;

    STK_CHK_GEN_N (SIZEOF_INT64);
    reserve BYTES_TO_WDS(SIZEOF_INT64) = mant_ptr {
        (exp) = ccall __decodeDouble_Int64(mant_ptr "ptr", arg);
        mant = I64[mant_ptr];
    }

    return (mant, TO_W_(exp));
}

848 849 850 851
/* -----------------------------------------------------------------------------
 * Concurrency primitives
 * -------------------------------------------------------------------------- */

852
stg_forkzh ( gcptr closure )
853
{
tibbe's avatar
tibbe committed
854
    MAYBE_GC_P(stg_forkzh, closure);
855

tibbe's avatar
tibbe committed
856
    gcptr threadid;
857

tibbe's avatar
tibbe committed
858 859 860
    ("ptr" threadid) = ccall createIOThread( MyCapability() "ptr",
                                  RtsFlags_GcFlags_initialStkSize(RtsFlags),
                                  closure "ptr");
861

tibbe's avatar
tibbe committed
862 863 864 865
    /* start blocked if the current thread is blocked */
    StgTSO_flags(threadid) = %lobits16(
        TO_W_(StgTSO_flags(threadid)) |
        TO_W_(StgTSO_flags(CurrentTSO)) & (TSO_BLOCKEX | TSO_INTERRUPTIBLE));
866

tibbe's avatar
tibbe committed
867
    ccall scheduleThread(MyCapability() "ptr", threadid "ptr");
868

tibbe's avatar
tibbe committed
869 870 871
    // context switch soon, but not immediately: we don't want every
    // forkIO to force a context-switch.
    Capability_context_switch(MyCapability()) = 1 :: CInt;
ian@well-typed.com's avatar
ian@well-typed.com committed
872

tibbe's avatar
tibbe committed
873
    return (threadid);
874 875
}

876
stg_forkOnzh ( W_ cpu, gcptr closure )
877
{
878
again: MAYBE_GC(again);
879

tibbe's avatar
tibbe committed
880
    gcptr threadid;
881

tibbe's avatar
tibbe committed
882 883 884 885
    ("ptr" threadid) = ccall createIOThread(
        MyCapability() "ptr",
        RtsFlags_GcFlags_initialStkSize(RtsFlags),
        closure "ptr");
886

tibbe's avatar
tibbe committed
887 888 889 890
    /* start blocked if the current thread is blocked */
    StgTSO_flags(threadid) = %lobits16(
        TO_W_(StgTSO_flags(threadid)) |
        TO_W_(StgTSO_flags(CurrentTSO)) & (TSO_BLOCKEX | TSO_INTERRUPTIBLE));
891

tibbe's avatar
tibbe committed
892
    ccall scheduleThreadOn(MyCapability() "ptr", cpu, threadid "ptr");
893

tibbe's avatar
tibbe committed
894 895 896
    // context switch soon, but not immediately: we don't want every
    // forkIO to force a context-switch.
    Capability_context_switch(MyCapability()) = 1 :: CInt;
ian@well-typed.com's avatar
ian@well-typed.com committed
897

tibbe's avatar
tibbe committed
898
    return (threadid);
899 900
}

901
stg_yieldzh ()
902
{
tibbe's avatar
tibbe committed
903 904 905 906 907 908
    // when we yield to the scheduler, we have to tell it to put the
    // current thread to the back of the queue by setting the
    // context_switch flag.  If we don't do this, it will run the same
    // thread again.
    Capability_context_switch(MyCapability()) = 1 :: CInt;
    jump stg_yield_noregs();
909 910
}

911
stg_myThreadIdzh ()
912
{
tibbe's avatar
tibbe committed
913
    return (CurrentTSO);
914 915
}

916
stg_labelThreadzh ( gcptr threadid, W_ addr )
917
{
918
#if defined(DEBUG) || defined(TRACING) || defined(DTRACE)
tibbe's avatar
tibbe committed
919
    ccall labelThread(MyCapability() "ptr", threadid "ptr", addr "ptr");
920
#endif
tibbe's avatar
tibbe committed
921
    return ();
922 923
}

924
stg_isCurrentThreadBoundzh (/* no args */)
925
{
tibbe's avatar
tibbe committed
926 927 928
    W_ r;
    (r) = ccall isThreadBound(CurrentTSO);
    return (r);
929 930
}

931
stg_threadStatuszh ( gcptr tso )
932 933 934
{
    W_ why_blocked;
    W_ what_next;
935
    W_ ret, cap, locked;
936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952

    what_next   = TO_W_(StgTSO_what_next(tso));
    why_blocked = TO_W_(StgTSO_why_blocked(tso));
    // Note: these two reads are not atomic, so they might end up
    // being inconsistent.  It doesn't matter, since we
    // only return one or the other.  If we wanted to return the
    // contents of block_info too, then we'd have to do some synchronisation.

    if (what_next == ThreadComplete) {
        ret = 16;  // NB. magic, matches up with GHC.Conc.threadStatus
    } else {
        if (what_next == ThreadKilled) {
            ret = 17;
        } else {
            ret = why_blocked;
        }
    }
953 954 955 956 957 958 959 960 961

    cap = TO_W_(Capability_no(StgTSO_cap(tso)));

    if ((TO_W_(StgTSO_flags(tso)) & TSO_LOCKED) != 0) {
        locked = 1;
    } else {
        locked = 0;
    }

962
    return (ret,cap,locked);
963
}
964 965 966 967 968

/* -----------------------------------------------------------------------------
 * TVar primitives
 * -------------------------------------------------------------------------- */

969 970 971
// Catch retry frame -----------------------------------------------------------

#define CATCH_RETRY_FRAME_FIELDS(w_,p_,info_ptr,        \
Simon Marlow's avatar
Simon Marlow committed
972
                                 p1, p2,                \
973 974 975 976
                                 running_alt_code,      \
                                 first_code,            \
                                 alt_code)              \
  w_ info_ptr,                                          \
Simon Marlow's avatar
Simon Marlow committed
977
  PROF_HDR_FIELDS(w_,p1,p2)                             \
978 979 980
  w_ running_alt_code,                                  \
  p_ first_code,                                        \
  p_ alt_code