PrimOps.cmm 70.6 KB
Newer Older
1
/* -*- tab-width: 8 -*- */
2 3
/* -----------------------------------------------------------------------------
 *
4
 * (c) The GHC Team, 1998-2012
5 6 7 8 9 10 11 12 13
 *
 * Out-of-line primitive operations
 *
 * This file contains the implementations of all the primitive
 * operations ("primops") which are not expanded inline.  See
 * ghc/compiler/prelude/primops.txt.pp for a list of all the primops;
 * this file contains code for most of those with the attribute
 * out_of_line=True.
 *
14 15 16
 * Entry convention: the entry convention for a primop is the
 * NativeNodeCall convention, and the return convention is
 * NativeReturn.  (see compiler/cmm/CmmCallConv.hs)
17 18 19 20 21 22 23 24 25
 *
 * This file is written in a subset of C--, extended with various
 * features specific to GHC.  It is compiled by GHC directly.  For the
 * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
 *
 * ---------------------------------------------------------------------------*/

#include "Cmm.h"

26
#ifdef __PIC__
27 28
import pthread_mutex_lock;
import pthread_mutex_unlock;
29
#endif
30
import base_ControlziExceptionziBase_nestedAtomically_closure;
31 32
import EnterCriticalSection;
import LeaveCriticalSection;
Ian Lynagh's avatar
Ian Lynagh committed
33
import ghczmprim_GHCziTypes_False_closure;
34
#if defined(USE_MINIINTERPRETER) || !defined(mingw32_HOST_OS)
35
import sm_mutex;
36
#endif
37

38 39 40 41 42 43 44 45 46 47 48 49 50
/*-----------------------------------------------------------------------------
  Array Primitives

  Basically just new*Array - the others are all inline macros.

  The slow entry point is for returning from a heap check, the saved
  size argument must be re-loaded from the stack.
  -------------------------------------------------------------------------- */

/* for objects that are *less* than the size of a word, make sure we
 * round up to the nearest word for the size of the array.
 */

51
stg_newByteArrayzh ( W_ n )
52
{
53 54 55 56 57
    W_ words, payload_words;
    gcptr p;

    MAYBE_GC_N(stg_newByteArrayzh, n);

58 59
    payload_words = ROUNDUP_BYTES_TO_WDS(n);
    words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
60
    ("ptr" p) = ccall allocate(MyCapability() "ptr",words);
61
    TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
62
    SET_HDR(p, stg_ARR_WORDS_info, CCCS);
63
    StgArrWords_bytes(p) = n;
64
    return (p);
65 66
}

Simon Marlow's avatar
Simon Marlow committed
67 68 69
#define BA_ALIGN 16
#define BA_MASK  (BA_ALIGN-1)

70
stg_newPinnedByteArrayzh ( W_ n )
71
{
72 73 74 75
    W_ words, bytes, payload_words;
    gcptr p;

    MAYBE_GC_N(stg_newPinnedByteArrayzh, n);
76

77
    bytes = n;
78 79 80 81 82 83 84 85 86 87
    /* payload_words is what we will tell the profiler we had to allocate */
    payload_words = ROUNDUP_BYTES_TO_WDS(bytes);
    /* When we actually allocate memory, we need to allow space for the
       header: */
    bytes = bytes + SIZEOF_StgArrWords;
    /* And we want to align to BA_ALIGN bytes, so we need to allow space
       to shift up to BA_ALIGN - 1 bytes: */
    bytes = bytes + BA_ALIGN - 1;
    /* Now we convert to a number of words: */
    words = ROUNDUP_BYTES_TO_WDS(bytes);
Simon Marlow's avatar
Simon Marlow committed
88

89
    ("ptr" p) = ccall allocatePinned(MyCapability() "ptr", words);
Simon Marlow's avatar
Simon Marlow committed
90 91
    TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);

92 93
    /* Now we need to move p forward so that the payload is aligned
       to BA_ALIGN bytes: */
Simon Marlow's avatar
Simon Marlow committed
94 95
    p = p + ((-p - SIZEOF_StgArrWords) & BA_MASK);

96
    SET_HDR(p, stg_ARR_WORDS_info, CCCS);
97
    StgArrWords_bytes(p) = n;
98
    return (p);
Simon Marlow's avatar
Simon Marlow committed
99 100
}

101
stg_newAlignedPinnedByteArrayzh ( W_ n, W_ alignment )
Simon Marlow's avatar
Simon Marlow committed
102
{
103 104
    W_ words, bytes, payload_words;
    gcptr p;
Simon Marlow's avatar
Simon Marlow committed
105

106
    again: MAYBE_GC(again);
Simon Marlow's avatar
Simon Marlow committed
107

108 109 110 111 112
    /* we always supply at least word-aligned memory, so there's no
       need to allow extra space for alignment if the requirement is less
       than a word.  This also prevents mischief with alignment == 0. */
    if (alignment <= SIZEOF_W) { alignment = 1; }

113 114
    bytes = n;

115 116
    /* payload_words is what we will tell the profiler we had to allocate */
    payload_words = ROUNDUP_BYTES_TO_WDS(bytes);
Simon Marlow's avatar
Simon Marlow committed
117

118 119 120 121 122 123 124 125
    /* When we actually allocate memory, we need to allow space for the
       header: */
    bytes = bytes + SIZEOF_StgArrWords;
    /* And we want to align to <alignment> bytes, so we need to allow space
       to shift up to <alignment - 1> bytes: */
    bytes = bytes + alignment - 1;
    /* Now we convert to a number of words: */
    words = ROUNDUP_BYTES_TO_WDS(bytes);
126

127
    ("ptr" p) = ccall allocatePinned(MyCapability() "ptr", words);
128 129
    TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);

130 131 132 133
    /* Now we need to move p forward so that the payload is aligned
       to <alignment> bytes. Note that we are assuming that
       <alignment> is a power of 2, which is technically not guaranteed */
    p = p + ((-p - SIZEOF_StgArrWords) & (alignment - 1));
134

135
    SET_HDR(p, stg_ARR_WORDS_info, CCCS);
136
    StgArrWords_bytes(p) = n;
137
    return (p);
138 139
}

140 141 142 143 144
// RRN: This one does not use the "ticketing" approach because it
// deals in unboxed scalars, not heap pointers.
stg_casIntArrayzh( gcptr arr, W_ ind, W_ old, W_ new )
/* MutableByteArray# s -> Int# -> Int# -> Int# -> State# s -> (# State# s, Int# #) */
{
tibbe's avatar
tibbe committed
145
    W_ p, h;
146 147 148 149 150 151 152

    p = arr + SIZEOF_StgArrWords + WDS(ind);
    (h) = ccall cas(p, old, new);

    return(h);
}

153

154
stg_newArrayzh ( W_ n /* words */, gcptr init )
155
{
tibbe's avatar
tibbe committed
156 157
    W_ words, size, p;
    gcptr arr;
158

159
    again: MAYBE_GC(again);
160

161 162 163 164 165
    // the mark area contains one byte for each 2^MUT_ARR_PTRS_CARD_BITS words
    // in the array, making sure we round up, and then rounding up to a whole
    // number of words.
    size = n + mutArrPtrsCardWords(n);
    words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + size;
166
    ("ptr" arr) = ccall allocate(MyCapability() "ptr",words);
tibbe's avatar
tibbe committed
167
    TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(size), 0);
168

169
    SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, CCCS);
170
    StgMutArrPtrs_ptrs(arr) = n;
171
    StgMutArrPtrs_size(arr) = size;
172 173 174 175

    // Initialise all elements of the the array with the value in R2
    p = arr + SIZEOF_StgMutArrPtrs;
  for:
tibbe's avatar
tibbe committed
176
    if (p < arr + SIZEOF_StgMutArrPtrs + WDS(n)) {
ian@well-typed.com's avatar
ian@well-typed.com committed
177 178 179
        W_[p] = init;
        p = p + WDS(1);
        goto for;
180 181
    }

182
    return (arr);
183 184
}

185
stg_unsafeThawArrayzh ( gcptr arr )
186
{
tibbe's avatar
tibbe committed
187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205
    // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST
    //
    // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN
    // normally doesn't.  However, when we freeze a MUT_ARR_PTRS, we leave
    // it on the mutable list for the GC to remove (removing something from
    // the mutable list is not easy).
    //
    // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list,
    // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0
    // to indicate that it is still on the mutable list.
    //
    // So, when we thaw a MUT_ARR_PTRS_FROZEN, we must cope with two cases:
    // either it is on a mut_list, or it isn't.  We adopt the convention that
    // the closure type is MUT_ARR_PTRS_FROZEN0 if it is on the mutable list,
    // and MUT_ARR_PTRS_FROZEN otherwise.  In fact it wouldn't matter if
    // we put it on the mutable list more than once, but it would get scavenged
    // multiple times during GC, which would be unnecessarily slow.
    //
    if (StgHeader_info(arr) != stg_MUT_ARR_PTRS_FROZEN0_info) {
206 207
        SET_INFO(arr,stg_MUT_ARR_PTRS_DIRTY_info);
        recordMutable(arr);
ian@well-typed.com's avatar
ian@well-typed.com committed
208 209
        // must be done after SET_INFO, because it ASSERTs closure_MUTABLE()
        return (arr);
tibbe's avatar
tibbe committed
210
    } else {
ian@well-typed.com's avatar
ian@well-typed.com committed
211 212
        SET_INFO(arr,stg_MUT_ARR_PTRS_DIRTY_info);
        return (arr);
tibbe's avatar
tibbe committed
213
    }
214 215
}

216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235
stg_copyArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n )
{
    copyArray(src, src_off, dst, dst_off, n)
}

stg_copyMutableArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n )
{
    copyMutableArray(src, src_off, dst, dst_off, n)
}

stg_copyArrayArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n )
{
    copyArray(src, src_off, dst, dst_off, n)
}

stg_copyMutableArrayArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n )
{
    copyMutableArray(src, src_off, dst, dst_off, n)
}

236 237
stg_cloneArrayzh ( gcptr src, W_ offset, W_ n )
{
tibbe's avatar
tibbe committed
238
    cloneArray(stg_MUT_ARR_PTRS_FROZEN_info, src, offset, n)
239 240 241 242
}

stg_cloneMutableArrayzh ( gcptr src, W_ offset, W_ n )
{
tibbe's avatar
tibbe committed
243
    cloneArray(stg_MUT_ARR_PTRS_DIRTY_info, src, offset, n)
244 245 246 247 248
}

// We have to escape the "z" in the name.
stg_freezzeArrayzh ( gcptr src, W_ offset, W_ n )
{
tibbe's avatar
tibbe committed
249
    cloneArray(stg_MUT_ARR_PTRS_FROZEN_info, src, offset, n)
250 251 252 253
}

stg_thawArrayzh ( gcptr src, W_ offset, W_ n )
{
tibbe's avatar
tibbe committed
254
    cloneArray(stg_MUT_ARR_PTRS_DIRTY_info, src, offset, n)
255 256
}

257
// RRN: Uses the ticketed approach; see casMutVar
258
stg_casArrayzh ( gcptr arr, W_ ind, gcptr old, gcptr new )
259
/* MutableArray# s a -> Int# -> a -> a -> State# s -> (# State# s, Int#, Any a #) */
260
{
tibbe's avatar
tibbe committed
261 262
    gcptr h;
    W_ p, len;
263 264

    p = arr + SIZEOF_StgMutArrPtrs + WDS(ind);
265
    (h) = ccall cas(p, old, new);
266 267 268
    
    if (h != old) {
        // Failure, return what was there instead of 'old':
269
        return (1,h);
270 271
    } else {
        // Compare and Swap Succeeded:
rrnewton's avatar
rrnewton committed
272 273 274 275
        SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, CCCS);
        len = StgMutArrPtrs_ptrs(arr);
        // The write barrier.  We must write a byte into the mark table:
        I8[arr + SIZEOF_StgMutArrPtrs + WDS(len) + (ind >> MUT_ARR_PTRS_CARD_BITS )] = 1;
276
        return (0,new);
277 278 279
    }
}

280
stg_newArrayArrayzh ( W_ n /* words */ )
281
{
tibbe's avatar
tibbe committed
282 283
    W_ words, size, p;
    gcptr arr;
284

285
    MAYBE_GC_N(stg_newArrayArrayzh, n);
286 287 288 289 290 291

    // the mark area contains one byte for each 2^MUT_ARR_PTRS_CARD_BITS words
    // in the array, making sure we round up, and then rounding up to a whole
    // number of words.
    size = n + mutArrPtrsCardWords(n);
    words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + size;
292
    ("ptr" arr) = ccall allocate(MyCapability() "ptr",words);
tibbe's avatar
tibbe committed
293
    TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(size), 0);
294 295 296 297 298 299 300 301

    SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]);
    StgMutArrPtrs_ptrs(arr) = n;
    StgMutArrPtrs_size(arr) = size;

    // Initialise all elements of the array with a pointer to the new array
    p = arr + SIZEOF_StgMutArrPtrs;
  for:
tibbe's avatar
tibbe committed
302
    if (p < arr + SIZEOF_StgMutArrPtrs + WDS(n)) {
ian@well-typed.com's avatar
ian@well-typed.com committed
303 304 305
        W_[p] = arr;
        p = p + WDS(1);
        goto for;
306 307
    }

308
    return (arr);
309 310
}

pumpkin's avatar
pumpkin committed
311

312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429
/* -----------------------------------------------------------------------------
   SmallArray primitives
   -------------------------------------------------------------------------- */

stg_newSmallArrayzh ( W_ n /* words */, gcptr init )
{
    W_ words, size, p;
    gcptr arr;

    again: MAYBE_GC(again);

    words = BYTES_TO_WDS(SIZEOF_StgSmallMutArrPtrs) + n;
    ("ptr" arr) = ccall allocate(MyCapability() "ptr",words);
    TICK_ALLOC_PRIM(SIZEOF_StgSmallMutArrPtrs, WDS(n), 0);

    SET_HDR(arr, stg_SMALL_MUT_ARR_PTRS_DIRTY_info, CCCS);
    StgSmallMutArrPtrs_ptrs(arr) = n;

    // Initialise all elements of the the array with the value in R2
    p = arr + SIZEOF_StgSmallMutArrPtrs;
  for:
    if (p < arr + SIZEOF_StgSmallMutArrPtrs + WDS(n)) {
        W_[p] = init;
        p = p + WDS(1);
        goto for;
    }

    return (arr);
}

stg_unsafeThawSmallArrayzh ( gcptr arr )
{
    // See stg_unsafeThawArrayzh
    if (StgHeader_info(arr) != stg_SMALL_MUT_ARR_PTRS_FROZEN0_info) {
        SET_INFO(arr, stg_SMALL_MUT_ARR_PTRS_DIRTY_info);
        recordMutable(arr);
        // must be done after SET_INFO, because it ASSERTs closure_MUTABLE()
        return (arr);
    } else {
        SET_INFO(arr, stg_SMALL_MUT_ARR_PTRS_DIRTY_info);
        return (arr);
    }
}

stg_cloneSmallArrayzh ( gcptr src, W_ offset, W_ n )
{
    cloneSmallArray(stg_SMALL_MUT_ARR_PTRS_FROZEN_info, src, offset, n)
}

stg_cloneSmallMutableArrayzh ( gcptr src, W_ offset, W_ n )
{
    cloneSmallArray(stg_SMALL_MUT_ARR_PTRS_DIRTY_info, src, offset, n)
}

// We have to escape the "z" in the name.
stg_freezzeSmallArrayzh ( gcptr src, W_ offset, W_ n )
{
    cloneSmallArray(stg_SMALL_MUT_ARR_PTRS_FROZEN_info, src, offset, n)
}

stg_thawSmallArrayzh ( gcptr src, W_ offset, W_ n )
{
    cloneSmallArray(stg_SMALL_MUT_ARR_PTRS_DIRTY_info, src, offset, n)
}

stg_copySmallArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n)
{
    W_ dst_p, src_p, bytes;

    SET_INFO(dst, stg_SMALL_MUT_ARR_PTRS_DIRTY_info);

    dst_p = dst + SIZEOF_StgSmallMutArrPtrs + WDS(dst_off);
    src_p = src + SIZEOF_StgSmallMutArrPtrs + WDS(src_off);
    bytes = WDS(n);
    prim %memcpy(dst_p, src_p, bytes, WDS(1));

    return ();
}

stg_copySmallMutableArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n)
{
    W_ dst_p, src_p, bytes;

    SET_INFO(dst, stg_SMALL_MUT_ARR_PTRS_DIRTY_info);

    dst_p = dst + SIZEOF_StgSmallMutArrPtrs + WDS(dst_off);
    src_p = src + SIZEOF_StgSmallMutArrPtrs + WDS(src_off);
    bytes = WDS(n);
    if (src == dst) {
        prim %memmove(dst_p, src_p, bytes, WDS(1));
    } else {
        prim %memcpy(dst_p, src_p, bytes, WDS(1));
    }

    return ();
}

// RRN: Uses the ticketed approach; see casMutVar
stg_casSmallArrayzh ( gcptr arr, W_ ind, gcptr old, gcptr new )
/* SmallMutableArray# s a -> Int# -> a -> a -> State# s -> (# State# s, Int#, Any a #) */
{
    gcptr h;
    W_ p, len;

    p = arr + SIZEOF_StgSmallMutArrPtrs + WDS(ind);
    (h) = ccall cas(p, old, new);

    if (h != old) {
        // Failure, return what was there instead of 'old':
        return (1,h);
    } else {
        // Compare and Swap Succeeded:
        SET_HDR(arr, stg_SMALL_MUT_ARR_PTRS_DIRTY_info, CCCS);
        return (0,new);
    }
}


430 431 432 433
/* -----------------------------------------------------------------------------
   MutVar primitives
   -------------------------------------------------------------------------- */

434
stg_newMutVarzh ( gcptr init )
435 436 437
{
    W_ mv;

438
    ALLOC_PRIM_P (SIZEOF_StgMutVar, stg_newMutVarzh, init);
439 440

    mv = Hp - SIZEOF_StgMutVar + WDS(1);
441
    SET_HDR(mv,stg_MUT_VAR_DIRTY_info,CCCS);
442
    StgMutVar_var(mv) = init;
ian@well-typed.com's avatar
ian@well-typed.com committed
443

444
    return (mv);
445 446
}

447 448 449 450 451
// RRN: To support the "ticketed" approach, we return the NEW rather
// than old value if the CAS is successful.  This is received in an
// opaque form in the Haskell code, preventing the compiler from
// changing its pointer identity.  The ticket can then be safely used
// in future CAS operations.
452
stg_casMutVarzh ( gcptr mv, gcptr old, gcptr new )
453
 /* MutVar# s a -> a -> a -> State# s -> (# State#, Int#, Any a #) */
Simon Marlow's avatar
Simon Marlow committed
454
{
455
    gcptr h;
Simon Marlow's avatar
Simon Marlow committed
456

tibbe's avatar
tibbe committed
457
    (h) = ccall cas(mv + SIZEOF_StgHeader + OFFSET_StgMutVar_var, old, new);
Simon Marlow's avatar
Simon Marlow committed
458
    if (h != old) {
459
        return (1,h);
Simon Marlow's avatar
Simon Marlow committed
460
    } else {
461
        if (GET_INFO(mv) == stg_MUT_VAR_CLEAN_info) {
tibbe's avatar
tibbe committed
462
            ccall dirty_MUT_VAR(BaseReg "ptr", mv "ptr");
463
        }
464
        return (0,new);
Simon Marlow's avatar
Simon Marlow committed
465 466 467
    }
}

468
stg_atomicModifyMutVarzh ( gcptr mv, gcptr f )
469
{
470
    W_ z, x, y, r, h;
471

ian@well-typed.com's avatar
ian@well-typed.com committed
472
    /* If x is the current contents of the MutVar#, then
473 474 475
       We want to make the new contents point to

         (sel_0 (f x))
ian@well-typed.com's avatar
ian@well-typed.com committed
476

477
       and the return value is
ian@well-typed.com's avatar
ian@well-typed.com committed
478 479

         (sel_1 (f x))
480 481 482 483

        obviously we can share (f x).

         z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
ian@well-typed.com's avatar
ian@well-typed.com committed
484
         y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
485 486 487 488
         r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
    */

#if MIN_UPD_SIZE > 1
489
#define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
490 491
#define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),WDS(MIN_UPD_SIZE-1))
#else
492
#define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(1))
493 494 495 496
#define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),0)
#endif

#if MIN_UPD_SIZE > 2
497
#define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
498 499
#define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),WDS(MIN_UPD_SIZE-2))
#else
500
#define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(2))
501 502 503 504 505
#define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0)
#endif

#define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE)

tibbe's avatar
tibbe committed
506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531
    HP_CHK_GEN_TICKY(SIZE);

    TICK_ALLOC_THUNK_2();
    CCCS_ALLOC(THUNK_2_SIZE);
    z = Hp - THUNK_2_SIZE + WDS(1);
    SET_HDR(z, stg_ap_2_upd_info, CCCS);
    LDV_RECORD_CREATE(z);
    StgThunk_payload(z,0) = f;

    TICK_ALLOC_THUNK_1();
    CCCS_ALLOC(THUNK_1_SIZE);
    y = z - THUNK_1_SIZE;
    SET_HDR(y, stg_sel_0_upd_info, CCCS);
    LDV_RECORD_CREATE(y);
    StgThunk_payload(y,0) = z;

    TICK_ALLOC_THUNK_1();
    CCCS_ALLOC(THUNK_1_SIZE);
    r = y - THUNK_1_SIZE;
    SET_HDR(r, stg_sel_1_upd_info, CCCS);
    LDV_RECORD_CREATE(r);
    StgThunk_payload(r,0) = z;

  retry:
    x = StgMutVar_var(mv);
    StgThunk_payload(z,1) = x;
532
#ifdef THREADED_RTS
tibbe's avatar
tibbe committed
533 534
    (h) = ccall cas(mv + SIZEOF_StgHeader + OFFSET_StgMutVar_var, x, y);
    if (h != x) { goto retry; }
535
#else
tibbe's avatar
tibbe committed
536
    StgMutVar_var(mv) = y;
537
#endif
538

tibbe's avatar
tibbe committed
539 540 541
    if (GET_INFO(mv) == stg_MUT_VAR_CLEAN_info) {
        ccall dirty_MUT_VAR(BaseReg "ptr", mv "ptr");
    }
542

tibbe's avatar
tibbe committed
543
    return (r);
544 545 546 547 548 549 550 551
}

/* -----------------------------------------------------------------------------
   Weak Pointer Primitives
   -------------------------------------------------------------------------- */

STRING(stg_weak_msg,"New weak pointer at %p\n")

552 553 554
stg_mkWeakzh ( gcptr key,
               gcptr value,
               gcptr finalizer /* or stg_NO_FINALIZER_closure */ )
555
{
tibbe's avatar
tibbe committed
556
    gcptr w;
557

tibbe's avatar
tibbe committed
558
    ALLOC_PRIM (SIZEOF_StgWeak)
559

tibbe's avatar
tibbe committed
560 561
    w = Hp - SIZEOF_StgWeak + WDS(1);
    SET_HDR(w, stg_WEAK_info, CCCS);
562

tibbe's avatar
tibbe committed
563 564 565 566
    StgWeak_key(w)         = key;
    StgWeak_value(w)       = value;
    StgWeak_finalizer(w)   = finalizer;
    StgWeak_cfinalizers(w) = stg_NO_FINALIZER_closure;
567

568 569 570 571 572
    StgWeak_link(w) = Capability_weak_ptr_list_hd(MyCapability());
    Capability_weak_ptr_list_hd(MyCapability()) = w;
    if (Capability_weak_ptr_list_tl(MyCapability()) == NULL) {
        Capability_weak_ptr_list_tl(MyCapability()) = w;
    }
573

tibbe's avatar
tibbe committed
574
    IF_DEBUG(weak, ccall debugBelch(stg_weak_msg,w));
575

tibbe's avatar
tibbe committed
576
    return (w);
577 578
}

579
stg_mkWeakNoFinalizzerzh ( gcptr key, gcptr value )
580
{
tibbe's avatar
tibbe committed
581
    jump stg_mkWeakzh (key, value, stg_NO_FINALIZER_closure);
582 583
}

584 585 586 587 588 589 590
STRING(stg_cfinalizer_msg,"Adding a finalizer to %p\n")

stg_addCFinalizzerToWeakzh ( W_ fptr,   // finalizer
                             W_ ptr,
                             W_ flag,   // has environment (0 or 1)
                             W_ eptr,
                             gcptr w )
591
{
tibbe's avatar
tibbe committed
592
    W_ c, info;
593

tibbe's avatar
tibbe committed
594
    ALLOC_PRIM (SIZEOF_StgCFinalizerList)
595

tibbe's avatar
tibbe committed
596 597
    c = Hp - SIZEOF_StgCFinalizerList + WDS(1);
    SET_HDR(c, stg_C_FINALIZER_LIST_info, CCCS);
598

tibbe's avatar
tibbe committed
599 600 601 602
    StgCFinalizerList_fptr(c) = fptr;
    StgCFinalizerList_ptr(c) = ptr;
    StgCFinalizerList_eptr(c) = eptr;
    StgCFinalizerList_flag(c) = flag;
603

tibbe's avatar
tibbe committed
604
    LOCK_CLOSURE(w, info);
605

tibbe's avatar
tibbe committed
606 607 608 609 610
    if (info == stg_DEAD_WEAK_info) {
        // Already dead.
        unlockClosure(w, info);
        return (0);
    }
611

tibbe's avatar
tibbe committed
612 613
    StgCFinalizerList_link(c) = StgWeak_cfinalizers(w);
    StgWeak_cfinalizers(w) = c;
614

tibbe's avatar
tibbe committed
615
    unlockClosure(w, info);
616

tibbe's avatar
tibbe committed
617
    recordMutable(w);
618

tibbe's avatar
tibbe committed
619
    IF_DEBUG(weak, ccall debugBelch(stg_cfinalizer_msg,w));
620

tibbe's avatar
tibbe committed
621
    return (1);
622
}
623

624
stg_finalizzeWeakzh ( gcptr w )
625
{
tibbe's avatar
tibbe committed
626 627
    gcptr f, list;
    W_ info;
628

tibbe's avatar
tibbe committed
629
    LOCK_CLOSURE(w, info);
630

tibbe's avatar
tibbe committed
631 632 633 634 635
    // already dead?
    if (info == stg_DEAD_WEAK_info) {
        unlockClosure(w, info);
        return (0,stg_NO_FINALIZER_closure);
    }
636

tibbe's avatar
tibbe committed
637 638
    f    = StgWeak_finalizer(w);
    list = StgWeak_cfinalizers(w);
639

tibbe's avatar
tibbe committed
640
    // kill it
641
#ifdef PROFILING
tibbe's avatar
tibbe committed
642 643 644 645 646 647 648 649 650
    // @LDV profiling
    // A weak pointer is inherently used, so we do not need to call
    // LDV_recordDead_FILL_SLOP_DYNAMIC():
    //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
    // or, LDV_recordDead():
    //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
    // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as
    // large as weak pointers, so there is no need to fill the slop, either.
    // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
651 652
#endif

tibbe's avatar
tibbe committed
653 654 655 656
    //
    // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
    //
    unlockClosure(w, stg_DEAD_WEAK_info);
657

tibbe's avatar
tibbe committed
658
    LDV_RECORD_CREATE(w);
659

tibbe's avatar
tibbe committed
660 661 662
    if (list != stg_NO_FINALIZER_closure) {
      ccall runCFinalizers(list);
    }
663

tibbe's avatar
tibbe committed
664 665 666 667 668 669
    /* return the finalizer */
    if (f == stg_NO_FINALIZER_closure) {
        return (0,stg_NO_FINALIZER_closure);
    } else {
        return (1,f);
    }
670 671
}

672
stg_deRefWeakzh ( gcptr w )
673
{
tibbe's avatar
tibbe committed
674 675
    W_ code, info;
    gcptr val;
676

tibbe's avatar
tibbe committed
677
    info = GET_INFO(w);
678

tibbe's avatar
tibbe committed
679 680 681 682
    if (info == stg_WHITEHOLE_info) {
        // w is locked by another thread. Now it's not immediately clear if w is
        // alive or not. We use lockClosure to wait for the info pointer to become
        // something other than stg_WHITEHOLE_info.
683

tibbe's avatar
tibbe committed
684 685 686
        LOCK_CLOSURE(w, info);
        unlockClosure(w, info);
    }
687

tibbe's avatar
tibbe committed
688 689 690 691 692 693 694 695
    if (info == stg_WEAK_info) {
        code = 1;
        val = StgWeak_value(w);
    } else {
        code = 0;
        val = w;
    }
    return (code,val);
696 697 698
}

/* -----------------------------------------------------------------------------
699
   Floating point operations.
700 701
   -------------------------------------------------------------------------- */

702
stg_decodeFloatzuIntzh ( F_ arg )
ian@well-typed.com's avatar
ian@well-typed.com committed
703
{
704
    W_ p;
705
    W_ tmp, mp_tmp1, mp_tmp_w, r1, r2;
706

707
    STK_CHK_GEN_N (WDS(2));
708

709
    reserve 2 = tmp {
ian@well-typed.com's avatar
ian@well-typed.com committed
710

tibbe's avatar
tibbe committed
711 712
        mp_tmp1  = tmp + WDS(1);
        mp_tmp_w = tmp;
713

tibbe's avatar
tibbe committed
714 715
        /* Perform the operation */
        ccall __decodeFloat_Int(mp_tmp1 "ptr", mp_tmp_w "ptr", arg);
716

tibbe's avatar
tibbe committed
717 718
        r1 = W_[mp_tmp1];
        r2 = W_[mp_tmp_w];
719
    }
ian@well-typed.com's avatar
ian@well-typed.com committed
720

721
    /* returns: (Int# (mantissa), Int# (exponent)) */
722
    return (r1, r2);
723 724
}

725
stg_decodeDoublezu2Intzh ( D_ arg )
ian@well-typed.com's avatar
ian@well-typed.com committed
726
{
727 728 729
    W_ p, tmp;
    W_ mp_tmp1, mp_tmp2, mp_result1, mp_result2;
    W_ r1, r2, r3, r4;
730

731
    STK_CHK_GEN_N (WDS(4));
732

733 734
    reserve 4 = tmp {

tibbe's avatar
tibbe committed
735 736 737 738
        mp_tmp1    = tmp + WDS(3);
        mp_tmp2    = tmp + WDS(2);
        mp_result1 = tmp + WDS(1);
        mp_result2 = tmp;
739
  
tibbe's avatar
tibbe committed
740 741 742 743 744 745 746 747 748
        /* Perform the operation */
        ccall __decodeDouble_2Int(mp_tmp1 "ptr", mp_tmp2 "ptr",
                                  mp_result1 "ptr", mp_result2 "ptr",
                                  arg);

        r1 = W_[mp_tmp1];
        r2 = W_[mp_tmp2];
        r3 = W_[mp_result1];
        r4 = W_[mp_result2];
749
    }
750 751 752

    /* returns:
       (Int# (mant sign), Word# (mant high), Word# (mant low), Int# (expn)) */
753
    return (r1, r2, r3, r4);
754 755
}

756 757 758 759
/* -----------------------------------------------------------------------------
 * Concurrency primitives
 * -------------------------------------------------------------------------- */

760
stg_forkzh ( gcptr closure )
761
{
tibbe's avatar
tibbe committed
762
    MAYBE_GC_P(stg_forkzh, closure);
763

tibbe's avatar
tibbe committed
764
    gcptr threadid;
765

tibbe's avatar
tibbe committed
766 767 768
    ("ptr" threadid) = ccall createIOThread( MyCapability() "ptr",
                                  RtsFlags_GcFlags_initialStkSize(RtsFlags),
                                  closure "ptr");
769

tibbe's avatar
tibbe committed
770 771 772 773
    /* start blocked if the current thread is blocked */
    StgTSO_flags(threadid) = %lobits16(
        TO_W_(StgTSO_flags(threadid)) |
        TO_W_(StgTSO_flags(CurrentTSO)) & (TSO_BLOCKEX | TSO_INTERRUPTIBLE));
774

tibbe's avatar
tibbe committed
775
    ccall scheduleThread(MyCapability() "ptr", threadid "ptr");
776

tibbe's avatar
tibbe committed
777 778 779
    // context switch soon, but not immediately: we don't want every
    // forkIO to force a context-switch.
    Capability_context_switch(MyCapability()) = 1 :: CInt;
ian@well-typed.com's avatar
ian@well-typed.com committed
780

tibbe's avatar
tibbe committed
781
    return (threadid);
782 783
}

784
stg_forkOnzh ( W_ cpu, gcptr closure )
785
{
786
again: MAYBE_GC(again);
787

tibbe's avatar
tibbe committed
788
    gcptr threadid;
789

tibbe's avatar
tibbe committed
790 791 792 793
    ("ptr" threadid) = ccall createIOThread(
        MyCapability() "ptr",
        RtsFlags_GcFlags_initialStkSize(RtsFlags),
        closure "ptr");
794

tibbe's avatar
tibbe committed
795 796 797 798
    /* start blocked if the current thread is blocked */
    StgTSO_flags(threadid) = %lobits16(
        TO_W_(StgTSO_flags(threadid)) |
        TO_W_(StgTSO_flags(CurrentTSO)) & (TSO_BLOCKEX | TSO_INTERRUPTIBLE));
799

tibbe's avatar
tibbe committed
800
    ccall scheduleThreadOn(MyCapability() "ptr", cpu, threadid "ptr");
801

tibbe's avatar
tibbe committed
802 803 804
    // context switch soon, but not immediately: we don't want every
    // forkIO to force a context-switch.
    Capability_context_switch(MyCapability()) = 1 :: CInt;
ian@well-typed.com's avatar
ian@well-typed.com committed
805

tibbe's avatar
tibbe committed
806
    return (threadid);
807 808
}

809
stg_yieldzh ()
810
{
tibbe's avatar
tibbe committed
811 812 813 814 815 816
    // when we yield to the scheduler, we have to tell it to put the
    // current thread to the back of the queue by setting the
    // context_switch flag.  If we don't do this, it will run the same
    // thread again.
    Capability_context_switch(MyCapability()) = 1 :: CInt;
    jump stg_yield_noregs();
817 818
}

819
stg_myThreadIdzh ()
820
{
tibbe's avatar
tibbe committed
821
    return (CurrentTSO);
822 823
}

824
stg_labelThreadzh ( gcptr threadid, W_ addr )
825
{
826
#if defined(DEBUG) || defined(TRACING) || defined(DTRACE)
tibbe's avatar
tibbe committed
827
    ccall labelThread(MyCapability() "ptr", threadid "ptr", addr "ptr");
828
#endif
tibbe's avatar
tibbe committed
829
    return ();
830 831
}

832
stg_isCurrentThreadBoundzh (/* no args */)
833
{
tibbe's avatar
tibbe committed
834 835 836
    W_ r;
    (r) = ccall isThreadBound(CurrentTSO);
    return (r);
837 838
}

839
stg_threadStatuszh ( gcptr tso )
840 841 842
{
    W_ why_blocked;
    W_ what_next;
843
    W_ ret, cap, locked;
844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860

    what_next   = TO_W_(StgTSO_what_next(tso));
    why_blocked = TO_W_(StgTSO_why_blocked(tso));
    // Note: these two reads are not atomic, so they might end up
    // being inconsistent.  It doesn't matter, since we
    // only return one or the other.  If we wanted to return the
    // contents of block_info too, then we'd have to do some synchronisation.

    if (what_next == ThreadComplete) {
        ret = 16;  // NB. magic, matches up with GHC.Conc.threadStatus
    } else {
        if (what_next == ThreadKilled) {
            ret = 17;
        } else {
            ret = why_blocked;
        }
    }
861 862 863 864 865 866 867 868 869

    cap = TO_W_(Capability_no(StgTSO_cap(tso)));

    if ((TO_W_(StgTSO_flags(tso)) & TSO_LOCKED) != 0) {
        locked = 1;
    } else {
        locked = 0;
    }

870
    return (ret,cap,locked);
871
}
872 873 874 875 876

/* -----------------------------------------------------------------------------
 * TVar primitives
 * -------------------------------------------------------------------------- */

877 878 879
// Catch retry frame -----------------------------------------------------------

#define CATCH_RETRY_FRAME_FIELDS(w_,p_,info_ptr,        \
Simon Marlow's avatar
Simon Marlow committed
880
                                 p1, p2,                \
881 882 883 884
                                 running_alt_code,      \
                                 first_code,            \
                                 alt_code)              \
  w_ info_ptr,                                          \
Simon Marlow's avatar
Simon Marlow committed
885
  PROF_HDR_FIELDS(w_,p1,p2)                             \
886 887 888
  w_ running_alt_code,                                  \
  p_ first_code,                                        \
  p_ alt_code
889 890


891
INFO_TABLE_RET(stg_catch_retry_frame, CATCH_RETRY_FRAME,
892
               CATCH_RETRY_FRAME_FIELDS(W_,P_,
Simon Marlow's avatar
Simon Marlow committed
893
                                        info_ptr, p1, p2,
894 895 896 897
                                        running_alt_code,
                                        first_code,
                                        alt_code))
    return (P_ ret)
898
{
899 900
    W_ r;
    gcptr trec, outer, arg;
901

902 903 904 905 906 907 908 909 910 911 912 913 914 915 916
    trec = StgTSO_trec(CurrentTSO);
    outer  = StgTRecHeader_enclosing_trec(trec);
    (r) = ccall stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr");
    if (r != 0) {
        // Succeeded (either first branch or second branch)
        StgTSO_trec(CurrentTSO) = outer;
        return (ret);
    } else {
        // Did not commit: re-execute
        P_ new_trec;
        ("ptr" new_trec) = ccall stmStartTransaction(MyCapability() "ptr",
                                                           outer "ptr");
        StgTSO_trec(CurrentTSO) = new_trec;
        if (running_alt_code != 0) {
            jump stg_ap_v_fast
Simon Marlow's avatar
Simon Marlow committed
917
                (CATCH_RETRY_FRAME_FIELDS(,,info_ptr, p1, p2,
918 919 920 921 922 923
                                          running_alt_code,
                                          first_code,
                                          alt_code))
                (alt_code);
        } else {
            jump stg_ap_v_fast
Simon Marlow's avatar
Simon Marlow committed
924
                (CATCH_RETRY_FRAME_FIELDS(,,info_ptr, p1, p2,
925 926 927 928 929 930 931
                                          running_alt_code,
                                          first_code,
                                          alt_code))
                (first_code);
        }
    }
}
932

Simon Marlow's avatar
Simon Marlow committed
933
// Atomically frame ------------------------------------------------------------
934

935
// This must match StgAtomicallyFrame in Closures.h
Simon Marlow's avatar
Simon Marlow committed
936
#define ATOMICALLY_FRAME_FIELDS(w_,p_,info_ptr,p1,p2,code,next,result)  \
tibbe's avatar
tibbe committed
937 938 939 940 941
    w_ info_ptr,                                                        \
    PROF_HDR_FIELDS(w_,p1,p2)                                           \
    p_ code,                                                            \
    p_ next,                                                            \
    p_ result
942 943


944
INFO_TABLE_RET(stg_atomically_frame, ATOMICALLY_FRAME,
945 946
               // layout of the frame, and bind the field names
               ATOMICALLY_FRAME_FIELDS(W_,P_,
Simon Marlow's avatar
Simon Marlow committed
947
                                       info_ptr, p1, p2,
948 949 950 951
                                       code,
                                       next_invariant,
                                       frame_result))
    return (P_ result) // value returned to the frame
952
{
tibbe's avatar
tibbe committed
953 954
    W_ valid;
    gcptr trec, outer, next_invariant, q;
tharris@microsoft.com's avatar
tharris@microsoft.com committed
955

tibbe's avatar
tibbe committed
956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986
    trec   = StgTSO_trec(CurrentTSO);
    outer  = StgTRecHeader_enclosing_trec(trec);

    if (outer == NO_TREC) {
        /* First time back at the atomically frame -- pick up invariants */
        ("ptr" next_invariant) =
            ccall stmGetInvariantsToCheck(MyCapability() "ptr", trec "ptr");
        frame_result = result;

    } else {
        /* Second/subsequent time back at the atomically frame -- abort the
         * tx that's checking the invariant and move on to the next one */
        StgTSO_trec(CurrentTSO) = outer;
        StgInvariantCheckQueue_my_execution(next_invariant) = trec;
        ccall stmAbortTransaction(MyCapability() "ptr", trec "ptr");
        /* Don't free trec -- it's linked from q and will be stashed in the
         * invariant if we eventually commit. */
        next_invariant =
           StgInvariantCheckQueue_next_queue_entry(next_invariant);
        trec = outer;
    }

    if (next_invariant != END_INVARIANT_CHECK_QUEUE) {
        /* We can't commit yet: another invariant to check */
        ("ptr" trec) = ccall stmStartTransaction(MyCapability() "ptr", trec "ptr");
        StgTSO_trec(CurrentTSO) = trec;
        q = StgInvariantCheckQueue_invariant(next_invariant);
        jump stg_ap_v_fast
            (ATOMICALLY_FRAME_FIELDS(,,info_ptr,p1,p2,
                                     code,next_invariant,frame_result))
            (StgAtomicInvariant_code(q));
tharris@microsoft.com's avatar
tharris@microsoft.com committed
987 988

    } else {
989

tibbe's avatar
tibbe committed
990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009
        /* We've got no more invariants to check, try to commit */
        (valid) = ccall stmCommitTransaction(MyCapability() "ptr", trec "ptr");
        if (valid != 0) {
            /* Transaction was valid: commit succeeded */
            StgTSO_trec(CurrentTSO) = NO_TREC;
            return (frame_result);
        } else {
            /* Transaction was not valid: try again */
            ("ptr" trec) = ccall stmStartTransaction(MyCapability() "ptr",
                                                     NO_TREC "ptr");
            StgTSO_trec(CurrentTSO) = trec;
            next_invariant = END_INVARIANT_CHECK_QUEUE;

            jump stg_ap_v_fast
                // push the StgAtomicallyFrame again: the code generator is
                // clever enough to only assign the fields that have changed.
                (ATOMICALLY_FRAME_FIELDS(,,info_ptr,p1,p2,
                                         code,next_invariant,frame_result))
                (code);
        }
tharris@microsoft.com's avatar
tharris@microsoft.com committed
1010
    }
1011 1012
}

1013

1014
INFO_TABLE_RET(stg_atomically_waiting_frame, ATOMICALLY_FRAME,
1015 1016
               // layout of the frame, and bind the field names
               ATOMICALLY_FRAME_FIELDS(W_,P_,
Simon Marlow's avatar
Simon Marlow committed
1017
                                       info_ptr, p1, p2,