PrimOps.cmm 72.3 KB
Newer Older
1
/* -*- tab-width: 8 -*- */
2 3
/* -----------------------------------------------------------------------------
 *
4
 * (c) The GHC Team, 1998-2012
5 6 7 8 9 10 11 12 13
 *
 * Out-of-line primitive operations
 *
 * This file contains the implementations of all the primitive
 * operations ("primops") which are not expanded inline.  See
 * ghc/compiler/prelude/primops.txt.pp for a list of all the primops;
 * this file contains code for most of those with the attribute
 * out_of_line=True.
 *
14 15 16
 * Entry convention: the entry convention for a primop is the
 * NativeNodeCall convention, and the return convention is
 * NativeReturn.  (see compiler/cmm/CmmCallConv.hs)
17 18 19 20 21 22 23 24 25
 *
 * This file is written in a subset of C--, extended with various
 * features specific to GHC.  It is compiled by GHC directly.  For the
 * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
 *
 * ---------------------------------------------------------------------------*/

#include "Cmm.h"

26
#ifdef __PIC__
27 28
import pthread_mutex_lock;
import pthread_mutex_unlock;
29
#endif
30
import base_ControlziExceptionziBase_nestedAtomically_closure;
31 32
import EnterCriticalSection;
import LeaveCriticalSection;
Ian Lynagh's avatar
Ian Lynagh committed
33
import ghczmprim_GHCziTypes_False_closure;
34
#if defined(USE_MINIINTERPRETER) || !defined(mingw32_HOST_OS)
35
import sm_mutex;
36
#endif
37

38 39 40 41 42 43 44 45 46 47 48 49 50
/*-----------------------------------------------------------------------------
  Array Primitives

  Basically just new*Array - the others are all inline macros.

  The slow entry point is for returning from a heap check, the saved
  size argument must be re-loaded from the stack.
  -------------------------------------------------------------------------- */

/* for objects that are *less* than the size of a word, make sure we
 * round up to the nearest word for the size of the array.
 */

51
stg_newByteArrayzh ( W_ n )
52
{
53 54 55 56 57
    W_ words, payload_words;
    gcptr p;

    MAYBE_GC_N(stg_newByteArrayzh, n);

58 59
    payload_words = ROUNDUP_BYTES_TO_WDS(n);
    words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
60
    ("ptr" p) = ccall allocate(MyCapability() "ptr",words);
61
    TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
62
    SET_HDR(p, stg_ARR_WORDS_info, CCCS);
63
    StgArrWords_bytes(p) = n;
64
    return (p);
65 66
}

Simon Marlow's avatar
Simon Marlow committed
67 68 69
#define BA_ALIGN 16
#define BA_MASK  (BA_ALIGN-1)

70
stg_newPinnedByteArrayzh ( W_ n )
71
{
72 73 74 75
    W_ words, bytes, payload_words;
    gcptr p;

    MAYBE_GC_N(stg_newPinnedByteArrayzh, n);
76

77
    bytes = n;
78 79 80 81 82 83 84 85 86 87
    /* payload_words is what we will tell the profiler we had to allocate */
    payload_words = ROUNDUP_BYTES_TO_WDS(bytes);
    /* When we actually allocate memory, we need to allow space for the
       header: */
    bytes = bytes + SIZEOF_StgArrWords;
    /* And we want to align to BA_ALIGN bytes, so we need to allow space
       to shift up to BA_ALIGN - 1 bytes: */
    bytes = bytes + BA_ALIGN - 1;
    /* Now we convert to a number of words: */
    words = ROUNDUP_BYTES_TO_WDS(bytes);
Simon Marlow's avatar
Simon Marlow committed
88

89
    ("ptr" p) = ccall allocatePinned(MyCapability() "ptr", words);
Simon Marlow's avatar
Simon Marlow committed
90 91
    TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);

92 93
    /* Now we need to move p forward so that the payload is aligned
       to BA_ALIGN bytes: */
Simon Marlow's avatar
Simon Marlow committed
94 95
    p = p + ((-p - SIZEOF_StgArrWords) & BA_MASK);

96
    SET_HDR(p, stg_ARR_WORDS_info, CCCS);
97
    StgArrWords_bytes(p) = n;
98
    return (p);
Simon Marlow's avatar
Simon Marlow committed
99 100
}

101
stg_newAlignedPinnedByteArrayzh ( W_ n, W_ alignment )
Simon Marlow's avatar
Simon Marlow committed
102
{
103 104
    W_ words, bytes, payload_words;
    gcptr p;
Simon Marlow's avatar
Simon Marlow committed
105

106
    again: MAYBE_GC(again);
Simon Marlow's avatar
Simon Marlow committed
107

108 109 110 111 112
    /* we always supply at least word-aligned memory, so there's no
       need to allow extra space for alignment if the requirement is less
       than a word.  This also prevents mischief with alignment == 0. */
    if (alignment <= SIZEOF_W) { alignment = 1; }

113 114
    bytes = n;

115 116
    /* payload_words is what we will tell the profiler we had to allocate */
    payload_words = ROUNDUP_BYTES_TO_WDS(bytes);
Simon Marlow's avatar
Simon Marlow committed
117

118 119 120 121 122 123 124 125
    /* When we actually allocate memory, we need to allow space for the
       header: */
    bytes = bytes + SIZEOF_StgArrWords;
    /* And we want to align to <alignment> bytes, so we need to allow space
       to shift up to <alignment - 1> bytes: */
    bytes = bytes + alignment - 1;
    /* Now we convert to a number of words: */
    words = ROUNDUP_BYTES_TO_WDS(bytes);
126

127
    ("ptr" p) = ccall allocatePinned(MyCapability() "ptr", words);
128 129
    TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);

130 131 132 133
    /* Now we need to move p forward so that the payload is aligned
       to <alignment> bytes. Note that we are assuming that
       <alignment> is a power of 2, which is technically not guaranteed */
    p = p + ((-p - SIZEOF_StgArrWords) & (alignment - 1));
134

135
    SET_HDR(p, stg_ARR_WORDS_info, CCCS);
136
    StgArrWords_bytes(p) = n;
137
    return (p);
138 139
}

140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
// shrink size of MutableByteArray in-place
stg_shrinkMutableByteArrayzh ( gcptr mba, W_ new_size )
// MutableByteArray# s -> Int# -> State# s -> State# s
{
   ASSERT(new_size >= 0);
   ASSERT(new_size <= StgArrWords_bytes(mba));

   OVERWRITING_CLOSURE_OFS(mba, (BYTES_TO_WDS(SIZEOF_StgArrWords) +
                                 ROUNDUP_BYTES_TO_WDS(new_size)));
   StgArrWords_bytes(mba) = new_size;
   LDV_RECORD_CREATE(mba);

   return ();
}

// resize MutableByteArray
//
// The returned MutableByteArray is either the original
// MutableByteArray resized in-place or, if not possible, a newly
// allocated (unpinned) MutableByteArray (with the original content
// copied over)
Gabor Greif's avatar
Gabor Greif committed
161
stg_resizeMutableByteArrayzh ( gcptr mba, W_ new_size )
162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193
// MutableByteArray# s -> Int# -> State# s -> (# State# s,MutableByteArray# s #)
{
   W_ new_size_wds;

   ASSERT(new_size >= 0);

   new_size_wds = ROUNDUP_BYTES_TO_WDS(new_size);

   if (new_size_wds <= BYTE_ARR_WDS(mba)) {
      OVERWRITING_CLOSURE_OFS(mba, (BYTES_TO_WDS(SIZEOF_StgArrWords) +
                                    new_size_wds));
      StgArrWords_bytes(mba) = new_size;
      LDV_RECORD_CREATE(mba);

      return (mba);
   } else {
      (P_ new_mba) = call stg_newByteArrayzh(new_size);

      // maybe at some point in the future we may be able to grow the
      // MBA in-place w/o copying if we know the space after the
      // current MBA is still available, as often we want to grow the
      // MBA shortly after we allocated the original MBA. So maybe no
      // further allocations have occurred by then.

      // copy over old content
      prim %memcpy(BYTE_ARR_CTS(new_mba), BYTE_ARR_CTS(mba),
                   StgArrWords_bytes(mba), WDS(1));

      return (new_mba);
   }
}

194 195 196 197 198
// RRN: This one does not use the "ticketing" approach because it
// deals in unboxed scalars, not heap pointers.
stg_casIntArrayzh( gcptr arr, W_ ind, W_ old, W_ new )
/* MutableByteArray# s -> Int# -> Int# -> Int# -> State# s -> (# State# s, Int# #) */
{
tibbe's avatar
tibbe committed
199
    W_ p, h;
200 201 202 203 204 205 206

    p = arr + SIZEOF_StgArrWords + WDS(ind);
    (h) = ccall cas(p, old, new);

    return(h);
}

207

208
stg_newArrayzh ( W_ n /* words */, gcptr init )
209
{
tibbe's avatar
tibbe committed
210 211
    W_ words, size, p;
    gcptr arr;
212

213
    again: MAYBE_GC(again);
214

215 216 217 218 219
    // the mark area contains one byte for each 2^MUT_ARR_PTRS_CARD_BITS words
    // in the array, making sure we round up, and then rounding up to a whole
    // number of words.
    size = n + mutArrPtrsCardWords(n);
    words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + size;
220
    ("ptr" arr) = ccall allocate(MyCapability() "ptr",words);
tibbe's avatar
tibbe committed
221
    TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(size), 0);
222

223
    SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, CCCS);
224
    StgMutArrPtrs_ptrs(arr) = n;
225
    StgMutArrPtrs_size(arr) = size;
226 227 228 229

    // Initialise all elements of the the array with the value in R2
    p = arr + SIZEOF_StgMutArrPtrs;
  for:
tibbe's avatar
tibbe committed
230
    if (p < arr + SIZEOF_StgMutArrPtrs + WDS(n)) {
ian@well-typed.com's avatar
ian@well-typed.com committed
231 232 233
        W_[p] = init;
        p = p + WDS(1);
        goto for;
234 235
    }

236
    return (arr);
237 238
}

239
stg_unsafeThawArrayzh ( gcptr arr )
240
{
tibbe's avatar
tibbe committed
241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259
    // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST
    //
    // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN
    // normally doesn't.  However, when we freeze a MUT_ARR_PTRS, we leave
    // it on the mutable list for the GC to remove (removing something from
    // the mutable list is not easy).
    //
    // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list,
    // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0
    // to indicate that it is still on the mutable list.
    //
    // So, when we thaw a MUT_ARR_PTRS_FROZEN, we must cope with two cases:
    // either it is on a mut_list, or it isn't.  We adopt the convention that
    // the closure type is MUT_ARR_PTRS_FROZEN0 if it is on the mutable list,
    // and MUT_ARR_PTRS_FROZEN otherwise.  In fact it wouldn't matter if
    // we put it on the mutable list more than once, but it would get scavenged
    // multiple times during GC, which would be unnecessarily slow.
    //
    if (StgHeader_info(arr) != stg_MUT_ARR_PTRS_FROZEN0_info) {
260 261
        SET_INFO(arr,stg_MUT_ARR_PTRS_DIRTY_info);
        recordMutable(arr);
ian@well-typed.com's avatar
ian@well-typed.com committed
262 263
        // must be done after SET_INFO, because it ASSERTs closure_MUTABLE()
        return (arr);
tibbe's avatar
tibbe committed
264
    } else {
ian@well-typed.com's avatar
ian@well-typed.com committed
265 266
        SET_INFO(arr,stg_MUT_ARR_PTRS_DIRTY_info);
        return (arr);
tibbe's avatar
tibbe committed
267
    }
268 269
}

270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289
stg_copyArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n )
{
    copyArray(src, src_off, dst, dst_off, n)
}

stg_copyMutableArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n )
{
    copyMutableArray(src, src_off, dst, dst_off, n)
}

stg_copyArrayArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n )
{
    copyArray(src, src_off, dst, dst_off, n)
}

stg_copyMutableArrayArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n )
{
    copyMutableArray(src, src_off, dst, dst_off, n)
}

290 291
stg_cloneArrayzh ( gcptr src, W_ offset, W_ n )
{
tibbe's avatar
tibbe committed
292
    cloneArray(stg_MUT_ARR_PTRS_FROZEN_info, src, offset, n)
293 294 295 296
}

stg_cloneMutableArrayzh ( gcptr src, W_ offset, W_ n )
{
tibbe's avatar
tibbe committed
297
    cloneArray(stg_MUT_ARR_PTRS_DIRTY_info, src, offset, n)
298 299 300 301 302
}

// We have to escape the "z" in the name.
stg_freezzeArrayzh ( gcptr src, W_ offset, W_ n )
{
tibbe's avatar
tibbe committed
303
    cloneArray(stg_MUT_ARR_PTRS_FROZEN_info, src, offset, n)
304 305 306 307
}

stg_thawArrayzh ( gcptr src, W_ offset, W_ n )
{
tibbe's avatar
tibbe committed
308
    cloneArray(stg_MUT_ARR_PTRS_DIRTY_info, src, offset, n)
309 310
}

311
// RRN: Uses the ticketed approach; see casMutVar
312
stg_casArrayzh ( gcptr arr, W_ ind, gcptr old, gcptr new )
313
/* MutableArray# s a -> Int# -> a -> a -> State# s -> (# State# s, Int#, Any a #) */
314
{
tibbe's avatar
tibbe committed
315 316
    gcptr h;
    W_ p, len;
317 318

    p = arr + SIZEOF_StgMutArrPtrs + WDS(ind);
319
    (h) = ccall cas(p, old, new);
320 321 322
    
    if (h != old) {
        // Failure, return what was there instead of 'old':
323
        return (1,h);
324 325
    } else {
        // Compare and Swap Succeeded:
rrnewton's avatar
rrnewton committed
326 327 328 329
        SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, CCCS);
        len = StgMutArrPtrs_ptrs(arr);
        // The write barrier.  We must write a byte into the mark table:
        I8[arr + SIZEOF_StgMutArrPtrs + WDS(len) + (ind >> MUT_ARR_PTRS_CARD_BITS )] = 1;
330
        return (0,new);
331 332 333
    }
}

334
stg_newArrayArrayzh ( W_ n /* words */ )
335
{
tibbe's avatar
tibbe committed
336 337
    W_ words, size, p;
    gcptr arr;
338

339
    MAYBE_GC_N(stg_newArrayArrayzh, n);
340 341 342 343 344 345

    // the mark area contains one byte for each 2^MUT_ARR_PTRS_CARD_BITS words
    // in the array, making sure we round up, and then rounding up to a whole
    // number of words.
    size = n + mutArrPtrsCardWords(n);
    words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + size;
346
    ("ptr" arr) = ccall allocate(MyCapability() "ptr",words);
tibbe's avatar
tibbe committed
347
    TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(size), 0);
348 349 350 351 352 353 354 355

    SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]);
    StgMutArrPtrs_ptrs(arr) = n;
    StgMutArrPtrs_size(arr) = size;

    // Initialise all elements of the array with a pointer to the new array
    p = arr + SIZEOF_StgMutArrPtrs;
  for:
tibbe's avatar
tibbe committed
356
    if (p < arr + SIZEOF_StgMutArrPtrs + WDS(n)) {
ian@well-typed.com's avatar
ian@well-typed.com committed
357 358 359
        W_[p] = arr;
        p = p + WDS(1);
        goto for;
360 361
    }

362
    return (arr);
363 364
}

pumpkin's avatar
pumpkin committed
365

366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483
/* -----------------------------------------------------------------------------
   SmallArray primitives
   -------------------------------------------------------------------------- */

stg_newSmallArrayzh ( W_ n /* words */, gcptr init )
{
    W_ words, size, p;
    gcptr arr;

    again: MAYBE_GC(again);

    words = BYTES_TO_WDS(SIZEOF_StgSmallMutArrPtrs) + n;
    ("ptr" arr) = ccall allocate(MyCapability() "ptr",words);
    TICK_ALLOC_PRIM(SIZEOF_StgSmallMutArrPtrs, WDS(n), 0);

    SET_HDR(arr, stg_SMALL_MUT_ARR_PTRS_DIRTY_info, CCCS);
    StgSmallMutArrPtrs_ptrs(arr) = n;

    // Initialise all elements of the the array with the value in R2
    p = arr + SIZEOF_StgSmallMutArrPtrs;
  for:
    if (p < arr + SIZEOF_StgSmallMutArrPtrs + WDS(n)) {
        W_[p] = init;
        p = p + WDS(1);
        goto for;
    }

    return (arr);
}

stg_unsafeThawSmallArrayzh ( gcptr arr )
{
    // See stg_unsafeThawArrayzh
    if (StgHeader_info(arr) != stg_SMALL_MUT_ARR_PTRS_FROZEN0_info) {
        SET_INFO(arr, stg_SMALL_MUT_ARR_PTRS_DIRTY_info);
        recordMutable(arr);
        // must be done after SET_INFO, because it ASSERTs closure_MUTABLE()
        return (arr);
    } else {
        SET_INFO(arr, stg_SMALL_MUT_ARR_PTRS_DIRTY_info);
        return (arr);
    }
}

stg_cloneSmallArrayzh ( gcptr src, W_ offset, W_ n )
{
    cloneSmallArray(stg_SMALL_MUT_ARR_PTRS_FROZEN_info, src, offset, n)
}

stg_cloneSmallMutableArrayzh ( gcptr src, W_ offset, W_ n )
{
    cloneSmallArray(stg_SMALL_MUT_ARR_PTRS_DIRTY_info, src, offset, n)
}

// We have to escape the "z" in the name.
stg_freezzeSmallArrayzh ( gcptr src, W_ offset, W_ n )
{
    cloneSmallArray(stg_SMALL_MUT_ARR_PTRS_FROZEN_info, src, offset, n)
}

stg_thawSmallArrayzh ( gcptr src, W_ offset, W_ n )
{
    cloneSmallArray(stg_SMALL_MUT_ARR_PTRS_DIRTY_info, src, offset, n)
}

stg_copySmallArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n)
{
    W_ dst_p, src_p, bytes;

    SET_INFO(dst, stg_SMALL_MUT_ARR_PTRS_DIRTY_info);

    dst_p = dst + SIZEOF_StgSmallMutArrPtrs + WDS(dst_off);
    src_p = src + SIZEOF_StgSmallMutArrPtrs + WDS(src_off);
    bytes = WDS(n);
    prim %memcpy(dst_p, src_p, bytes, WDS(1));

    return ();
}

stg_copySmallMutableArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n)
{
    W_ dst_p, src_p, bytes;

    SET_INFO(dst, stg_SMALL_MUT_ARR_PTRS_DIRTY_info);

    dst_p = dst + SIZEOF_StgSmallMutArrPtrs + WDS(dst_off);
    src_p = src + SIZEOF_StgSmallMutArrPtrs + WDS(src_off);
    bytes = WDS(n);
    if (src == dst) {
        prim %memmove(dst_p, src_p, bytes, WDS(1));
    } else {
        prim %memcpy(dst_p, src_p, bytes, WDS(1));
    }

    return ();
}

// RRN: Uses the ticketed approach; see casMutVar
stg_casSmallArrayzh ( gcptr arr, W_ ind, gcptr old, gcptr new )
/* SmallMutableArray# s a -> Int# -> a -> a -> State# s -> (# State# s, Int#, Any a #) */
{
    gcptr h;
    W_ p, len;

    p = arr + SIZEOF_StgSmallMutArrPtrs + WDS(ind);
    (h) = ccall cas(p, old, new);

    if (h != old) {
        // Failure, return what was there instead of 'old':
        return (1,h);
    } else {
        // Compare and Swap Succeeded:
        SET_HDR(arr, stg_SMALL_MUT_ARR_PTRS_DIRTY_info, CCCS);
        return (0,new);
    }
}


484 485 486 487
/* -----------------------------------------------------------------------------
   MutVar primitives
   -------------------------------------------------------------------------- */

488
stg_newMutVarzh ( gcptr init )
489 490 491
{
    W_ mv;

492
    ALLOC_PRIM_P (SIZEOF_StgMutVar, stg_newMutVarzh, init);
493 494

    mv = Hp - SIZEOF_StgMutVar + WDS(1);
495
    SET_HDR(mv,stg_MUT_VAR_DIRTY_info,CCCS);
496
    StgMutVar_var(mv) = init;
ian@well-typed.com's avatar
ian@well-typed.com committed
497

498
    return (mv);
499 500
}

501 502 503 504 505
// RRN: To support the "ticketed" approach, we return the NEW rather
// than old value if the CAS is successful.  This is received in an
// opaque form in the Haskell code, preventing the compiler from
// changing its pointer identity.  The ticket can then be safely used
// in future CAS operations.
506
stg_casMutVarzh ( gcptr mv, gcptr old, gcptr new )
507
 /* MutVar# s a -> a -> a -> State# s -> (# State#, Int#, Any a #) */
Simon Marlow's avatar
Simon Marlow committed
508
{
509
    gcptr h;
Simon Marlow's avatar
Simon Marlow committed
510

tibbe's avatar
tibbe committed
511
    (h) = ccall cas(mv + SIZEOF_StgHeader + OFFSET_StgMutVar_var, old, new);
Simon Marlow's avatar
Simon Marlow committed
512
    if (h != old) {
513
        return (1,h);
Simon Marlow's avatar
Simon Marlow committed
514
    } else {
515
        if (GET_INFO(mv) == stg_MUT_VAR_CLEAN_info) {
tibbe's avatar
tibbe committed
516
            ccall dirty_MUT_VAR(BaseReg "ptr", mv "ptr");
517
        }
518
        return (0,new);
Simon Marlow's avatar
Simon Marlow committed
519 520 521
    }
}

522
stg_atomicModifyMutVarzh ( gcptr mv, gcptr f )
523
{
524
    W_ z, x, y, r, h;
525

ian@well-typed.com's avatar
ian@well-typed.com committed
526
    /* If x is the current contents of the MutVar#, then
527 528 529
       We want to make the new contents point to

         (sel_0 (f x))
ian@well-typed.com's avatar
ian@well-typed.com committed
530

531
       and the return value is
ian@well-typed.com's avatar
ian@well-typed.com committed
532 533

         (sel_1 (f x))
534 535 536 537

        obviously we can share (f x).

         z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
ian@well-typed.com's avatar
ian@well-typed.com committed
538
         y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
539 540 541 542
         r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
    */

#if MIN_UPD_SIZE > 1
543
#define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
544 545
#define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),WDS(MIN_UPD_SIZE-1))
#else
546
#define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(1))
547 548 549 550
#define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),0)
#endif

#if MIN_UPD_SIZE > 2
551
#define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
552 553
#define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),WDS(MIN_UPD_SIZE-2))
#else
554
#define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(2))
555 556 557 558 559
#define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0)
#endif

#define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE)

tibbe's avatar
tibbe committed
560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585
    HP_CHK_GEN_TICKY(SIZE);

    TICK_ALLOC_THUNK_2();
    CCCS_ALLOC(THUNK_2_SIZE);
    z = Hp - THUNK_2_SIZE + WDS(1);
    SET_HDR(z, stg_ap_2_upd_info, CCCS);
    LDV_RECORD_CREATE(z);
    StgThunk_payload(z,0) = f;

    TICK_ALLOC_THUNK_1();
    CCCS_ALLOC(THUNK_1_SIZE);
    y = z - THUNK_1_SIZE;
    SET_HDR(y, stg_sel_0_upd_info, CCCS);
    LDV_RECORD_CREATE(y);
    StgThunk_payload(y,0) = z;

    TICK_ALLOC_THUNK_1();
    CCCS_ALLOC(THUNK_1_SIZE);
    r = y - THUNK_1_SIZE;
    SET_HDR(r, stg_sel_1_upd_info, CCCS);
    LDV_RECORD_CREATE(r);
    StgThunk_payload(r,0) = z;

  retry:
    x = StgMutVar_var(mv);
    StgThunk_payload(z,1) = x;
586
#ifdef THREADED_RTS
tibbe's avatar
tibbe committed
587 588
    (h) = ccall cas(mv + SIZEOF_StgHeader + OFFSET_StgMutVar_var, x, y);
    if (h != x) { goto retry; }
589
#else
tibbe's avatar
tibbe committed
590
    StgMutVar_var(mv) = y;
591
#endif
592

tibbe's avatar
tibbe committed
593 594 595
    if (GET_INFO(mv) == stg_MUT_VAR_CLEAN_info) {
        ccall dirty_MUT_VAR(BaseReg "ptr", mv "ptr");
    }
596

tibbe's avatar
tibbe committed
597
    return (r);
598 599 600 601 602 603 604 605
}

/* -----------------------------------------------------------------------------
   Weak Pointer Primitives
   -------------------------------------------------------------------------- */

STRING(stg_weak_msg,"New weak pointer at %p\n")

606 607 608
stg_mkWeakzh ( gcptr key,
               gcptr value,
               gcptr finalizer /* or stg_NO_FINALIZER_closure */ )
609
{
tibbe's avatar
tibbe committed
610
    gcptr w;
611

tibbe's avatar
tibbe committed
612
    ALLOC_PRIM (SIZEOF_StgWeak)
613

tibbe's avatar
tibbe committed
614 615
    w = Hp - SIZEOF_StgWeak + WDS(1);
    SET_HDR(w, stg_WEAK_info, CCCS);
616

tibbe's avatar
tibbe committed
617 618 619 620
    StgWeak_key(w)         = key;
    StgWeak_value(w)       = value;
    StgWeak_finalizer(w)   = finalizer;
    StgWeak_cfinalizers(w) = stg_NO_FINALIZER_closure;
621

622 623 624 625 626
    StgWeak_link(w) = Capability_weak_ptr_list_hd(MyCapability());
    Capability_weak_ptr_list_hd(MyCapability()) = w;
    if (Capability_weak_ptr_list_tl(MyCapability()) == NULL) {
        Capability_weak_ptr_list_tl(MyCapability()) = w;
    }
627

tibbe's avatar
tibbe committed
628
    IF_DEBUG(weak, ccall debugBelch(stg_weak_msg,w));
629

tibbe's avatar
tibbe committed
630
    return (w);
631 632
}

633
stg_mkWeakNoFinalizzerzh ( gcptr key, gcptr value )
634
{
tibbe's avatar
tibbe committed
635
    jump stg_mkWeakzh (key, value, stg_NO_FINALIZER_closure);
636 637
}

638 639 640 641 642 643 644
STRING(stg_cfinalizer_msg,"Adding a finalizer to %p\n")

stg_addCFinalizzerToWeakzh ( W_ fptr,   // finalizer
                             W_ ptr,
                             W_ flag,   // has environment (0 or 1)
                             W_ eptr,
                             gcptr w )
645
{
tibbe's avatar
tibbe committed
646
    W_ c, info;
647

tibbe's avatar
tibbe committed
648
    ALLOC_PRIM (SIZEOF_StgCFinalizerList)
649

tibbe's avatar
tibbe committed
650 651
    c = Hp - SIZEOF_StgCFinalizerList + WDS(1);
    SET_HDR(c, stg_C_FINALIZER_LIST_info, CCCS);
652

tibbe's avatar
tibbe committed
653 654 655 656
    StgCFinalizerList_fptr(c) = fptr;
    StgCFinalizerList_ptr(c) = ptr;
    StgCFinalizerList_eptr(c) = eptr;
    StgCFinalizerList_flag(c) = flag;
657

tibbe's avatar
tibbe committed
658
    LOCK_CLOSURE(w, info);
659

tibbe's avatar
tibbe committed
660 661 662 663 664
    if (info == stg_DEAD_WEAK_info) {
        // Already dead.
        unlockClosure(w, info);
        return (0);
    }
665

tibbe's avatar
tibbe committed
666 667
    StgCFinalizerList_link(c) = StgWeak_cfinalizers(w);
    StgWeak_cfinalizers(w) = c;
668

tibbe's avatar
tibbe committed
669
    unlockClosure(w, info);
670

tibbe's avatar
tibbe committed
671
    recordMutable(w);
672

tibbe's avatar
tibbe committed
673
    IF_DEBUG(weak, ccall debugBelch(stg_cfinalizer_msg,w));
674

tibbe's avatar
tibbe committed
675
    return (1);
676
}
677

678
stg_finalizzeWeakzh ( gcptr w )
679
{
tibbe's avatar
tibbe committed
680 681
    gcptr f, list;
    W_ info;
682

tibbe's avatar
tibbe committed
683
    LOCK_CLOSURE(w, info);
684

tibbe's avatar
tibbe committed
685 686 687 688 689
    // already dead?
    if (info == stg_DEAD_WEAK_info) {
        unlockClosure(w, info);
        return (0,stg_NO_FINALIZER_closure);
    }
690

tibbe's avatar
tibbe committed
691 692
    f    = StgWeak_finalizer(w);
    list = StgWeak_cfinalizers(w);
693

tibbe's avatar
tibbe committed
694
    // kill it
695
#ifdef PROFILING
tibbe's avatar
tibbe committed
696 697 698 699 700 701 702 703 704
    // @LDV profiling
    // A weak pointer is inherently used, so we do not need to call
    // LDV_recordDead_FILL_SLOP_DYNAMIC():
    //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
    // or, LDV_recordDead():
    //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
    // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as
    // large as weak pointers, so there is no need to fill the slop, either.
    // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
705 706
#endif

tibbe's avatar
tibbe committed
707 708 709 710
    //
    // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
    //
    unlockClosure(w, stg_DEAD_WEAK_info);
711

tibbe's avatar
tibbe committed
712
    LDV_RECORD_CREATE(w);
713

tibbe's avatar
tibbe committed
714 715 716
    if (list != stg_NO_FINALIZER_closure) {
      ccall runCFinalizers(list);
    }
717

tibbe's avatar
tibbe committed
718 719 720 721 722 723
    /* return the finalizer */
    if (f == stg_NO_FINALIZER_closure) {
        return (0,stg_NO_FINALIZER_closure);
    } else {
        return (1,f);
    }
724 725
}

726
stg_deRefWeakzh ( gcptr w )
727
{
tibbe's avatar
tibbe committed
728 729
    W_ code, info;
    gcptr val;
730

tibbe's avatar
tibbe committed
731
    info = GET_INFO(w);
732

tibbe's avatar
tibbe committed
733 734 735 736
    if (info == stg_WHITEHOLE_info) {
        // w is locked by another thread. Now it's not immediately clear if w is
        // alive or not. We use lockClosure to wait for the info pointer to become
        // something other than stg_WHITEHOLE_info.
737

tibbe's avatar
tibbe committed
738 739 740
        LOCK_CLOSURE(w, info);
        unlockClosure(w, info);
    }
741

tibbe's avatar
tibbe committed
742 743 744 745 746 747 748 749
    if (info == stg_WEAK_info) {
        code = 1;
        val = StgWeak_value(w);
    } else {
        code = 0;
        val = w;
    }
    return (code,val);
750 751 752
}

/* -----------------------------------------------------------------------------
753
   Floating point operations.
754 755
   -------------------------------------------------------------------------- */

756
stg_decodeFloatzuIntzh ( F_ arg )
ian@well-typed.com's avatar
ian@well-typed.com committed
757
{
758
    W_ p;
759
    W_ tmp, mp_tmp1, mp_tmp_w, r1, r2;
760

761
    STK_CHK_GEN_N (WDS(2));
762

763
    reserve 2 = tmp {
ian@well-typed.com's avatar
ian@well-typed.com committed
764

tibbe's avatar
tibbe committed
765 766
        mp_tmp1  = tmp + WDS(1);
        mp_tmp_w = tmp;
767

tibbe's avatar
tibbe committed
768 769
        /* Perform the operation */
        ccall __decodeFloat_Int(mp_tmp1 "ptr", mp_tmp_w "ptr", arg);
770

tibbe's avatar
tibbe committed
771 772
        r1 = W_[mp_tmp1];
        r2 = W_[mp_tmp_w];
773
    }
ian@well-typed.com's avatar
ian@well-typed.com committed
774

775
    /* returns: (Int# (mantissa), Int# (exponent)) */
776
    return (r1, r2);
777 778
}

779
stg_decodeDoublezu2Intzh ( D_ arg )
ian@well-typed.com's avatar
ian@well-typed.com committed
780
{
781 782 783
    W_ p, tmp;
    W_ mp_tmp1, mp_tmp2, mp_result1, mp_result2;
    W_ r1, r2, r3, r4;
784

785
    STK_CHK_GEN_N (WDS(4));
786

787 788
    reserve 4 = tmp {

tibbe's avatar
tibbe committed
789 790 791 792
        mp_tmp1    = tmp + WDS(3);
        mp_tmp2    = tmp + WDS(2);
        mp_result1 = tmp + WDS(1);
        mp_result2 = tmp;
793
  
tibbe's avatar
tibbe committed
794 795 796 797 798 799 800 801 802
        /* Perform the operation */
        ccall __decodeDouble_2Int(mp_tmp1 "ptr", mp_tmp2 "ptr",
                                  mp_result1 "ptr", mp_result2 "ptr",
                                  arg);

        r1 = W_[mp_tmp1];
        r2 = W_[mp_tmp2];
        r3 = W_[mp_result1];
        r4 = W_[mp_result2];
803
    }
804 805 806

    /* returns:
       (Int# (mant sign), Word# (mant high), Word# (mant low), Int# (expn)) */
807
    return (r1, r2, r3, r4);
808 809
}

810 811 812 813
/* -----------------------------------------------------------------------------
 * Concurrency primitives
 * -------------------------------------------------------------------------- */

814
stg_forkzh ( gcptr closure )
815
{
tibbe's avatar
tibbe committed
816
    MAYBE_GC_P(stg_forkzh, closure);
817

tibbe's avatar
tibbe committed
818
    gcptr threadid;
819

tibbe's avatar
tibbe committed
820 821 822
    ("ptr" threadid) = ccall createIOThread( MyCapability() "ptr",
                                  RtsFlags_GcFlags_initialStkSize(RtsFlags),
                                  closure "ptr");
823

tibbe's avatar
tibbe committed
824 825 826 827
    /* start blocked if the current thread is blocked */
    StgTSO_flags(threadid) = %lobits16(
        TO_W_(StgTSO_flags(threadid)) |
        TO_W_(StgTSO_flags(CurrentTSO)) & (TSO_BLOCKEX | TSO_INTERRUPTIBLE));
828

tibbe's avatar
tibbe committed
829
    ccall scheduleThread(MyCapability() "ptr", threadid "ptr");
830

tibbe's avatar
tibbe committed
831 832 833
    // context switch soon, but not immediately: we don't want every
    // forkIO to force a context-switch.
    Capability_context_switch(MyCapability()) = 1 :: CInt;
ian@well-typed.com's avatar
ian@well-typed.com committed
834

tibbe's avatar
tibbe committed
835
    return (threadid);
836 837
}

838
stg_forkOnzh ( W_ cpu, gcptr closure )
839
{
840
again: MAYBE_GC(again);
841

tibbe's avatar
tibbe committed
842
    gcptr threadid;
843

tibbe's avatar
tibbe committed
844 845 846 847
    ("ptr" threadid) = ccall createIOThread(
        MyCapability() "ptr",
        RtsFlags_GcFlags_initialStkSize(RtsFlags),
        closure "ptr");
848

tibbe's avatar
tibbe committed
849 850 851 852
    /* start blocked if the current thread is blocked */
    StgTSO_flags(threadid) = %lobits16(
        TO_W_(StgTSO_flags(threadid)) |
        TO_W_(StgTSO_flags(CurrentTSO)) & (TSO_BLOCKEX | TSO_INTERRUPTIBLE));
853

tibbe's avatar
tibbe committed
854
    ccall scheduleThreadOn(MyCapability() "ptr", cpu, threadid "ptr");
855

tibbe's avatar
tibbe committed
856 857 858
    // context switch soon, but not immediately: we don't want every
    // forkIO to force a context-switch.
    Capability_context_switch(MyCapability()) = 1 :: CInt;
ian@well-typed.com's avatar
ian@well-typed.com committed
859

tibbe's avatar
tibbe committed
860
    return (threadid);
861 862
}

863
stg_yieldzh ()
864
{
tibbe's avatar
tibbe committed
865 866 867 868 869 870
    // when we yield to the scheduler, we have to tell it to put the
    // current thread to the back of the queue by setting the
    // context_switch flag.  If we don't do this, it will run the same
    // thread again.
    Capability_context_switch(MyCapability()) = 1 :: CInt;
    jump stg_yield_noregs();
871 872
}

873
stg_myThreadIdzh ()
874
{
tibbe's avatar
tibbe committed
875
    return (CurrentTSO);
876 877
}

878
stg_labelThreadzh ( gcptr threadid, W_ addr )
879
{
880
#if defined(DEBUG) || defined(TRACING) || defined(DTRACE)
tibbe's avatar
tibbe committed
881
    ccall labelThread(MyCapability() "ptr", threadid "ptr", addr "ptr");
882
#endif
tibbe's avatar
tibbe committed
883
    return ();
884 885
}

886
stg_isCurrentThreadBoundzh (/* no args */)
887
{
tibbe's avatar
tibbe committed
888 889 890
    W_ r;
    (r) = ccall isThreadBound(CurrentTSO);
    return (r);
891 892
}

893
stg_threadStatuszh ( gcptr tso )
894 895 896
{
    W_ why_blocked;
    W_ what_next;
897
    W_ ret, cap, locked;
898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914

    what_next   = TO_W_(StgTSO_what_next(tso));
    why_blocked = TO_W_(StgTSO_why_blocked(tso));
    // Note: these two reads are not atomic, so they might end up
    // being inconsistent.  It doesn't matter, since we
    // only return one or the other.  If we wanted to return the
    // contents of block_info too, then we'd have to do some synchronisation.

    if (what_next == ThreadComplete) {
        ret = 16;  // NB. magic, matches up with GHC.Conc.threadStatus
    } else {
        if (what_next == ThreadKilled) {
            ret = 17;
        } else {
            ret = why_blocked;
        }
    }
915 916 917 918 919 920 921 922 923

    cap = TO_W_(Capability_no(StgTSO_cap(tso)));

    if ((TO_W_(StgTSO_flags(tso)) & TSO_LOCKED) != 0) {
        locked = 1;
    } else {
        locked = 0;
    }

924
    return (ret,cap,locked);
925
}
926 927 928 929 930

/* -----------------------------------------------------------------------------
 * TVar primitives
 * -------------------------------------------------------------------------- */

931 932 933
// Catch retry frame -----------------------------------------------------------

#define CATCH_RETRY_FRAME_FIELDS(w_,p_,info_ptr,        \
Simon Marlow's avatar
Simon Marlow committed
934
                                 p1, p2,                \
935 936 937 938
                                 running_alt_code,      \
                                 first_code,            \
                                 alt_code)              \
  w_ info_ptr,                                          \
Simon Marlow's avatar
Simon Marlow committed
939
  PROF_HDR_FIELDS(w_,p1,p2)                             \
940 941 942
  w_ running_alt_code,                                  \
  p_ first_code,                                        \
  p_ alt_code
943 944


945
INFO_TABLE_RET(stg_catch_retry_frame, CATCH_RETRY_FRAME,
946
               CATCH_RETRY_FRAME_FIELDS(W_,P_,
Simon Marlow's avatar
Simon Marlow committed
947
                                        info_ptr, p1, p2,
948 949 950 951
                                        running_alt_code,
                                        first_code,
                                        alt_code))
    return (P_ ret)
952
{
953 954
    W_ r;
    gcptr trec, outer, arg;
955

956 957 958 959 960 961 962 963 964 965 966 967 968 969 970
    trec = StgTSO_trec(CurrentTSO);
    outer  = StgTRecHeader_enclosing_trec(trec);
    (r) = ccall stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr");
    if (r != 0) {
        // Succeeded (either first branch or second branch)
        StgTSO_trec(CurrentTSO) = outer;
        return (ret);
    } else {
        // Did not commit: re-execute
        P_ new_trec;
        ("ptr" new_trec) = ccall stmStartTransaction(MyCapability() "ptr",
                                                           outer "ptr");
        StgTSO_trec(CurrentTSO) = new_trec;
        if (running_alt_code != 0) {
            jump stg_ap_v_fast
Simon Marlow's avatar
Simon Marlow committed
971
                (CATCH_RETRY_FRAME_FIELDS(,,info_ptr, p1, p2,
972 973 974 975 976 977
                                          running_alt_code,
                                          first_code,
                                          alt_code))
                (alt_code);
        } else {
            jump stg_ap_v_fast
Simon Marlow's avatar
Simon Marlow committed
978
                (CATCH_RETRY_FRAME_FIELDS(,,info_ptr, p1, p2,
979 980 981 982 983 984 985
                                          running_alt_code,
                                          first_code,
                                          alt_code))
                (first_code);
        }
    }
}
986

Simon Marlow's avatar
Simon Marlow committed
987
// Atomically frame ------------------------------------------------------------
988

989
// This must match StgAtomicallyFrame in Closures.h
Simon Marlow's avatar
Simon Marlow committed
990
#define ATOMICALLY_FRAME_FIELDS(w_,p_,info_ptr,p1,p2,code,next,result)  \
tibbe's avatar
tibbe committed
991 992 993 994 995
    w_ info_ptr,                                                        \
    PROF_HDR_FIELDS(w_,p1,p2)                                           \
    p_ code,                                                            \
    p_ next,                                                            \
    p_ result
996 997


998
INFO_TABLE_RET(stg_atomically_frame, ATOMICALLY_FRAME,
999 1000
               // layout of the frame, and bind the field names
               ATOMICALLY_FRAME_FIELDS(W_,P_,
Simon Marlow's avatar
Simon Marlow committed
1001
                                       info_ptr, p1, p2,
1002 1003 1004 1005
                                       code,
                                       next_invariant,
                                       frame_result))
    return (P_ result) // value returned to the frame
1006
{
tibbe's avatar
tibbe committed
1007 1008
    W_ valid;
    gcptr trec, outer, next_invariant, q;
tharris@microsoft.com's avatar
tharris@microsoft.com committed
1009

tibbe's avatar
tibbe committed
1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040
    trec   = StgTSO_trec(CurrentTSO);
    outer  = StgTRecHeader_enclosing_trec(trec);

    if (outer == NO_TREC) {
        /* First time back at the atomically frame -- pick up invariants */
        ("ptr" next_invariant) =
            ccall stmGetInvariantsToCheck(MyCapability() "ptr", trec "ptr");
        frame_result = result;

    } else {
        /* Second/subsequent time back at the atomically frame -- abort the
         * tx that's checking the invariant and move on to the next one */
        StgTSO_trec(CurrentTSO) = outer;
        StgInvariantCheckQueue_my_execution(next_invariant) = trec;
        ccall stmAbortTransaction(MyCapability() "ptr", trec "ptr");
        /* Don't free trec -- it's linked from q and will be stashed in the
         * invariant if we eventually commit. */
        next_invariant =
           StgInvariantCheckQueue_next_queue_entry(next_invariant);
        trec = outer;
    }

    if (next_invariant != END_INVARIANT_CHECK_QUEUE) {
        /* We can't commit yet: another invariant to check */
        ("ptr" trec) = ccall stmStartTransaction(MyCapability() "ptr", trec "ptr");
        StgTSO_trec(CurrentTSO) = trec;
        q = StgInvariantCheckQueue_invariant(next_invariant);
        jump stg_ap_v_fast
            (ATOMICALLY_FRAME_FIELDS(,,info_ptr,p1,p2,
                                     code,next_invariant,frame_result))
            (StgAtomicInvariant_code(q));