PrimOps.cmm 61 KB
Newer Older
1 2
/* -----------------------------------------------------------------------------
 *
3
 * (c) The GHC Team, 1998-2012
4 5 6 7 8 9 10 11 12
 *
 * Out-of-line primitive operations
 *
 * This file contains the implementations of all the primitive
 * operations ("primops") which are not expanded inline.  See
 * ghc/compiler/prelude/primops.txt.pp for a list of all the primops;
 * this file contains code for most of those with the attribute
 * out_of_line=True.
 *
13 14 15
 * Entry convention: the entry convention for a primop is the
 * NativeNodeCall convention, and the return convention is
 * NativeReturn.  (see compiler/cmm/CmmCallConv.hs)
16 17 18 19 20 21 22 23 24
 *
 * This file is written in a subset of C--, extended with various
 * features specific to GHC.  It is compiled by GHC directly.  For the
 * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
 *
 * ---------------------------------------------------------------------------*/

#include "Cmm.h"

25
#ifdef __PIC__
26 27
import pthread_mutex_lock;
import pthread_mutex_unlock;
28
#endif
29
import base_ControlziExceptionziBase_nestedAtomically_closure;
30 31
import EnterCriticalSection;
import LeaveCriticalSection;
Ian Lynagh's avatar
Ian Lynagh committed
32
import ghczmprim_GHCziTypes_False_closure;
33
#if defined(USE_MINIINTERPRETER) || !defined(mingw32_HOST_OS)
34
import sm_mutex;
35
#endif
36

37 38 39 40 41 42 43 44 45 46 47 48 49
/*-----------------------------------------------------------------------------
  Array Primitives

  Basically just new*Array - the others are all inline macros.

  The slow entry point is for returning from a heap check, the saved
  size argument must be re-loaded from the stack.
  -------------------------------------------------------------------------- */

/* for objects that are *less* than the size of a word, make sure we
 * round up to the nearest word for the size of the array.
 */

50
stg_newByteArrayzh ( W_ n )
51
{
52 53 54 55 56
    W_ words, payload_words;
    gcptr p;

    MAYBE_GC_N(stg_newByteArrayzh, n);

57 58
    payload_words = ROUNDUP_BYTES_TO_WDS(n);
    words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
59
    ("ptr" p) = ccall allocate(MyCapability() "ptr",words);
60
    TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
61
    SET_HDR(p, stg_ARR_WORDS_info, CCCS);
62
    StgArrWords_bytes(p) = n;
63
    return (p);
64 65
}

Simon Marlow's avatar
Simon Marlow committed
66 67 68
#define BA_ALIGN 16
#define BA_MASK  (BA_ALIGN-1)

69
stg_newPinnedByteArrayzh ( W_ n )
70
{
71 72 73 74
    W_ words, bytes, payload_words;
    gcptr p;

    MAYBE_GC_N(stg_newPinnedByteArrayzh, n);
75

76
    bytes = n;
77 78 79 80 81 82 83 84 85 86
    /* payload_words is what we will tell the profiler we had to allocate */
    payload_words = ROUNDUP_BYTES_TO_WDS(bytes);
    /* When we actually allocate memory, we need to allow space for the
       header: */
    bytes = bytes + SIZEOF_StgArrWords;
    /* And we want to align to BA_ALIGN bytes, so we need to allow space
       to shift up to BA_ALIGN - 1 bytes: */
    bytes = bytes + BA_ALIGN - 1;
    /* Now we convert to a number of words: */
    words = ROUNDUP_BYTES_TO_WDS(bytes);
Simon Marlow's avatar
Simon Marlow committed
87

88
    ("ptr" p) = ccall allocatePinned(MyCapability() "ptr", words);
Simon Marlow's avatar
Simon Marlow committed
89 90
    TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);

91 92
    /* Now we need to move p forward so that the payload is aligned
       to BA_ALIGN bytes: */
Simon Marlow's avatar
Simon Marlow committed
93 94
    p = p + ((-p - SIZEOF_StgArrWords) & BA_MASK);

95
    SET_HDR(p, stg_ARR_WORDS_info, CCCS);
96
    StgArrWords_bytes(p) = n;
97
    return (p);
Simon Marlow's avatar
Simon Marlow committed
98 99
}

100
stg_newAlignedPinnedByteArrayzh ( W_ n, W_ alignment )
Simon Marlow's avatar
Simon Marlow committed
101
{
102 103
    W_ words, bytes, payload_words;
    gcptr p;
Simon Marlow's avatar
Simon Marlow committed
104

105
    again: MAYBE_GC(again);
Simon Marlow's avatar
Simon Marlow committed
106

107 108 109 110 111
    /* we always supply at least word-aligned memory, so there's no
       need to allow extra space for alignment if the requirement is less
       than a word.  This also prevents mischief with alignment == 0. */
    if (alignment <= SIZEOF_W) { alignment = 1; }

112 113
    bytes = n;

114 115
    /* payload_words is what we will tell the profiler we had to allocate */
    payload_words = ROUNDUP_BYTES_TO_WDS(bytes);
Simon Marlow's avatar
Simon Marlow committed
116

117 118 119 120 121 122 123 124
    /* When we actually allocate memory, we need to allow space for the
       header: */
    bytes = bytes + SIZEOF_StgArrWords;
    /* And we want to align to <alignment> bytes, so we need to allow space
       to shift up to <alignment - 1> bytes: */
    bytes = bytes + alignment - 1;
    /* Now we convert to a number of words: */
    words = ROUNDUP_BYTES_TO_WDS(bytes);
125

126
    ("ptr" p) = ccall allocatePinned(MyCapability() "ptr", words);
127 128
    TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);

129 130 131 132
    /* Now we need to move p forward so that the payload is aligned
       to <alignment> bytes. Note that we are assuming that
       <alignment> is a power of 2, which is technically not guaranteed */
    p = p + ((-p - SIZEOF_StgArrWords) & (alignment - 1));
133

134
    SET_HDR(p, stg_ARR_WORDS_info, CCCS);
135
    StgArrWords_bytes(p) = n;
136
    return (p);
137 138
}

139
stg_newArrayzh ( W_ n /* words */, gcptr init )
140
{
141 142
    W_ words, size;
    gcptr p, arr;
143

144
    again: MAYBE_GC(again);
145

146 147 148 149 150
    // the mark area contains one byte for each 2^MUT_ARR_PTRS_CARD_BITS words
    // in the array, making sure we round up, and then rounding up to a whole
    // number of words.
    size = n + mutArrPtrsCardWords(n);
    words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + size;
151
    ("ptr" arr) = ccall allocate(MyCapability() "ptr",words);
152 153
    TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0);

154
    SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, CCCS);
155
    StgMutArrPtrs_ptrs(arr) = n;
156
    StgMutArrPtrs_size(arr) = size;
157 158 159 160 161 162 163 164 165

    // Initialise all elements of the the array with the value in R2
    p = arr + SIZEOF_StgMutArrPtrs;
  for:
    if (p < arr + WDS(words)) {
	W_[p] = init;
	p = p + WDS(1);
	goto for;
    }
166 167 168 169 170 171 172
    // Initialise the mark bits with 0
  for2:
    if (p < arr + WDS(size)) {
	W_[p] = 0;
	p = p + WDS(1);
	goto for2;
    }
173

174
    return (arr);
175 176
}

177
stg_unsafeThawArrayzh ( gcptr arr )
178 179 180 181 182 183
{
  // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST
  //
  // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN 
  // normally doesn't.  However, when we freeze a MUT_ARR_PTRS, we leave
  // it on the mutable list for the GC to remove (removing something from
184
  // the mutable list is not easy).
185
  // 
186
  // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list,
187 188 189
  // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0
  // to indicate that it is still on the mutable list.
  //
190 191
  // So, when we thaw a MUT_ARR_PTRS_FROZEN, we must cope with two cases:
  // either it is on a mut_list, or it isn't.  We adopt the convention that
192
  // the closure type is MUT_ARR_PTRS_FROZEN0 if it is on the mutable list,
193 194 195
  // and MUT_ARR_PTRS_FROZEN otherwise.  In fact it wouldn't matter if
  // we put it on the mutable list more than once, but it would get scavenged
  // multiple times during GC, which would be unnecessarily slow.
196
  //
197 198 199
  if (StgHeader_info(arr) != stg_MUT_ARR_PTRS_FROZEN0_info) {
        SET_INFO(arr,stg_MUT_ARR_PTRS_DIRTY_info);
        recordMutable(arr);
200
	// must be done after SET_INFO, because it ASSERTs closure_MUTABLE()
201
	return (arr);
202
  } else {
203 204
	SET_INFO(arr,stg_MUT_ARR_PTRS_DIRTY_info);
	return (arr);
205 206 207
  }
}

208
stg_newArrayArrayzh ( W_ n /* words */ )
209
{
210 211
    W_ words, size;
    gcptr p, arr;
212

213
    MAYBE_GC_N(stg_newArrayArrayzh, n);
214 215 216 217 218 219

    // the mark area contains one byte for each 2^MUT_ARR_PTRS_CARD_BITS words
    // in the array, making sure we round up, and then rounding up to a whole
    // number of words.
    size = n + mutArrPtrsCardWords(n);
    words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + size;
220
    ("ptr" arr) = ccall allocate(MyCapability() "ptr",words);
221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242
    TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0);

    SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]);
    StgMutArrPtrs_ptrs(arr) = n;
    StgMutArrPtrs_size(arr) = size;

    // Initialise all elements of the array with a pointer to the new array
    p = arr + SIZEOF_StgMutArrPtrs;
  for:
    if (p < arr + WDS(words)) {
	W_[p] = arr;
	p = p + WDS(1);
	goto for;
    }
    // Initialise the mark bits with 0
  for2:
    if (p < arr + WDS(size)) {
	W_[p] = 0;
	p = p + WDS(1);
	goto for2;
    }

243
    return (arr);
244 245
}

pumpkin's avatar
pumpkin committed
246

247 248 249 250
/* -----------------------------------------------------------------------------
   MutVar primitives
   -------------------------------------------------------------------------- */

251
stg_newMutVarzh ( gcptr init )
252 253 254
{
    W_ mv;

255
    ALLOC_PRIM (SIZEOF_StgMutVar);
256 257

    mv = Hp - SIZEOF_StgMutVar + WDS(1);
258
    SET_HDR(mv,stg_MUT_VAR_DIRTY_info,CCCS);
259
    StgMutVar_var(mv) = init;
260
    
261
    return (mv);
262 263
}

264
stg_casMutVarzh ( gcptr mv, gcptr old, gcptr new )
Simon Marlow's avatar
Simon Marlow committed
265 266
 /* MutVar# s a -> a -> a -> State# s -> (# State#, Int#, a #) */
{
267
    gcptr h;
Simon Marlow's avatar
Simon Marlow committed
268

269 270
    (h) = ccall cas(mv + SIZEOF_StgHeader + OFFSET_StgMutVar_var,
                          old, new);
Simon Marlow's avatar
Simon Marlow committed
271
    if (h != old) {
272
        return (1,h);
Simon Marlow's avatar
Simon Marlow committed
273
    } else {
274
        if (GET_INFO(mv) == stg_MUT_VAR_CLEAN_info) {
275
           ccall dirty_MUT_VAR(BaseReg "ptr", mv "ptr");
276
        }
277
        return (0,h);
Simon Marlow's avatar
Simon Marlow committed
278 279 280
    }
}

281
stg_atomicModifyMutVarzh ( gcptr mv, gcptr f )
282
{
283
    W_ z, x, y, r, h;
284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301

    /* If x is the current contents of the MutVar#, then 
       We want to make the new contents point to

         (sel_0 (f x))
 
       and the return value is
	 
	 (sel_1 (f x))

        obviously we can share (f x).

         z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
	 y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
         r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
    */

#if MIN_UPD_SIZE > 1
302
#define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
303 304
#define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),WDS(MIN_UPD_SIZE-1))
#else
305
#define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(1))
306 307 308 309
#define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),0)
#endif

#if MIN_UPD_SIZE > 2
310
#define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
311 312
#define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),WDS(MIN_UPD_SIZE-2))
#else
313
#define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(2))
314 315 316 317 318
#define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0)
#endif

#define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE)

319
   HP_CHK_GEN_TICKY(SIZE);
320 321 322 323

   TICK_ALLOC_THUNK_2();
   CCCS_ALLOC(THUNK_2_SIZE);
   z = Hp - THUNK_2_SIZE + WDS(1);
324
   SET_HDR(z, stg_ap_2_upd_info, CCCS);
325
   LDV_RECORD_CREATE(z);
326
   StgThunk_payload(z,0) = f;
327 328 329 330

   TICK_ALLOC_THUNK_1();
   CCCS_ALLOC(THUNK_1_SIZE);
   y = z - THUNK_1_SIZE;
331
   SET_HDR(y, stg_sel_0_upd_info, CCCS);
332
   LDV_RECORD_CREATE(y);
333
   StgThunk_payload(y,0) = z;
334 335 336 337

   TICK_ALLOC_THUNK_1();
   CCCS_ALLOC(THUNK_1_SIZE);
   r = y - THUNK_1_SIZE;
338
   SET_HDR(r, stg_sel_1_upd_info, CCCS);
339
   LDV_RECORD_CREATE(r);
340 341
   StgThunk_payload(r,0) = z;

342 343 344 345
 retry:
   x = StgMutVar_var(mv);
   StgThunk_payload(z,1) = x;
#ifdef THREADED_RTS
346
   (h) = ccall cas(mv + SIZEOF_StgHeader + OFFSET_StgMutVar_var, x, y);
347 348 349
   if (h != x) { goto retry; }
#else
   StgMutVar_var(mv) = y;
350
#endif
351

352
   if (GET_INFO(mv) == stg_MUT_VAR_CLEAN_info) {
353
     ccall dirty_MUT_VAR(BaseReg "ptr", mv "ptr");
354 355
   }

356
   return (r);
357 358 359 360 361 362 363 364
}

/* -----------------------------------------------------------------------------
   Weak Pointer Primitives
   -------------------------------------------------------------------------- */

STRING(stg_weak_msg,"New weak pointer at %p\n")

365 366 367
stg_mkWeakzh ( gcptr key,
               gcptr value,
               gcptr finalizer /* or stg_NO_FINALIZER_closure */ )
368
{
369
  gcptr w;
370

371
  ALLOC_PRIM (SIZEOF_StgWeak)
372 373

  w = Hp - SIZEOF_StgWeak + WDS(1);
374
  SET_HDR(w, stg_WEAK_info, CCCS);
375

376 377 378 379
  // We don't care about cfinalizer here.
  // Should StgWeak_cfinalizer(w) be stg_NO_FINALIZER_closure or
  // something else?

380 381 382
  StgWeak_key(w)        = key;
  StgWeak_value(w)      = value;
  StgWeak_finalizer(w)  = finalizer;
383
  StgWeak_cfinalizer(w) = stg_NO_FINALIZER_closure;
384

385
  ACQUIRE_LOCK(sm_mutex);
386 387
  StgWeak_link(w)	= W_[weak_ptr_list];
  W_[weak_ptr_list] 	= w;
388
  RELEASE_LOCK(sm_mutex);
389

390
  IF_DEBUG(weak, ccall debugBelch(stg_weak_msg,w));
391

392
  return (w);
393 394
}

395
stg_mkWeakNoFinalizzerzh ( gcptr key, gcptr value )
396
{
397
  jump stg_mkWeakzh (key, value, stg_NO_FINALIZER_closure);
398 399
}

400 401 402 403 404 405
stg_mkWeakForeignEnvzh ( gcptr key,
                         gcptr val,
                         W_ fptr,   // finalizer
                         W_ ptr,
                         W_ flag,   // has environment (0 or 1)
                         W_ eptr )
406
{
407 408 409 410
  W_ payload_words, words;
  gcptr w, p;

  ALLOC_PRIM (SIZEOF_StgWeak);
411 412

  w = Hp - SIZEOF_StgWeak + WDS(1);
413
  SET_HDR(w, stg_WEAK_info, CCCS);
414 415 416

  payload_words = 4;
  words         = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
417
  ("ptr" p)     = ccall allocate(MyCapability() "ptr", words);
418 419

  TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
420
  SET_HDR(p, stg_ARR_WORDS_info, CCCS);
421

422
  StgArrWords_bytes(p)     = WDS(payload_words);
423 424 425 426 427 428 429 430 431 432 433 434 435
  StgArrWords_payload(p,0) = fptr;
  StgArrWords_payload(p,1) = ptr;
  StgArrWords_payload(p,2) = eptr;
  StgArrWords_payload(p,3) = flag;

  // We don't care about the value here.
  // Should StgWeak_value(w) be stg_NO_FINALIZER_closure or something else?

  StgWeak_key(w)        = key;
  StgWeak_value(w)      = val;
  StgWeak_finalizer(w)  = stg_NO_FINALIZER_closure;
  StgWeak_cfinalizer(w) = p;

436
  ACQUIRE_LOCK(sm_mutex);
437 438
  StgWeak_link(w)   = W_[weak_ptr_list];
  W_[weak_ptr_list] = w;
439
  RELEASE_LOCK(sm_mutex);
440

441
  IF_DEBUG(weak, ccall debugBelch(stg_weak_msg,w));
442

443
  return (w);
444
}
445

446
stg_finalizzeWeakzh ( gcptr w )
447
{
448
  gcptr f, arr;
449 450 451

  // already dead?
  if (GET_INFO(w) == stg_DEAD_WEAK_info) {
452
      return (0,stg_NO_FINALIZER_closure);
453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473
  }

  // kill it
#ifdef PROFILING
  // @LDV profiling
  // A weak pointer is inherently used, so we do not need to call
  // LDV_recordDead_FILL_SLOP_DYNAMIC():
  //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
  // or, LDV_recordDead():
  //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
  // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as 
  // large as weak pointers, so there is no need to fill the slop, either.
  // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
#endif

  //
  // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
  //
  SET_INFO(w,stg_DEAD_WEAK_info);
  LDV_RECORD_CREATE(w);

474 475 476
  f   = StgWeak_finalizer(w);
  arr = StgWeak_cfinalizer(w);

477
  StgDeadWeak_link(w) = StgWeak_link(w);
478

479
  if (arr != stg_NO_FINALIZER_closure) {
480
    ccall runCFinalizer(StgArrWords_payload(arr,0),
481 482
                              StgArrWords_payload(arr,1),
                              StgArrWords_payload(arr,2),
483
                              StgArrWords_payload(arr,3));
484 485
  }

486 487
  /* return the finalizer */
  if (f == stg_NO_FINALIZER_closure) {
488
      return (0,stg_NO_FINALIZER_closure);
489
  } else {
490
      return (1,f);
491 492 493
  }
}

494
stg_deRefWeakzh ( gcptr w )
495
{
496 497
  W_ code;
  gcptr val;
498 499 500 501 502 503 504 505

  if (GET_INFO(w) == stg_WEAK_info) {
    code = 1;
    val = StgWeak_value(w);
  } else {
    code = 0;
    val = w;
  }
506
  return (code,val);
507 508 509
}

/* -----------------------------------------------------------------------------
510
   Floating point operations.
511 512
   -------------------------------------------------------------------------- */

513
stg_decodeFloatzuIntzh ( F_ arg )
514 515
{ 
    W_ p;
516 517 518
    W_ mp_tmp1;
    W_ mp_tmp_w;

519
    STK_CHK_GEN_N (WDS(2));
520 521 522

    mp_tmp1  = Sp - WDS(1);
    mp_tmp_w = Sp - WDS(2);
523 524
    
    /* Perform the operation */
525
    ccall __decodeFloat_Int(mp_tmp1 "ptr", mp_tmp_w "ptr", arg);
526 527
    
    /* returns: (Int# (mantissa), Int# (exponent)) */
528
    return (W_[mp_tmp1], W_[mp_tmp_w]);
529 530
}

531
stg_decodeDoublezu2Intzh ( D_ arg )
532 533
{ 
    W_ p;
534 535 536 537 538
    W_ mp_tmp1;
    W_ mp_tmp2;
    W_ mp_result1;
    W_ mp_result2;

539
    STK_CHK_GEN_N (WDS(4));
540 541 542 543 544

    mp_tmp1    = Sp - WDS(1);
    mp_tmp2    = Sp - WDS(2);
    mp_result1 = Sp - WDS(3);
    mp_result2 = Sp - WDS(4);
545 546

    /* Perform the operation */
547
    ccall __decodeDouble_2Int(mp_tmp1 "ptr", mp_tmp2 "ptr",
548
                                    mp_result1 "ptr", mp_result2 "ptr",
549
                                    arg);
550 551 552

    /* returns:
       (Int# (mant sign), Word# (mant high), Word# (mant low), Int# (expn)) */
553
    return (W_[mp_tmp1], W_[mp_tmp2], W_[mp_result1], W_[mp_result2]);
554 555
}

556 557 558 559
/* -----------------------------------------------------------------------------
 * Concurrency primitives
 * -------------------------------------------------------------------------- */

560
stg_forkzh ( gcptr closure )
561
{
562
  MAYBE_GC_P(stg_forkzh, closure);
563

564
  gcptr threadid;
565

566
  ("ptr" threadid) = ccall createIOThread( MyCapability() "ptr", 
567
				RtsFlags_GcFlags_initialStkSize(RtsFlags), 
568
                                closure "ptr");
569 570

  /* start blocked if the current thread is blocked */
571 572 573
  StgTSO_flags(threadid) = %lobits16(
     TO_W_(StgTSO_flags(threadid)) | 
     TO_W_(StgTSO_flags(CurrentTSO)) & (TSO_BLOCKEX | TSO_INTERRUPTIBLE));
574

575
  ccall scheduleThread(MyCapability() "ptr", threadid "ptr");
576

577 578
  // context switch soon, but not immediately: we don't want every
  // forkIO to force a context-switch.
579
  Capability_context_switch(MyCapability()) = 1 :: CInt;
580
  
581
  return (threadid);
582 583
}

584
stg_forkOnzh ( W_ cpu, gcptr closure )
585
{
586
again: MAYBE_GC(again);
587

588
  gcptr threadid;
589

590
  ("ptr" threadid) = ccall createIOThread( MyCapability() "ptr", 
591
				RtsFlags_GcFlags_initialStkSize(RtsFlags), 
592
                                closure "ptr");
593 594

  /* start blocked if the current thread is blocked */
595 596 597
  StgTSO_flags(threadid) = %lobits16(
     TO_W_(StgTSO_flags(threadid)) | 
     TO_W_(StgTSO_flags(CurrentTSO)) & (TSO_BLOCKEX | TSO_INTERRUPTIBLE));
598

599
  ccall scheduleThreadOn(MyCapability() "ptr", cpu, threadid "ptr");
600

601 602
  // context switch soon, but not immediately: we don't want every
  // forkIO to force a context-switch.
603
  Capability_context_switch(MyCapability()) = 1 :: CInt;
604
  
605
  return (threadid);
606 607
}

608
stg_yieldzh ()
609
{
610 611 612 613 614
  // when we yield to the scheduler, we have to tell it to put the
  // current thread to the back of the queue by setting the
  // context_switch flag.  If we don't do this, it will run the same
  // thread again.
  Capability_context_switch(MyCapability()) = 1 :: CInt;
615
  jump stg_yield_noregs();
616 617
}

618
stg_myThreadIdzh ()
619
{
620
  return (CurrentTSO);
621 622
}

623
stg_labelThreadzh ( gcptr threadid, W_ addr )
624
{
625
#if defined(DEBUG) || defined(TRACING) || defined(DTRACE)
626
  ccall labelThread(MyCapability() "ptr", threadid "ptr", addr "ptr");
627
#endif
628
  return ();
629 630
}

631
stg_isCurrentThreadBoundzh (/* no args */)
632 633
{
  W_ r;
634 635
  (r) = ccall isThreadBound(CurrentTSO);
  return (r);
636 637
}

638
stg_threadStatuszh ( gcptr tso )
639 640 641
{
    W_ why_blocked;
    W_ what_next;
642
    W_ ret, cap, locked;
643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659

    what_next   = TO_W_(StgTSO_what_next(tso));
    why_blocked = TO_W_(StgTSO_why_blocked(tso));
    // Note: these two reads are not atomic, so they might end up
    // being inconsistent.  It doesn't matter, since we
    // only return one or the other.  If we wanted to return the
    // contents of block_info too, then we'd have to do some synchronisation.

    if (what_next == ThreadComplete) {
        ret = 16;  // NB. magic, matches up with GHC.Conc.threadStatus
    } else {
        if (what_next == ThreadKilled) {
            ret = 17;
        } else {
            ret = why_blocked;
        }
    }
660 661 662 663 664 665 666 667 668

    cap = TO_W_(Capability_no(StgTSO_cap(tso)));

    if ((TO_W_(StgTSO_flags(tso)) & TSO_LOCKED) != 0) {
        locked = 1;
    } else {
        locked = 0;
    }

669
    return (ret,cap,locked);
670
}
671 672 673 674 675

/* -----------------------------------------------------------------------------
 * TVar primitives
 * -------------------------------------------------------------------------- */

676 677 678 679 680 681 682 683 684 685 686
// Catch retry frame -----------------------------------------------------------

#define CATCH_RETRY_FRAME_FIELDS(w_,p_,info_ptr,        \
                                 running_alt_code,      \
                                 first_code,            \
                                 alt_code)              \
  w_ info_ptr,                                          \
  PROF_HDR_FIELDS(w_)                                   \
  w_ running_alt_code,                                  \
  p_ first_code,                                        \
  p_ alt_code
687 688


689
INFO_TABLE_RET(stg_catch_retry_frame, CATCH_RETRY_FRAME,
690 691 692 693 694 695
               CATCH_RETRY_FRAME_FIELDS(W_,P_,
                                        info_ptr,
                                        running_alt_code,
                                        first_code,
                                        alt_code))
    return (P_ ret)
696
{
697 698
    W_ r;
    gcptr trec, outer, arg;
699

700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729
    trec = StgTSO_trec(CurrentTSO);
    outer  = StgTRecHeader_enclosing_trec(trec);
    (r) = ccall stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr");
    if (r != 0) {
        // Succeeded (either first branch or second branch)
        StgTSO_trec(CurrentTSO) = outer;
        return (ret);
    } else {
        // Did not commit: re-execute
        P_ new_trec;
        ("ptr" new_trec) = ccall stmStartTransaction(MyCapability() "ptr",
                                                           outer "ptr");
        StgTSO_trec(CurrentTSO) = new_trec;
        if (running_alt_code != 0) {
            jump stg_ap_v_fast
                (CATCH_RETRY_FRAME_FIELDS(,,info_ptr,
                                          running_alt_code,
                                          first_code,
                                          alt_code))
                (alt_code);
        } else {
            jump stg_ap_v_fast
                (CATCH_RETRY_FRAME_FIELDS(,,info_ptr,
                                          running_alt_code,
                                          first_code,
                                          alt_code))
                (first_code);
        }
    }
}
730

Simon Marlow's avatar
Simon Marlow committed
731
// Atomically frame ------------------------------------------------------------
732

733 734 735 736 737 738 739 740 741
// This must match StgAtomicallyFrame in Closures.h
#define ATOMICALLY_FRAME_FIELDS(w_,p_,info_ptr,code,next,result)        \
  w_ info_ptr,                                                          \
  PROF_HDR_FIELDS(w_)                                                   \
  p_ code,                                                              \
  p_ next,                                                              \
  p_ result


742
INFO_TABLE_RET(stg_atomically_frame, ATOMICALLY_FRAME,
743 744 745 746 747 748 749
               // layout of the frame, and bind the field names
               ATOMICALLY_FRAME_FIELDS(W_,P_,
                                       info_ptr,
                                       code,
                                       next_invariant,
                                       frame_result))
    return (P_ result) // value returned to the frame
750
{
751 752
  W_ valid;
  gcptr trec, outer, next_invariant, q;
753

754
  trec   = StgTSO_trec(CurrentTSO);
755
  outer  = StgTRecHeader_enclosing_trec(trec);
tharris@microsoft.com's avatar
tharris@microsoft.com committed
756 757 758

  if (outer == NO_TREC) {
    /* First time back at the atomically frame -- pick up invariants */
759 760 761
    ("ptr" next_invariant) =
        ccall stmGetInvariantsToCheck(MyCapability() "ptr", trec "ptr");
    frame_result = result;
762 763

  } else {
tharris@microsoft.com's avatar
tharris@microsoft.com committed
764 765 766
    /* Second/subsequent time back at the atomically frame -- abort the
     * tx that's checking the invariant and move on to the next one */
    StgTSO_trec(CurrentTSO) = outer;
767 768
    StgInvariantCheckQueue_my_execution(next_invariant) = trec;
    ccall stmAbortTransaction(MyCapability() "ptr", trec "ptr");
tharris@microsoft.com's avatar
tharris@microsoft.com committed
769 770
    /* Don't free trec -- it's linked from q and will be stashed in the
     * invariant if we eventually commit. */
771 772
    next_invariant =
       StgInvariantCheckQueue_next_queue_entry(next_invariant);
tharris@microsoft.com's avatar
tharris@microsoft.com committed
773 774 775
    trec = outer;
  }

776
  if (next_invariant != END_INVARIANT_CHECK_QUEUE) {
tharris@microsoft.com's avatar
tharris@microsoft.com committed
777
    /* We can't commit yet: another invariant to check */
778
    ("ptr" trec) = ccall stmStartTransaction(MyCapability() "ptr", trec "ptr");
779
    StgTSO_trec(CurrentTSO) = trec;
780 781 782 783
    q = StgInvariantCheckQueue_invariant(next_invariant);
    jump stg_ap_v_fast
        (ATOMICALLY_FRAME_FIELDS(,,info_ptr,code,next_invariant,frame_result))
        (StgAtomicInvariant_code(q));
tharris@microsoft.com's avatar
tharris@microsoft.com committed
784 785 786 787

  } else {

    /* We've got no more invariants to check, try to commit */
788
    (valid) = ccall stmCommitTransaction(MyCapability() "ptr", trec "ptr");
tharris@microsoft.com's avatar
tharris@microsoft.com committed
789 790 791
    if (valid != 0) {
      /* Transaction was valid: commit succeeded */
      StgTSO_trec(CurrentTSO) = NO_TREC;
792
      return (frame_result);
tharris@microsoft.com's avatar
tharris@microsoft.com committed
793 794
    } else {
      /* Transaction was not valid: try again */
795
      ("ptr" trec) = ccall stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr");
tharris@microsoft.com's avatar
tharris@microsoft.com committed
796
      StgTSO_trec(CurrentTSO) = trec;
797 798 799 800 801 802 803
      next_invariant = END_INVARIANT_CHECK_QUEUE;

      jump stg_ap_v_fast
          // push the StgAtomicallyFrame again: the code generator is
          // clever enough to only assign the fields that have changed.
          (ATOMICALLY_FRAME_FIELDS(,,info_ptr,code,next_invariant,frame_result))
          (code);
tharris@microsoft.com's avatar
tharris@microsoft.com committed
804
    }
805 806 807
  }
}

808

809
INFO_TABLE_RET(stg_atomically_waiting_frame, ATOMICALLY_FRAME,
810 811 812 813 814 815 816
               // layout of the frame, and bind the field names
               ATOMICALLY_FRAME_FIELDS(W_,P_,
                                       info_ptr,
                                       code,
                                       next_invariant,
                                       frame_result))
    return (/* no return values */)
817
{
818
  W_ trec, valid;
819 820

  /* The TSO is currently waiting: should we stop waiting? */
821
  (valid) = ccall stmReWait(MyCapability() "ptr", CurrentTSO "ptr");
Simon Marlow's avatar
Simon Marlow committed
822
  if (valid != 0) {
823 824 825 826 827
      /* Previous attempt is still valid: no point trying again yet */
      jump stg_block_noregs
          (ATOMICALLY_FRAME_FIELDS(,,info_ptr,
                                   code,next_invariant,frame_result))
          ();
828 829
  } else {
    /* Previous attempt is no longer valid: try again */
830
    ("ptr" trec) = ccall stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr");
831
    StgTSO_trec(CurrentTSO) = trec;
832 833 834 835 836 837

    // change the frame header to stg_atomically_frame_info
    jump stg_ap_v_fast
        (ATOMICALLY_FRAME_FIELDS(,,stg_atomically_frame_info,
                                 code,next_invariant,frame_result))
        (code);
838
  }
839
}
840

841
// STM catch frame -------------------------------------------------------------
842 843 844 845 846 847

/* Catch frames are very similar to update frames, but when entering
 * one we just pop the frame off the stack and perform the correct
 * kind of return to the activation record underneath us on the stack.
 */

848 849 850 851 852 853
#define CATCH_STM_FRAME_FIELDS(w_,p_,info_ptr,code,handler)     \
  w_ info_ptr,                                                  \
  PROF_HDR_FIELDS(w_)                                           \
  p_ code,                                                      \
  p_ handler

854
INFO_TABLE_RET(stg_catch_stm_frame, CATCH_STM_FRAME,
855 856 857 858 859 860 861 862 863 864
               // layout of the frame, and bind the field names
               CATCH_STM_FRAME_FIELDS(W_,P_,info_ptr,code,handler))
    return (P_ ret)
{
    W_ r, trec, outer;

    trec = StgTSO_trec(CurrentTSO);
    outer  = StgTRecHeader_enclosing_trec(trec);
    (r) = ccall stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr");
    if (r != 0) {
Simon Marlow's avatar
Simon Marlow committed
865 866
        /* Commit succeeded */
        StgTSO_trec(CurrentTSO) = outer;
867 868
        return (ret);
    } else {
Simon Marlow's avatar
Simon Marlow committed
869 870
        /* Commit failed */
        W_ new_trec;
871
        ("ptr" new_trec) = ccall stmStartTransaction(MyCapability() "ptr", outer "ptr");
Simon Marlow's avatar
Simon Marlow committed
872
        StgTSO_trec(CurrentTSO) = new_trec;
873 874 875 876 877 878

        jump stg_ap_v_fast
            (CATCH_STM_FRAME_FIELDS(,,info_ptr,code,handler))
            (code);
    }
}
879 880


881
// Primop definition -----------------------------------------------------------
882

883
stg_atomicallyzh (P_ stm)
884 885 886
{
  W_ old_trec;
  W_ new_trec;
887 888
  W_ code, next_invariant, frame_result;

889
  // stmStartTransaction may allocate
890
  MAYBE_GC_P(stg_atomicallyzh, stm);
891

892
  STK_CHK_GEN();
893

894 895 896 897
  old_trec = StgTSO_trec(CurrentTSO);

  /* Nested transactions are not allowed; raise an exception */
  if (old_trec != NO_TREC) {
898
     jump stg_raisezh(base_ControlziExceptionziBase_nestedAtomically_closure);
899 900
  }

901 902 903
  code = stm;
  next_invariant = END_INVARIANT_CHECK_QUEUE;
  frame_result = NO_TREC;
904 905

  /* Start the memory transcation */
906
  ("ptr" new_trec) = ccall stmStartTransaction(MyCapability() "ptr", old_trec "ptr");
907 908
  StgTSO_trec(CurrentTSO) = new_trec;

909 910 911 912
  jump stg_ap_v_fast
      (ATOMICALLY_FRAME_FIELDS(,,stg_atomically_frame_info,
                               code,next_invariant,frame_result))
      (stm);
913 914
}

915 916 917 918 919
// A closure representing "atomically x".  This is used when a thread
// inside a transaction receives an asynchronous exception; see #5866.
// It is somewhat similar to the stg_raise closure.
//
INFO_TABLE(stg_atomically,1,0,THUNK_1_0,"atomically","atomically")
920
    (P_ thunk)
921
{
922
    jump stg_atomicallyzh(StgThunk_payload(thunk,0));
923 924
}

925

926 927
stg_catchSTMzh (P_ code    /* :: STM a */,
                P_ handler /* :: Exception -> STM a */)
928
{
929 930 931 932 933 934 935 936 937 938 939 940 941
    STK_CHK_GEN();

    /* Start a nested transaction to run the body of the try block in */
    W_ cur_trec;
    W_ new_trec;
    cur_trec = StgTSO_trec(CurrentTSO);
    ("ptr" new_trec) = ccall stmStartTransaction(MyCapability() "ptr",
                                                 cur_trec "ptr");
    StgTSO_trec(CurrentTSO) = new_trec;

    jump stg_ap_v_fast
        (CATCH_STM_FRAME_FIELDS(,,stg_catch_stm_frame_info, code, handler))
        (code);
942 943 944
}


945 946
stg_catchRetryzh (P_ first_code, /* :: STM a */
                  P_ alt_code    /* :: STM a */)
947 948 949
{
  W_ new_trec;

tharris's avatar