PrimOps.cmm 58.7 KB
Newer Older
1 2
/* -----------------------------------------------------------------------------
 *
3
 * (c) The GHC Team, 1998-2011
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
 *
 * Out-of-line primitive operations
 *
 * This file contains the implementations of all the primitive
 * operations ("primops") which are not expanded inline.  See
 * ghc/compiler/prelude/primops.txt.pp for a list of all the primops;
 * this file contains code for most of those with the attribute
 * out_of_line=True.
 *
 * Entry convention: the entry convention for a primop is that all the
 * args are in Stg registers (R1, R2, etc.).  This is to make writing
 * the primops easier.  (see compiler/codeGen/CgCallConv.hs).
 *
 * Return convention: results from a primop are generally returned
 * using the ordinary unboxed tuple return convention.  The C-- parser
 * implements the RET_xxxx() macros to perform unboxed-tuple returns
 * based on the prevailing return convention.
 *
 * This file is written in a subset of C--, extended with various
 * features specific to GHC.  It is compiled by GHC directly.  For the
 * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
 *
 * ---------------------------------------------------------------------------*/

#include "Cmm.h"

30
#ifdef __PIC__
31 32
import pthread_mutex_lock;
import pthread_mutex_unlock;
33
#endif
34
import base_ControlziExceptionziBase_nestedAtomically_closure;
35 36
import EnterCriticalSection;
import LeaveCriticalSection;
Ian Lynagh's avatar
Ian Lynagh committed
37
import ghczmprim_GHCziTypes_False_closure;
38
#if !defined(mingw32_HOST_OS)
39
import sm_mutex;
40
#endif
41

42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
/*-----------------------------------------------------------------------------
  Array Primitives

  Basically just new*Array - the others are all inline macros.

  The size arg is always passed in R1, and the result returned in R1.

  The slow entry point is for returning from a heap check, the saved
  size argument must be re-loaded from the stack.
  -------------------------------------------------------------------------- */

/* for objects that are *less* than the size of a word, make sure we
 * round up to the nearest word for the size of the array.
 */

57
stg_newByteArrayzh
58 59
{
    W_ words, payload_words, n, p;
60
    MAYBE_GC(NO_PTRS,stg_newByteArrayzh);
61 62 63
    n = R1;
    payload_words = ROUNDUP_BYTES_TO_WDS(n);
    words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
64
    ("ptr" p) = foreign "C" allocate(MyCapability() "ptr",words) [];
65 66
    TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
67
    StgArrWords_bytes(p) = n;
68 69 70
    RET_P(p);
}

Simon Marlow's avatar
Simon Marlow committed
71 72 73
#define BA_ALIGN 16
#define BA_MASK  (BA_ALIGN-1)

74
stg_newPinnedByteArrayzh
75
{
76
    W_ words, n, bytes, payload_words, p;
77

78
    MAYBE_GC(NO_PTRS,stg_newPinnedByteArrayzh);
79 80
    n = R1;
    bytes = n;
81 82 83 84 85 86 87 88 89 90
    /* payload_words is what we will tell the profiler we had to allocate */
    payload_words = ROUNDUP_BYTES_TO_WDS(bytes);
    /* When we actually allocate memory, we need to allow space for the
       header: */
    bytes = bytes + SIZEOF_StgArrWords;
    /* And we want to align to BA_ALIGN bytes, so we need to allow space
       to shift up to BA_ALIGN - 1 bytes: */
    bytes = bytes + BA_ALIGN - 1;
    /* Now we convert to a number of words: */
    words = ROUNDUP_BYTES_TO_WDS(bytes);
Simon Marlow's avatar
Simon Marlow committed
91

92
    ("ptr" p) = foreign "C" allocatePinned(MyCapability() "ptr", words) [];
Simon Marlow's avatar
Simon Marlow committed
93 94
    TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);

95 96
    /* Now we need to move p forward so that the payload is aligned
       to BA_ALIGN bytes: */
Simon Marlow's avatar
Simon Marlow committed
97 98 99
    p = p + ((-p - SIZEOF_StgArrWords) & BA_MASK);

    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
100
    StgArrWords_bytes(p) = n;
Simon Marlow's avatar
Simon Marlow committed
101 102 103
    RET_P(p);
}

104
stg_newAlignedPinnedByteArrayzh
Simon Marlow's avatar
Simon Marlow committed
105
{
106
    W_ words, n, bytes, payload_words, p, alignment;
Simon Marlow's avatar
Simon Marlow committed
107

108
    MAYBE_GC(NO_PTRS,stg_newAlignedPinnedByteArrayzh);
109
    n = R1;
110
    alignment = R2;
Simon Marlow's avatar
Simon Marlow committed
111

112 113 114 115 116
    /* we always supply at least word-aligned memory, so there's no
       need to allow extra space for alignment if the requirement is less
       than a word.  This also prevents mischief with alignment == 0. */
    if (alignment <= SIZEOF_W) { alignment = 1; }

117 118
    bytes = n;

119 120
    /* payload_words is what we will tell the profiler we had to allocate */
    payload_words = ROUNDUP_BYTES_TO_WDS(bytes);
Simon Marlow's avatar
Simon Marlow committed
121

122 123 124 125 126 127 128 129
    /* When we actually allocate memory, we need to allow space for the
       header: */
    bytes = bytes + SIZEOF_StgArrWords;
    /* And we want to align to <alignment> bytes, so we need to allow space
       to shift up to <alignment - 1> bytes: */
    bytes = bytes + alignment - 1;
    /* Now we convert to a number of words: */
    words = ROUNDUP_BYTES_TO_WDS(bytes);
130

131
    ("ptr" p) = foreign "C" allocatePinned(MyCapability() "ptr", words) [];
132 133
    TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);

134 135 136 137
    /* Now we need to move p forward so that the payload is aligned
       to <alignment> bytes. Note that we are assuming that
       <alignment> is a power of 2, which is technically not guaranteed */
    p = p + ((-p - SIZEOF_StgArrWords) & (alignment - 1));
138 139

    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
140
    StgArrWords_bytes(p) = n;
141 142 143
    RET_P(p);
}

144
stg_newArrayzh
145
{
146
    W_ words, n, init, arr, p, size;
147 148 149
    /* Args: R1 = words, R2 = initialisation value */

    n = R1;
150
    MAYBE_GC(R2_PTR,stg_newArrayzh);
151

152 153 154 155 156
    // the mark area contains one byte for each 2^MUT_ARR_PTRS_CARD_BITS words
    // in the array, making sure we round up, and then rounding up to a whole
    // number of words.
    size = n + mutArrPtrsCardWords(n);
    words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + size;
157
    ("ptr" arr) = foreign "C" allocate(MyCapability() "ptr",words) [R2];
158 159
    TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0);

160
    SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]);
161
    StgMutArrPtrs_ptrs(arr) = n;
162
    StgMutArrPtrs_size(arr) = size;
163 164 165 166 167 168 169 170 171 172

    // Initialise all elements of the the array with the value in R2
    init = R2;
    p = arr + SIZEOF_StgMutArrPtrs;
  for:
    if (p < arr + WDS(words)) {
	W_[p] = init;
	p = p + WDS(1);
	goto for;
    }
173 174 175 176 177 178 179
    // Initialise the mark bits with 0
  for2:
    if (p < arr + WDS(size)) {
	W_[p] = 0;
	p = p + WDS(1);
	goto for2;
    }
180 181 182 183

    RET_P(arr);
}

184
stg_unsafeThawArrayzh
185 186 187 188 189 190
{
  // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST
  //
  // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN 
  // normally doesn't.  However, when we freeze a MUT_ARR_PTRS, we leave
  // it on the mutable list for the GC to remove (removing something from
191
  // the mutable list is not easy).
192
  // 
193
  // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list,
194 195 196
  // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0
  // to indicate that it is still on the mutable list.
  //
197 198
  // So, when we thaw a MUT_ARR_PTRS_FROZEN, we must cope with two cases:
  // either it is on a mut_list, or it isn't.  We adopt the convention that
199
  // the closure type is MUT_ARR_PTRS_FROZEN0 if it is on the mutable list,
200 201 202
  // and MUT_ARR_PTRS_FROZEN otherwise.  In fact it wouldn't matter if
  // we put it on the mutable list more than once, but it would get scavenged
  // multiple times during GC, which would be unnecessarily slow.
203
  //
204
  if (StgHeader_info(R1) != stg_MUT_ARR_PTRS_FROZEN0_info) {
205
	SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
206
	recordMutable(R1, R1);
207 208 209
	// must be done after SET_INFO, because it ASSERTs closure_MUTABLE()
	RET_P(R1);
  } else {
210
	SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
211
	RET_P(R1);
212 213 214 215 216 217 218
  }
}

/* -----------------------------------------------------------------------------
   MutVar primitives
   -------------------------------------------------------------------------- */

219
stg_newMutVarzh
220 221 222 223
{
    W_ mv;
    /* Args: R1 = initialisation value */

224
    ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, stg_newMutVarzh);
225 226

    mv = Hp - SIZEOF_StgMutVar + WDS(1);
227
    SET_HDR(mv,stg_MUT_VAR_DIRTY_info,W_[CCCS]);
228 229 230 231 232
    StgMutVar_var(mv) = R1;
    
    RET_P(mv);
}

233
stg_atomicModifyMutVarzh
234
{
235
    W_ mv, f, z, x, y, r, h;
236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254
    /* Args: R1 :: MutVar#,  R2 :: a -> (a,b) */

    /* If x is the current contents of the MutVar#, then 
       We want to make the new contents point to

         (sel_0 (f x))
 
       and the return value is
	 
	 (sel_1 (f x))

        obviously we can share (f x).

         z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
	 y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
         r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
    */

#if MIN_UPD_SIZE > 1
255
#define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
256 257
#define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),WDS(MIN_UPD_SIZE-1))
#else
258
#define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(1))
259 260 261 262
#define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),0)
#endif

#if MIN_UPD_SIZE > 2
263
#define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
264 265
#define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),WDS(MIN_UPD_SIZE-2))
#else
266
#define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(2))
267 268 269 270 271
#define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0)
#endif

#define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE)

272
   HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, stg_atomicModifyMutVarzh);
273

274 275
   mv = R1;
   f = R2;
276 277 278 279 280 281

   TICK_ALLOC_THUNK_2();
   CCCS_ALLOC(THUNK_2_SIZE);
   z = Hp - THUNK_2_SIZE + WDS(1);
   SET_HDR(z, stg_ap_2_upd_info, W_[CCCS]);
   LDV_RECORD_CREATE(z);
282
   StgThunk_payload(z,0) = f;
283 284 285 286 287 288

   TICK_ALLOC_THUNK_1();
   CCCS_ALLOC(THUNK_1_SIZE);
   y = z - THUNK_1_SIZE;
   SET_HDR(y, stg_sel_0_upd_info, W_[CCCS]);
   LDV_RECORD_CREATE(y);
289
   StgThunk_payload(y,0) = z;
290 291 292 293 294 295

   TICK_ALLOC_THUNK_1();
   CCCS_ALLOC(THUNK_1_SIZE);
   r = y - THUNK_1_SIZE;
   SET_HDR(r, stg_sel_1_upd_info, W_[CCCS]);
   LDV_RECORD_CREATE(r);
296 297
   StgThunk_payload(r,0) = z;

298 299 300 301 302 303 304 305
 retry:
   x = StgMutVar_var(mv);
   StgThunk_payload(z,1) = x;
#ifdef THREADED_RTS
   (h) = foreign "C" cas(mv + SIZEOF_StgHeader + OFFSET_StgMutVar_var, x, y) [];
   if (h != x) { goto retry; }
#else
   StgMutVar_var(mv) = y;
306
#endif
307

308 309 310 311
   if (GET_INFO(mv) == stg_MUT_VAR_CLEAN_info) {
     foreign "C" dirty_MUT_VAR(BaseReg "ptr", mv "ptr") [];
   }

312 313 314 315 316 317 318 319 320
   RET_P(r);
}

/* -----------------------------------------------------------------------------
   Weak Pointer Primitives
   -------------------------------------------------------------------------- */

STRING(stg_weak_msg,"New weak pointer at %p\n")

321
stg_mkWeakzh
322 323 324 325 326 327 328 329 330 331 332
{
  /* R1 = key
     R2 = value
     R3 = finalizer (or NULL)
  */
  W_ w;

  if (R3 == NULL) {
    R3 = stg_NO_FINALIZER_closure;
  }

333
  ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, stg_mkWeakzh );
334 335 336 337

  w = Hp - SIZEOF_StgWeak + WDS(1);
  SET_HDR(w, stg_WEAK_info, W_[CCCS]);

338 339 340 341 342 343 344 345
  // We don't care about cfinalizer here.
  // Should StgWeak_cfinalizer(w) be stg_NO_FINALIZER_closure or
  // something else?

  StgWeak_key(w)        = R1;
  StgWeak_value(w)      = R2;
  StgWeak_finalizer(w)  = R3;
  StgWeak_cfinalizer(w) = stg_NO_FINALIZER_closure;
346

347
  ACQUIRE_LOCK(sm_mutex);
348 349
  StgWeak_link(w)	= W_[weak_ptr_list];
  W_[weak_ptr_list] 	= w;
350
  RELEASE_LOCK(sm_mutex);
351

352
  IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []);
353 354 355 356

  RET_P(w);
}

357
stg_mkWeakForeignEnvzh
358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376
{
  /* R1 = key
     R2 = value
     R3 = finalizer
     R4 = pointer
     R5 = has environment (0 or 1)
     R6 = environment
  */
  W_ w, payload_words, words, p;

  W_ key, val, fptr, ptr, flag, eptr;

  key  = R1;
  val  = R2;
  fptr = R3;
  ptr  = R4;
  flag = R5;
  eptr = R6;

377
  ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR, stg_mkWeakForeignEnvzh );
378 379 380 381 382 383

  w = Hp - SIZEOF_StgWeak + WDS(1);
  SET_HDR(w, stg_WEAK_info, W_[CCCS]);

  payload_words = 4;
  words         = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
384
  ("ptr" p)     = foreign "C" allocate(MyCapability() "ptr", words) [];
385 386 387 388

  TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
  SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);

389
  StgArrWords_bytes(p)     = WDS(payload_words);
390 391 392 393 394 395 396 397 398 399 400 401 402
  StgArrWords_payload(p,0) = fptr;
  StgArrWords_payload(p,1) = ptr;
  StgArrWords_payload(p,2) = eptr;
  StgArrWords_payload(p,3) = flag;

  // We don't care about the value here.
  // Should StgWeak_value(w) be stg_NO_FINALIZER_closure or something else?

  StgWeak_key(w)        = key;
  StgWeak_value(w)      = val;
  StgWeak_finalizer(w)  = stg_NO_FINALIZER_closure;
  StgWeak_cfinalizer(w) = p;

403
  ACQUIRE_LOCK(sm_mutex);
404 405
  StgWeak_link(w)   = W_[weak_ptr_list];
  W_[weak_ptr_list] = w;
406
  RELEASE_LOCK(sm_mutex);
407 408 409 410 411

  IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []);

  RET_P(w);
}
412

413
stg_finalizzeWeakzh
414 415 416
{
  /* R1 = weak ptr
   */
417
  W_ w, f, arr;
418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444

  w = R1;

  // already dead?
  if (GET_INFO(w) == stg_DEAD_WEAK_info) {
      RET_NP(0,stg_NO_FINALIZER_closure);
  }

  // kill it
#ifdef PROFILING
  // @LDV profiling
  // A weak pointer is inherently used, so we do not need to call
  // LDV_recordDead_FILL_SLOP_DYNAMIC():
  //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
  // or, LDV_recordDead():
  //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
  // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as 
  // large as weak pointers, so there is no need to fill the slop, either.
  // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
#endif

  //
  // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
  //
  SET_INFO(w,stg_DEAD_WEAK_info);
  LDV_RECORD_CREATE(w);

445 446 447
  f   = StgWeak_finalizer(w);
  arr = StgWeak_cfinalizer(w);

448
  StgDeadWeak_link(w) = StgWeak_link(w);
449

450 451 452 453 454 455 456
  if (arr != stg_NO_FINALIZER_closure) {
    foreign "C" runCFinalizer(StgArrWords_payload(arr,0),
                              StgArrWords_payload(arr,1),
                              StgArrWords_payload(arr,2),
                              StgArrWords_payload(arr,3)) [];
  }

457 458 459 460 461 462 463 464
  /* return the finalizer */
  if (f == stg_NO_FINALIZER_closure) {
      RET_NP(0,stg_NO_FINALIZER_closure);
  } else {
      RET_NP(1,f);
  }
}

465
stg_deRefWeakzh
466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481
{
  /* R1 = weak ptr */
  W_ w, code, val;

  w = R1;
  if (GET_INFO(w) == stg_WEAK_info) {
    code = 1;
    val = StgWeak_value(w);
  } else {
    code = 0;
    val = w;
  }
  RET_NP(code,val);
}

/* -----------------------------------------------------------------------------
482
   Floating point operations.
483 484
   -------------------------------------------------------------------------- */

485
stg_decodeFloatzuIntzh
486 487 488
{ 
    W_ p;
    F_ arg;
489 490 491
    W_ mp_tmp1;
    W_ mp_tmp_w;

492
    STK_CHK_GEN( WDS(2), NO_PTRS, stg_decodeFloatzuIntzh );
493 494 495

    mp_tmp1  = Sp - WDS(1);
    mp_tmp_w = Sp - WDS(2);
496 497 498 499 500 501 502 503 504 505 506
    
    /* arguments: F1 = Float# */
    arg = F1;
    
    /* Perform the operation */
    foreign "C" __decodeFloat_Int(mp_tmp1 "ptr", mp_tmp_w "ptr", arg) [];
    
    /* returns: (Int# (mantissa), Int# (exponent)) */
    RET_NN(W_[mp_tmp1], W_[mp_tmp_w]);
}

507
stg_decodeDoublezu2Intzh
508 509 510
{ 
    D_ arg;
    W_ p;
511 512 513 514 515
    W_ mp_tmp1;
    W_ mp_tmp2;
    W_ mp_result1;
    W_ mp_result2;

516
    STK_CHK_GEN( WDS(4), NO_PTRS, stg_decodeDoublezu2Intzh );
517 518 519 520 521

    mp_tmp1    = Sp - WDS(1);
    mp_tmp2    = Sp - WDS(2);
    mp_result1 = Sp - WDS(3);
    mp_result2 = Sp - WDS(4);
522 523 524 525 526

    /* arguments: D1 = Double# */
    arg = D1;

    /* Perform the operation */
527 528 529 530 531 532 533
    foreign "C" __decodeDouble_2Int(mp_tmp1 "ptr", mp_tmp2 "ptr",
                                    mp_result1 "ptr", mp_result2 "ptr",
                                    arg) [];

    /* returns:
       (Int# (mant sign), Word# (mant high), Word# (mant low), Int# (expn)) */
    RET_NNNN(W_[mp_tmp1], W_[mp_tmp2], W_[mp_result1], W_[mp_result2]);
534 535
}

536 537 538 539
/* -----------------------------------------------------------------------------
 * Concurrency primitives
 * -------------------------------------------------------------------------- */

540
stg_forkzh
541 542
{
  /* args: R1 = closure to spark */
543

544
  MAYBE_GC(R1_PTR, stg_forkzh);
545

546 547 548 549
  W_ closure;
  W_ threadid;
  closure = R1;

550
  ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr", 
551 552
				RtsFlags_GcFlags_initialStkSize(RtsFlags), 
				closure "ptr") [];
553 554

  /* start blocked if the current thread is blocked */
555 556 557
  StgTSO_flags(threadid) = %lobits16(
     TO_W_(StgTSO_flags(threadid)) | 
     TO_W_(StgTSO_flags(CurrentTSO)) & (TSO_BLOCKEX | TSO_INTERRUPTIBLE));
558

559 560
  foreign "C" scheduleThread(MyCapability() "ptr", threadid "ptr") [];

561 562
  // context switch soon, but not immediately: we don't want every
  // forkIO to force a context-switch.
563
  Capability_context_switch(MyCapability()) = 1 :: CInt;
564 565 566 567
  
  RET_P(threadid);
}

568
stg_forkOnzh
569 570 571
{
  /* args: R1 = cpu, R2 = closure to spark */

572
  MAYBE_GC(R2_PTR, stg_forkOnzh);
573 574 575 576 577 578 579

  W_ cpu;
  W_ closure;
  W_ threadid;
  cpu = R1;
  closure = R2;

580
  ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr", 
581
				RtsFlags_GcFlags_initialStkSize(RtsFlags), 
582
				closure "ptr") [];
583 584

  /* start blocked if the current thread is blocked */
585 586 587
  StgTSO_flags(threadid) = %lobits16(
     TO_W_(StgTSO_flags(threadid)) | 
     TO_W_(StgTSO_flags(CurrentTSO)) & (TSO_BLOCKEX | TSO_INTERRUPTIBLE));
588

589
  foreign "C" scheduleThreadOn(MyCapability() "ptr", cpu, threadid "ptr") [];
590

591 592
  // context switch soon, but not immediately: we don't want every
  // forkIO to force a context-switch.
593
  Capability_context_switch(MyCapability()) = 1 :: CInt;
594
  
595
  RET_P(threadid);
596 597
}

598
stg_yieldzh
599 600 601 602
{
  jump stg_yield_noregs;
}

603
stg_myThreadIdzh
604 605 606 607 608
{
  /* no args. */
  RET_P(CurrentTSO);
}

609
stg_labelThreadzh
610 611 612 613 614
{
  /* args: 
	R1 = ThreadId#
	R2 = Addr# */
#ifdef DEBUG
615
  foreign "C" labelThread(R1 "ptr", R2 "ptr") [];
616 617 618 619
#endif
  jump %ENTRY_CODE(Sp(0));
}

620
stg_isCurrentThreadBoundzh
621 622 623
{
  /* no args */
  W_ r;
624
  (r) = foreign "C" isThreadBound(CurrentTSO) [];
625 626 627
  RET_N(r);
}

628
stg_threadStatuszh
629 630 631 632 633
{
    /* args: R1 :: ThreadId# */
    W_ tso;
    W_ why_blocked;
    W_ what_next;
634
    W_ ret, cap, locked;
635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653

    tso = R1;

    what_next   = TO_W_(StgTSO_what_next(tso));
    why_blocked = TO_W_(StgTSO_why_blocked(tso));
    // Note: these two reads are not atomic, so they might end up
    // being inconsistent.  It doesn't matter, since we
    // only return one or the other.  If we wanted to return the
    // contents of block_info too, then we'd have to do some synchronisation.

    if (what_next == ThreadComplete) {
        ret = 16;  // NB. magic, matches up with GHC.Conc.threadStatus
    } else {
        if (what_next == ThreadKilled) {
            ret = 17;
        } else {
            ret = why_blocked;
        }
    }
654 655 656 657 658 659 660 661 662 663

    cap = TO_W_(Capability_no(StgTSO_cap(tso)));

    if ((TO_W_(StgTSO_flags(tso)) & TSO_LOCKED) != 0) {
        locked = 1;
    } else {
        locked = 0;
    }

    RET_NNN(ret,cap,locked);
664
}
665 666 667 668 669 670 671 672 673

/* -----------------------------------------------------------------------------
 * TVar primitives
 * -------------------------------------------------------------------------- */

#define SP_OFF 0

// Catch retry frame ------------------------------------------------------------

674
INFO_TABLE_RET(stg_catch_retry_frame, CATCH_RETRY_FRAME,
675
#if defined(PROFILING)
676
  W_ unused1, W_ unused2,
677
#endif
678
  W_ unused3, P_ unused4, P_ unused5)
679 680 681 682 683
{
   W_ r, frame, trec, outer;

   frame = Sp;
   trec = StgTSO_trec(CurrentTSO);
684
   outer  = StgTRecHeader_enclosing_trec(trec);
685
   (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
Simon Marlow's avatar
Simon Marlow committed
686
   if (r != 0) {
687 688 689 690 691
     /* Succeeded (either first branch or second branch) */
     StgTSO_trec(CurrentTSO) = outer;
     Sp = Sp + SIZEOF_StgCatchRetryFrame;
     jump %ENTRY_CODE(Sp(SP_OFF));
   } else {
tharris@microsoft.com's avatar
tharris@microsoft.com committed
692
     /* Did not commit: re-execute */
693
     W_ new_trec;
694
     ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
695
     StgTSO_trec(CurrentTSO) = new_trec;
Simon Marlow's avatar
Simon Marlow committed
696
     if (StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
697 698 699 700
       R1 = StgCatchRetryFrame_alt_code(frame);
     } else {
       R1 = StgCatchRetryFrame_first_code(frame);
     }
701
     jump stg_ap_v_fast;
702 703
   }
}
704 705


Simon Marlow's avatar
Simon Marlow committed
706
// Atomically frame ------------------------------------------------------------
707

708
INFO_TABLE_RET(stg_atomically_frame, ATOMICALLY_FRAME,
709
#if defined(PROFILING)
710
  W_ unused1, W_ unused2,
711
#endif
712
  P_ code, P_ next_invariant_to_check, P_ result)
713
{
tharris@microsoft.com's avatar
tharris@microsoft.com committed
714
  W_ frame, trec, valid, next_invariant, q, outer;
715

716 717 718
  frame  = Sp;
  trec   = StgTSO_trec(CurrentTSO);
  result = R1;
719
  outer  = StgTRecHeader_enclosing_trec(trec);
tharris@microsoft.com's avatar
tharris@microsoft.com committed
720 721 722

  if (outer == NO_TREC) {
    /* First time back at the atomically frame -- pick up invariants */
723
    ("ptr" q) = foreign "C" stmGetInvariantsToCheck(MyCapability() "ptr", trec "ptr") [];
tharris@microsoft.com's avatar
tharris@microsoft.com committed
724
    StgAtomicallyFrame_next_invariant_to_check(frame) = q;
725
    StgAtomicallyFrame_result(frame) = result;
726 727

  } else {
tharris@microsoft.com's avatar
tharris@microsoft.com committed
728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744
    /* Second/subsequent time back at the atomically frame -- abort the
     * tx that's checking the invariant and move on to the next one */
    StgTSO_trec(CurrentTSO) = outer;
    q = StgAtomicallyFrame_next_invariant_to_check(frame);
    StgInvariantCheckQueue_my_execution(q) = trec;
    foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
    /* Don't free trec -- it's linked from q and will be stashed in the
     * invariant if we eventually commit. */
    q = StgInvariantCheckQueue_next_queue_entry(q);
    StgAtomicallyFrame_next_invariant_to_check(frame) = q;
    trec = outer;
  }

  q = StgAtomicallyFrame_next_invariant_to_check(frame);

  if (q != END_INVARIANT_CHECK_QUEUE) {
    /* We can't commit yet: another invariant to check */
745
    ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [];
746
    StgTSO_trec(CurrentTSO) = trec;
tharris@microsoft.com's avatar
tharris@microsoft.com committed
747 748 749

    next_invariant = StgInvariantCheckQueue_invariant(q);
    R1 = StgAtomicInvariant_code(next_invariant);
750
    jump stg_ap_v_fast;
tharris@microsoft.com's avatar
tharris@microsoft.com committed
751 752 753 754

  } else {

    /* We've got no more invariants to check, try to commit */
755
    (valid) = foreign "C" stmCommitTransaction(MyCapability() "ptr", trec "ptr") [];
tharris@microsoft.com's avatar
tharris@microsoft.com committed
756 757 758
    if (valid != 0) {
      /* Transaction was valid: commit succeeded */
      StgTSO_trec(CurrentTSO) = NO_TREC;
759
      R1 = StgAtomicallyFrame_result(frame);
tharris@microsoft.com's avatar
tharris@microsoft.com committed
760 761 762 763
      Sp = Sp + SIZEOF_StgAtomicallyFrame;
      jump %ENTRY_CODE(Sp(SP_OFF));
    } else {
      /* Transaction was not valid: try again */
764
      ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
tharris@microsoft.com's avatar
tharris@microsoft.com committed
765 766 767 768 769
      StgTSO_trec(CurrentTSO) = trec;
      StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
      R1 = StgAtomicallyFrame_code(frame);
      jump stg_ap_v_fast;
    }
770 771 772
  }
}

773 774 775 776
INFO_TABLE_RET(stg_atomically_waiting_frame, ATOMICALLY_FRAME,
#if defined(PROFILING)
  W_ unused1, W_ unused2,
#endif
777
  P_ code, P_ next_invariant_to_check, P_ result)
778 779 780 781 782 783
{
  W_ frame, trec, valid;

  frame = Sp;

  /* The TSO is currently waiting: should we stop waiting? */
784
  (valid) = foreign "C" stmReWait(MyCapability() "ptr", CurrentTSO "ptr") [];
Simon Marlow's avatar
Simon Marlow committed
785
  if (valid != 0) {
786 787 788 789
    /* Previous attempt is still valid: no point trying again yet */
    jump stg_block_noregs;
  } else {
    /* Previous attempt is no longer valid: try again */
790
    ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
791 792 793
    StgTSO_trec(CurrentTSO) = trec;
    StgHeader_info(frame) = stg_atomically_frame_info;
    R1 = StgAtomicallyFrame_code(frame);
794
    jump stg_ap_v_fast;
795
  }
796
}
797 798 799 800 801 802 803 804 805 806

// STM catch frame --------------------------------------------------------------

#define SP_OFF 0

/* Catch frames are very similar to update frames, but when entering
 * one we just pop the frame off the stack and perform the correct
 * kind of return to the activation record underneath us on the stack.
 */

807 808 809 810
INFO_TABLE_RET(stg_catch_stm_frame, CATCH_STM_FRAME,
#if defined(PROFILING)
  W_ unused1, W_ unused2,
#endif
811
  P_ unused3, P_ unused4)
Simon Marlow's avatar
Simon Marlow committed
812 813 814 815
   {
      W_ r, frame, trec, outer;
      frame = Sp;
      trec = StgTSO_trec(CurrentTSO);
816
      outer  = StgTRecHeader_enclosing_trec(trec);
817
      (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
Simon Marlow's avatar
Simon Marlow committed
818 819 820 821 822 823 824 825
      if (r != 0) {
        /* Commit succeeded */
        StgTSO_trec(CurrentTSO) = outer;
        Sp = Sp + SIZEOF_StgCatchSTMFrame;
        jump Sp(SP_OFF);
      } else {
        /* Commit failed */
        W_ new_trec;
826
        ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
Simon Marlow's avatar
Simon Marlow committed
827 828 829 830 831
        StgTSO_trec(CurrentTSO) = new_trec;
        R1 = StgCatchSTMFrame_code(frame);
        jump stg_ap_v_fast;
      }
   }
832 833 834 835


// Primop definition ------------------------------------------------------------

836
stg_atomicallyzh
837 838 839 840 841
{
  W_ frame;
  W_ old_trec;
  W_ new_trec;
  
842
  // stmStartTransaction may allocate
843
  MAYBE_GC (R1_PTR, stg_atomicallyzh); 
844

845
  /* Args: R1 = m :: STM a */
846
  STK_CHK_GEN(SIZEOF_StgAtomicallyFrame + WDS(1), R1_PTR, stg_atomicallyzh);
847

848 849 850 851
  old_trec = StgTSO_trec(CurrentTSO);

  /* Nested transactions are not allowed; raise an exception */
  if (old_trec != NO_TREC) {
852
     R1 = base_ControlziExceptionziBase_nestedAtomically_closure;
853
     jump stg_raisezh;
854 855
  }

856 857 858 859
  /* Set up the atomically frame */
  Sp = Sp - SIZEOF_StgAtomicallyFrame;
  frame = Sp;

860
  SET_HDR(frame,stg_atomically_frame_info, W_[CCCS]);
861
  StgAtomicallyFrame_code(frame) = R1;
862
  StgAtomicallyFrame_result(frame) = NO_TREC;
tharris@microsoft.com's avatar
tharris@microsoft.com committed
863
  StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
864 865

  /* Start the memory transcation */
866
  ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", old_trec "ptr") [R1];
867 868 869
  StgTSO_trec(CurrentTSO) = new_trec;

  /* Apply R1 to the realworld token */
870
  jump stg_ap_v_fast;
871 872 873
}


874
stg_catchSTMzh
875 876 877 878 879
{
  W_ frame;
  
  /* Args: R1 :: STM a */
  /* Args: R2 :: Exception -> STM a */
880
  STK_CHK_GEN(SIZEOF_StgCatchSTMFrame + WDS(1), R1_PTR & R2_PTR, stg_catchSTMzh);
881 882 883 884 885

  /* Set up the catch frame */
  Sp = Sp - SIZEOF_StgCatchSTMFrame;
  frame = Sp;

886
  SET_HDR(frame, stg_catch_stm_frame_info, W_[CCCS]);
887
  StgCatchSTMFrame_handler(frame) = R2;
tharris@microsoft.com's avatar
tharris@microsoft.com committed
888 889 890 891 892 893
  StgCatchSTMFrame_code(frame) = R1;

  /* Start a nested transaction to run the body of the try block in */
  W_ cur_trec;  
  W_ new_trec;
  cur_trec = StgTSO_trec(CurrentTSO);
894
  ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", cur_trec "ptr");
tharris@microsoft.com's avatar
tharris@microsoft.com committed
895
  StgTSO_trec(CurrentTSO) = new_trec;
896 897

  /* Apply R1 to the realworld token */
898
  jump stg_ap_v_fast;
899 900 901
}


902
stg_catchRetryzh
903 904 905 906 907
{
  W_ frame;
  W_ new_trec;
  W_ trec;

908
  // stmStartTransaction may allocate
909
  MAYBE_GC (R1_PTR & R2_PTR, stg_catchRetryzh); 
910

911 912
  /* Args: R1 :: STM a */
  /* Args: R2 :: STM a */
913
  STK_CHK_GEN(SIZEOF_StgCatchRetryFrame + WDS(1), R1_PTR & R2_PTR, stg_catchRetryzh);
914 915 916

  /* Start a nested transaction within which to run the first code */
  trec = StgTSO_trec(CurrentTSO);
917
  ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [R1,R2];
918 919 920 921 922 923
  StgTSO_trec(CurrentTSO) = new_trec;

  /* Set up the catch-retry frame */
  Sp = Sp - SIZEOF_StgCatchRetryFrame;
  frame = Sp;
  
924
  SET_HDR(frame, stg_catch_retry_frame_info, W_[CCCS]);
925
  StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
926 927 928 929
  StgCatchRetryFrame_first_code(frame) = R1;
  StgCatchRetryFrame_alt_code(frame) = R2;

  /* Apply R1 to the realworld token */
930
  jump stg_ap_v_fast;
931 932 933
}


934
stg_retryzh
935 936 937 938 939 940 941
{
  W_ frame_type;
  W_ frame;
  W_ trec;
  W_ outer;
  W_ r;

942
  MAYBE_GC (NO_PTRS, stg_retryzh); // STM operations may allocate
943 944 945

  // Find the enclosing ATOMICALLY_FRAME or CATCH_RETRY_FRAME
retry_pop_stack:
946 947 948
  SAVE_THREAD_STATE();
  (frame_type) = foreign "C" findRetryFrameHelper(MyCapability(), CurrentTSO "ptr") [];
  LOAD_THREAD_STATE();
949
  frame = Sp;
tharris@microsoft.com's avatar
tharris@microsoft.com committed
950
  trec = StgTSO_trec(CurrentTSO);
951
  outer  = StgTRecHeader_enclosing_trec(trec);
952 953 954 955

  if (frame_type == CATCH_RETRY_FRAME) {
    // The retry reaches a CATCH_RETRY_FRAME before the atomic frame
    ASSERT(outer != NO_TREC);
tharris@microsoft.com's avatar
tharris@microsoft.com committed
956 957 958
    // Abort the transaction attempting the current branch
    foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
    foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
Simon Marlow's avatar
Simon Marlow committed
959
    if (!StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
tharris@microsoft.com's avatar
tharris@microsoft.com committed
960
      // Retry in the first branch: try the alternative
961
      ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
962
      StgTSO_trec(CurrentTSO) = trec;
963
      StgCatchRetryFrame_running_alt_code(frame) = 1 :: CInt; // true;
964
      R1 = StgCatchRetryFrame_alt_code(frame);
965
      jump stg_ap_v_fast;
966
    } else {
tharris@microsoft.com's avatar
tharris@microsoft.com committed
967 968 969 970
      // Retry in the alternative code: propagate the retry
      StgTSO_trec(CurrentTSO) = outer;
      Sp = Sp + SIZEOF_StgCatchRetryFrame;
      goto retry_pop_stack;
971 972 973 974 975
    }
  }

  // We've reached the ATOMICALLY_FRAME: attempt to wait 
  ASSERT(frame_type == ATOMICALLY_FRAME);
tharris@microsoft.com's avatar
tharris@microsoft.com committed
976 977 978 979 980 981 982
  if (outer != NO_TREC) {
    // We called retry while checking invariants, so abort the current
    // invariant check (merging its TVar accesses into the parents read
    // set so we'll wait on them)
    foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
    foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
    trec = outer;
983
    StgTSO_trec(CurrentTSO) = trec;
984
    outer  = StgTRecHeader_enclosing_trec(trec);
tharris@microsoft.com's avatar
tharris@microsoft.com committed
985
  }
986
  ASSERT(outer == NO_TREC);
tharris@microsoft.com's avatar
tharris@microsoft.com committed
987

988
  (r) = foreign "C" stmWait(MyCapability() "ptr", CurrentTSO "ptr", trec "ptr") [];
Simon Marlow's avatar
Simon Marlow committed
989
  if (r != 0) {
990
    // Transaction was valid: stmWait put us on the TVars' queues, we now block
991
    StgHeader_info(frame) = stg_atomically_waiting_frame_info;
992
    Sp = frame;
993
    // Fix up the stack in the unregisterised case: the return convention is different.
994 995
    R3 = trec; // passing to stmWaitUnblock()
    jump stg_block_stmwait;
996 997
  } else {
    // Transaction was not valid: retry immediately
998
    ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
999 1000 1001
    StgTSO_trec(CurrentTSO) = trec;
    R1 = StgAtomicallyFrame_code(frame);
    Sp = frame;
1002
    jump stg_ap_v_fast;
1003 1004 1005 1006
  }
}


1007
stg_checkzh
tharris@microsoft.com's avatar
tharris@microsoft.com committed
1008 1009 1010 1011
{
  W_ trec, closure;

  /* Args: R1 = invariant closure */
1012
  MAYBE_GC (R1_PTR, stg_checkzh); 
tharris@microsoft.com's avatar
tharris@microsoft.com committed
1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023

  trec = StgTSO_trec(CurrentTSO);
  closure = R1;
  foreign "C" stmAddInvariantToCheck(MyCapability() "ptr", 
                                     trec "ptr",
                                     closure "ptr") [];

  jump %ENTRY_CODE(Sp(0));
}


1024
stg_newTVarzh
1025 1026
{
  W_ tv;
1027
  W_ new_value;
1028 1029 1030

  /* Args: R1 = initialisation value */

1031
  MAYBE_GC (R1_PTR, stg_newTVarzh); 
1032
  new_value = R1;
1033
  ("ptr" tv) = foreign "C" stmNewTVar(MyCapability() "ptr", new_value "ptr") [];
1034 1035 1036 1037
  RET_P(tv);
}


1038
stg_readTVarzh
1039 1040 1041 1042 1043 1044 1045
{
  W_ trec;
  W_ tvar;
  W_ result;

  /* Args: R1 = TVar closure */

1046
  MAYBE_GC (R1_PTR, stg_readTVarzh); // Call to stmReadTVar may allocate