PrimOps.cmm 59.2 KB
Newer Older
1 2
/* -----------------------------------------------------------------------------
 *
3
 * (c) The GHC Team, 1998-2011
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
 *
 * Out-of-line primitive operations
 *
 * This file contains the implementations of all the primitive
 * operations ("primops") which are not expanded inline.  See
 * ghc/compiler/prelude/primops.txt.pp for a list of all the primops;
 * this file contains code for most of those with the attribute
 * out_of_line=True.
 *
 * Entry convention: the entry convention for a primop is that all the
 * args are in Stg registers (R1, R2, etc.).  This is to make writing
 * the primops easier.  (see compiler/codeGen/CgCallConv.hs).
 *
 * Return convention: results from a primop are generally returned
 * using the ordinary unboxed tuple return convention.  The C-- parser
 * implements the RET_xxxx() macros to perform unboxed-tuple returns
 * based on the prevailing return convention.
 *
 * This file is written in a subset of C--, extended with various
 * features specific to GHC.  It is compiled by GHC directly.  For the
 * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
 *
 * ---------------------------------------------------------------------------*/

#include "Cmm.h"

30
#ifdef __PIC__
31 32
import pthread_mutex_lock;
import pthread_mutex_unlock;
33
#endif
34
import base_ControlziExceptionziBase_nestedAtomically_closure;
35 36
import EnterCriticalSection;
import LeaveCriticalSection;
Ian Lynagh's avatar
Ian Lynagh committed
37
import ghczmprim_GHCziTypes_False_closure;
38
#if !defined(mingw32_HOST_OS)
39
import sm_mutex;
40
#endif
41

42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
/*-----------------------------------------------------------------------------
  Array Primitives

  Basically just new*Array - the others are all inline macros.

  The size arg is always passed in R1, and the result returned in R1.

  The slow entry point is for returning from a heap check, the saved
  size argument must be re-loaded from the stack.
  -------------------------------------------------------------------------- */

/* for objects that are *less* than the size of a word, make sure we
 * round up to the nearest word for the size of the array.
 */

57
stg_newByteArrayzh
58 59
{
    W_ words, payload_words, n, p;
60
    MAYBE_GC(NO_PTRS,stg_newByteArrayzh);
61 62 63
    n = R1;
    payload_words = ROUNDUP_BYTES_TO_WDS(n);
    words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
64
    ("ptr" p) = foreign "C" allocate(MyCapability() "ptr",words) [];
65 66
    TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
67
    StgArrWords_bytes(p) = n;
68 69 70
    RET_P(p);
}

Simon Marlow's avatar
Simon Marlow committed
71 72 73
#define BA_ALIGN 16
#define BA_MASK  (BA_ALIGN-1)

74
stg_newPinnedByteArrayzh
75
{
76
    W_ words, n, bytes, payload_words, p;
77

78
    MAYBE_GC(NO_PTRS,stg_newPinnedByteArrayzh);
79 80
    n = R1;
    bytes = n;
81 82 83 84 85 86 87 88 89 90
    /* payload_words is what we will tell the profiler we had to allocate */
    payload_words = ROUNDUP_BYTES_TO_WDS(bytes);
    /* When we actually allocate memory, we need to allow space for the
       header: */
    bytes = bytes + SIZEOF_StgArrWords;
    /* And we want to align to BA_ALIGN bytes, so we need to allow space
       to shift up to BA_ALIGN - 1 bytes: */
    bytes = bytes + BA_ALIGN - 1;
    /* Now we convert to a number of words: */
    words = ROUNDUP_BYTES_TO_WDS(bytes);
Simon Marlow's avatar
Simon Marlow committed
91

92
    ("ptr" p) = foreign "C" allocatePinned(MyCapability() "ptr", words) [];
Simon Marlow's avatar
Simon Marlow committed
93 94
    TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);

95 96
    /* Now we need to move p forward so that the payload is aligned
       to BA_ALIGN bytes: */
Simon Marlow's avatar
Simon Marlow committed
97 98 99
    p = p + ((-p - SIZEOF_StgArrWords) & BA_MASK);

    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
100
    StgArrWords_bytes(p) = n;
Simon Marlow's avatar
Simon Marlow committed
101 102 103
    RET_P(p);
}

104
stg_newAlignedPinnedByteArrayzh
Simon Marlow's avatar
Simon Marlow committed
105
{
106
    W_ words, n, bytes, payload_words, p, alignment;
Simon Marlow's avatar
Simon Marlow committed
107

108
    MAYBE_GC(NO_PTRS,stg_newAlignedPinnedByteArrayzh);
109
    n = R1;
110
    alignment = R2;
Simon Marlow's avatar
Simon Marlow committed
111

112 113 114 115 116
    /* we always supply at least word-aligned memory, so there's no
       need to allow extra space for alignment if the requirement is less
       than a word.  This also prevents mischief with alignment == 0. */
    if (alignment <= SIZEOF_W) { alignment = 1; }

117 118
    bytes = n;

119 120
    /* payload_words is what we will tell the profiler we had to allocate */
    payload_words = ROUNDUP_BYTES_TO_WDS(bytes);
Simon Marlow's avatar
Simon Marlow committed
121

122 123 124 125 126 127 128 129
    /* When we actually allocate memory, we need to allow space for the
       header: */
    bytes = bytes + SIZEOF_StgArrWords;
    /* And we want to align to <alignment> bytes, so we need to allow space
       to shift up to <alignment - 1> bytes: */
    bytes = bytes + alignment - 1;
    /* Now we convert to a number of words: */
    words = ROUNDUP_BYTES_TO_WDS(bytes);
130

131
    ("ptr" p) = foreign "C" allocatePinned(MyCapability() "ptr", words) [];
132 133
    TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);

134 135 136 137
    /* Now we need to move p forward so that the payload is aligned
       to <alignment> bytes. Note that we are assuming that
       <alignment> is a power of 2, which is technically not guaranteed */
    p = p + ((-p - SIZEOF_StgArrWords) & (alignment - 1));
138 139

    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
140
    StgArrWords_bytes(p) = n;
141 142 143
    RET_P(p);
}

144
stg_newArrayzh
145
{
146
    W_ words, n, init, arr, p, size;
147 148 149
    /* Args: R1 = words, R2 = initialisation value */

    n = R1;
150
    MAYBE_GC(R2_PTR,stg_newArrayzh);
151

152 153 154 155 156
    // the mark area contains one byte for each 2^MUT_ARR_PTRS_CARD_BITS words
    // in the array, making sure we round up, and then rounding up to a whole
    // number of words.
    size = n + mutArrPtrsCardWords(n);
    words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + size;
157
    ("ptr" arr) = foreign "C" allocate(MyCapability() "ptr",words) [R2];
158 159
    TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0);

160
    SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]);
161
    StgMutArrPtrs_ptrs(arr) = n;
162
    StgMutArrPtrs_size(arr) = size;
163 164 165 166 167 168 169 170 171 172

    // Initialise all elements of the the array with the value in R2
    init = R2;
    p = arr + SIZEOF_StgMutArrPtrs;
  for:
    if (p < arr + WDS(words)) {
	W_[p] = init;
	p = p + WDS(1);
	goto for;
    }
173 174 175 176 177 178 179
    // Initialise the mark bits with 0
  for2:
    if (p < arr + WDS(size)) {
	W_[p] = 0;
	p = p + WDS(1);
	goto for2;
    }
180 181 182 183

    RET_P(arr);
}

184
stg_unsafeThawArrayzh
185 186 187 188 189 190
{
  // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST
  //
  // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN 
  // normally doesn't.  However, when we freeze a MUT_ARR_PTRS, we leave
  // it on the mutable list for the GC to remove (removing something from
191
  // the mutable list is not easy).
192
  // 
193
  // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list,
194 195 196
  // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0
  // to indicate that it is still on the mutable list.
  //
197 198
  // So, when we thaw a MUT_ARR_PTRS_FROZEN, we must cope with two cases:
  // either it is on a mut_list, or it isn't.  We adopt the convention that
199
  // the closure type is MUT_ARR_PTRS_FROZEN0 if it is on the mutable list,
200 201 202
  // and MUT_ARR_PTRS_FROZEN otherwise.  In fact it wouldn't matter if
  // we put it on the mutable list more than once, but it would get scavenged
  // multiple times during GC, which would be unnecessarily slow.
203
  //
204
  if (StgHeader_info(R1) != stg_MUT_ARR_PTRS_FROZEN0_info) {
205
	SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
206
	recordMutable(R1, R1);
207 208 209
	// must be done after SET_INFO, because it ASSERTs closure_MUTABLE()
	RET_P(R1);
  } else {
210
	SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
211
	RET_P(R1);
212 213 214
  }
}

pumpkin's avatar
pumpkin committed
215

216 217 218 219
/* -----------------------------------------------------------------------------
   MutVar primitives
   -------------------------------------------------------------------------- */

220
stg_newMutVarzh
221 222 223 224
{
    W_ mv;
    /* Args: R1 = initialisation value */

225
    ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, stg_newMutVarzh);
226 227

    mv = Hp - SIZEOF_StgMutVar + WDS(1);
228
    SET_HDR(mv,stg_MUT_VAR_DIRTY_info,W_[CCCS]);
229 230 231 232 233
    StgMutVar_var(mv) = R1;
    
    RET_P(mv);
}

Simon Marlow's avatar
Simon Marlow committed
234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252
stg_casMutVarzh
 /* MutVar# s a -> a -> a -> State# s -> (# State#, Int#, a #) */
{
    W_ mv, old, new, h;

    mv  = R1;
    old = R2;
    new = R3;

    (h) = foreign "C" cas(mv + SIZEOF_StgHeader + OFFSET_StgMutVar_var,
                          old, new) [];
    if (h != old) {
        RET_NP(1,h);
    } else {
        RET_NP(0,h);
    }
}


253
stg_atomicModifyMutVarzh
254
{
255
    W_ mv, f, z, x, y, r, h;
256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274
    /* Args: R1 :: MutVar#,  R2 :: a -> (a,b) */

    /* If x is the current contents of the MutVar#, then 
       We want to make the new contents point to

         (sel_0 (f x))
 
       and the return value is
	 
	 (sel_1 (f x))

        obviously we can share (f x).

         z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
	 y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
         r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
    */

#if MIN_UPD_SIZE > 1
275
#define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
276 277
#define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),WDS(MIN_UPD_SIZE-1))
#else
278
#define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(1))
279 280 281 282
#define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),0)
#endif

#if MIN_UPD_SIZE > 2
283
#define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
284 285
#define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),WDS(MIN_UPD_SIZE-2))
#else
286
#define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(2))
287 288 289 290 291
#define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0)
#endif

#define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE)

292
   HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, stg_atomicModifyMutVarzh);
293

294 295
   mv = R1;
   f = R2;
296 297 298 299 300 301

   TICK_ALLOC_THUNK_2();
   CCCS_ALLOC(THUNK_2_SIZE);
   z = Hp - THUNK_2_SIZE + WDS(1);
   SET_HDR(z, stg_ap_2_upd_info, W_[CCCS]);
   LDV_RECORD_CREATE(z);
302
   StgThunk_payload(z,0) = f;
303 304 305 306 307 308

   TICK_ALLOC_THUNK_1();
   CCCS_ALLOC(THUNK_1_SIZE);
   y = z - THUNK_1_SIZE;
   SET_HDR(y, stg_sel_0_upd_info, W_[CCCS]);
   LDV_RECORD_CREATE(y);
309
   StgThunk_payload(y,0) = z;
310 311 312 313 314 315

   TICK_ALLOC_THUNK_1();
   CCCS_ALLOC(THUNK_1_SIZE);
   r = y - THUNK_1_SIZE;
   SET_HDR(r, stg_sel_1_upd_info, W_[CCCS]);
   LDV_RECORD_CREATE(r);
316 317
   StgThunk_payload(r,0) = z;

318 319 320 321 322 323 324 325
 retry:
   x = StgMutVar_var(mv);
   StgThunk_payload(z,1) = x;
#ifdef THREADED_RTS
   (h) = foreign "C" cas(mv + SIZEOF_StgHeader + OFFSET_StgMutVar_var, x, y) [];
   if (h != x) { goto retry; }
#else
   StgMutVar_var(mv) = y;
326
#endif
327

328 329 330 331
   if (GET_INFO(mv) == stg_MUT_VAR_CLEAN_info) {
     foreign "C" dirty_MUT_VAR(BaseReg "ptr", mv "ptr") [];
   }

332 333 334 335 336 337 338 339 340
   RET_P(r);
}

/* -----------------------------------------------------------------------------
   Weak Pointer Primitives
   -------------------------------------------------------------------------- */

STRING(stg_weak_msg,"New weak pointer at %p\n")

341
stg_mkWeakzh
342 343 344 345 346 347 348 349 350 351 352
{
  /* R1 = key
     R2 = value
     R3 = finalizer (or NULL)
  */
  W_ w;

  if (R3 == NULL) {
    R3 = stg_NO_FINALIZER_closure;
  }

353
  ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, stg_mkWeakzh );
354 355 356 357

  w = Hp - SIZEOF_StgWeak + WDS(1);
  SET_HDR(w, stg_WEAK_info, W_[CCCS]);

358 359 360 361 362 363 364 365
  // We don't care about cfinalizer here.
  // Should StgWeak_cfinalizer(w) be stg_NO_FINALIZER_closure or
  // something else?

  StgWeak_key(w)        = R1;
  StgWeak_value(w)      = R2;
  StgWeak_finalizer(w)  = R3;
  StgWeak_cfinalizer(w) = stg_NO_FINALIZER_closure;
366

367
  ACQUIRE_LOCK(sm_mutex);
368 369
  StgWeak_link(w)	= W_[weak_ptr_list];
  W_[weak_ptr_list] 	= w;
370
  RELEASE_LOCK(sm_mutex);
371

372
  IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []);
373 374 375 376

  RET_P(w);
}

377
stg_mkWeakForeignEnvzh
378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396
{
  /* R1 = key
     R2 = value
     R3 = finalizer
     R4 = pointer
     R5 = has environment (0 or 1)
     R6 = environment
  */
  W_ w, payload_words, words, p;

  W_ key, val, fptr, ptr, flag, eptr;

  key  = R1;
  val  = R2;
  fptr = R3;
  ptr  = R4;
  flag = R5;
  eptr = R6;

397
  ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR, stg_mkWeakForeignEnvzh );
398 399 400 401 402 403

  w = Hp - SIZEOF_StgWeak + WDS(1);
  SET_HDR(w, stg_WEAK_info, W_[CCCS]);

  payload_words = 4;
  words         = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
404
  ("ptr" p)     = foreign "C" allocate(MyCapability() "ptr", words) [];
405 406 407 408

  TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
  SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);

409
  StgArrWords_bytes(p)     = WDS(payload_words);
410 411 412 413 414 415 416 417 418 419 420 421 422
  StgArrWords_payload(p,0) = fptr;
  StgArrWords_payload(p,1) = ptr;
  StgArrWords_payload(p,2) = eptr;
  StgArrWords_payload(p,3) = flag;

  // We don't care about the value here.
  // Should StgWeak_value(w) be stg_NO_FINALIZER_closure or something else?

  StgWeak_key(w)        = key;
  StgWeak_value(w)      = val;
  StgWeak_finalizer(w)  = stg_NO_FINALIZER_closure;
  StgWeak_cfinalizer(w) = p;

423
  ACQUIRE_LOCK(sm_mutex);
424 425
  StgWeak_link(w)   = W_[weak_ptr_list];
  W_[weak_ptr_list] = w;
426
  RELEASE_LOCK(sm_mutex);
427 428 429 430 431

  IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []);

  RET_P(w);
}
432

433
stg_finalizzeWeakzh
434 435 436
{
  /* R1 = weak ptr
   */
437
  W_ w, f, arr;
438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464

  w = R1;

  // already dead?
  if (GET_INFO(w) == stg_DEAD_WEAK_info) {
      RET_NP(0,stg_NO_FINALIZER_closure);
  }

  // kill it
#ifdef PROFILING
  // @LDV profiling
  // A weak pointer is inherently used, so we do not need to call
  // LDV_recordDead_FILL_SLOP_DYNAMIC():
  //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
  // or, LDV_recordDead():
  //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
  // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as 
  // large as weak pointers, so there is no need to fill the slop, either.
  // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
#endif

  //
  // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
  //
  SET_INFO(w,stg_DEAD_WEAK_info);
  LDV_RECORD_CREATE(w);

465 466 467
  f   = StgWeak_finalizer(w);
  arr = StgWeak_cfinalizer(w);

468
  StgDeadWeak_link(w) = StgWeak_link(w);
469

470 471 472 473 474 475 476
  if (arr != stg_NO_FINALIZER_closure) {
    foreign "C" runCFinalizer(StgArrWords_payload(arr,0),
                              StgArrWords_payload(arr,1),
                              StgArrWords_payload(arr,2),
                              StgArrWords_payload(arr,3)) [];
  }

477 478 479 480 481 482 483 484
  /* return the finalizer */
  if (f == stg_NO_FINALIZER_closure) {
      RET_NP(0,stg_NO_FINALIZER_closure);
  } else {
      RET_NP(1,f);
  }
}

485
stg_deRefWeakzh
486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501
{
  /* R1 = weak ptr */
  W_ w, code, val;

  w = R1;
  if (GET_INFO(w) == stg_WEAK_info) {
    code = 1;
    val = StgWeak_value(w);
  } else {
    code = 0;
    val = w;
  }
  RET_NP(code,val);
}

/* -----------------------------------------------------------------------------
502
   Floating point operations.
503 504
   -------------------------------------------------------------------------- */

505
stg_decodeFloatzuIntzh
506 507 508
{ 
    W_ p;
    F_ arg;
509 510 511
    W_ mp_tmp1;
    W_ mp_tmp_w;

512
    STK_CHK_GEN( WDS(2), NO_PTRS, stg_decodeFloatzuIntzh );
513 514 515

    mp_tmp1  = Sp - WDS(1);
    mp_tmp_w = Sp - WDS(2);
516 517 518 519 520 521 522 523 524 525 526
    
    /* arguments: F1 = Float# */
    arg = F1;
    
    /* Perform the operation */
    foreign "C" __decodeFloat_Int(mp_tmp1 "ptr", mp_tmp_w "ptr", arg) [];
    
    /* returns: (Int# (mantissa), Int# (exponent)) */
    RET_NN(W_[mp_tmp1], W_[mp_tmp_w]);
}

527
stg_decodeDoublezu2Intzh
528 529 530
{ 
    D_ arg;
    W_ p;
531 532 533 534 535
    W_ mp_tmp1;
    W_ mp_tmp2;
    W_ mp_result1;
    W_ mp_result2;

536
    STK_CHK_GEN( WDS(4), NO_PTRS, stg_decodeDoublezu2Intzh );
537 538 539 540 541

    mp_tmp1    = Sp - WDS(1);
    mp_tmp2    = Sp - WDS(2);
    mp_result1 = Sp - WDS(3);
    mp_result2 = Sp - WDS(4);
542 543 544 545 546

    /* arguments: D1 = Double# */
    arg = D1;

    /* Perform the operation */
547 548 549 550 551 552 553
    foreign "C" __decodeDouble_2Int(mp_tmp1 "ptr", mp_tmp2 "ptr",
                                    mp_result1 "ptr", mp_result2 "ptr",
                                    arg) [];

    /* returns:
       (Int# (mant sign), Word# (mant high), Word# (mant low), Int# (expn)) */
    RET_NNNN(W_[mp_tmp1], W_[mp_tmp2], W_[mp_result1], W_[mp_result2]);
554 555
}

556 557 558 559
/* -----------------------------------------------------------------------------
 * Concurrency primitives
 * -------------------------------------------------------------------------- */

560
stg_forkzh
561 562
{
  /* args: R1 = closure to spark */
563

564
  MAYBE_GC(R1_PTR, stg_forkzh);
565

566 567 568 569
  W_ closure;
  W_ threadid;
  closure = R1;

570
  ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr", 
571 572
				RtsFlags_GcFlags_initialStkSize(RtsFlags), 
				closure "ptr") [];
573 574

  /* start blocked if the current thread is blocked */
575 576 577
  StgTSO_flags(threadid) = %lobits16(
     TO_W_(StgTSO_flags(threadid)) | 
     TO_W_(StgTSO_flags(CurrentTSO)) & (TSO_BLOCKEX | TSO_INTERRUPTIBLE));
578

579 580
  foreign "C" scheduleThread(MyCapability() "ptr", threadid "ptr") [];

581 582
  // context switch soon, but not immediately: we don't want every
  // forkIO to force a context-switch.
583
  Capability_context_switch(MyCapability()) = 1 :: CInt;
584 585 586 587
  
  RET_P(threadid);
}

588
stg_forkOnzh
589 590 591
{
  /* args: R1 = cpu, R2 = closure to spark */

592
  MAYBE_GC(R2_PTR, stg_forkOnzh);
593 594 595 596 597 598 599

  W_ cpu;
  W_ closure;
  W_ threadid;
  cpu = R1;
  closure = R2;

600
  ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr", 
601
				RtsFlags_GcFlags_initialStkSize(RtsFlags), 
602
				closure "ptr") [];
603 604

  /* start blocked if the current thread is blocked */
605 606 607
  StgTSO_flags(threadid) = %lobits16(
     TO_W_(StgTSO_flags(threadid)) | 
     TO_W_(StgTSO_flags(CurrentTSO)) & (TSO_BLOCKEX | TSO_INTERRUPTIBLE));
608

609
  foreign "C" scheduleThreadOn(MyCapability() "ptr", cpu, threadid "ptr") [];
610

611 612
  // context switch soon, but not immediately: we don't want every
  // forkIO to force a context-switch.
613
  Capability_context_switch(MyCapability()) = 1 :: CInt;
614
  
615
  RET_P(threadid);
616 617
}

618
stg_yieldzh
619 620 621 622
{
  jump stg_yield_noregs;
}

623
stg_myThreadIdzh
624 625 626 627 628
{
  /* no args. */
  RET_P(CurrentTSO);
}

629
stg_labelThreadzh
630 631 632 633
{
  /* args: 
	R1 = ThreadId#
	R2 = Addr# */
634 635
#if defined(DEBUG) || defined(TRACING) || defined(DTRACE)
  foreign "C" labelThread(MyCapability() "ptr", R1 "ptr", R2 "ptr") [];
636 637 638 639
#endif
  jump %ENTRY_CODE(Sp(0));
}

640
stg_isCurrentThreadBoundzh
641 642 643
{
  /* no args */
  W_ r;
644
  (r) = foreign "C" isThreadBound(CurrentTSO) [];
645 646 647
  RET_N(r);
}

648
stg_threadStatuszh
649 650 651 652 653
{
    /* args: R1 :: ThreadId# */
    W_ tso;
    W_ why_blocked;
    W_ what_next;
654
    W_ ret, cap, locked;
655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673

    tso = R1;

    what_next   = TO_W_(StgTSO_what_next(tso));
    why_blocked = TO_W_(StgTSO_why_blocked(tso));
    // Note: these two reads are not atomic, so they might end up
    // being inconsistent.  It doesn't matter, since we
    // only return one or the other.  If we wanted to return the
    // contents of block_info too, then we'd have to do some synchronisation.

    if (what_next == ThreadComplete) {
        ret = 16;  // NB. magic, matches up with GHC.Conc.threadStatus
    } else {
        if (what_next == ThreadKilled) {
            ret = 17;
        } else {
            ret = why_blocked;
        }
    }
674 675 676 677 678 679 680 681 682 683

    cap = TO_W_(Capability_no(StgTSO_cap(tso)));

    if ((TO_W_(StgTSO_flags(tso)) & TSO_LOCKED) != 0) {
        locked = 1;
    } else {
        locked = 0;
    }

    RET_NNN(ret,cap,locked);
684
}
685 686 687 688 689 690 691 692 693

/* -----------------------------------------------------------------------------
 * TVar primitives
 * -------------------------------------------------------------------------- */

#define SP_OFF 0

// Catch retry frame ------------------------------------------------------------

694
INFO_TABLE_RET(stg_catch_retry_frame, CATCH_RETRY_FRAME,
695
#if defined(PROFILING)
696
  W_ unused1, W_ unused2,
697
#endif
698
  W_ unused3, P_ unused4, P_ unused5)
699 700 701 702 703
{
   W_ r, frame, trec, outer;

   frame = Sp;
   trec = StgTSO_trec(CurrentTSO);
704
   outer  = StgTRecHeader_enclosing_trec(trec);
705
   (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
Simon Marlow's avatar
Simon Marlow committed
706
   if (r != 0) {
707 708 709 710 711
     /* Succeeded (either first branch or second branch) */
     StgTSO_trec(CurrentTSO) = outer;
     Sp = Sp + SIZEOF_StgCatchRetryFrame;
     jump %ENTRY_CODE(Sp(SP_OFF));
   } else {
tharris@microsoft.com's avatar
tharris@microsoft.com committed
712
     /* Did not commit: re-execute */
713
     W_ new_trec;
714
     ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
715
     StgTSO_trec(CurrentTSO) = new_trec;
Simon Marlow's avatar
Simon Marlow committed
716
     if (StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
717 718 719 720
       R1 = StgCatchRetryFrame_alt_code(frame);
     } else {
       R1 = StgCatchRetryFrame_first_code(frame);
     }
721
     jump stg_ap_v_fast;
722 723
   }
}
724 725


Simon Marlow's avatar
Simon Marlow committed
726
// Atomically frame ------------------------------------------------------------
727

728
INFO_TABLE_RET(stg_atomically_frame, ATOMICALLY_FRAME,
729
#if defined(PROFILING)
730
  W_ unused1, W_ unused2,
731
#endif
732
  P_ code, P_ next_invariant_to_check, P_ result)
733
{
tharris@microsoft.com's avatar
tharris@microsoft.com committed
734
  W_ frame, trec, valid, next_invariant, q, outer;
735

736 737 738
  frame  = Sp;
  trec   = StgTSO_trec(CurrentTSO);
  result = R1;
739
  outer  = StgTRecHeader_enclosing_trec(trec);
tharris@microsoft.com's avatar
tharris@microsoft.com committed
740 741 742

  if (outer == NO_TREC) {
    /* First time back at the atomically frame -- pick up invariants */
743
    ("ptr" q) = foreign "C" stmGetInvariantsToCheck(MyCapability() "ptr", trec "ptr") [];
tharris@microsoft.com's avatar
tharris@microsoft.com committed
744
    StgAtomicallyFrame_next_invariant_to_check(frame) = q;
745
    StgAtomicallyFrame_result(frame) = result;
746 747

  } else {
tharris@microsoft.com's avatar
tharris@microsoft.com committed
748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764
    /* Second/subsequent time back at the atomically frame -- abort the
     * tx that's checking the invariant and move on to the next one */
    StgTSO_trec(CurrentTSO) = outer;
    q = StgAtomicallyFrame_next_invariant_to_check(frame);
    StgInvariantCheckQueue_my_execution(q) = trec;
    foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
    /* Don't free trec -- it's linked from q and will be stashed in the
     * invariant if we eventually commit. */
    q = StgInvariantCheckQueue_next_queue_entry(q);
    StgAtomicallyFrame_next_invariant_to_check(frame) = q;
    trec = outer;
  }

  q = StgAtomicallyFrame_next_invariant_to_check(frame);

  if (q != END_INVARIANT_CHECK_QUEUE) {
    /* We can't commit yet: another invariant to check */
765
    ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [];
766
    StgTSO_trec(CurrentTSO) = trec;
tharris@microsoft.com's avatar
tharris@microsoft.com committed
767 768 769

    next_invariant = StgInvariantCheckQueue_invariant(q);
    R1 = StgAtomicInvariant_code(next_invariant);
770
    jump stg_ap_v_fast;
tharris@microsoft.com's avatar
tharris@microsoft.com committed
771 772 773 774

  } else {

    /* We've got no more invariants to check, try to commit */
775
    (valid) = foreign "C" stmCommitTransaction(MyCapability() "ptr", trec "ptr") [];
tharris@microsoft.com's avatar
tharris@microsoft.com committed
776 777 778
    if (valid != 0) {
      /* Transaction was valid: commit succeeded */
      StgTSO_trec(CurrentTSO) = NO_TREC;
779
      R1 = StgAtomicallyFrame_result(frame);
tharris@microsoft.com's avatar
tharris@microsoft.com committed
780 781 782 783
      Sp = Sp + SIZEOF_StgAtomicallyFrame;
      jump %ENTRY_CODE(Sp(SP_OFF));
    } else {
      /* Transaction was not valid: try again */
784
      ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
tharris@microsoft.com's avatar
tharris@microsoft.com committed
785 786 787 788 789
      StgTSO_trec(CurrentTSO) = trec;
      StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
      R1 = StgAtomicallyFrame_code(frame);
      jump stg_ap_v_fast;
    }
790 791 792
  }
}

793 794 795 796
INFO_TABLE_RET(stg_atomically_waiting_frame, ATOMICALLY_FRAME,
#if defined(PROFILING)
  W_ unused1, W_ unused2,
#endif
797
  P_ code, P_ next_invariant_to_check, P_ result)
798 799 800 801 802 803
{
  W_ frame, trec, valid;

  frame = Sp;

  /* The TSO is currently waiting: should we stop waiting? */
804
  (valid) = foreign "C" stmReWait(MyCapability() "ptr", CurrentTSO "ptr") [];
Simon Marlow's avatar
Simon Marlow committed
805
  if (valid != 0) {
806 807 808 809
    /* Previous attempt is still valid: no point trying again yet */
    jump stg_block_noregs;
  } else {
    /* Previous attempt is no longer valid: try again */
810
    ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
811 812 813
    StgTSO_trec(CurrentTSO) = trec;
    StgHeader_info(frame) = stg_atomically_frame_info;
    R1 = StgAtomicallyFrame_code(frame);
814
    jump stg_ap_v_fast;
815
  }
816
}
817 818 819 820 821 822 823 824 825 826

// STM catch frame --------------------------------------------------------------

#define SP_OFF 0

/* Catch frames are very similar to update frames, but when entering
 * one we just pop the frame off the stack and perform the correct
 * kind of return to the activation record underneath us on the stack.
 */

827 828 829 830
INFO_TABLE_RET(stg_catch_stm_frame, CATCH_STM_FRAME,
#if defined(PROFILING)
  W_ unused1, W_ unused2,
#endif
831
  P_ unused3, P_ unused4)
Simon Marlow's avatar
Simon Marlow committed
832 833 834 835
   {
      W_ r, frame, trec, outer;
      frame = Sp;
      trec = StgTSO_trec(CurrentTSO);
836
      outer  = StgTRecHeader_enclosing_trec(trec);
837
      (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
Simon Marlow's avatar
Simon Marlow committed
838 839 840 841 842 843 844 845
      if (r != 0) {
        /* Commit succeeded */
        StgTSO_trec(CurrentTSO) = outer;
        Sp = Sp + SIZEOF_StgCatchSTMFrame;
        jump Sp(SP_OFF);
      } else {
        /* Commit failed */
        W_ new_trec;
846
        ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
Simon Marlow's avatar
Simon Marlow committed
847 848 849 850 851
        StgTSO_trec(CurrentTSO) = new_trec;
        R1 = StgCatchSTMFrame_code(frame);
        jump stg_ap_v_fast;
      }
   }
852 853 854 855


// Primop definition ------------------------------------------------------------

856
stg_atomicallyzh
857 858 859 860 861
{
  W_ frame;
  W_ old_trec;
  W_ new_trec;
  
862
  // stmStartTransaction may allocate
863
  MAYBE_GC (R1_PTR, stg_atomicallyzh); 
864

865
  /* Args: R1 = m :: STM a */
866
  STK_CHK_GEN(SIZEOF_StgAtomicallyFrame + WDS(1), R1_PTR, stg_atomicallyzh);
867

868 869 870 871
  old_trec = StgTSO_trec(CurrentTSO);

  /* Nested transactions are not allowed; raise an exception */
  if (old_trec != NO_TREC) {
872
     R1 = base_ControlziExceptionziBase_nestedAtomically_closure;
873
     jump stg_raisezh;
874 875
  }

876 877 878 879
  /* Set up the atomically frame */
  Sp = Sp - SIZEOF_StgAtomicallyFrame;
  frame = Sp;

880
  SET_HDR(frame,stg_atomically_frame_info, W_[CCCS]);
881
  StgAtomicallyFrame_code(frame) = R1;
882
  StgAtomicallyFrame_result(frame) = NO_TREC;
tharris@microsoft.com's avatar
tharris@microsoft.com committed
883
  StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
884 885

  /* Start the memory transcation */
886
  ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", old_trec "ptr") [R1];
887 888 889
  StgTSO_trec(CurrentTSO) = new_trec;

  /* Apply R1 to the realworld token */
890
  jump stg_ap_v_fast;
891 892 893
}


894
stg_catchSTMzh
895 896 897 898 899
{
  W_ frame;
  
  /* Args: R1 :: STM a */
  /* Args: R2 :: Exception -> STM a */
900
  STK_CHK_GEN(SIZEOF_StgCatchSTMFrame + WDS(1), R1_PTR & R2_PTR, stg_catchSTMzh);
901 902 903 904 905

  /* Set up the catch frame */
  Sp = Sp - SIZEOF_StgCatchSTMFrame;
  frame = Sp;

906
  SET_HDR(frame, stg_catch_stm_frame_info, W_[CCCS]);
907
  StgCatchSTMFrame_handler(frame) = R2;
tharris@microsoft.com's avatar
tharris@microsoft.com committed
908 909 910 911 912 913
  StgCatchSTMFrame_code(frame) = R1;

  /* Start a nested transaction to run the body of the try block in */
  W_ cur_trec;  
  W_ new_trec;
  cur_trec = StgTSO_trec(CurrentTSO);
914
  ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", cur_trec "ptr");
tharris@microsoft.com's avatar
tharris@microsoft.com committed
915
  StgTSO_trec(CurrentTSO) = new_trec;
916 917

  /* Apply R1 to the realworld token */
918
  jump stg_ap_v_fast;
919 920 921
}


922
stg_catchRetryzh
923 924 925 926 927
{
  W_ frame;
  W_ new_trec;
  W_ trec;

928
  // stmStartTransaction may allocate
929
  MAYBE_GC (R1_PTR & R2_PTR, stg_catchRetryzh); 
930

931 932
  /* Args: R1 :: STM a */
  /* Args: R2 :: STM a */
933
  STK_CHK_GEN(SIZEOF_StgCatchRetryFrame + WDS(1), R1_PTR & R2_PTR, stg_catchRetryzh);
934 935 936

  /* Start a nested transaction within which to run the first code */
  trec = StgTSO_trec(CurrentTSO);
937
  ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [R1,R2];
938 939 940 941 942 943
  StgTSO_trec(CurrentTSO) = new_trec;

  /* Set up the catch-retry frame */
  Sp = Sp - SIZEOF_StgCatchRetryFrame;
  frame = Sp;
  
944
  SET_HDR(frame, stg_catch_retry_frame_info, W_[CCCS]);
945
  StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
946 947 948 949
  StgCatchRetryFrame_first_code(frame) = R1;
  StgCatchRetryFrame_alt_code(frame) = R2;

  /* Apply R1 to the realworld token */
950
  jump stg_ap_v_fast;
951 952 953
}


954
stg_retryzh
955 956 957 958 959 960 961
{
  W_ frame_type;
  W_ frame;
  W_ trec;
  W_ outer;
  W_ r;

962
  MAYBE_GC (NO_PTRS, stg_retryzh); // STM operations may allocate
963 964 965

  // Find the enclosing ATOMICALLY_FRAME or CATCH_RETRY_FRAME
retry_pop_stack:
966 967 968
  SAVE_THREAD_STATE();
  (frame_type) = foreign "C" findRetryFrameHelper(MyCapability(), CurrentTSO "ptr") [];
  LOAD_THREAD_STATE();
969
  frame = Sp;
tharris@microsoft.com's avatar
tharris@microsoft.com committed
970
  trec = StgTSO_trec(CurrentTSO);
971
  outer  = StgTRecHeader_enclosing_trec(trec);
972 973 974 975

  if (frame_type == CATCH_RETRY_FRAME) {
    // The retry reaches a CATCH_RETRY_FRAME before the atomic frame
    ASSERT(outer != NO_TREC);
tharris@microsoft.com's avatar
tharris@microsoft.com committed
976 977 978
    // Abort the transaction attempting the current branch
    foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
    foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
Simon Marlow's avatar
Simon Marlow committed
979
    if (!StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
tharris@microsoft.com's avatar
tharris@microsoft.com committed
980
      // Retry in the first branch: try the alternative
981
      ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
982
      StgTSO_trec(CurrentTSO) = trec;
983
      StgCatchRetryFrame_running_alt_code(frame) = 1 :: CInt; // true;
984
      R1 = StgCatchRetryFrame_alt_code(frame);
985
      jump stg_ap_v_fast;
986
    } else {
tharris@microsoft.com's avatar
tharris@microsoft.com committed
987 988 989 990
      // Retry in the alternative code: propagate the retry
      StgTSO_trec(CurrentTSO) = outer;
      Sp = Sp + SIZEOF_StgCatchRetryFrame;
      goto retry_pop_stack;
991 992 993 994 995
    }
  }

  // We've reached the ATOMICALLY_FRAME: attempt to wait 
  ASSERT(frame_type == ATOMICALLY_FRAME);
tharris@microsoft.com's avatar
tharris@microsoft.com committed
996 997 998 999 1000 1001 1002
  if (outer != NO_TREC) {
    // We called retry while checking invariants, so abort the current
    // invariant check (merging its TVar accesses into the parents read
    // set so we'll wait on them)
    foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
    foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
    trec = outer;
1003
    StgTSO_trec(CurrentTSO) = trec;
1004
    outer  = StgTRecHeader_enclosing_trec(trec);
tharris@microsoft.com's avatar
tharris@microsoft.com committed
1005
  }
1006
  ASSERT(outer == NO_TREC);
tharris@microsoft.com's avatar
tharris@microsoft.com committed
1007

1008
  (r) = foreign "C" stmWait(MyCapability() "ptr", CurrentTSO "ptr", trec "ptr") [];
Simon Marlow's avatar
Simon Marlow committed
1009
  if (r != 0) {
1010
    // Transaction was valid: stmWait put us on the TVars' queues, we now block
1011
    StgHeader_info(frame) = stg_atomically_waiting_frame_info;
1012
    Sp = frame;
1013
    // Fix up the stack in the unregisterised case: the return convention is different.
1014 1015
    R3 = trec; // passing to stmWaitUnblock()
    jump stg_block_stmwait;
1016 1017
  } else {
    // Transaction was not valid: retry immediately
1018
    ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1019 1020 1021
    StgTSO_trec(CurrentTSO) = trec;
    R1 = StgAtomicallyFrame_code(frame);
    Sp = frame;
1022
    jump stg_ap_v_fast;
1023 1024 1025 1026
  }
}


1027
stg_checkzh
tharris@microsoft.com's avatar
tharris@microsoft.com committed
1028 1029 1030 1031
{
  W_ trec, closure;

  /* Args: R1 = invariant closure */
1032
  MAYBE_GC (R1_PTR, stg_checkzh); 
tharris@microsoft.com's avatar
tharris@microsoft.com committed
1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043

  trec = StgTSO_trec(CurrentTSO);
  closure = R1;
  foreign "C" stmAddInvariantToCheck(MyCapability() "ptr", 
                                     trec "ptr",
                                     closure "ptr") [];

  jump %ENTRY_CODE(Sp(0));
}


1044
stg_newTVarzh
1045 1046
{
  W_ tv;
1047
  W_ new_value;
1048 1049 1050

  /* Args: R1 = initialisation value */

1051
  MAYBE_GC (R1_PTR, stg_newTVarzh); 
1052
  new_value = R1;