HeapStackCheck.cmm 19.3 KB
Newer Older
1 2 3 4 5 6 7 8
/* -----------------------------------------------------------------------------
 *
 * (c) The GHC Team, 1998-2004
 *
 * Canned Heap-Check and Stack-Check sequences.
 *
 * This file is written in a subset of C--, extended with various
 * features specific to GHC.  It is compiled by GHC directly.  For the
9
 * syntax of .cmm files, see the parser in ghc/compiler/GHC/Cmm/Parser.y.
10 11 12 13
 *
 * ---------------------------------------------------------------------------*/

#include "Cmm.h"
14
#include "Updates.h"
Simon Marlow's avatar
Simon Marlow committed
15
#include "SMPClosureOps.h"
16

Ben Gamari's avatar
Ben Gamari committed
17
#if defined(__PIC__)
18
import pthread_mutex_unlock;
19
#endif
20 21
import EnterCriticalSection;
import LeaveCriticalSection;
22

23 24 25
/* Stack/Heap Check Failure
 * ------------------------
 *
Simon Marlow's avatar
Simon Marlow committed
26 27 28
 * Both heap and stack check failures end up in the same place, so
 * that we can share the code for the failure case when a proc needs
 * both a stack check and a heap check (a common case).
29
 *
Simon Marlow's avatar
Simon Marlow committed
30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
 * So when we get here, we have to tell the difference between a stack
 * check failure and a heap check failure.  The code for the checks
 * looks like this:

        if (Sp - 16 < SpLim) goto c1Tf;
        Hp = Hp + 16;
        if (Hp > HpLim) goto c1Th;
        ...
    c1Th:
        HpAlloc = 16;
        goto c1Tf;
    c1Tf: jump stg_gc_enter_1 ();

 * Note that Sp is not decremented by the check, whereas Hp is.  The
 * reasons for this seem to be largely historic, I can't think of a
 * good reason not to decrement Sp at the check too. (--SDM)
46
 *
Simon Marlow's avatar
Simon Marlow committed
47 48 49
 * Note that HpLim may be set to zero arbitrarily by the timer signal
 * or another processor to trigger a context switch via heap check
 * failure.
50
 *
Simon Marlow's avatar
Simon Marlow committed
51 52 53 54 55 56 57 58
 * The job of these fragments (stg_gc_enter_1 and friends) is to
 *   1. Leave no slop in the heap, so Hp must be retreated if it was
 *      incremented by the check.  No-slop is a requirement for LDV
 *      profiling, at least.
 *   2. If a heap check failed, try to grab another heap block from
 *      the nursery and continue.
 *   3. otherwise, return to the scheduler with StackOverflow,
 *      HeapOverflow, or ThreadYielding as appropriate.
59
 *
Simon Marlow's avatar
Simon Marlow committed
60 61 62 63 64
 * We can tell whether Hp was incremented, because HpAlloc is
 * non-zero: HpAlloc is required to be zero at all times unless a
 * heap-check just failed, which is why the stack-check failure case
 * does not set HpAlloc (see code fragment above).  So that covers (1).
 * HpAlloc is zeroed in LOAD_THREAD_STATE().
65
 *
Simon Marlow's avatar
Simon Marlow committed
66 67 68 69 70
 * If Hp > HpLim, then either (a) we have reached the end of the
 * current heap block, or (b) HpLim == 0 and we should yield.  Hence
 * check Hp > HpLim first, and then HpLim == 0 to decide whether to
 * return ThreadYielding or try to grab another heap block from the
 * nursery.
71
 *
Simon Marlow's avatar
Simon Marlow committed
72 73 74
 * If Hp <= HpLim, then this must be a StackOverflow.  The scheduler
 * will either increase the size of our stack, or raise an exception if
 * the stack is already too big.
75
 */
76 77 78 79

#define PRE_RETURN(why,what_next)                       \
  StgTSO_what_next(CurrentTSO) = what_next::I16;        \
  StgRegTable_rRet(BaseReg) = why;                      \
80 81
  R1 = BaseReg;

82 83 84 85
/* Remember that the return address is *removed* when returning to a
 * ThreadRunGHC thread.
 */

86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
stg_gc_noregs
{
    W_ ret;

    DEBUG_ONLY(foreign "C" heapCheckFail());
    if (Hp > HpLim) {
        Hp = Hp - HpAlloc/*in bytes*/;
        if (HpLim == 0) {
                ret = ThreadYielding;
                goto sched;
        }
        if (HpAlloc <= BLOCK_SIZE
            && bdescr_link(CurrentNursery) != NULL) {
            HpAlloc = 0;
            CLOSE_NURSERY();
Simon Marlow's avatar
Simon Marlow committed
101 102 103 104
            Capability_total_allocated(MyCapability()) =
              Capability_total_allocated(MyCapability()) +
              BYTES_TO_WDS(bdescr_free(CurrentNursery) -
                           bdescr_start(CurrentNursery));
105
            CurrentNursery = bdescr_link(CurrentNursery);
Simon Marlow's avatar
Simon Marlow committed
106
            bdescr_free(CurrentNursery) = bdescr_start(CurrentNursery);
107 108
            OPEN_NURSERY();
            if (Capability_context_switch(MyCapability()) != 0 :: CInt ||
109 110 111
                Capability_interrupt(MyCapability())      != 0 :: CInt ||
                (StgTSO_alloc_limit(CurrentTSO) `lt` (0::I64) &&
                 (TO_W_(StgTSO_flags(CurrentTSO)) & TSO_ALLOC_LIMIT) != 0)) {
112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131
                ret = ThreadYielding;
                goto sched;
            } else {
                jump %ENTRY_CODE(Sp(0)) [];
            }
        } else {
            ret = HeapOverflow;
            goto sched;
        }
    } else {
        if (CHECK_GC()) {
            ret = HeapOverflow;
        } else {
            ret = StackOverflow;
        }
    }
  sched:
    PRE_RETURN(ret,ThreadRunGHC);
    jump stg_returnToSched [R1];
}
132

133
#define HP_GENERIC                              \
134 135
    PRE_RETURN(HeapOverflow, ThreadRunGHC)      \
    jump stg_returnToSched [R1];
136

137
#define BLOCK_GENERIC                           \
138 139
    PRE_RETURN(ThreadBlocked,  ThreadRunGHC)    \
    jump stg_returnToSched [R1];
140

141
#define YIELD_GENERIC                           \
142 143
    PRE_RETURN(ThreadYielding, ThreadRunGHC)    \
    jump stg_returnToSched [R1];
144

145
#define BLOCK_BUT_FIRST(c)                      \
146 147 148
    PRE_RETURN(ThreadBlocked, ThreadRunGHC)     \
    R2 = c;                                     \
    jump stg_returnToSchedButFirst [R1,R2,R3];
149

150
#define YIELD_TO_INTERPRETER                    \
151 152
    PRE_RETURN(ThreadYielding, ThreadInterpret) \
    jump stg_returnToSchedNotPaused [R1];
153 154 155 156 157 158 159 160 161 162 163

/* -----------------------------------------------------------------------------
   Heap checks in thunks/functions.

   In these cases, node always points to the function closure.  This gives
   us an easy way to return to the function: just leave R1 on the top of
   the stack, and have the scheduler enter it to return.

   There are canned sequences for 'n' pointer values in registers.
   -------------------------------------------------------------------------- */

164 165
INFO_TABLE_RET ( stg_enter, RET_SMALL, W_ info_ptr, P_ closure )
    return (/* no return values */)
166
{
167
    ENTER(closure);
168 169
}

170
__stg_gc_enter_1 (P_ node)
171
{
172 173 174 175 176 177 178 179 180 181 182
    jump stg_gc_noregs (stg_enter_info, node) ();
}

/* -----------------------------------------------------------------------------
   Canned heap checks for primitives.

   We can't use stg_gc_fun because primitives are not functions, so
   these fragments let us save some boilerplate heap-check-failure
   code in a few common cases.
   -------------------------------------------------------------------------- */

183
stg_gc_prim (W_ fun)
184 185 186 187 188
{
    call stg_gc_noregs ();
    jump fun();
}

189
stg_gc_prim_p (P_ arg, W_ fun)
190 191 192 193 194
{
    call stg_gc_noregs ();
    jump fun(arg);
}

195
stg_gc_prim_pp (P_ arg1, P_ arg2, W_ fun)
196 197 198 199 200
{
    call stg_gc_noregs ();
    jump fun(arg1,arg2);
}

201
stg_gc_prim_n (W_ arg, W_ fun)
202 203 204
{
    call stg_gc_noregs ();
    jump fun(arg);
205 206
}

207 208
INFO_TABLE_RET(stg_gc_prim_p_ll, RET_SMALL, W_ info, P_ arg, W_ fun)
    /* explicit stack */
209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227
{
    W_ fun;
    P_ arg;
    fun = Sp(2);
    arg = Sp(1);
    Sp_adj(3);
    R1 = arg;
    jump fun [R1];
}

stg_gc_prim_p_ll
{
    W_ fun;
    P_ arg;
    fun = R2;
    arg = R1;
    Sp_adj(-3);
    Sp(2) = fun;
    Sp(1) = arg;
228
    Sp(0) = stg_gc_prim_p_ll_info;
229 230 231
    jump stg_gc_noregs [];
}

232
/* -----------------------------------------------------------------------------
233 234 235
   Info tables for returning values of various types.  These are used
   when we want to push a frame on the stack that will return a value
   to the frame underneath it.
236 237
   -------------------------------------------------------------------------- */

238 239
INFO_TABLE_RET ( stg_ret_v, RET_SMALL, W_ info_ptr )
    return (/* no return values */)
240
{
241
    return ();
242 243
}

244 245
INFO_TABLE_RET ( stg_ret_p, RET_SMALL, W_ info_ptr, P_ ptr )
    return (/* no return values */)
246
{
247
    return (ptr);
248 249
}

250 251
INFO_TABLE_RET ( stg_ret_n, RET_SMALL, W_ info_ptr, W_ nptr )
    return (/* no return values */)
252
{
253
    return (nptr);
254 255
}

256 257
INFO_TABLE_RET ( stg_ret_f, RET_SMALL, W_ info_ptr, F_ f )
    return (/* no return values */)
258
{
259
    return (f);
260 261
}

262 263
INFO_TABLE_RET ( stg_ret_d, RET_SMALL, W_ info_ptr, D_ d )
    return (/* no return values */)
264
{
265
    return (d);
266 267
}

268 269
INFO_TABLE_RET ( stg_ret_l, RET_SMALL, W_ info_ptr, L_ l )
    return (/* no return values */)
270
{
271
    return (l);
272 273
}

274 275 276 277 278 279
/* -----------------------------------------------------------------------------
   Canned heap-check failures for case alts, where we have some values
   in registers or on the stack according to the NativeReturn
   convention.
   -------------------------------------------------------------------------- */

280

281 282 283 284 285
/*-- void return ------------------------------------------------------------ */

/*-- R1 is a GC pointer, but we don't enter it ----------------------- */

stg_gc_unpt_r1 return (P_ ptr) /* NB. return convention */
286
{
287
    jump stg_gc_noregs (stg_ret_p_info, ptr) ();
288 289
}

290 291 292
/*-- R1 is unboxed -------------------------------------------------- */

stg_gc_unbx_r1 return (W_ nptr) /* NB. return convention */
293
{
294
    jump stg_gc_noregs (stg_ret_n_info, nptr) ();
295 296
}

297
/*-- F1 contains a float ------------------------------------------------- */
298

299
stg_gc_f1 return (F_ f)
300
{
301
    jump stg_gc_noregs (stg_ret_f_info, f) ();
302 303
}

304 305 306
/*-- D1 contains a double ------------------------------------------------- */

stg_gc_d1 return (D_ d)
307
{
308
    jump stg_gc_noregs (stg_ret_d_info, d) ();
309 310 311 312 313
}


/*-- L1 contains an int64 ------------------------------------------------- */

314
stg_gc_l1 return (L_ l)
315
{
316
    jump stg_gc_noregs (stg_ret_l_info, l) ();
317 318
}

319 320 321
/*-- Unboxed tuples with multiple pointers -------------------------------- */

stg_gc_pp return (P_ arg1, P_ arg2)
322
{
323 324
    call stg_gc_noregs();
    return (arg1,arg2);
325 326
}

327 328 329 330 331
stg_gc_ppp return (P_ arg1, P_ arg2, P_ arg3)
{
    call stg_gc_noregs();
    return (arg1,arg2,arg3);
}
332

333
stg_gc_pppp return (P_ arg1, P_ arg2, P_ arg3, P_ arg4)
334
{
335 336
    call stg_gc_noregs();
    return (arg1,arg2,arg3,arg4);
337 338 339 340 341 342
}

/* -----------------------------------------------------------------------------
   Generic function entry heap check code.

   At a function entry point, the arguments are as per the calling convention,
343
   i.e. some in regs and some on the stack.  There may or may not be
344 345 346 347 348 349 350 351
   a pointer to the function closure in R1 - if there isn't, then the heap
   check failure code in the function will arrange to load it.

   The function's argument types are described in its info table, so we
   can just jump to this bit of generic code to save away all the
   registers and return to the scheduler.

   This code arranges the stack like this:
352

353 354
         |        ....         |
         |        args         |
355
         +---------------------+
356
         |      f_closure      |
357
         +---------------------+
358
         |        size         |
359
         +---------------------+
360
         |   stg_gc_fun_info   |
361
         +---------------------+
362 363 364 365 366 367 368

   The size is the number of words of arguments on the stack, and is cached
   in the frame in order to simplify stack walking: otherwise the size of
   this stack frame would have to be calculated by looking at f's info table.

   -------------------------------------------------------------------------- */

369
__stg_gc_fun /* explicit stack */
370 371 372 373 374
{
    W_ size;
    W_ info;
    W_ type;

Simon Marlow's avatar
Simon Marlow committed
375
    info = %GET_FUN_INFO(UNTAG(R1));
376 377 378 379

    // cache the size
    type = TO_W_(StgFunInfoExtra_fun_type(info));
    if (type == ARG_GEN) {
380 381 382
        size = BITMAP_SIZE(StgFunInfoExtra_bitmap(info));
    } else {
        if (type == ARG_GEN_BIG) {
Ben Gamari's avatar
Ben Gamari committed
383
#if defined(TABLES_NEXT_TO_CODE)
384
            // bitmap field holds an offset
385 386 387
            size = StgLargeBitmap_size(
                      TO_W_(StgFunInfoExtraRev_bitmap_offset(info))
                      + %GET_ENTRY(UNTAG(R1)) /* ### */ );
388
#else
389
            size = StgLargeBitmap_size( StgFunInfoExtra_bitmap(info) );
390
#endif
391 392 393
        } else {
            size = BITMAP_SIZE(W_[stg_arg_bitmaps + WDS(type)]);
        }
394
    }
395

Ben Gamari's avatar
Ben Gamari committed
396
#if defined(NO_ARG_REGS)
397 398
    // we don't have to save any registers away
    Sp_adj(-3);
399 400
    Sp(2) = R1;
    Sp(1) = size;
401
    Sp(0) = stg_gc_fun_info;
402
    jump stg_gc_noregs [];
403 404 405 406 407 408 409
#else
    W_ type;
    type = TO_W_(StgFunInfoExtra_fun_type(info));
    // cache the size
    if (type == ARG_GEN || type == ARG_GEN_BIG) {
        // regs already saved by the heap check code
        Sp_adj(-3);
410 411
        Sp(2) = R1;
        Sp(1) = size;
412
        Sp(0) = stg_gc_fun_info;
413
        // DEBUG_ONLY(foreign "C" debugBelch("stg_fun_gc_gen(ARG_GEN)"););
414
        jump stg_gc_noregs [];
415
    } else {
416
        jump W_[stg_stack_save_entries + WDS(type)] [*]; // all regs live
417
            // jumps to stg_gc_noregs after saving stuff
418
    }
419
#endif /* !NO_ARG_REGS */
420 421
}

422

423 424 425 426 427 428 429 430
/* -----------------------------------------------------------------------------
   Generic Apply (return point)

   The dual to stg_fun_gc_gen (above): this fragment returns to the
   function, passing arguments in the stack and in registers
   appropriately.  The stack layout is given above.
   -------------------------------------------------------------------------- */

431 432
INFO_TABLE_RET ( stg_gc_fun, RET_FUN )
    /* explicit stack */
433
{
434
    R1 = Sp(2);
435
    Sp_adj(3);
Ben Gamari's avatar
Ben Gamari committed
436
#if defined(NO_ARG_REGS)
437 438
    // Minor optimisation: there are no argument registers to load up,
    // so we can just jump straight to the function's entry point.
439
    jump %GET_ENTRY(UNTAG(R1)) [R1];
440 441 442
#else
    W_ info;
    W_ type;
443

Simon Marlow's avatar
Simon Marlow committed
444
    info = %GET_FUN_INFO(UNTAG(R1));
445 446
    type = TO_W_(StgFunInfoExtra_fun_type(info));
    if (type == ARG_GEN || type == ARG_GEN_BIG) {
447
        jump StgFunInfoExtra_slow_apply(info) [R1];
448 449 450 451 452 453
    } else {
        if (type == ARG_BCO) {
            // cover this case just to be on the safe side
            Sp_adj(-2);
            Sp(1) = R1;
            Sp(0) = stg_apply_interp_info;
454
            jump stg_yield_to_interpreter [];
455
        } else {
456
            jump W_[stg_ap_stack_entries + WDS(type)] [R1];
457
        }
458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491
    }
#endif
}

/* -----------------------------------------------------------------------------
   Yields
   -------------------------------------------------------------------------- */

stg_yield_noregs
{
    YIELD_GENERIC;
}

/* -----------------------------------------------------------------------------
   Yielding to the interpreter... top of stack says what to do next.
   -------------------------------------------------------------------------- */

stg_yield_to_interpreter
{
    YIELD_TO_INTERPRETER;
}

/* -----------------------------------------------------------------------------
   Blocks
   -------------------------------------------------------------------------- */

stg_block_noregs
{
    BLOCK_GENERIC;
}

/* -----------------------------------------------------------------------------
 * takeMVar/putMVar-specific blocks
 *
492
 * Stack layout for a thread blocked in takeMVar/readMVar:
493
 *
494 495
 *       ret. addr
 *       ptr to MVar   (R1)
496
 *       stg_block_takemvar_info (or stg_block_readmvar_info)
497 498
 *
 * Stack layout for a thread blocked in putMVar:
499
 *
500 501 502 503 504
 *       ret. addr
 *       ptr to Value  (R2)
 *       ptr to MVar   (R1)
 *       stg_block_putmvar_info
 *
505
 * See PrimOps.cmm for a description of the workings of take/putMVar.
506
 *
507 508
 * -------------------------------------------------------------------------- */

509 510
INFO_TABLE_RET ( stg_block_takemvar, RET_SMALL, W_ info_ptr, P_ mvar )
    return ()
511
{
512
    jump stg_takeMVarzh(mvar);
513 514
}

515 516 517
// code fragment executed just before we return to the scheduler
stg_block_takemvar_finally
{
518 519 520
    W_ r1, r3;
    r1 = R1;
    r3 = R3;
521
    unlockClosure(R3, stg_MVAR_DIRTY_info);
522 523
    R1 = r1;
    R3 = r3;
524
    jump StgReturn [R1];
525 526
}

527
// Stack useage covered by RESERVED_STACK_WORDS
528
stg_block_takemvar /* mvar passed in R1 */
529 530 531 532
{
    Sp_adj(-2);
    Sp(1) = R1;
    Sp(0) = stg_block_takemvar_info;
533
    R3 = R1; // mvar communicated to stg_block_takemvar_finally in R3
534
    BLOCK_BUT_FIRST(stg_block_takemvar_finally);
535 536
}

537
INFO_TABLE_RET ( stg_block_readmvar, RET_SMALL, W_ info_ptr, P_ mvar )
538 539
    return ()
{
540
    jump stg_readMVarzh(mvar);
541 542 543
}

// code fragment executed just before we return to the scheduler
544
stg_block_readmvar_finally
545 546 547 548 549 550 551 552 553 554
{
    W_ r1, r3;
    r1 = R1;
    r3 = R3;
    unlockClosure(R3, stg_MVAR_DIRTY_info);
    R1 = r1;
    R3 = r3;
    jump StgReturn [R1];
}

555
stg_block_readmvar /* mvar passed in R1 */
556 557 558
{
    Sp_adj(-2);
    Sp(1) = R1;
559 560 561
    Sp(0) = stg_block_readmvar_info;
    R3 = R1; // mvar communicated to stg_block_readmvar_finally in R3
    BLOCK_BUT_FIRST(stg_block_readmvar_finally);
562 563
}

564 565 566
INFO_TABLE_RET( stg_block_putmvar, RET_SMALL, W_ info_ptr,
                P_ mvar, P_ val )
    return ()
567
{
568
    jump stg_putMVarzh(mvar, val);
569 570
}

571 572 573
// code fragment executed just before we return to the scheduler
stg_block_putmvar_finally
{
574 575 576
    W_ r1, r3;
    r1 = R1;
    r3 = R3;
577
    unlockClosure(R3, stg_MVAR_DIRTY_info);
578 579
    R1 = r1;
    R3 = r3;
580
    jump StgReturn [R1];
581 582
}

583
stg_block_putmvar (P_ mvar, P_ val)
584
{
585 586 587 588
    push (stg_block_putmvar_info, mvar, val) {
      R3 = R1; // mvar communicated to stg_block_putmvar_finally in R3
      BLOCK_BUT_FIRST(stg_block_putmvar_finally);
   }
589 590
}

591
stg_block_blackhole (P_ node)
592 593
{
    Sp_adj(-2);
594
    Sp(1) = node;
595
    Sp(0) = stg_enter_info;
596
    BLOCK_GENERIC;
597 598
}

599 600 601
INFO_TABLE_RET ( stg_block_throwto, RET_SMALL, W_ info_ptr,
                 P_ tso, P_ exception )
    return ()
602
{
603
    jump stg_killThreadzh(tso, exception);
604 605 606 607
}

stg_block_throwto_finally
{
608 609 610 611
    // unlock the throwto message, but only if it wasn't already
    // unlocked.  It may have been unlocked if we revoked the message
    // due to an exception being raised during threadPaused().
    if (StgHeader_info(StgTSO_block_info(CurrentTSO)) == stg_WHITEHOLE_info) {
612 613
        W_ r1;
        r1 = R1;
614
        unlockClosure(StgTSO_block_info(CurrentTSO), stg_MSG_THROWTO_info);
615
        R1 = r1;
616
    }
617
    jump StgReturn [R1];
618 619
}

620
stg_block_throwto (P_ tso, P_ exception)
621
{
622 623 624
    push (stg_block_throwto_info, tso, exception) {
       BLOCK_BUT_FIRST(stg_block_throwto_finally);
    }
625 626
}

Ben Gamari's avatar
Ben Gamari committed
627
#if defined(mingw32_HOST_OS)
628 629
INFO_TABLE_RET ( stg_block_async, RET_SMALL, W_ info_ptr, W_ ares )
    return ()
630 631 632
{
    W_ len, errC;

Ian Lynagh's avatar
Ian Lynagh committed
633 634
    len = TO_W_(StgAsyncIOResult_len(ares));
    errC = TO_W_(StgAsyncIOResult_errCode(ares));
635 636
    ccall free(ares "ptr");
    return (len, errC);
637 638 639 640
}

stg_block_async
{
641
    Sp_adj(-2);
642 643 644 645
    Sp(0) = stg_block_async_info;
    BLOCK_GENERIC;
}

sof's avatar
sof committed
646 647 648
/* Used by threadDelay implementation; it would be desirable to get rid of
 * this free()'ing void return continuation.
 */
649 650
INFO_TABLE_RET ( stg_block_async_void, RET_SMALL, W_ info_ptr, W_ ares )
    return ()
sof's avatar
sof committed
651
{
652 653
    ccall free(ares "ptr");
    return ();
sof's avatar
sof committed
654 655 656 657
}

stg_block_async_void
{
658
    Sp_adj(-2);
sof's avatar
sof committed
659 660 661 662
    Sp(0) = stg_block_async_void_info;
    BLOCK_GENERIC;
}

663
#endif
664

665

666 667 668 669 670 671
/* -----------------------------------------------------------------------------
   STM-specific waiting
   -------------------------------------------------------------------------- */

stg_block_stmwait
{
672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689
    // When blocking on an MVar we have to be careful to only release
    // the lock on the MVar at the very last moment (using
    // BLOCK_BUT_FIRST()), since when we release the lock another
    // Capability can wake up the thread, which modifies its stack and
    // other state.  This is not a problem for STM, because STM
    // wakeups are non-destructive; the waker simply calls
    // tryWakeupThread() which sends a message to the owner
    // Capability.  So the moment we release this lock we might start
    // getting wakeup messages, but that's perfectly harmless.
    //
    // Furthermore, we *must* release these locks, just in case an
    // exception is raised in this thread by
    // maybePerformBlockedException() while exiting to the scheduler,
    // which will abort the transaction, which needs to obtain a lock
    // on all the TVars to remove the thread from the queues.
    //
    ccall stmWaitUnlock(MyCapability() "ptr", R3 "ptr");
    BLOCK_GENERIC;
690
}