Forked from
Glasgow Haskell Compiler / GHC
5230 commits behind the upstream repository.
-
Moves it into the IOManager.c where we can follow the new pattern of switching on the selected I/O manager. Uses a new IOManager API: syncDelay, following the naming convention of sync* for thread-synchronous I/O & timer/delay operations. As part of porting from cmm to C, we maintain the rule that the why_blocked gets accessed using load acquire and store release atomic memory operations. There was one exception to this rule: in the delay# primop cmm code on posix (not win32), the why_blocked was being updated using a store relaxed, not a store release. I've no idea why. In this convesion I'm playing it safe here and using store release consistently.
Moves it into the IOManager.c where we can follow the new pattern of switching on the selected I/O manager. Uses a new IOManager API: syncDelay, following the naming convention of sync* for thread-synchronous I/O & timer/delay operations. As part of porting from cmm to C, we maintain the rule that the why_blocked gets accessed using load acquire and store release atomic memory operations. There was one exception to this rule: in the delay# primop cmm code on posix (not win32), the why_blocked was being updated using a store relaxed, not a store release. I've no idea why. In this convesion I'm playing it safe here and using store release consistently.
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
PrimOps.cmm 90.44 KiB
/* -*- tab-width: 8 -*- */
/* -----------------------------------------------------------------------------
*
* (c) The GHC Team, 1998-2012
*
* Out-of-line primitive operations
*
* This file contains the implementations of all the primitive
* operations ("primops") which are not expanded inline. See
* ghc/compiler/GHC/Builtin/primops.txt.pp for a list of all the primops;
* this file contains code for most of those with the attribute
* out_of_line=True.
*
* Entry convention: the entry convention for a primop is the
* NativeNodeCall convention, and the return convention is
* NativeReturn. (see compiler/GHC/Cmm/CallConv.hs)
*
* This file is written in a subset of C--, extended with various
* features specific to GHC. It is compiled by GHC directly. For the
* syntax of .cmm files, see the parser in ghc/compiler/GHC/Cmm/Parser.y.
*
* ---------------------------------------------------------------------------*/
#include "Cmm.h"
#include "MachDeps.h"
#include "SMPClosureOps.h"
#if defined(__PIC__)
import pthread_mutex_lock;
import pthread_mutex_unlock;
#endif
import CLOSURE ghczminternal_GHCziInternalziControlziExceptionziBase_nestedAtomically_closure;
import CLOSURE ghczminternal_GHCziInternalziIOziException_heapOverflow_closure;
import CLOSURE ghczminternal_GHCziInternalziIOziException_blockedIndefinitelyOnMVar_closure;
import CLOSURE ghczminternal_GHCziInternalziIOPort_doubleReadException_closure;
import AcquireSRWLockExclusive;
import ReleaseSRWLockExclusive;
import CLOSURE ghczmprim_GHCziTypes_False_closure;
#if defined(PROFILING)
import CLOSURE CCS_MAIN;
#endif
#if !defined(UnregisterisedCompiler)
import CLOSURE ALLOC_RTS_ctr;
import CLOSURE ALLOC_RTS_tot;
import CLOSURE CCS_SYSTEM;
import CLOSURE HEAP_CHK_ctr;
import CLOSURE RtsFlags;
import CLOSURE STK_CHK_ctr;
import CLOSURE blocked_queue_hd;
import CLOSURE blocked_queue_tl;
import CLOSURE g0;
import CLOSURE large_alloc_lim;
import CLOSURE n_capabilities;
import CLOSURE sleeping_queue;
import CLOSURE stable_name_table;
import CLOSURE stable_ptr_table;
import CLOSURE stg_AP_STACK_info;
import CLOSURE stg_AP_info;
import CLOSURE stg_ARR_WORDS_info;
import CLOSURE stg_BCO_info;
import CLOSURE stg_C_FINALIZER_LIST_info;
import CLOSURE stg_DEAD_WEAK_info;
import CLOSURE stg_END_STM_WATCH_QUEUE_closure;
import CLOSURE stg_END_TSO_QUEUE_closure;
import CLOSURE stg_IND_info;
import CLOSURE stg_MSG_NULL_info;
import CLOSURE stg_MUT_ARR_PTRS_DIRTY_info;
import CLOSURE stg_MUT_ARR_PTRS_FROZEN_CLEAN_info;
import CLOSURE stg_MUT_ARR_PTRS_FROZEN_DIRTY_info;
import CLOSURE stg_MUT_VAR_CLEAN_info;
import CLOSURE stg_MUT_VAR_DIRTY_info;
import CLOSURE stg_MVAR_CLEAN_info;
import CLOSURE stg_MVAR_DIRTY_info;
import CLOSURE stg_MVAR_TSO_QUEUE_info;
import CLOSURE stg_NO_FINALIZER_closure;
import CLOSURE stg_NO_TREC_closure;
import CLOSURE stg_SMALL_MUT_ARR_PTRS_DIRTY_info;
import CLOSURE stg_SMALL_MUT_ARR_PTRS_FROZEN_CLEAN_info;
import CLOSURE stg_SMALL_MUT_ARR_PTRS_FROZEN_DIRTY_info;
import CLOSURE stg_STABLE_NAME_info;
import CLOSURE stg_TREC_HEADER_info;
import CLOSURE stg_TVAR_DIRTY_info;
import CLOSURE stg_WEAK_info;
import CLOSURE stg_WHITEHOLE_info;
import CLOSURE stg_ap_2_upd_info;
import CLOSURE stg_atomically_frame_info;
import CLOSURE stg_atomically_waiting_frame_info;
import CLOSURE stg_catch_retry_frame_info;
import CLOSURE stg_catch_stm_frame_info;
import CLOSURE stg_keepAlive_frame_info;
import CLOSURE stg_noDuplicate_info;
import CLOSURE stg_ret_p_info;
import CLOSURE stg_sel_0_upd_info;
#endif
#if defined(DEBUG)
#define ASSERT_IN_BOUNDS(ind, sz) \
if (ind >= sz) { ccall rtsOutOfBoundsAccess(); }
#else
#define ASSERT_IN_BOUNDS(ind, sz)
#endif
/*-----------------------------------------------------------------------------
Array Primitives
Basically just new*Array - the others are all inline macros.
The slow entry point is for returning from a heap check, the saved
size argument must be re-loaded from the stack.
-------------------------------------------------------------------------- */
/* for objects that are *less* than the size of a word, make sure we
* round up to the nearest word for the size of the array.
*/
stg_newByteArrayzh ( W_ n )
{
W_ words, payload_words;
gcptr p;
MAYBE_GC_N(stg_newByteArrayzh, n);
payload_words = ROUNDUP_BYTES_TO_WDS(n);
words = BYTES_TO_WDS(SIZEOF_StgArrBytes) + payload_words;
("ptr" p) = ccall allocateMightFail(MyCapability() "ptr", words);
if (p == NULL) {
jump stg_raisezh(ghczminternal_GHCziInternalziIOziException_heapOverflow_closure);
}
TICK_ALLOC_PRIM(SIZEOF_StgArrBytes,WDS(payload_words),0);
SET_HDR(p, stg_ARR_WORDS_info, CCCS);
StgArrBytes_bytes(p) = n;
return (p);
}
#define BA_ALIGN 16
#define BA_MASK (BA_ALIGN-1)
stg_newPinnedByteArrayzh ( W_ n )
{
W_ words, bytes, payload_words;
gcptr p;
MAYBE_GC_N(stg_newPinnedByteArrayzh, n);
bytes = n;
/* payload_words is what we will tell the profiler we had to allocate */
payload_words = ROUNDUP_BYTES_TO_WDS(bytes);
/* When we actually allocate memory, we need to allow space for the
header: */
bytes = bytes + SIZEOF_StgArrBytes;
/* Now we convert to a number of words: */
words = ROUNDUP_BYTES_TO_WDS(bytes);
("ptr" p) = ccall allocatePinned(MyCapability() "ptr", words, BA_ALIGN, SIZEOF_StgArrBytes);
if (p == NULL) {
jump stg_raisezh(ghczminternal_GHCziInternalziIOziException_heapOverflow_closure);
}
TICK_ALLOC_PRIM(SIZEOF_StgArrBytes,WDS(payload_words),0);
/* No write barrier needed since this is a new allocation. */
SET_HDR(p, stg_ARR_WORDS_info, CCCS);
StgArrBytes_bytes(p) = n;
return (p);
}
stg_newAlignedPinnedByteArrayzh ( W_ n, W_ alignment )
{
W_ words, bytes, payload_words;
gcptr p;
again: MAYBE_GC(again);
/* we always supply at least word-aligned memory, so there's no
need to allow extra space for alignment if the requirement is less
than a word. This also prevents mischief with alignment == 0. */
if (alignment <= SIZEOF_W) { alignment = SIZEOF_W; }
bytes = n;
/* payload_words is what we will tell the profiler we had to allocate */
payload_words = ROUNDUP_BYTES_TO_WDS(bytes);
/* When we actually allocate memory, we need to allow space for the
header: */
bytes = bytes + SIZEOF_StgArrBytes;
/* Now we convert to a number of words: */
words = ROUNDUP_BYTES_TO_WDS(bytes);
("ptr" p) = ccall allocatePinned(MyCapability() "ptr", words, alignment, SIZEOF_StgArrBytes);
if (p == NULL) {
jump stg_raisezh(ghczminternal_GHCziInternalziIOziException_heapOverflow_closure);
}
TICK_ALLOC_PRIM(SIZEOF_StgArrBytes,WDS(payload_words),0);
/* No write barrier needed since this is a new allocation. */
SET_HDR(p, stg_ARR_WORDS_info, CCCS);
StgArrBytes_bytes(p) = n;
return (p);
}
stg_isByteArrayPinnedzh ( gcptr ba )
// ByteArray# s -> Int#
{
W_ bd, flags;
bd = Bdescr(ba);
// Pinned byte arrays live in blocks with the BF_PINNED flag set.
// We also consider BF_LARGE objects to be immovable. See #13894.
// See the comment in Storage.c:allocatePinned.
// We also consider BF_COMPACT objects to be immovable. See #14900.
flags = TO_W_(bdescr_flags(bd));
// We used to also consider BF_LARGE pinned, but stopped doing so
// because it interacted badly with compact regions. See #22255
return (flags & BF_PINNED != 0);
}
stg_isMutableByteArrayPinnedzh ( gcptr mba )
// MutableByteArray# s -> Int#
{
jump stg_isByteArrayPinnedzh(mba);
}
/* Note [LDV profiling and resizing arrays]
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* As far as the LDV profiler is concerned arrays are "inherently used" which
* means we don't track their time of use and eventual destruction. We just
* assume they get used.
*
* Thus it is not necessary to call PROF_HEADER_CREATE when resizing them as we
* used to as the LDV profiler will essentially ignore arrays anyways.
*/
// shrink size of MutableByteArray in-place
stg_shrinkMutableByteArrayzh ( gcptr mba, W_ new_size )
// MutableByteArray# s -> Int# -> State# s -> State# s
{
ASSERT(new_size <= StgArrBytes_bytes(mba));
OVERWRITING_CLOSURE_MUTABLE(mba, (BYTES_TO_WDS(SIZEOF_StgArrBytes) +
ROUNDUP_BYTES_TO_WDS(new_size)));
StgArrBytes_bytes(mba) = new_size;
// No need to call PROF_HEADER_CREATE. See Note [LDV profiling and resizing arrays]
return ();
}
// resize MutableByteArray
//
// The returned MutableByteArray is either the original
// MutableByteArray resized in-place or, if not possible, a newly
// allocated (unpinned) MutableByteArray (with the original content
// copied over)
stg_resizzeMutableByteArrayzh ( gcptr mba, W_ new_size )
// MutableByteArray# s -> Int# -> State# s -> (# State# s,MutableByteArray# s #)
{
W_ new_size_wds;
ASSERT(new_size >= 0);
new_size_wds = ROUNDUP_BYTES_TO_WDS(new_size);
if (new_size_wds <= BYTE_ARR_WDS(mba)) {
OVERWRITING_CLOSURE_MUTABLE(mba, (BYTES_TO_WDS(SIZEOF_StgArrBytes) +
new_size_wds));
StgArrBytes_bytes(mba) = new_size;
// No need to call PROF_HEADER_CREATE. See Note [LDV profiling and resizing arrays]
return (mba);
} else {
(P_ new_mba) = call stg_newByteArrayzh(new_size);
// maybe at some point in the future we may be able to grow the
// MBA in-place w/o copying if we know the space after the
// current MBA is still available, as often we want to grow the
// MBA shortly after we allocated the original MBA. So maybe no
// further allocations have occurred by then.
// copy over old content
prim %memcpy(BYTE_ARR_CTS(new_mba), BYTE_ARR_CTS(mba),
StgArrBytes_bytes(mba), SIZEOF_W);
return (new_mba);
}
}
// shrink size of SmallMutableArray in-place
stg_shrinkSmallMutableArrayzh ( gcptr mba, W_ new_size )
// SmallMutableArray# s -> Int# -> State# s -> State# s
{
ASSERT(new_size <= StgSmallMutArrPtrs_ptrs(mba));
IF_NONMOVING_WRITE_BARRIER_ENABLED {
// Ensure that the elements we are about to shrink out of existence
// remain visible to the non-moving collector.
W_ p, end;
p = mba + SIZEOF_StgSmallMutArrPtrs + WDS(new_size);
end = mba + SIZEOF_StgSmallMutArrPtrs + WDS(StgSmallMutArrPtrs_ptrs(mba));
again:
if (p < end) {
ccall updateRemembSetPushClosure_(BaseReg "ptr",
W_[p] "ptr");
p = p + SIZEOF_W;
goto again;
}
}
OVERWRITING_CLOSURE_MUTABLE(mba, (BYTES_TO_WDS(SIZEOF_StgSmallMutArrPtrs) +
new_size));
StgSmallMutArrPtrs_ptrs(mba) = new_size;
// No need to call PROF_HEADER_CREATE. See Note [LDV profiling and resizing arrays]
return ();
}
// RRN: This one does not use the "ticketing" approach because it
// deals in unboxed scalars, not heap pointers.
stg_casIntArrayzh( gcptr arr, W_ ind, W_ old, W_ new )
/* MutableByteArray# s -> Int# -> Int# -> Int# -> State# s -> (# State# s, Int# #) */
{
W_ p, h;
ASSERT_IN_BOUNDS(ind + WDS(1) - 1, StgArrBytes_bytes(arr));
p = arr + SIZEOF_StgArrBytes + WDS(ind);
(h) = prim %cmpxchgW(p, old, new);
return(h);
}
stg_casInt8Arrayzh( gcptr arr, W_ ind, I8 old, I8 new )
/* MutableByteArray# s -> Int# -> Int8# -> Int8# -> State# s -> (# State# s, Int8# #) */
{
W_ p;
I8 h;
ASSERT_IN_BOUNDS(ind, StgArrBytes_bytes(arr));
p = arr + SIZEOF_StgArrBytes + ind;
(h) = prim %cmpxchg8(p, old, new);
return(h);
}
stg_casInt16Arrayzh( gcptr arr, W_ ind, I16 old, I16 new )
/* MutableByteArray# s -> Int# -> Int16# -> Int16# -> State# s -> (# State# s, Int16# #) */
{
W_ p;
I16 h;
ASSERT_IN_BOUNDS(ind + 1, StgArrBytes_bytes(arr));
p = arr + SIZEOF_StgArrBytes + ind*2;
(h) = prim %cmpxchg16(p, old, new);
return(h);
}
stg_casInt32Arrayzh( gcptr arr, W_ ind, I32 old, I32 new )
/* MutableByteArray# s -> Int# -> Int32# -> Int32# -> State# s -> (# State# s, Int32# #) */
{
W_ p;
I32 h;
ASSERT_IN_BOUNDS(ind + 3, StgArrBytes_bytes(arr));
p = arr + SIZEOF_StgArrBytes + ind*4;
(h) = prim %cmpxchg32(p, old, new);
return(h);
}
stg_casInt64Arrayzh( gcptr arr, W_ ind, I64 old, I64 new )
/* MutableByteArray# s -> Int# -> Int64# -> Int64# -> State# s -> (# State# s, Int64# #) */
{
W_ p;
I64 h;
ASSERT_IN_BOUNDS(ind + 7, StgArrBytes_bytes(arr));
p = arr + SIZEOF_StgArrBytes + ind*8;
(h) = prim %cmpxchg64(p, old, new);
return(h);
}
stg_newArrayzh ( W_ n /* words */, gcptr init )
{
W_ words, size, p;
gcptr arr;
again: MAYBE_GC(again);
// the mark area contains one byte for each 2^MUT_ARR_PTRS_CARD_BITS words
// in the array, making sure we round up, and then rounding up to a whole
// number of words.
size = n + mutArrPtrsCardWords(n);
words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + size;
("ptr" arr) = ccall allocateMightFail(MyCapability() "ptr",words);
if (arr == NULL) {
jump stg_raisezh(ghczminternal_GHCziInternalziIOziException_heapOverflow_closure);
}
TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(size), 0);
/* No write barrier needed since this is a new allocation. */
SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, CCCS);
StgMutArrPtrs_ptrs(arr) = n;
StgMutArrPtrs_size(arr) = size;
/* Ensure that the card array is initialized */
if (n != 0) {
setCardsValue(arr, 0, n, 0);
}
// Initialise all elements of the array with the value in R2
p = arr + SIZEOF_StgMutArrPtrs;
for:
if (p < arr + SIZEOF_StgMutArrPtrs + WDS(n)) (likely: True) {
W_[p] = init;
p = p + WDS(1);
goto for;
}
return (arr);
}
stg_unsafeThawArrayzh ( gcptr arr )
{
// A MUT_ARR_PTRS always lives on a mut_list, but a MUT_ARR_PTRS_FROZEN
// doesn't. To decide whether to add the thawed array to a mut_list we check
// the info table. MUT_ARR_PTRS_FROZEN_DIRTY means it's already on a
// mut_list so no need to add it again. MUT_ARR_PTRS_FROZEN_CLEAN means it's
// not and we should add it to a mut_list.
if (StgHeader_info(arr) != stg_MUT_ARR_PTRS_FROZEN_DIRTY_info) {
SET_INFO(arr,stg_MUT_ARR_PTRS_DIRTY_info);
// must be done after SET_INFO, because it ASSERTs closure_MUTABLE():
recordMutable(arr);
return (arr);
} else {
SET_INFO(arr,stg_MUT_ARR_PTRS_DIRTY_info);
return (arr);
}
}
stg_copyArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n )
{
copyArray(src, src_off, dst, dst_off, n)
}
stg_copyMutableArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n )
{
copyMutableArray(src, src_off, dst, dst_off, n)
}
stg_cloneArrayzh ( gcptr src, W_ offset, W_ n )
{
cloneArray(stg_MUT_ARR_PTRS_FROZEN_CLEAN_info, src, offset, n)
}
stg_cloneMutableArrayzh ( gcptr src, W_ offset, W_ n )
{
cloneArray(stg_MUT_ARR_PTRS_DIRTY_info, src, offset, n)
}
// We have to escape the "z" in the name.
stg_freezzeArrayzh ( gcptr src, W_ offset, W_ n )
{
cloneArray(stg_MUT_ARR_PTRS_FROZEN_CLEAN_info, src, offset, n)
}
stg_thawArrayzh ( gcptr src, W_ offset, W_ n )
{
cloneArray(stg_MUT_ARR_PTRS_DIRTY_info, src, offset, n)
}
// RRN: Uses the ticketed approach; see casMutVar
stg_casArrayzh ( gcptr arr, W_ ind, gcptr old, gcptr new )
/* MutableArray# s a -> Int# -> a -> a -> State# s -> (# State# s, Int#, Any a #) */
{
gcptr h;
W_ p, len;
ASSERT_IN_BOUNDS(ind, StgMutArrPtrs_ptrs(arr));
p = arr + SIZEOF_StgMutArrPtrs + WDS(ind);
(h) = prim %cmpxchgW(p, old, new);
if (h != old) {
// Failure, return what was there instead of 'old':
return (1,h);
} else {
// Compare and Swap Succeeded:
SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, CCCS);
len = StgMutArrPtrs_ptrs(arr);
// The write barrier. We must write a byte into the mark table:
I8[arr + SIZEOF_StgMutArrPtrs + WDS(len) + (ind >> MUT_ARR_PTRS_CARD_BITS )] = 1;
// Concurrent GC write barrier
updateRemembSetPushPtr(old);
return (0,new);
}
}
/* -----------------------------------------------------------------------------
SmallArray primitives
-------------------------------------------------------------------------- */
stg_newSmallArrayzh ( W_ n /* words */, gcptr init )
{
W_ words, size, p;
gcptr arr;
again: MAYBE_GC(again);
words = BYTES_TO_WDS(SIZEOF_StgSmallMutArrPtrs) + n;
("ptr" arr) = ccall allocateMightFail(MyCapability() "ptr",words);
if (arr == NULL) (likely: False) {
jump stg_raisezh(ghczminternal_GHCziInternalziIOziException_heapOverflow_closure);
}
TICK_ALLOC_PRIM(SIZEOF_StgSmallMutArrPtrs, WDS(n), 0);
/* No write barrier needed since this is a new allocation. */
SET_HDR(arr, stg_SMALL_MUT_ARR_PTRS_DIRTY_info, CCCS);
StgSmallMutArrPtrs_ptrs(arr) = n;
// Initialise all elements of the array with the value in R2
p = arr + SIZEOF_StgSmallMutArrPtrs;
// Avoid the shift for `WDS(n)` in the inner loop
W_ limit;
limit = arr + SIZEOF_StgSmallMutArrPtrs + WDS(n);
for:
if (p < limit) (likely: True) {
W_[p] = init;
p = p + WDS(1);
goto for;
}
return (arr);
}
stg_unsafeThawSmallArrayzh ( gcptr arr )
{
// See stg_unsafeThawArrayzh
if (StgHeader_info(arr) != stg_SMALL_MUT_ARR_PTRS_FROZEN_DIRTY_info) {
SET_INFO(arr, stg_SMALL_MUT_ARR_PTRS_DIRTY_info);
recordMutable(arr);
// must be done after SET_INFO, because it ASSERTs closure_MUTABLE()
return (arr);
} else {
SET_INFO(arr, stg_SMALL_MUT_ARR_PTRS_DIRTY_info);
return (arr);
}
}
stg_cloneSmallArrayzh ( gcptr src, W_ offset, W_ n )
{
cloneSmallArray(stg_SMALL_MUT_ARR_PTRS_FROZEN_CLEAN_info, src, offset, n)
}
stg_cloneSmallMutableArrayzh ( gcptr src, W_ offset, W_ n )
{
cloneSmallArray(stg_SMALL_MUT_ARR_PTRS_DIRTY_info, src, offset, n)
}
// We have to escape the "z" in the name.
stg_freezzeSmallArrayzh ( gcptr src, W_ offset, W_ n )
{
cloneSmallArray(stg_SMALL_MUT_ARR_PTRS_FROZEN_CLEAN_info, src, offset, n)
}
stg_thawSmallArrayzh ( gcptr src, W_ offset, W_ n )
{
cloneSmallArray(stg_SMALL_MUT_ARR_PTRS_DIRTY_info, src, offset, n)
}
// Concurrent GC write barrier for pointer array copies
//
// hdr_size in bytes. dst_off in words, n in words.
stg_copyArray_barrier ( W_ hdr_size, gcptr dst, W_ dst_off, W_ n)
{
W_ end, p;
ASSERT(n > 0); // Assumes n==0 is handled by caller
p = dst + hdr_size + WDS(dst_off);
end = p + WDS(n);
again:
ccall updateRemembSetPushClosure_(BaseReg "ptr", W_[p] "ptr");
p = p + WDS(1);
if (p < end) {
goto again;
}
return ();
}
stg_copySmallArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n)
{
W_ dst_p, src_p, bytes;
if (n > 0) {
IF_NONMOVING_WRITE_BARRIER_ENABLED {
call stg_copyArray_barrier(SIZEOF_StgSmallMutArrPtrs,
dst, dst_off, n);
}
SET_INFO(dst, stg_SMALL_MUT_ARR_PTRS_DIRTY_info);
ASSERT_IN_BOUNDS(dst_off + n - 1, StgSmallMutArrPtrs_ptrs(dst));
ASSERT_IN_BOUNDS(src_off + n - 1, StgSmallMutArrPtrs_ptrs(src));
dst_p = dst + SIZEOF_StgSmallMutArrPtrs + WDS(dst_off);
src_p = src + SIZEOF_StgSmallMutArrPtrs + WDS(src_off);
bytes = WDS(n);
prim %memcpy(dst_p, src_p, bytes, SIZEOF_W);
}
return ();
}
stg_copySmallMutableArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n)
{
W_ dst_p, src_p, bytes;
if (n > 0) {
IF_NONMOVING_WRITE_BARRIER_ENABLED {
call stg_copyArray_barrier(SIZEOF_StgSmallMutArrPtrs,
dst, dst_off, n);
}
SET_INFO(dst, stg_SMALL_MUT_ARR_PTRS_DIRTY_info);
ASSERT_IN_BOUNDS(dst_off + n - 1, StgSmallMutArrPtrs_ptrs(dst));
ASSERT_IN_BOUNDS(src_off + n - 1, StgSmallMutArrPtrs_ptrs(src));
dst_p = dst + SIZEOF_StgSmallMutArrPtrs + WDS(dst_off);
src_p = src + SIZEOF_StgSmallMutArrPtrs + WDS(src_off);
bytes = WDS(n);
if (src == dst) {
prim %memmove(dst_p, src_p, bytes, SIZEOF_W);
} else {
prim %memcpy(dst_p, src_p, bytes, SIZEOF_W);
}
}
return ();
}
// RRN: Uses the ticketed approach; see casMutVar
stg_casSmallArrayzh ( gcptr arr, W_ ind, gcptr old, gcptr new )
/* SmallMutableArray# s a -> Int# -> a -> a -> State# s -> (# State# s, Int#, Any a #) */
{
gcptr h;
W_ p, len;
ASSERT_IN_BOUNDS(ind, StgSmallMutArrPtrs_ptrs(arr));
p = arr + SIZEOF_StgSmallMutArrPtrs + WDS(ind);
(h) = prim %cmpxchgW(p, old, new);
if (h != old) {
// Failure, return what was there instead of 'old':
return (1,h);
} else {
// Compare and Swap Succeeded:
SET_HDR(arr, stg_SMALL_MUT_ARR_PTRS_DIRTY_info, CCCS);
// Concurrent GC write barrier
updateRemembSetPushPtr(old);
return (0,new);
}
}
/* -----------------------------------------------------------------------------
MutVar primitives
-------------------------------------------------------------------------- */
stg_newMutVarzh ( gcptr init )
{
W_ mv;
ALLOC_PRIM_P (SIZEOF_StgMutVar, stg_newMutVarzh, init);
mv = Hp - SIZEOF_StgMutVar + WDS(1);
/* No write barrier needed since this is a new allocation. */
SET_HDR(mv,stg_MUT_VAR_DIRTY_info,CCCS);
StgMutVar_var(mv) = init;
return (mv);
}
// RRN: To support the "ticketed" approach, we return the NEW rather
// than old value if the CAS is successful. This is received in an
// opaque form in the Haskell code, preventing the compiler from
// changing its pointer identity. The ticket can then be safely used
// in future CAS operations.
stg_casMutVarzh ( gcptr mv, gcptr old, gcptr new )
/* MutVar# s a -> a -> a -> State# s -> (# State#, Int#, Any a #) */
{
#if defined(THREADED_RTS)
gcptr h;
(h) = prim %cmpxchgW(mv + SIZEOF_StgHeader + OFFSET_StgMutVar_var, old, new);
if (h != old) {
return (1,h);
} else {
if (GET_INFO(mv) == stg_MUT_VAR_CLEAN_info) {
ccall dirty_MUT_VAR(BaseReg "ptr", mv "ptr", old "ptr");
}
return (0,new);
}
#else
gcptr prev_val;
prev_val = StgMutVar_var(mv);
if (prev_val != old) {
return (1,prev_val);
} else {
StgMutVar_var(mv) = new;
if (GET_INFO(mv) == stg_MUT_VAR_CLEAN_info) {
ccall dirty_MUT_VAR(BaseReg "ptr", mv "ptr", old "ptr");
}
return (0,new);
}
#endif
}
stg_atomicModifyMutVar2zh ( gcptr mv, gcptr f )
{
W_ z, x, y, h;
/* If x is the current contents of the MutVar#, then
We want to make the new contents point to
(sel_0 (f x))
and the return value is
(# x, (f x) #)
obviously we can share (f x).
z = [stg_ap_2 f x] (SIZEOF_StgThunkHeader + WDS(2))
y = [stg_sel_0 z] (SIZEOF_StgThunkHeader + WDS(1))
*/
#define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(1))
#define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),0)
#define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(2))
#define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0)
#define SIZE (THUNK_2_SIZE + THUNK_1_SIZE)
HP_CHK_GEN_TICKY(SIZE);
TICK_ALLOC_THUNK_2();
CCCS_ALLOC(THUNK_2_SIZE);
z = Hp - THUNK_2_SIZE + WDS(1);
SET_HDR(z, stg_ap_2_upd_info, CCCS);
PROF_HEADER_CREATE(z);
StgThunk_payload(z,0) = f;
TICK_ALLOC_THUNK_1();
CCCS_ALLOC(THUNK_1_SIZE);
y = z - THUNK_1_SIZE;
SET_HDR(y, stg_sel_0_upd_info, CCCS);
PROF_HEADER_CREATE(y);
StgThunk_payload(y,0) = z;
retry:
x = %relaxed StgMutVar_var(mv);
StgThunk_payload(z,1) = x;
#if defined(THREADED_RTS)
(h) = prim %cmpxchgW(mv + SIZEOF_StgHeader + OFFSET_StgMutVar_var, x, y);
if (h != x) { goto retry; }
#else
StgMutVar_var(mv) = y;
#endif
W_ info;
info = %relaxed GET_INFO(mv);
if (info == stg_MUT_VAR_CLEAN_info) {
ccall dirty_MUT_VAR(BaseReg "ptr", mv "ptr", x "ptr");
}
return (x,z);
}
stg_atomicModifyMutVarzuzh ( gcptr mv, gcptr f )
{
W_ z, x, h;
/* If x is the current contents of the MutVar#, then
We want to make the new contents point to
(f x)
and the return value is
(# x, (f x) #)
obviously we can share (f x).
z = [stg_ap_2 f x] (max (HS + 2) MIN_UPD_SIZE)
*/
#define THUNK_SIZE (SIZEOF_StgThunkHeader + WDS(2))
#define TICK_ALLOC_THUNK() TICK_ALLOC_UP_THK(WDS(2),0)
HP_CHK_GEN_TICKY(THUNK_SIZE);
TICK_ALLOC_THUNK();
CCCS_ALLOC(THUNK_SIZE);
z = Hp - THUNK_SIZE + WDS(1);
SET_HDR(z, stg_ap_2_upd_info, CCCS);
PROF_HEADER_CREATE(z);
StgThunk_payload(z,0) = f;
retry:
x = %relaxed StgMutVar_var(mv);
StgThunk_payload(z,1) = x;
#if defined(THREADED_RTS)
(h) = prim %cmpxchgW(mv + SIZEOF_StgHeader + OFFSET_StgMutVar_var, x, z);
if (h != x) { goto retry; }
#else
StgMutVar_var(mv) = z;
#endif
W_ info;
info = %relaxed GET_INFO(mv);
if (info == stg_MUT_VAR_CLEAN_info) {
ccall dirty_MUT_VAR(BaseReg "ptr", mv "ptr", x "ptr");
}
return (x,z);
}
/* -----------------------------------------------------------------------------
Weak Pointer Primitives
-------------------------------------------------------------------------- */
stg_mkWeakzh ( gcptr key,
gcptr value,
gcptr finalizer /* or stg_NO_FINALIZER_closure */ )
{
gcptr w;
ALLOC_PRIM (SIZEOF_StgWeak)
w = Hp - SIZEOF_StgWeak + WDS(1);
// No memory barrier needed as this is a new allocation.
SET_HDR(w, stg_WEAK_info, CCCS);
StgWeak_key(w) = key;
StgWeak_value(w) = value;
StgWeak_finalizer(w) = finalizer;
StgWeak_cfinalizers(w) = stg_NO_FINALIZER_closure;
StgWeak_link(w) = Capability_weak_ptr_list_hd(MyCapability());
Capability_weak_ptr_list_hd(MyCapability()) = w;
if (Capability_weak_ptr_list_tl(MyCapability()) == NULL) {
Capability_weak_ptr_list_tl(MyCapability()) = w;
}
IF_DEBUG(weak, ccall debugBelch("New weak pointer at %p\n",w));
return (w);
}
stg_mkWeakNoFinalizzerzh ( gcptr key, gcptr value )
{
jump stg_mkWeakzh (key, value, stg_NO_FINALIZER_closure);
}
stg_addCFinalizzerToWeakzh ( W_ fptr, // finalizer
W_ ptr,
W_ flag, // has environment (0 or 1)
W_ eptr,
gcptr w )
{
W_ c, info;
ALLOC_PRIM (SIZEOF_StgCFinalizerList)
c = Hp - SIZEOF_StgCFinalizerList + WDS(1);
SET_HDR(c, stg_C_FINALIZER_LIST_info, CCCS);
StgCFinalizerList_fptr(c) = fptr;
StgCFinalizerList_ptr(c) = ptr;
StgCFinalizerList_eptr(c) = eptr;
StgCFinalizerList_flag(c) = flag;
LOCK_CLOSURE(w, info);
if (info == stg_DEAD_WEAK_info) {
// Already dead.
unlockClosure(w, info);
return (0);
}
// Write barrier for concurrent non-moving collector
updateRemembSetPushPtr(StgWeak_cfinalizers(w))
StgCFinalizerList_link(c) = StgWeak_cfinalizers(w);
StgWeak_cfinalizers(w) = c;
unlockClosure(w, info);
recordMutable(w);
IF_DEBUG(weak, ccall debugBelch("Adding a finalizer to %p\n",w));
return (1);
}
stg_finalizzeWeakzh ( gcptr w )
{
gcptr f, list;
W_ info;
LOCK_CLOSURE(w, info);
// already dead?
if (info == stg_DEAD_WEAK_info) {
unlockClosure(w, info);
return (0,stg_NO_FINALIZER_closure);
}
f = StgWeak_finalizer(w);
list = StgWeak_cfinalizers(w);
// kill it
#if defined(PROFILING)
// @LDV profiling
// A weak pointer is inherently used, so we do not need to call
// LDV_recordDead_FILL_SLOP_DYNAMIC():
// LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
// or, LDV_recordDead():
// LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
// Furthermore, when PROFILING is turned on, dead weak pointers are exactly as
// large as weak pointers, so there is no need to fill the slop, either.
// See stg_DEAD_WEAK_info in StgMiscClosures.cmm.
#endif
//
// Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
//
unlockClosure(w, stg_DEAD_WEAK_info);
PROF_HEADER_CREATE(w);
if (list != stg_NO_FINALIZER_closure) {
ccall runCFinalizers(list);
}
/* return the finalizer */
if (f == stg_NO_FINALIZER_closure) {
return (0,stg_NO_FINALIZER_closure);
} else {
return (1,f);
}
}
stg_deRefWeakzh ( gcptr w )
{
W_ code, info;
gcptr val;
info = GET_INFO_ACQUIRE(w);
if (info == stg_WHITEHOLE_info) {
// w is locked by another thread. Now it's not immediately clear if w is
// alive or not. We use lockClosure to wait for the info pointer to become
// something other than stg_WHITEHOLE_info.
LOCK_CLOSURE(w, info);
unlockClosure(w, info);
}
if (info == stg_WEAK_info) {
code = 1;
val = StgWeak_value(w);
// See Note [Concurrent read barrier on deRefWeak#] in NonMovingMark.c
updateRemembSetPushPtr(val);
} else {
code = 0;
val = w;
}
return (code,val);
}
/* -----------------------------------------------------------------------------
Floating point operations.
-------------------------------------------------------------------------- */
stg_decodeFloatzuIntzh ( F_ arg )
{
W_ p;
W_ tmp, mp_tmp1, mp_tmp_w, r1, r2;
STK_CHK_GEN_N (WDS(2));
reserve 2 = tmp {
mp_tmp1 = tmp + WDS(1);
mp_tmp_w = tmp;
/* Perform the operation */
ccall __decodeFloat_Int(mp_tmp1 "ptr", mp_tmp_w "ptr", arg);
r1 = W_[mp_tmp1];
r2 = W_[mp_tmp_w];
}
/* returns: (Int# (mantissa), Int# (exponent)) */
return (r1, r2);
}
stg_decodeDoublezu2Intzh ( D_ arg )
{
W_ p, tmp;
W_ mp_tmp1, mp_tmp2, mp_result1, mp_result2;
W_ r1, r2, r3, r4;
STK_CHK_GEN_N (WDS(4));
reserve 4 = tmp {
mp_tmp1 = tmp + WDS(3);
mp_tmp2 = tmp + WDS(2);
mp_result1 = tmp + WDS(1);
mp_result2 = tmp;
/* Perform the operation */
ccall __decodeDouble_2Int(mp_tmp1 "ptr", mp_tmp2 "ptr",
mp_result1 "ptr", mp_result2 "ptr",
arg);
r1 = W_[mp_tmp1];
r2 = W_[mp_tmp2];
r3 = W_[mp_result1];
r4 = W_[mp_result2];
}
/* returns:
(Int# (mant sign), Word# (mant high), Word# (mant low), Int# (expn)) */
return (r1, r2, r3, r4);
}
/* Double# -> (# Int64#, Int# #) */
stg_decodeDoublezuInt64zh ( D_ arg )
{
CInt exp;
I64 mant;
W_ mant_ptr;
STK_CHK_GEN_N (SIZEOF_INT64);
reserve BYTES_TO_WDS(SIZEOF_INT64) = mant_ptr {
(exp) = ccall __decodeDouble_Int64(mant_ptr "ptr", arg);
mant = I64[mant_ptr];
}
return (mant, TO_W_(exp));
}
/* -----------------------------------------------------------------------------
* Concurrency primitives
* -------------------------------------------------------------------------- */
stg_forkzh ( gcptr closure )
{
MAYBE_GC_P(stg_forkzh, closure);
gcptr threadid;
("ptr" threadid) = ccall createIOThread( MyCapability() "ptr",
TO_W_(RtsFlags_GcFlags_initialStkSize(RtsFlags)),
closure "ptr");
/* start blocked if the current thread is blocked */
StgTSO_flags(threadid) = %lobits16(
TO_W_(StgTSO_flags(threadid)) |
TO_W_(StgTSO_flags(CurrentTSO)) & (TSO_BLOCKEX | TSO_INTERRUPTIBLE));
ccall scheduleThread(MyCapability() "ptr", threadid "ptr");
// context switch soon, but not immediately: we don't want every
// forkIO to force a context-switch.
%relaxed Capability_context_switch(MyCapability()) = 1 :: CInt;
return (threadid);
}
stg_forkOnzh ( W_ cpu, gcptr closure )
{
again: MAYBE_GC(again);
gcptr threadid;
("ptr" threadid) = ccall createIOThread(
MyCapability() "ptr",
TO_W_(RtsFlags_GcFlags_initialStkSize(RtsFlags)),
closure "ptr");
/* start blocked if the current thread is blocked */
StgTSO_flags(threadid) = %lobits16(
TO_W_(StgTSO_flags(threadid)) |
TO_W_(StgTSO_flags(CurrentTSO)) & (TSO_BLOCKEX | TSO_INTERRUPTIBLE));
ccall scheduleThreadOn(MyCapability() "ptr", cpu, threadid "ptr");
return (threadid);
}
stg_yieldzh ()
{
// when we yield to the scheduler, we have to tell it to put the
// current thread to the back of the queue by setting the
// context_switch flag. If we don't do this, it will run the same
// thread again.
%relaxed Capability_context_switch(MyCapability()) = 1 :: CInt;
jump stg_yield_noregs();
}
stg_labelThreadzh ( gcptr threadid, W_ addr )
{
ccall labelThread(MyCapability() "ptr", threadid "ptr", addr "ptr");
return ();
}
stg_listThreadszh ()
{
P_ arr;
("ptr" arr) = ccall listThreads(MyCapability() "ptr");
return (arr);
}
stg_isCurrentThreadBoundzh (/* no args */)
{
W_ r;
(r) = ccall isThreadBound(CurrentTSO);
return (r);
}
stg_threadLabelzh ( gcptr tso )
{
W_ r;
r = StgTSO_label(tso);
if (r == 0) {
return (0, 0);
} else {
return (1, r);
}
}
stg_threadStatuszh ( gcptr tso )
{
W_ why_blocked, what_next, flags, cap;
W_ ret, cap_no, locked;
what_next = TO_W_(RELAXED_LOAD_CLOSURE_FIELD(StgTSO_what_next, tso));
why_blocked = TO_W_(RELAXED_LOAD_CLOSURE_FIELD(StgTSO_why_blocked, tso));
// Note: these two reads are not atomic, so they might end up
// being inconsistent. It doesn't matter, since we
// only return one or the other. If we wanted to return the
// contents of block_info too, then we'd have to do some synchronisation.
if (what_next == ThreadComplete) {
ret = 16; // NB. magic, matches up with GHC.Conc.threadStatus
} else {
if (what_next == ThreadKilled) {
ret = 17;
} else {
ret = why_blocked;
}
}
cap = RELAXED_LOAD_CLOSURE_FIELD(StgTSO_cap, tso);
cap_no = TO_W_(Capability_no(cap));
flags = TO_W_(RELAXED_LOAD_CLOSURE_FIELD(StgTSO_flags, tso));
if ((flags & TSO_LOCKED) != 0) {
locked = 1;
} else {
locked = 0;
}
return (ret,cap_no,locked);
}
/* -----------------------------------------------------------------------------
* TVar primitives
* -------------------------------------------------------------------------- */
stg_abort /* no arg list: explicit stack layout */
{
W_ frame_type;
W_ frame;
W_ trec;
W_ outer;
W_ r;
// STM operations may allocate
MAYBE_GC_ (stg_abort); // NB. not MAYBE_GC(), we cannot make a
// function call in an explicit-stack proc
// Find the enclosing ATOMICALLY_FRAME
SAVE_THREAD_STATE();
(frame_type) = ccall findAtomicallyFrameHelper(MyCapability(), CurrentTSO "ptr");
LOAD_THREAD_STATE();
frame = Sp;
trec = StgTSO_trec(CurrentTSO);
outer = StgTRecHeader_enclosing_trec(trec);
// We've reached the ATOMICALLY_FRAME
ASSERT(frame_type == ATOMICALLY_FRAME);
ASSERT(outer == NO_TREC);
// Restart the transaction.
("ptr" trec) = ccall stmStartTransaction(MyCapability() "ptr", outer "ptr");
StgTSO_trec(CurrentTSO) = trec;
Sp = frame;
R1 = StgAtomicallyFrame_code(frame);
jump stg_ap_v_fast [R1];
}
// Catch retry frame -----------------------------------------------------------
#define CATCH_RETRY_FRAME_FIELDS(w_,p_,info_ptr, \
p1, p2, \
running_alt_code, \
first_code, \
alt_code) \
w_ info_ptr, \
PROF_HDR_FIELDS(w_,p1,p2) \
w_ running_alt_code, \
p_ first_code, \
p_ alt_code
INFO_TABLE_RET(stg_catch_retry_frame, CATCH_RETRY_FRAME,
CATCH_RETRY_FRAME_FIELDS(W_,P_,
info_ptr, p1, p2,
running_alt_code,
first_code,
alt_code))
return (P_ ret)
{
unwind Sp = Sp + SIZEOF_StgCatchRetryFrame;
W_ r;
gcptr trec, outer, arg;
trec = StgTSO_trec(CurrentTSO);
outer = StgTRecHeader_enclosing_trec(trec);
(r) = ccall stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr");
if (r != 0) {
// Succeeded (either first branch or second branch)
StgTSO_trec(CurrentTSO) = outer;
return (ret);
} else {
// Did not commit: abort and restart.
StgTSO_trec(CurrentTSO) = outer;
jump stg_abort();
}
}
// Atomically frame ------------------------------------------------------------
// This must match StgAtomicallyFrame in Closures.h
#define ATOMICALLY_FRAME_FIELDS(w_,p_,info_ptr,p1,p2,code,result) \
w_ info_ptr, \
PROF_HDR_FIELDS(w_,p1,p2) \
p_ code, \
p_ result
INFO_TABLE_RET(stg_atomically_frame, ATOMICALLY_FRAME,
// layout of the frame, and bind the field names
ATOMICALLY_FRAME_FIELDS(W_,P_,
info_ptr, p1, p2,
code,
frame_result))
return (P_ result) // value returned to the frame
{
W_ valid;
gcptr trec;
trec = StgTSO_trec(CurrentTSO);
/* Back at the atomically frame */
frame_result = result;
/* try to commit */
(valid) = ccall stmCommitTransaction(MyCapability() "ptr", trec "ptr");
if (valid != 0) {
/* Transaction was valid: commit succeeded */
StgTSO_trec(CurrentTSO) = NO_TREC;
return (frame_result);
} else {
/* Transaction was not valid: try again */
("ptr" trec) = ccall stmStartTransaction(MyCapability() "ptr",
NO_TREC "ptr");
StgTSO_trec(CurrentTSO) = trec;
jump stg_ap_v_fast
// push the StgAtomicallyFrame again: the code generator is
// clever enough to only assign the fields that have changed.
(ATOMICALLY_FRAME_FIELDS(,,info_ptr,p1,p2,
code,frame_result))
(code);
}
}
INFO_TABLE_RET(stg_atomically_waiting_frame, ATOMICALLY_FRAME,
// layout of the frame, and bind the field names
ATOMICALLY_FRAME_FIELDS(W_,P_,
info_ptr, p1, p2,
code,
frame_result))
return (/* no return values */)
{
W_ trec, valid;
/* The TSO is currently waiting: should we stop waiting? */
(valid) = ccall stmReWait(MyCapability() "ptr", CurrentTSO "ptr");
if (valid != 0) {
/* Previous attempt is still valid: no point trying again yet */
jump stg_block_noregs
(ATOMICALLY_FRAME_FIELDS(,,info_ptr, p1, p2,
code,frame_result))
();
} else {
/* Previous attempt is no longer valid: try again */
("ptr" trec) = ccall stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr");
StgTSO_trec(CurrentTSO) = trec;
// change the frame header to stg_atomically_frame_info
jump stg_ap_v_fast
(ATOMICALLY_FRAME_FIELDS(,,stg_atomically_frame_info, p1, p2,
code,frame_result))
(code);
}
}
// STM catch frame -------------------------------------------------------------
/* Catch frames are very similar to update frames, but when entering
* one we just pop the frame off the stack and perform the correct
* kind of return to the activation record underneath us on the stack.
*/
#define CATCH_STM_FRAME_FIELDS(w_,p_,info_ptr,p1,p2,code,handler) \
w_ info_ptr, \
PROF_HDR_FIELDS(w_,p1,p2) \
p_ code, \
p_ handler
INFO_TABLE_RET(stg_catch_stm_frame, CATCH_STM_FRAME,
// layout of the frame, and bind the field names
CATCH_STM_FRAME_FIELDS(W_,P_,info_ptr,p1,p2,code,handler))
return (P_ ret)
{
W_ r, trec, outer;
trec = StgTSO_trec(CurrentTSO);
outer = StgTRecHeader_enclosing_trec(trec);
(r) = ccall stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr");
if (r != 0) {
/* Commit succeeded */
StgTSO_trec(CurrentTSO) = outer;
return (ret);
} else {
/* Commit failed */
W_ new_trec;
("ptr" new_trec) = ccall stmStartTransaction(MyCapability() "ptr", outer "ptr");
StgTSO_trec(CurrentTSO) = new_trec;
jump stg_ap_v_fast
(CATCH_STM_FRAME_FIELDS(,,info_ptr,p1,p2,code,handler))
(code);
}
}
// Primop definition -----------------------------------------------------------
stg_atomicallyzh (P_ stm)
{
P_ old_trec;
P_ new_trec;
P_ code, frame_result;
// stmStartTransaction may allocate
MAYBE_GC_P(stg_atomicallyzh, stm);
STK_CHK_GEN();
old_trec = StgTSO_trec(CurrentTSO);
/* Nested transactions are not allowed; raise an exception */
if (old_trec != NO_TREC) {
jump stg_raisezh(ghczminternal_GHCziInternalziControlziExceptionziBase_nestedAtomically_closure);
}
code = stm;
frame_result = NO_TREC;
/* Start the memory transaction */
("ptr" new_trec) = ccall stmStartTransaction(MyCapability() "ptr", old_trec "ptr");
StgTSO_trec(CurrentTSO) = new_trec;
jump stg_ap_v_fast
(ATOMICALLY_FRAME_FIELDS(,,stg_atomically_frame_info, CCCS, 0,
code,frame_result))
(stm);
}
// A closure representing "atomically x". This is used when a thread
// inside a transaction receives an asynchronous exception; see #5866.
// It is somewhat similar to the stg_raise closure.
//
INFO_TABLE(stg_atomically,1,0,THUNK_1_0,"atomically","atomically")
(P_ thunk)
{
jump stg_atomicallyzh(StgThunk_payload(thunk,0));
}
stg_catchSTMzh (P_ code /* :: STM a */,
P_ handler /* :: Exception -> STM a */)
{
STK_CHK_GEN();
/* Start a nested transaction to run the body of the try block in */
W_ cur_trec;
W_ new_trec;
cur_trec = StgTSO_trec(CurrentTSO);
("ptr" new_trec) = ccall stmStartTransaction(MyCapability() "ptr",
cur_trec "ptr");
StgTSO_trec(CurrentTSO) = new_trec;
jump stg_ap_v_fast
(CATCH_STM_FRAME_FIELDS(,,stg_catch_stm_frame_info, CCCS, 0,
code, handler))
(code);
}
stg_catchRetryzh (P_ first_code, /* :: STM a */
P_ alt_code /* :: STM a */)
{
W_ new_trec;
// stmStartTransaction may allocate
MAYBE_GC_PP (stg_catchRetryzh, first_code, alt_code);
STK_CHK_GEN();
/* Start a nested transaction within which to run the first code */
("ptr" new_trec) = ccall stmStartTransaction(MyCapability() "ptr",
StgTSO_trec(CurrentTSO) "ptr");
StgTSO_trec(CurrentTSO) = new_trec;
// push the CATCH_RETRY stack frame, and apply first_code to realWorld#
jump stg_ap_v_fast
(CATCH_RETRY_FRAME_FIELDS(,, stg_catch_retry_frame_info, CCCS, 0,
0, /* not running_alt_code */
first_code,
alt_code))
(first_code);
}
stg_retryzh /* no arg list: explicit stack layout */
{
W_ frame_type;
W_ frame;
W_ trec;
W_ outer;
W_ r;
// STM operations may allocate
MAYBE_GC_ (stg_retryzh); // NB. not MAYBE_GC(), we cannot make a
// function call in an explicit-stack proc
// Find the enclosing ATOMICALLY_FRAME or CATCH_RETRY_FRAME
retry_pop_stack:
SAVE_THREAD_STATE();
(frame_type) = ccall findRetryFrameHelper(MyCapability(), CurrentTSO "ptr");
LOAD_THREAD_STATE();
frame = Sp;
trec = StgTSO_trec(CurrentTSO);
outer = StgTRecHeader_enclosing_trec(trec);
if (frame_type == CATCH_RETRY_FRAME) {
// The retry reaches a CATCH_RETRY_FRAME before the atomic frame
ASSERT(outer != NO_TREC);
// Abort the transaction attempting the current branch
ccall stmAbortTransaction(MyCapability() "ptr", trec "ptr");
ccall stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr");
if (!StgCatchRetryFrame_running_alt_code(frame) != 0) {
// Retry in the first branch: try the alternative
("ptr" trec) = ccall stmStartTransaction(MyCapability() "ptr", outer "ptr");
StgTSO_trec(CurrentTSO) = trec;
StgCatchRetryFrame_running_alt_code(frame) = 1 :: CInt; // true;
R1 = StgCatchRetryFrame_alt_code(frame);
jump stg_ap_v_fast [R1];
} else {
// Retry in the alternative code: propagate the retry
StgTSO_trec(CurrentTSO) = outer;
Sp = Sp + SIZEOF_StgCatchRetryFrame;
goto retry_pop_stack;
}
}
// We've reached the ATOMICALLY_FRAME: attempt to wait
ASSERT(frame_type == ATOMICALLY_FRAME);
ASSERT(outer == NO_TREC);
(r) = ccall stmWait(MyCapability() "ptr", CurrentTSO "ptr", trec "ptr");
if (r != 0) {
// Transaction was valid: stmWait put us on the TVars' queues, we now block
StgHeader_info(frame) = stg_atomically_waiting_frame_info;
Sp = frame;
R3 = trec; // passing to stmWaitUnblock()
jump stg_block_stmwait [R3];
} else {
// Transaction was not valid: retry immediately
("ptr" trec) = ccall stmStartTransaction(MyCapability() "ptr", outer "ptr");
StgTSO_trec(CurrentTSO) = trec;
Sp = frame;
R1 = StgAtomicallyFrame_code(frame);
jump stg_ap_v_fast [R1];
}
}
stg_newTVarzh (P_ init)
{
W_ tv;
ALLOC_PRIM_P (SIZEOF_StgTVar, stg_newTVarzh, init);
tv = Hp - SIZEOF_StgTVar + WDS(1);
SET_HDR (tv, stg_TVAR_DIRTY_info, CCCS);
StgTVar_current_value(tv) = init;
StgTVar_first_watch_queue_entry(tv) = stg_END_STM_WATCH_QUEUE_closure;
StgTVar_num_updates(tv) = 0;
return (tv);
}
stg_readTVarzh (P_ tvar)
{
P_ trec;
P_ result;
// Call to stmReadTVar may allocate
MAYBE_GC_P (stg_readTVarzh, tvar);
trec = StgTSO_trec(CurrentTSO);
("ptr" result) = ccall stmReadTVar(MyCapability() "ptr", trec "ptr",
tvar "ptr");
return (result);
}
stg_readTVarIOzh ( P_ tvar /* :: TVar a */ )
{
W_ result, resultinfo;
again:
result = %acquire StgTVar_current_value(tvar);
resultinfo = %INFO_PTR(result);
if (resultinfo == stg_TREC_HEADER_info) {
goto again;
}
return (result);
}
stg_writeTVarzh (P_ tvar, /* :: TVar a */
P_ new_value /* :: a */)
{
W_ trec;
// Call to stmWriteTVar may allocate
MAYBE_GC_PP (stg_writeTVarzh, tvar, new_value);
trec = StgTSO_trec(CurrentTSO);
ccall stmWriteTVar(MyCapability() "ptr", trec "ptr", tvar "ptr",
new_value "ptr");
return ();
}
/* -----------------------------------------------------------------------------
* MVar primitives
*
* take & putMVar work as follows. Firstly, an important invariant:
*
* If the MVar is full, then the blocking queue contains only
* threads blocked on putMVar, and if the MVar is empty then the
* blocking queue contains only threads blocked on takeMVar.
*
* takeMvar:
* MVar empty : then add ourselves to the blocking queue
* MVar full : remove the value from the MVar, and
* blocking queue empty : return
* blocking queue non-empty : perform the first blocked putMVar
* from the queue, and wake up the
* thread (MVar is now full again)
*
* putMVar is just the dual of the above algorithm.
*
* How do we "perform a putMVar"? Well, we have to fiddle around with
* the stack of the thread waiting to do the putMVar. See
* stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
* the stack layout, and the PerformPut and PerformTake macros below.
*
* It is important that a blocked take or put is woken up with the
* take/put already performed, because otherwise there would be a
* small window of vulnerability where the thread could receive an
* exception and never perform its take or put, and we'd end up with a
* deadlock.
*
* Note [Nonmoving write barrier in Perform{Put,Take}]
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* As noted in Note [Non-moving garbage collector] in NonMoving.c, the
* non-moving GC requires that all overwritten pointers be pushed to the update
* remembered set. In the case of stack mutation this typically happens by
* "dirtying" the stack, which eagerly traces the entire stack chunk.
*
* An exception to this rule is PerformPut, which mutates the stack of a
* blocked thread (overwriting an stg_block_putmvar frame). To ensure that the
* collector sees the MVar and value reachable from the overwritten frame, we
* must push them to the update remembered set. Failing to do so was the cause
* of #20399.
*
* Note that unlike PerformPut, the callers of PerformTake first dirty the
* stack prior mutating it (since they introduce a *new*, potentially
* inter-generational reference to the stack) and therefore the barrier
* described above is unnecessary in this case.
* -------------------------------------------------------------------------- */
stg_isEmptyMVarzh ( P_ mvar /* :: MVar a */ )
{
if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) {
return (1);
} else {
return (0);
}
}
stg_newMVarzh ()
{
W_ mvar;
ALLOC_PRIM_ (SIZEOF_StgMVar, stg_newMVarzh);
mvar = Hp - SIZEOF_StgMVar + WDS(1);
// No memory barrier needed as this is a new allocation.
SET_HDR(mvar,stg_MVAR_DIRTY_info,CCCS);
// MVARs start dirty: generation 0 has no mutable list
StgMVar_head(mvar) = stg_END_TSO_QUEUE_closure;
StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
return (mvar);
}
// See Note [Nonmoving write barrier in Perform{Put,Take}].
// Precondition: the stack must be dirtied.
#define PerformTake(stack, value) \
W_ sp; \
sp = StgStack_sp(stack); \
W_[sp + WDS(1)] = value; \
W_[sp + WDS(0)] = stg_ret_p_info;
// See Note [Nonmoving write barrier in Perform{Put,Take}].
#define PerformPut(stack,lval) \
W_ sp; \
sp = StgStack_sp(stack) + WDS(3); \
IF_NONMOVING_WRITE_BARRIER_ENABLED { \
ccall updateRemembSetPushClosure_(BaseReg "ptr", W_[sp - WDS(1)] "ptr"); \
ccall updateRemembSetPushClosure_(BaseReg "ptr", W_[sp - WDS(2)] "ptr"); \
} \
StgStack_sp(stack) = sp; \
lval = W_[sp - WDS(1)];
stg_takeMVarzh ( P_ mvar /* :: MVar a */ )
{
W_ val, info, tso, q, qinfo;
LOCK_CLOSURE(mvar, info);
/* If the MVar is empty, put ourselves on its blocking queue,
* and wait until we're woken up.
*/
if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) {
if (info == stg_MVAR_CLEAN_info) {
ccall dirty_MVAR(BaseReg "ptr", mvar "ptr", StgMVar_value(mvar) "ptr");
}
// We want to put the heap check down here in the slow path,
// but be careful to unlock the closure before returning to
// the RTS if the check fails.
ALLOC_PRIM_WITH_CUSTOM_FAILURE
(SIZEOF_StgMVarTSOQueue,
unlockClosure(mvar, stg_MVAR_DIRTY_info);
GC_PRIM_P(stg_takeMVarzh, mvar));
q = Hp - SIZEOF_StgMVarTSOQueue + WDS(1);
StgMVarTSOQueue_link(q) = END_TSO_QUEUE;
StgMVarTSOQueue_tso(q) = CurrentTSO;
SET_HDR(q, stg_MVAR_TSO_QUEUE_info, CCS_SYSTEM);
if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
%release StgMVar_head(mvar) = q;
} else {
%release StgMVarTSOQueue_link(StgMVar_tail(mvar)) = q;
ccall recordClosureMutated(MyCapability() "ptr",
StgMVar_tail(mvar));
}
StgTSO__link(CurrentTSO) = q;
StgTSO_block_info(CurrentTSO) = mvar;
%release StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I32;
StgMVar_tail(mvar) = q;
jump stg_block_takemvar(mvar);
}
/* we got the value... */
val = StgMVar_value(mvar);
q = StgMVar_head(mvar);
loop:
if (q == stg_END_TSO_QUEUE_closure) {
/* No further putMVars, MVar is now empty */
StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
// If the MVar is not already dirty, then we don't need to make
// it dirty, as it is empty with nothing blocking on it.
unlockClosure(mvar, info);
// However, we do need to ensure that the nonmoving collector
// knows about the reference to the value that we just removed...
updateRemembSetPushPtr(val);
return (val);
}
qinfo = GET_INFO_ACQUIRE(q);
if (qinfo == stg_IND_info ||
qinfo == stg_MSG_NULL_info) {
q = %acquire StgInd_indirectee(q);
goto loop;
}
// There are putMVar(s) waiting... wake up the first thread on the queue
if (info == stg_MVAR_CLEAN_info) {
ccall dirty_MVAR(BaseReg "ptr", mvar "ptr", val "ptr");
}
tso = StgMVarTSOQueue_tso(q);
StgMVar_head(mvar) = StgMVarTSOQueue_link(q);
if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
}
ASSERT(StgTSO_why_blocked(tso) == BlockedOnMVar::I32);
ASSERT(StgTSO_block_info(tso) == mvar);
// actually perform the putMVar for the thread that we just woke up
W_ stack;
stack = StgTSO_stackobj(tso);
PerformPut(stack, StgMVar_value(mvar));
// indicate that the MVar operation has now completed.
StgTSO__link(tso) = stg_END_TSO_QUEUE_closure;
// no need to mark the TSO dirty, we have only written END_TSO_QUEUE.
ccall tryWakeupThread(MyCapability() "ptr", tso);
unlockClosure(mvar, stg_MVAR_DIRTY_info);
return (val);
}
stg_tryTakeMVarzh ( P_ mvar /* :: MVar a */ )
{
W_ val, info, tso, q, qinfo;
LOCK_CLOSURE(mvar, info);
/* If the MVar is empty, return 0. */
if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) {
#if defined(THREADED_RTS)
unlockClosure(mvar, info);
#endif
/* HACK: we need a pointer to pass back,
* so we abuse NO_FINALIZER_closure
*/
return (0, stg_NO_FINALIZER_closure);
}
/* we got the value... */
val = StgMVar_value(mvar);
q = StgMVar_head(mvar);
loop:
if (q == stg_END_TSO_QUEUE_closure) {
/* No further putMVars, MVar is now empty */
StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
unlockClosure(mvar, info);
return (1, val);
}
qinfo = GET_INFO_ACQUIRE(q);
if (qinfo == stg_IND_info ||
qinfo == stg_MSG_NULL_info) {
q = %acquire StgInd_indirectee(q);
goto loop;
}
// There are putMVar(s) waiting... wake up the first thread on the queue
if (info == stg_MVAR_CLEAN_info) {
ccall dirty_MVAR(BaseReg "ptr", mvar "ptr", val "ptr");
}
tso = StgMVarTSOQueue_tso(q);
StgMVar_head(mvar) = StgMVarTSOQueue_link(q);
if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
}
ASSERT(StgTSO_why_blocked(tso) == BlockedOnMVar::I32);
ASSERT(StgTSO_block_info(tso) == mvar);
// actually perform the putMVar for the thread that we just woke up
W_ stack;
stack = StgTSO_stackobj(tso);
PerformPut(stack, StgMVar_value(mvar));
// indicate that the MVar operation has now completed.
StgTSO__link(tso) = stg_END_TSO_QUEUE_closure;
// no need to mark the TSO dirty, we have only written END_TSO_QUEUE.
ccall tryWakeupThread(MyCapability() "ptr", tso);
unlockClosure(mvar, stg_MVAR_DIRTY_info);
return (1,val);
}
stg_putMVarzh ( P_ mvar, /* :: MVar a */
P_ val, /* :: a */ )
{
W_ info, tso, q, qinfo;
LOCK_CLOSURE(mvar, info);
if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) {
if (info == stg_MVAR_CLEAN_info) {
ccall dirty_MVAR(BaseReg "ptr", mvar "ptr", StgMVar_value(mvar) "ptr");
}
// We want to put the heap check down here in the slow path,
// but be careful to unlock the closure before returning to
// the RTS if the check fails.
ALLOC_PRIM_WITH_CUSTOM_FAILURE
(SIZEOF_StgMVarTSOQueue,
unlockClosure(mvar, stg_MVAR_DIRTY_info);
GC_PRIM_PP(stg_putMVarzh, mvar, val));
q = Hp - SIZEOF_StgMVarTSOQueue + WDS(1);
StgMVarTSOQueue_link(q) = END_TSO_QUEUE;
StgMVarTSOQueue_tso(q) = CurrentTSO;
SET_HDR(q, stg_MVAR_TSO_QUEUE_info, CCS_SYSTEM);
if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
%release StgMVar_head(mvar) = q;
} else {
%release StgMVarTSOQueue_link(StgMVar_tail(mvar)) = q;
ccall recordClosureMutated(MyCapability() "ptr",
StgMVar_tail(mvar));
}
StgTSO__link(CurrentTSO) = q;
StgTSO_block_info(CurrentTSO) = mvar;
%release StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I32;
StgMVar_tail(mvar) = q;
jump stg_block_putmvar(mvar,val);
}
// We are going to mutate the closure, make sure its current pointers
// are marked.
if (info == stg_MVAR_CLEAN_info) {
ccall update_MVAR(BaseReg "ptr", mvar "ptr", StgMVar_value(mvar) "ptr");
}
q = StgMVar_head(mvar);
loop:
if (q == stg_END_TSO_QUEUE_closure) {
/* No further takes, the MVar is now full. */
StgMVar_value(mvar) = val;
if (info == stg_MVAR_CLEAN_info) {
ccall dirty_MVAR(BaseReg "ptr", mvar "ptr", StgMVar_value(mvar) "ptr");
}
unlockClosure(mvar, stg_MVAR_DIRTY_info);
return ();
}
qinfo = GET_INFO_ACQUIRE(q);
if (qinfo == stg_IND_info ||
qinfo == stg_MSG_NULL_info) {
q = %acquire StgInd_indirectee(q);
goto loop;
}
// There are readMVar/takeMVar(s) waiting: wake up the first one
tso = StgMVarTSOQueue_tso(q);
q = StgMVarTSOQueue_link(q);
StgMVar_head(mvar) = q;
if (q == stg_END_TSO_QUEUE_closure) {
StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
} else {
if (info == stg_MVAR_CLEAN_info) {
// Resolve #18919.
ccall dirty_MVAR(BaseReg "ptr", mvar "ptr",
StgMVar_value(mvar) "ptr");
info = stg_MVAR_DIRTY_info;
}
}
ASSERT(StgTSO_block_info(tso) == mvar);
// save why_blocked here, because waking up the thread destroys
// this information
W_ why_blocked;
why_blocked = TO_W_(StgTSO_why_blocked(tso));
// actually perform the takeMVar
W_ stack;
stack = StgTSO_stackobj(tso);
if (IS_STACK_CLEAN(stack)) {
ccall dirty_STACK(MyCapability() "ptr", stack "ptr");
}
PerformTake(stack, val);
// indicate that the MVar operation has now completed.
StgTSO__link(tso) = stg_END_TSO_QUEUE_closure;
ccall tryWakeupThread(MyCapability() "ptr", tso);
// If it was a readMVar, then we can still do work,
// so loop back. (XXX: This could take a while)
if (why_blocked == BlockedOnMVarRead)
goto loop;
ASSERT(why_blocked == BlockedOnMVar);
unlockClosure(mvar, info);
return ();
}
// NOTE: there is another implementation of this function in
// Threads.c:performTryPutMVar(). Keep them in sync! It was
// measurably slower to call the C function from here (70% for a
// tight loop doing tryPutMVar#).
//
// TODO: we could kill the duplication by making tryPutMVar# into an
// inline primop that expands into a C call to performTryPutMVar().
stg_tryPutMVarzh ( P_ mvar, /* :: MVar a */
P_ val, /* :: a */ )
{
W_ info, tso, q, qinfo;
LOCK_CLOSURE(mvar, info);
if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) {
#if defined(THREADED_RTS)
unlockClosure(mvar, info);
#endif
return (0);
}
q = StgMVar_head(mvar);
loop:
if (q == stg_END_TSO_QUEUE_closure) {
/* No further takes, the MVar is now full. */
if (info == stg_MVAR_CLEAN_info) {
ccall dirty_MVAR(BaseReg "ptr", mvar "ptr", StgMVar_value(mvar) "ptr");
}
StgMVar_value(mvar) = val;
unlockClosure(mvar, stg_MVAR_DIRTY_info);
return (1);
}
qinfo = GET_INFO_ACQUIRE(q);
if (qinfo == stg_IND_info ||
qinfo == stg_MSG_NULL_info) {
q = %acquire StgInd_indirectee(q);
goto loop;
}
// There are takeMVar(s) waiting: wake up the first one
tso = StgMVarTSOQueue_tso(q);
q = StgMVarTSOQueue_link(q);
StgMVar_head(mvar) = q;
if (q == stg_END_TSO_QUEUE_closure) {
StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
} else {
if (info == stg_MVAR_CLEAN_info) {
// Resolve #18919.
ccall dirty_MVAR(BaseReg "ptr", mvar "ptr",
StgMVar_value(mvar) "ptr");
info = stg_MVAR_DIRTY_info;
}
}
// save why_blocked here, because waking up the thread destroys
// this information
W_ why_blocked;
why_blocked = TO_W_(StgTSO_why_blocked(tso)); // TODO: Missing barrier
ASSERT(StgTSO_block_info(tso) == mvar);
// actually perform the takeMVar
W_ stack;
stack = StgTSO_stackobj(tso);
if (IS_STACK_CLEAN(stack)) {
ccall dirty_STACK(MyCapability() "ptr", stack "ptr");
}
PerformTake(stack, val);
// indicate that the MVar operation has now completed.
StgTSO__link(tso) = stg_END_TSO_QUEUE_closure;
ccall tryWakeupThread(MyCapability() "ptr", tso);
// If it was a readMVar, then we can still do work,
// so loop back. (XXX: This could take a while)
if (why_blocked == BlockedOnMVarRead)
goto loop;
ASSERT(why_blocked == BlockedOnMVar);
unlockClosure(mvar, info);
return (1);
}
stg_readMVarzh ( P_ mvar, /* :: MVar a */ )
{
W_ val, info, tso, q;
LOCK_CLOSURE(mvar, info);
/* If the MVar is empty, put ourselves on the blocked readers
* list and wait until we're woken up.
*/
if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) {
// Add MVar to mutable list
if (info == stg_MVAR_CLEAN_info) {
ccall dirty_MVAR(BaseReg "ptr", mvar "ptr", StgMVar_value(mvar));
}
ALLOC_PRIM_WITH_CUSTOM_FAILURE
(SIZEOF_StgMVarTSOQueue,
unlockClosure(mvar, stg_MVAR_DIRTY_info);
GC_PRIM_P(stg_readMVarzh, mvar));
q = Hp - SIZEOF_StgMVarTSOQueue + WDS(1);
// readMVars are pushed to the front of the queue, so
// they get handled immediately
StgMVarTSOQueue_link(q) = StgMVar_head(mvar);
StgMVarTSOQueue_tso(q) = CurrentTSO;
SET_HDR(q, stg_MVAR_TSO_QUEUE_info, CCS_SYSTEM);
%release StgMVar_head(mvar) = q;
StgTSO__link(CurrentTSO) = q;
StgTSO_block_info(CurrentTSO) = mvar;
%release StgTSO_why_blocked(CurrentTSO) = BlockedOnMVarRead::I16;
if (StgMVar_tail(mvar) == stg_END_TSO_QUEUE_closure) {
StgMVar_tail(mvar) = q;
}
jump stg_block_readmvar(mvar);
}
val = StgMVar_value(mvar);
unlockClosure(mvar, info);
return (val);
}
stg_tryReadMVarzh ( P_ mvar, /* :: MVar a */ )
{
W_ val, info, tso, q;
LOCK_CLOSURE(mvar, info);
if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) {
unlockClosure(mvar, info);
return (0, stg_NO_FINALIZER_closure);
}
val = StgMVar_value(mvar);
unlockClosure(mvar, info);
return (1, val);
}
/* -----------------------------------------------------------------------------
* IOPort primitives
*
* readIOPort & writeIOPort work as follows. Firstly, an important invariant:
*
* Only one read and one write is allowed for an IOPort.
* Reading or writing to the same port twice will throw an exception.
*
* readIOPort:
* IOPort empty : then add ourselves to the blocking queue
* IOPort full : remove the value from the IOPort, and
* blocking queue empty : return
* blocking queue non-empty : perform the only blocked
* writeIOPort from the queue, and
* wake up the thread
* (IOPort is now empty)
*
* writeIOPort is just the dual of the above algorithm.
*
* How do we "perform a writeIOPort"? Well, By storing the value and prt on the
* stack, same way we do with MVars. Semantically the operations mutate the
* stack the same way so we will re-use the logic and datastructures for MVars
* for IOPort. See stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c
* for the stack layout, and the PerformPut and PerformTake macros below. We
* also re-use the closure types MVAR_CLEAN/_DIRTY for IOPort.
*
* The remaining caveats of MVar thus also apply for an IOPort. The main
* crucial difference between an MVar and IOPort is that the scheduler will not
* be allowed to interrupt a blocked IOPort just because it thinks there's a
* deadlock. This is especially crucial for the non-threaded runtime.
*
* To avoid double reads/writes we set only the head to a MVarTSOQueue when
* a reader queues up on a port.
* We set the tail to the port itself upon reading. We can do this
* since there can only be one reader/writer for the port. In contrast to MVars
* which do need to keep a list of blocked threads.
*
* This means IOPorts have these valid states and transitions:
*
┌─────────┐
│ Empty │ head == tail == value == END_TSO_QUEUE
├─────────┤
│ │
write │ │ read
v v
value != END_TSO_QUEUE ┌─────────┐ ┌─────────┐ value == END_TSO_QUEUE
head == END_TSO_QUEUE │ full │ │ reading │ head == queue with single reader
tail == END_TSO_QUEUE └─────────┘ └─────────┘ tail == END_TSO_QUEUE
│ │
read │ │ write
│ │
v v
┌──────────┐ value != END_TSO_QUEUE
│ Used │ head == END_TSO_QUEUE
└──────────┘ tail == ioport
*
* -------------------------------------------------------------------------- */
stg_readIOPortzh ( P_ ioport /* :: IOPort a */ )
{
W_ val, info, tso, q;
LOCK_CLOSURE(ioport, info);
/* If the Port is empty, put ourselves on the blocked readers
* list and wait until we're woken up.
*/
if (StgMVar_value(ioport) == stg_END_TSO_QUEUE_closure) {
// There is or was already another reader, throw exception.
if (StgMVar_head(ioport) != stg_END_TSO_QUEUE_closure ||
StgMVar_tail(ioport) != stg_END_TSO_QUEUE_closure) {
unlockClosure(ioport, info);
jump stg_raiseIOzh(ghczminternal_GHCziInternalziIOPort_doubleReadException_closure);
}
if (info == stg_MVAR_CLEAN_info) {
ccall dirty_MVAR(BaseReg "ptr", ioport "ptr", StgMVar_value(ioport));
}
ALLOC_PRIM_WITH_CUSTOM_FAILURE
(SIZEOF_StgMVarTSOQueue,
unlockClosure(ioport, stg_MVAR_DIRTY_info);
GC_PRIM_P(stg_readIOPortzh, ioport));
q = Hp - SIZEOF_StgMVarTSOQueue + WDS(1);
// link = stg_END_TSO_QUEUE_closure since we check that
// there is no other reader above.
StgMVarTSOQueue_link(q) = stg_END_TSO_QUEUE_closure;
StgMVarTSOQueue_tso(q) = CurrentTSO;
SET_HDR(q, stg_MVAR_TSO_QUEUE_info, CCS_SYSTEM);
%release StgMVar_head(ioport) = q;
StgTSO__link(CurrentTSO) = q;
StgTSO_block_info(CurrentTSO) = ioport;
// See Note [Heap memory barriers]
%release StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I32;
//Unlocks the closure as well
jump stg_block_readmvar(ioport);
}
//This way we can check of there has been a read already.
//Upon reading we set tail to indicate the port is now closed.
if (StgMVar_tail(ioport) == stg_END_TSO_QUEUE_closure) {
StgMVar_tail(ioport) = ioport;
StgMVar_head(ioport) = stg_END_TSO_QUEUE_closure;
} else {
//Or another thread has read already: Throw an exception.
unlockClosure(ioport, info);
jump stg_raiseIOzh(ghczminternal_GHCziInternalziIOPort_doubleReadException_closure);
}
val = StgMVar_value(ioport);
unlockClosure(ioport, info);
return (val);
}
stg_writeIOPortzh ( P_ ioport, /* :: IOPort a */
P_ val, /* :: a */ )
{
W_ info, tso, q;
LOCK_CLOSURE(ioport, info);
/* If there is already a value in the port, then raise an exception
as it's the second write.
Correct usages of IOPort should never have a second
write. */
if (StgMVar_value(ioport) != stg_END_TSO_QUEUE_closure) {
unlockClosure(ioport, info);
jump stg_raiseIOzh(ghczminternal_GHCziInternalziIOPort_doubleReadException_closure);
return (0);
}
// We are going to mutate the closure, make sure its current pointers
// are marked.
if (info == stg_MVAR_CLEAN_info) {
ccall update_MVAR(BaseReg "ptr", ioport "ptr", StgMVar_value(ioport) "ptr");
}
q = StgMVar_head(ioport);
loop:
if (q == stg_END_TSO_QUEUE_closure) {
/* No takes, the IOPort is now full. */
if (info == stg_MVAR_CLEAN_info) {
ccall dirty_MVAR(BaseReg "ptr", ioport "ptr", StgMVar_value(ioport) "ptr");
}
StgMVar_value(ioport) = val;
unlockClosure(ioport, stg_MVAR_DIRTY_info);
return (1);
}
//Possibly IND added by removeFromMVarBlockedQueue
if (StgHeader_info(q) == stg_IND_info ||
StgHeader_info(q) == stg_MSG_NULL_info) {
q = %acquire StgInd_indirectee(q);
goto loop;
}
// There is a readIOPort waiting: wake it up
tso = StgMVarTSOQueue_tso(q);
// Assert no read has happened yet.
ASSERT(StgMVar_tail(ioport) == stg_END_TSO_QUEUE_closure);
// And there is only one reader queued up.
ASSERT(StgMVarTSOQueue_link(q) == stg_END_TSO_QUEUE_closure);
// We perform the read here, so set tail/head accordingly.
StgMVar_head(ioport) = stg_END_TSO_QUEUE_closure;
StgMVar_tail(ioport) = ioport;
// In contrast to MVars we do not need to move on to the
// next element in the waiting list here, as there can only ever
// be one thread blocked on a port.
// save why_blocked here, because waking up the thread destroys
// this information
W_ why_blocked;
why_blocked = TO_W_(StgTSO_why_blocked(tso)); // TODO Missing acquire
ASSERT(StgTSO_block_info(tso) == ioport);
// actually perform the takeMVar
W_ stack;
stack = StgTSO_stackobj(tso);
if (IS_STACK_CLEAN(stack)) {
ccall dirty_STACK(MyCapability() "ptr", stack "ptr");
}
PerformTake(stack, val);
// indicate that the operation has now completed.
StgTSO__link(tso) = stg_END_TSO_QUEUE_closure;
ccall tryWakeupThread(MyCapability() "ptr", tso);
// For MVars we loop here, waking up all readers.
// IOPorts however can only have on reader. So we are done
// at this point.
//Either there was no reader queued, or he must have been
//blocked on BlockedOnMVar
ASSERT(why_blocked == BlockedOnMVar);
unlockClosure(ioport, info);
return (1);
}
/* -----------------------------------------------------------------------------
IOPort primitives
-------------------------------------------------------------------------- */
stg_newIOPortzh ()
{
W_ ioport;
ALLOC_PRIM_ (SIZEOF_StgMVar, stg_newIOPortzh);
ioport = Hp - SIZEOF_StgMVar + WDS(1);
SET_HDR(ioport, stg_MVAR_DIRTY_info,CCCS);
// MVARs start dirty: generation 0 has no mutable list
StgMVar_head(ioport) = stg_END_TSO_QUEUE_closure;
StgMVar_tail(ioport) = stg_END_TSO_QUEUE_closure;
StgMVar_value(ioport) = stg_END_TSO_QUEUE_closure;
return (ioport);
}
/* -----------------------------------------------------------------------------
Stable name primitives
------------------------------------------------------------------------- */
stg_makeStableNamezh ( P_ obj )
{
W_ index, sn_obj;
MAYBE_GC_P(stg_makeStableNamezh, obj);
(index) = ccall lookupStableName(obj "ptr");
/* Is there already a StableName for this heap object?
* stable_name_table is a pointer to an array of snEntry structs.
*/
sn_obj = %acquire snEntry_sn_obj(W_[stable_name_table] + index*SIZEOF_snEntry);
if (sn_obj == NULL) {
// At this point we have a snEntry, but it doesn't look as used to the
// GC yet because we don't have a StableName object for the sn_obj field
// (remember that sn_obj == NULL means the entry is free). So if we call
// GC here we end up skipping the snEntry in gcStableNameTable(). This
// caused #15906. Solution: use allocate(), which does not call GC.
//
// (Alternatively we could use a special value for the sn_obj field to
// indicate that the entry is being allocated and not free, but that's
// too complicated and doesn't buy us much. See D5342?id=18700.)
("ptr" sn_obj) = ccall allocate(MyCapability() "ptr",
BYTES_TO_WDS(SIZEOF_StgStableName));
SET_HDR(sn_obj, stg_STABLE_NAME_info, CCCS);
StgStableName_sn(sn_obj) = index;
// This will make the StableName# object visible to other threads;
// be sure that its completely visible to other cores.
// See Note [Heap memory barriers] in SMP.h.
%release snEntry_sn_obj(W_[stable_name_table] + index*SIZEOF_snEntry) = sn_obj;
}
return (sn_obj);
}
/* -----------------------------------------------------------------------------
Stable pointer primitives
------------------------------------------------------------------------- */
stg_makeStablePtrzh ( P_ obj )
{
W_ sp;
("ptr" sp) = ccall getStablePtr(obj "ptr");
return (sp);
}
stg_deRefStablePtrzh ( P_ sp )
{
W_ r;
// see Note [NULL StgStablePtr] in StablePtr.c
// here we assume that sp is a valid StablePtr#
r = spEntry_addr(W_[stable_ptr_table] + (sp-1)*SIZEOF_spEntry);
return (r);
}
/* -----------------------------------------------------------------------------
Bytecode object primitives
------------------------------------------------------------------------- */
stg_newBCOzh ( P_ instrs,
P_ literals,
P_ ptrs,
W_ arity,
P_ bitmap_arr )
{
W_ bco, bytes, words;
words = BYTES_TO_WDS(SIZEOF_StgBCO) + BYTE_ARR_WDS(bitmap_arr);
bytes = WDS(words);
ALLOC_PRIM (bytes);
bco = Hp - bytes + WDS(1);
// No memory barrier necessary as this is a new allocation.
SET_HDR(bco, stg_BCO_info, CCS_MAIN);
StgBCO_instrs(bco) = instrs;
StgBCO_literals(bco) = literals;
StgBCO_ptrs(bco) = ptrs;
StgBCO_arity(bco) = HALF_W_(arity);
StgBCO_size(bco) = HALF_W_(words);
// Copy the arity/bitmap info into the BCO
W_ i;
i = 0;
for:
if (i < BYTE_ARR_WDS(bitmap_arr)) {
StgBCO_bitmap(bco,i) = StgArrBytes_payload(bitmap_arr,i);
i = i + 1;
goto for;
}
return (bco);
}
stg_mkApUpd0zh ( P_ bco )
{
W_ ap;
// This function is *only* used to wrap zero-arity BCOs in an
// updatable wrapper (see GHC.ByteCode.Linker). An AP thunk is always
// saturated and always points directly to a FUN or BCO.
ASSERT(%INFO_TYPE(%GET_STD_INFO(bco)) == HALF_W_(BCO) &&
StgBCO_arity(bco) == HALF_W_(0));
HP_CHK_P(SIZEOF_StgAP, stg_mkApUpd0zh, bco);
TICK_ALLOC_UP_THK(0, 0);
CCCS_ALLOC(SIZEOF_StgAP);
ap = Hp - SIZEOF_StgAP + WDS(1);
// No memory barrier necessary as this is a new allocation.
SET_HDR(ap, stg_AP_info, CCS_MAIN);
StgAP_n_args(ap) = HALF_W_(0);
StgAP_fun(ap) = bco;
return (ap);
}
stg_unpackClosurezh ( P_ closure )
{
W_ info, ptrs, nptrs, p, ptrs_arr, dat_arr;
MAYBE_GC_P(stg_unpackClosurezh, closure);
info = GET_INFO_ACQUIRE(UNTAG(closure));
info = %STD_INFO(info);
ptrs = TO_W_(%INFO_PTRS(info));
nptrs = TO_W_(%INFO_NPTRS(info));
W_ clos;
clos = UNTAG(closure);
W_ len;
// The array returned, dat_arr, is the raw data for the entire closure.
// The length is variable based upon the closure type, ptrs, and non-ptrs
(len) = foreign "C" heap_view_closureSize(clos "ptr");
W_ dat_arr_sz;
dat_arr_sz = SIZEOF_StgArrBytes + WDS(len);
("ptr" dat_arr) = ccall allocateMightFail(MyCapability() "ptr", BYTES_TO_WDS(dat_arr_sz));
if (dat_arr == NULL) (likely: False) {
jump stg_raisezh(ghczminternal_GHCziInternalziIOziException_heapOverflow_closure);
}
TICK_ALLOC_PRIM(SIZEOF_StgArrBytes, WDS(len), 0);
SET_HDR(dat_arr, stg_ARR_WORDS_info, CCCS);
StgArrBytes_bytes(dat_arr) = WDS(len);
p = 0;
for:
if(p < len) {
W_[BYTE_ARR_CTS(dat_arr) + WDS(p)] = W_[clos + WDS(p)];
p = p + 1;
goto for;
}
W_ ptrArray;
// Collect pointers.
("ptr" ptrArray) = foreign "C" heap_view_closurePtrs(MyCapability() "ptr", clos "ptr");
return (info, dat_arr, ptrArray);
}
stg_closureSizzezh (P_ clos)
{
W_ len;
(len) = foreign "C" heap_view_closureSize(UNTAG(clos) "ptr");
return (len);
}
stg_whereFromzh (P_ clos, W_ buf)
{
CBool success;
W_ info;
info = GET_INFO(UNTAG(clos));
(success) = foreign "C" lookupIPE(info, buf);
return (TO_W_(success));
}
/* -----------------------------------------------------------------------------
Thread I/O blocking primitives
-------------------------------------------------------------------------- */
stg_waitReadzh ( W_ fd )
{
ccall syncIOWaitReady(MyCapability() "ptr", CurrentTSO "ptr",
/* IORead */ 0::I32, fd);
jump stg_block_noregs();
}
stg_waitWritezh ( W_ fd )
{
ccall syncIOWaitReady(MyCapability() "ptr", CurrentTSO "ptr",
/* IOWrite */ 1::I32, fd);
jump stg_block_noregs();
}
stg_delayzh ( W_ us_delay )
{
ccall syncDelay(MyCapability() "ptr", CurrentTSO "ptr", us_delay);
/* Annoyingly, we cannot be consistent with how we wait and resume the
* blocked thread. The reason is that the win32 legacy I/O manager
* allocates a StgAsyncIOResult struct on the C heap which has to be
* freed when the thread resumes. It's a bit awkward to arrange to
* allocate it on the GC heap instead, so that's how it is for now. Sigh.
*/
#if defined(mingw32_HOST_OS)
jump stg_block_async_void();
#else
jump stg_block_noregs();
#endif
}
#if defined(mingw32_HOST_OS)
stg_asyncReadzh ( W_ fd, W_ is_sock, W_ len, W_ buf )
{
W_ ares;
CInt reqID;
#if defined(THREADED_RTS)
ccall barf("asyncRead# on threaded RTS") never returns;
#else
/* could probably allocate this on the heap instead */
("ptr" ares) = ccall stgMallocBytes(SIZEOF_StgAsyncIOResult,
"stg_asyncReadzh");
(reqID) = ccall addIORequest(fd, 0/*FALSE*/,is_sock,len,buf "ptr");
StgAsyncIOResult_reqID(ares) = reqID;
StgAsyncIOResult_len(ares) = 0;
StgAsyncIOResult_errCode(ares) = 0;
StgTSO_block_info(CurrentTSO) = ares;
ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I32);
%release StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I32;
ccall appendToIOBlockedQueue(MyCapability() "ptr", CurrentTSO "ptr");
jump stg_block_async();
#endif
}
stg_asyncWritezh ( W_ fd, W_ is_sock, W_ len, W_ buf )
{
W_ ares;
CInt reqID;
#if defined(THREADED_RTS)
ccall barf("asyncWrite# on threaded RTS") never returns;
#else
("ptr" ares) = ccall stgMallocBytes(SIZEOF_StgAsyncIOResult,
"stg_asyncWritezh");
(reqID) = ccall addIORequest(fd, 1/*TRUE*/,is_sock,len,buf "ptr");
StgAsyncIOResult_reqID(ares) = reqID;
StgAsyncIOResult_len(ares) = 0;
StgAsyncIOResult_errCode(ares) = 0;
StgTSO_block_info(CurrentTSO) = ares;
ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I32);
%release StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I32;
ccall appendToIOBlockedQueue(MyCapability() "ptr", CurrentTSO "ptr");
jump stg_block_async();
#endif
}
stg_asyncDoProczh ( W_ proc, W_ param )
{
W_ ares;
CInt reqID;
#if defined(THREADED_RTS)
ccall barf("asyncDoProc# on threaded RTS") never returns;
#else
/* could probably allocate this on the heap instead */
("ptr" ares) = ccall stgMallocBytes(SIZEOF_StgAsyncIOResult,
"stg_asyncDoProczh");
(reqID) = ccall addDoProcRequest(proc "ptr",param "ptr");
StgAsyncIOResult_reqID(ares) = reqID;
StgAsyncIOResult_len(ares) = 0;
StgAsyncIOResult_errCode(ares) = 0;
StgTSO_block_info(CurrentTSO) = ares;
ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I32);
%release StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I32;
ccall appendToIOBlockedQueue(MyCapability() "ptr", CurrentTSO "ptr");
jump stg_block_async();
#endif
}
#endif
/* -----------------------------------------------------------------------------
* noDuplicate#
*
* noDuplicate# tries to ensure that none of the thunks under
* evaluation by the current thread are also under evaluation by
* another thread. It relies on *both* threads doing noDuplicate#;
* the second one will get blocked if they are duplicating some work.
*
* The idea is that noDuplicate# is used within unsafePerformIO to
* ensure that the IO operation is performed at most once.
* noDuplicate# calls threadPaused which acquires an exclusive lock on
* all the thunks currently under evaluation by the current thread.
*
* Consider the following scenario. There is a thunk A, whose
* evaluation requires evaluating thunk B, where thunk B is an
* unsafePerformIO. Two threads, 1 and 2, bother enter A. Thread 2
* is pre-empted before it enters B, and claims A by blackholing it
* (in threadPaused). Thread 1 now enters B, and calls noDuplicate#.
*
* thread 1 thread 2
* +-----------+ +---------------+
* | -------+-----> A <-------+------- |
* | update | BLACKHOLE | marked_update |
* +-----------+ +---------------+
* | | | |
* ... ...
* | | +---------------+
* +-----------+
* | ------+-----> B
* | update | BLACKHOLE
* +-----------+
*
* At this point: A is a blackhole, owned by thread 2. noDuplicate#
* calls threadPaused, which walks up the stack and
* - claims B on behalf of thread 1
* - then it reaches the update frame for A, which it sees is already
* a BLACKHOLE and is therefore owned by another thread. Since
* thread 1 is duplicating work, the computation up to the update
* frame for A is suspended, including thunk B.
* - thunk B, which is an unsafePerformIO, has now been reverted to
* an AP_STACK which could be duplicated - BAD!
* - The solution is as follows: before calling threadPaused, we
* leave a frame on the stack (stg_noDuplicate_info) that will call
* noDuplicate# again if the current computation is suspended and
* restarted.
*
* See the test program in concurrent/prog003 for a way to demonstrate
* this. It needs to be run with +RTS -N3 or greater, and the bug
* only manifests occasionally (once very 10 runs or so).
* -------------------------------------------------------------------------- */
INFO_TABLE_RET(stg_noDuplicate, RET_SMALL, W_ info_ptr)
return (/* no return values */)
{
jump stg_noDuplicatezh();
}
stg_noDuplicatezh /* no arg list: explicit stack layout */
{
// With a single capability there's no chance of work duplication.
CInt n_caps;
n_caps = %relaxed CInt[n_capabilities];
if (n_caps == 1 :: CInt) {
jump %ENTRY_CODE(Sp(0)) [];
}
STK_CHK_LL (WDS(1), stg_noDuplicatezh);
// leave noDuplicate frame in case the current
// computation is suspended and restarted (see above).
Sp_adj(-1);
Sp(0) = stg_noDuplicate_info;
SAVE_THREAD_STATE();
ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16);
ccall threadPaused (MyCapability() "ptr", CurrentTSO "ptr");
if (StgTSO_what_next(CurrentTSO) == ThreadKilled::I16) {
jump stg_threadFinished [];
} else {
LOAD_THREAD_STATE();
ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16);
// remove the stg_noDuplicate frame if it is still there.
if (Sp(0) == stg_noDuplicate_info) {
Sp_adj(1);
}
jump %ENTRY_CODE(Sp(0)) [];
}
}
/* -----------------------------------------------------------------------------
Misc. primitives
-------------------------------------------------------------------------- */
stg_getApStackValzh ( P_ ap_stack, W_ offset )
{
W_ ap_stackinfo;
ap_stackinfo = GET_INFO_ACQUIRE(UNTAG(ap_stack));
if (ap_stackinfo == stg_AP_STACK_info) {
return (1,StgAP_STACK_payload(ap_stack,offset));
} else {
return (0,ap_stack);
}
}
stg_getSparkzh ()
{
W_ spark;
#if !defined(THREADED_RTS)
return (0,ghczmprim_GHCziTypes_False_closure);
#else
("ptr" spark) = ccall findSpark(MyCapability() "ptr");
if (spark != 0) {
return (1,spark);
} else {
return (0,ghczmprim_GHCziTypes_False_closure);
}
#endif
}
stg_clearCCSzh (P_ arg)
{
#if defined(PROFILING)
CCCS = CCS_MAIN;
#endif
jump stg_ap_v_fast(arg);
}
stg_numSparkszh ()
{
W_ n;
#if defined(THREADED_RTS)
(n) = ccall dequeElements(Capability_sparks(MyCapability()));
#else
n = 0;
#endif
return (n);
}
stg_traceEventzh ( W_ msg )
{
#if defined(TRACING) || defined(DEBUG)
ccall traceUserMsg(MyCapability() "ptr", msg "ptr");
#elif defined(DTRACE)
W_ enabled;
// We should go through the macro HASKELLEVENT_USER_MSG_ENABLED from
// RtsProbes.h, but that header file includes unistd.h, which doesn't
// work in Cmm
#if !defined(solaris2_HOST_OS)
(enabled) = ccall __dtrace_isenabled$HaskellEvent$user__msg$v1();
#else
// Solaris' DTrace can't handle the
// __dtrace_isenabled$HaskellEvent$user__msg$v1
// call above. This call is just for testing whether the user__msg
// probe is enabled, and is here for just performance optimization.
// Since preparation for the probe is not that complex I disable usage of
// this test above for Solaris and enable the probe usage manually
// here. Please note that this does not mean that the probe will be
// used during the runtime! You still need to enable it by consumption
// in your dtrace script as you do with any other probe.
enabled = 1;
#endif
if (enabled != 0) {
ccall dtraceUserMsgWrapper(MyCapability() "ptr", msg "ptr");
}
#endif
return ();
}
stg_traceBinaryEventzh ( W_ msg, W_ len )
{
#if defined(TRACING) || defined(DEBUG)
ccall traceUserBinaryMsg(MyCapability() "ptr", msg "ptr", len);
#endif
return ();
}
// Same code as stg_traceEventzh above but a different kind of event
// Before changing this code, read the comments in the impl above
stg_traceMarkerzh ( W_ msg )
{
#if defined(TRACING) || defined(DEBUG)
ccall traceUserMarker(MyCapability() "ptr", msg "ptr");
#elif defined(DTRACE)
W_ enabled;
#if !defined(solaris2_HOST_OS)
(enabled) = ccall __dtrace_isenabled$HaskellEvent$user__marker$v1();
#else
enabled = 1;
#endif
if (enabled != 0) {
ccall dtraceUserMarkerWrapper(MyCapability() "ptr", msg "ptr");
}
#endif
return ();
}
stg_getThreadAllocationCounterzh ()
{
// Account for the allocation in the current block
W_ offset;
offset = Hp - bdescr_start(CurrentNursery);
return (StgTSO_alloc_limit(CurrentTSO) - TO_I64(offset));
}
stg_setThreadAllocationCounterzh ( I64 counter )
{
// Allocation in the current block will be subtracted by
// getThreadAllocationCounter#, so we have to offset any existing
// allocation here. See also openNursery/closeNursery in
// GHC.StgToCmm.Foreign.
W_ offset;
offset = Hp - bdescr_start(CurrentNursery);
StgTSO_alloc_limit(CurrentTSO) = counter + TO_I64(offset);
return ();
}
#define KEEP_ALIVE_FRAME_FIELDS(w_,p_,info_ptr,p1,p2,c) \
w_ info_ptr, \
PROF_HDR_FIELDS(w_,p1,p2) \
p_ c
stg_keepAlivezh ( P_ c, /* :: v */
P_ io /* :: IO p */ )
{
STK_CHK_GEN();
jump stg_ap_v_fast
(KEEP_ALIVE_FRAME_FIELDS(,,stg_keepAlive_frame_info, CCCS, 0, c))(io);
}
INFO_TABLE_RET(stg_keepAlive_frame, RET_SMALL, KEEP_ALIVE_FRAME_FIELDS(W_,P_, info_ptr, p1, p2, c))
return (P_ ret)
{
return (ret);
}
#if WORD_SIZE_IN_BITS == 64
#define DOUBLE_SIZE_WDS 1
#else
#define DOUBLE_SIZE_WDS 2
#endif
stg_castWord64ToDoublezh(I64 w)
{
D_ d;
P_ ptr;
ASSERT(RESERVED_STACK_WORDS >= DOUBLE_SIZE_WDS);
reserve DOUBLE_SIZE_WDS = ptr {
I64[ptr] = w;
d = D_[ptr];
}
return (d);
}
stg_castDoubleToWord64zh(D_ d)
{
I64 w;
P_ ptr;
ASSERT(RESERVED_STACK_WORDS >= DOUBLE_SIZE_WDS);
reserve DOUBLE_SIZE_WDS = ptr {
D_[ptr] = d;
w = I64[ptr];
}
return (w);
}
stg_castWord32ToFloatzh(W_ w)
{
F_ f;
P_ ptr;
ASSERT(RESERVED_STACK_WORDS >= 1);
reserve 1 = ptr {
I32[ptr] = %lobits32(w);
f = F_[ptr];
}
return (f);
}
stg_castFloatToWord32zh(F_ f)
{
W_ w;
P_ ptr;
ASSERT(RESERVED_STACK_WORDS >= 1);
reserve 1 = ptr {
F_[ptr] = f;
// Fix #16617: use zero-extending (TO_ZXW_) here
w = TO_ZXW_(I32[ptr]);
}
return (w);
}