diff --git a/compiler/cmm/CmmMachOp.hs b/compiler/cmm/CmmMachOp.hs index 7cd5c1bc20887799c780f05c6e2ae72e0809f24a..bee69f6cc9409fb0f607f564ff52bf26d55e0f27 100644 --- a/compiler/cmm/CmmMachOp.hs +++ b/compiler/cmm/CmmMachOp.hs @@ -589,6 +589,7 @@ data CallishMachOp | MO_SubIntC Width | MO_U_Mul2 Width + | MO_ReadBarrier | MO_WriteBarrier | MO_Touch -- Keep variables live (when using interior pointers) diff --git a/compiler/cmm/CmmParse.y b/compiler/cmm/CmmParse.y index bb389d17ae68ecf2afbd426280673acb0945006a..bdc1e5dbc0d6b1ee3df041b9e308fa946d6ec432 100644 --- a/compiler/cmm/CmmParse.y +++ b/compiler/cmm/CmmParse.y @@ -999,6 +999,7 @@ machOps = listToUFM $ callishMachOps :: UniqFM ([CmmExpr] -> (CallishMachOp, [CmmExpr])) callishMachOps = listToUFM $ map (\(x, y) -> (mkFastString x, y)) [ + ( "read_barrier", (,) MO_ReadBarrier ), ( "write_barrier", (,) MO_WriteBarrier ), ( "memcpy", memcpyLikeTweakArgs MO_Memcpy ), ( "memset", memcpyLikeTweakArgs MO_Memset ), diff --git a/compiler/cmm/PprC.hs b/compiler/cmm/PprC.hs index 822de431a46848e8379229a7f1a6a87145fa799a..dd2ea6cd753d553baefe1f59cf83bbc193c0dfb5 100644 --- a/compiler/cmm/PprC.hs +++ b/compiler/cmm/PprC.hs @@ -808,6 +808,7 @@ pprCallishMachOp_for_C mop MO_F32_Exp -> text "expf" MO_F32_Sqrt -> text "sqrtf" MO_F32_Fabs -> text "fabsf" + MO_ReadBarrier -> text "read_barrier" MO_WriteBarrier -> text "write_barrier" MO_Memcpy _ -> text "memcpy" MO_Memset _ -> text "memset" diff --git a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs index 236b26dbdfee8237e4ac5720ab489dbd55bae6a0..3d5adaee3638056e1d27da91ec596c66296f1c51 100644 --- a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs +++ b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs @@ -169,17 +169,25 @@ barrier = do let s = Fence False SyncSeqCst return (unitOL s, []) +-- | Insert a 'barrier', unless the target platform is in the provided list of +-- exceptions (where no code will be emitted instead). +barrierUnless :: [Arch] -> LlvmM StmtData +barrierUnless exs = do + platform <- getLlvmPlatform + if platformArch platform `elem` exs + then return (nilOL, []) + else barrier + -- | Foreign Calls genCall :: ForeignTarget -> [CmmFormal] -> [CmmActual] -> LlvmM StmtData --- Write barrier needs to be handled specially as it is implemented as an LLVM --- intrinsic function. +-- Barriers need to be handled specially as they are implemented as LLVM +-- intrinsic functions. +genCall (PrimTarget MO_ReadBarrier) _ _ = + barrierUnless [ArchX86, ArchX86_64, ArchSPARC] genCall (PrimTarget MO_WriteBarrier) _ _ = do - platform <- getLlvmPlatform - if platformArch platform `elem` [ArchX86, ArchX86_64, ArchSPARC] - then return (nilOL, []) - else barrier + barrierUnless [ArchX86, ArchX86_64, ArchSPARC] genCall (PrimTarget MO_Touch) _ _ = return (nilOL, []) @@ -827,6 +835,7 @@ cmmPrimOpFunctions mop = do -- We support MO_U_Mul2 through ordinary LLVM mul instruction, see the -- appropriate case of genCall. MO_U_Mul2 {} -> unsupported + MO_ReadBarrier -> unsupported MO_WriteBarrier -> unsupported MO_Touch -> unsupported MO_UF_Conv _ -> unsupported diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs index 86525f4736debd72fc729e38fde9d102705e7268..7b0eda1f1b760dc48731b649f609d3cd7792bea4 100644 --- a/compiler/nativeGen/PPC/CodeGen.hs +++ b/compiler/nativeGen/PPC/CodeGen.hs @@ -1122,6 +1122,8 @@ genCCall :: ForeignTarget -- function to call -> [CmmFormal] -- where to put the result -> [CmmActual] -- arguments (of mixed type) -> NatM InstrBlock +genCCall (PrimTarget MO_ReadBarrier) _ _ + = return $ unitOL LWSYNC genCCall (PrimTarget MO_WriteBarrier) _ _ = return $ unitOL LWSYNC @@ -2027,6 +2029,7 @@ genCCall' dflags gcp target dest_regs args MO_AddIntC {} -> unsupported MO_SubIntC {} -> unsupported MO_U_Mul2 {} -> unsupported + MO_ReadBarrier -> unsupported MO_WriteBarrier -> unsupported MO_Touch -> unsupported MO_Prefetch_Data _ -> unsupported diff --git a/compiler/nativeGen/SPARC/CodeGen.hs b/compiler/nativeGen/SPARC/CodeGen.hs index 851a6f2f0a2670de73f412fdd5aac48993a7821d..52f03dde7e52e849420d4729c31cc4e5671f1f01 100644 --- a/compiler/nativeGen/SPARC/CodeGen.hs +++ b/compiler/nativeGen/SPARC/CodeGen.hs @@ -401,6 +401,8 @@ genCCall -- -- In the SPARC case we don't need a barrier. -- +genCCall (PrimTarget MO_ReadBarrier) _ _ + = return $ nilOL genCCall (PrimTarget MO_WriteBarrier) _ _ = return $ nilOL @@ -687,6 +689,7 @@ outOfLineMachOp_table mop MO_AddIntC {} -> unsupported MO_SubIntC {} -> unsupported MO_U_Mul2 {} -> unsupported + MO_ReadBarrier -> unsupported MO_WriteBarrier -> unsupported MO_Touch -> unsupported (MO_Prefetch_Data _) -> unsupported diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs index 61686186f17f76ddca58c5228c53ecc9ed6cc1a1..027b1ac6dd2498fff13a4cf8bd6689440b539307 100644 --- a/compiler/nativeGen/X86/CodeGen.hs +++ b/compiler/nativeGen/X86/CodeGen.hs @@ -1916,8 +1916,9 @@ genCCall dflags _ (PrimTarget (MO_Memset align)) _ possibleWidth = minimum [left, sizeBytes] dst_addr = AddrBaseIndex (EABaseReg dst) EAIndexNone (ImmInteger (n - left)) +genCCall _ _ (PrimTarget MO_ReadBarrier) _ _ _ = return nilOL genCCall _ _ (PrimTarget MO_WriteBarrier) _ _ _ = return nilOL - -- write barrier compiles to no code on x86/x86-64; + -- barriers compile to no code on x86/x86-64; -- we keep it this long in order to prevent earlier optimisations. genCCall _ _ (PrimTarget MO_Touch) _ _ _ = return nilOL @@ -2990,6 +2991,7 @@ outOfLineCmmOp bid mop res args MO_AddWordC {} -> unsupported MO_SubWordC {} -> unsupported MO_U_Mul2 {} -> unsupported + MO_ReadBarrier -> unsupported MO_WriteBarrier -> unsupported MO_Touch -> unsupported (MO_Prefetch_Data _ ) -> unsupported diff --git a/includes/Cmm.h b/includes/Cmm.h index ba84328a73eca4e4d474673eef2d318e78d905a5..d3570309858641d9e52c013dad2474fc224cc8f9 100644 --- a/includes/Cmm.h +++ b/includes/Cmm.h @@ -303,7 +303,7 @@ #define ENTER_(ret,x) \ again: \ W_ info; \ - LOAD_INFO(ret,x) \ + LOAD_INFO(ret,x) \ switch [INVALID_OBJECT .. N_CLOSURE_TYPES] \ (TO_W_( %INFO_TYPE(%STD_INFO(info)) )) { \ case \ @@ -626,6 +626,11 @@ #define OVERWRITING_CLOSURE_OFS(c,n) /* nothing */ #endif +#if defined(THREADED_RTS) +#define prim_read_barrier prim %read_barrier() +#else +#define prim_read_barrier /* nothing */ +#endif #if defined(THREADED_RTS) #define prim_write_barrier prim %write_barrier() #else diff --git a/includes/stg/SMP.h b/includes/stg/SMP.h index 16761516ac7436bd38c0c60b2522e855cc98f645..eef372f6fc7b5a5224830dc0d6c180651def46e8 100644 --- a/includes/stg/SMP.h +++ b/includes/stg/SMP.h @@ -96,6 +96,57 @@ EXTERN_INLINE void write_barrier(void); EXTERN_INLINE void store_load_barrier(void); EXTERN_INLINE void load_load_barrier(void); +/* + * Note [Heap memory barriers] + * + * Machines with weak memory ordering semantics have consequences for how + * closures are observed and mutated. For example, consider a closure that needs + * to be updated to an indirection. In order for the indirection to be safe for + * concurrent observers to enter, said observers must read the indirection's + * info table before they read the indirectee. Furthermore, the entering + * observer makes assumptions about the closure based on its info table + * contents, e.g. an INFO_TYPE of IND imples the closure has an indirectee + * pointer that is safe to follow. + * + * When a closure is updated with an indirection, both its info table and its + * indirectee must be written. With weak memory ordering, these two writes can + * be arbitrarily reordered, and perhaps even interleaved with other threads' + * reads and writes (in the absence of memory barrier instructions). Consider + * this example of a bad reordering: + * + * - An updater writes to a closure's info table (INFO_TYPE is now IND). + * - A concurrent observer branches upon reading the closure's INFO_TYPE as IND. + * - A concurrent observer reads the closure's indirectee and enters it. + * - An updater writes the closure's indirectee. + * + * Here the update to the indirectee comes too late and the concurrent observer + * has jumped off into the abyss. Speculative execution can also cause us + * issues, consider: + * + * - An observer is about to case on a value in closure's info table. + * - The observer speculatively reads one or more of closure's fields. + * - An updater writes to closure's info table. + * - The observer takes a branch based on the new info table value, but with the + * old closure fields! + * - The updater writes to the closure's other fields, but its too late. + * + * Because of these effects, reads and writes to a closure's info table must be + * ordered carefully with respect to reads and writes to the closure's other + * fields, and memory barriers must be placed to ensure that reads and writes + * occur in program order. Specifically, updates to a closure must follow the + * following pattern: + * + * - Update the closure's (non-info table) fields. + * - Write barrier. + * - Update the closure's info table. + * + * Observing a closure's fields must follow the following pattern: + * + * - Read the closure's info pointer. + * - Read barrier. + * - Read the closure's (non-info table) fields. + */ + /* ---------------------------------------------------------------------------- Implementations ------------------------------------------------------------------------- */ diff --git a/rts/Apply.cmm b/rts/Apply.cmm index b08a8bf5385bff9ddffe9c516eaa0589c0c93dd5..c1ab2dc71f9fd5a33608a266a4233ac6c5aacfa2 100644 --- a/rts/Apply.cmm +++ b/rts/Apply.cmm @@ -62,8 +62,11 @@ again: W_ info; P_ untaggedfun; W_ arity; + // We must obey the correct heap object observation pattern in + // note [Heap memory barriers] in SMP.h. untaggedfun = UNTAG(fun); info = %INFO_PTR(untaggedfun); + prim_read_barrier; switch [INVALID_OBJECT .. N_CLOSURE_TYPES] (TO_W_( %INFO_TYPE(%STD_INFO(info)) )) { case @@ -104,7 +107,6 @@ again: CCS_ALLOC(BYTES_TO_WDS(SIZEOF_StgPAP), CCS_OVERHEAD); P_ pap; pap = Hp - SIZEOF_StgPAP + WDS(1); - SET_HDR(pap, stg_PAP_info, CCCS); StgPAP_arity(pap) = arity; if (arity <= TAG_MASK) { // TODO: Shouldn't this already be tagged? If not why did we @@ -113,6 +115,8 @@ again: } StgPAP_fun(pap) = fun; StgPAP_n_args(pap) = 0; + prim_write_barrier; + SET_HDR(pap, stg_PAP_info, CCCS); return (pap); } } @@ -132,7 +136,6 @@ again: pap = Hp - size + WDS(1); // We'll lose the original PAP, so we should enter its CCS ccall enterFunCCS(BaseReg "ptr", StgHeader_ccs(untaggedfun) "ptr"); - SET_HDR(pap, stg_PAP_info, CCCS); StgPAP_arity(pap) = StgPAP_arity(untaggedfun); StgPAP_n_args(pap) = StgPAP_n_args(untaggedfun); StgPAP_fun(pap) = StgPAP_fun(fun); @@ -140,6 +143,8 @@ again: i = TO_W_(StgPAP_n_args(untaggedfun)); loop: if (i == 0) { + prim_write_barrier; + SET_HDR(pap, stg_PAP_info, CCCS); return (pap); } i = i - 1; @@ -282,6 +287,7 @@ for: info = %GET_FUN_INFO(UNTAG(R1)); W_ type; type = TO_W_(StgFunInfoExtra_fun_type(info)); + prim_read_barrier; if (type == ARG_GEN) { jump StgFunInfoExtra_slow_apply(info) [R1]; } @@ -360,6 +366,7 @@ for: info = %GET_FUN_INFO(UNTAG(R1)); W_ type; type = TO_W_(StgFunInfoExtra_fun_type(info)); + prim_read_barrier; if (type == ARG_GEN) { jump StgFunInfoExtra_slow_apply(info) [R1]; } @@ -424,12 +431,14 @@ for: TICK_ENT_VIA_NODE(); #if defined(NO_ARG_REGS) + prim_read_barrier; jump %GET_ENTRY(UNTAG(R1)) [R1]; #else W_ info; info = %GET_FUN_INFO(UNTAG(R1)); W_ type; type = TO_W_(StgFunInfoExtra_fun_type(info)); + prim_read_barrier; if (type == ARG_GEN) { jump StgFunInfoExtra_slow_apply(info) [R1]; } diff --git a/rts/Compact.cmm b/rts/Compact.cmm index 061646846d612c93bb5797bfcea48535bcb731f7..7982fb2f818d31b56dd2eff2fb482d7575258f7d 100644 --- a/rts/Compact.cmm +++ b/rts/Compact.cmm @@ -69,6 +69,7 @@ eval: tag = GETTAG(p); p = UNTAG(p); info = %INFO_PTR(p); + prim_read_barrier; type = TO_W_(%INFO_TYPE(%STD_INFO(info))); switch [0 .. N_CLOSURE_TYPES] type { @@ -168,7 +169,6 @@ eval: cards = SIZEOF_StgMutArrPtrs + WDS(ptrs); ALLOCATE(compact, BYTES_TO_WDS(size), p, to, tag); P_[pp] = tag | to; - SET_HDR(to, StgHeader_info(p), StgHeader_ccs(p)); StgMutArrPtrs_ptrs(to) = ptrs; StgMutArrPtrs_size(to) = StgMutArrPtrs_size(p); prim %memcpy(to + cards, p + cards , size - cards, 1); @@ -182,6 +182,7 @@ eval: i = i + 1; goto loop0; } + SET_HDR(to, StgHeader_info(p), StgHeader_ccs(p)); return(); } @@ -198,7 +199,6 @@ eval: ptrs = StgSmallMutArrPtrs_ptrs(p); ALLOCATE(compact, BYTES_TO_WDS(SIZEOF_StgSmallMutArrPtrs) + ptrs, p, to, tag); P_[pp] = tag | to; - SET_HDR(to, StgHeader_info(p), StgHeader_ccs(p)); StgSmallMutArrPtrs_ptrs(to) = ptrs; i = 0; loop1: @@ -210,6 +210,7 @@ eval: i = i + 1; goto loop1; } + SET_HDR(to, StgHeader_info(p), StgHeader_ccs(p)); return(); } @@ -235,7 +236,6 @@ eval: ALLOCATE(compact, size, p, to, tag); P_[pp] = tag | to; - SET_HDR(to, StgHeader_info(p), StgHeader_ccs(p)); // First, copy the non-pointers if (nptrs > 0) { @@ -245,6 +245,7 @@ eval: i = i + 1; if (i < ptrs + nptrs) ( likely: True ) goto loop2; } + SET_HDR(to, StgHeader_info(p), StgHeader_ccs(p)); // Next, recursively compact and copy the pointers if (ptrs == 0) { return(); } diff --git a/rts/Interpreter.c b/rts/Interpreter.c index e4b9d5696e46701075008c1c3453e6fa74a8cac9..b1a25d743a8cf2c0e2f42d47a39ca7102736f690 100644 --- a/rts/Interpreter.c +++ b/rts/Interpreter.c @@ -249,10 +249,11 @@ StgClosure * newEmptyPAP (Capability *cap, uint32_t arity) { StgPAP *pap = (StgPAP *)allocate(cap, sizeofW(StgPAP)); - SET_HDR(pap, &stg_PAP_info, cap->r.rCCCS); pap->arity = arity; pap->n_args = 0; pap->fun = tagged_obj; + write_barrier(); + SET_HDR(pap, &stg_PAP_info, cap->r.rCCCS); return (StgClosure *)pap; } @@ -266,7 +267,6 @@ StgClosure * copyPAP (Capability *cap, StgPAP *oldpap) uint32_t size = PAP_sizeW(oldpap->n_args); StgPAP *pap = (StgPAP *)allocate(cap, size); enterFunCCS(&cap->r, oldpap->header.prof.ccs); - SET_HDR(pap, &stg_PAP_info, cap->r.rCCCS); pap->arity = oldpap->arity; pap->n_args = oldpap->n_args; pap->fun = oldpap->fun; @@ -274,6 +274,8 @@ StgClosure * copyPAP (Capability *cap, StgPAP *oldpap) for (i = 0; i < ((StgPAP *)pap)->n_args; i++) { pap->payload[i] = oldpap->payload[i]; } + write_barrier(); + SET_HDR(pap, &stg_PAP_info, cap->r.rCCCS); return (StgClosure *)pap; } @@ -481,8 +483,9 @@ eval_obj: { StgUpdateFrame *__frame; __frame = (StgUpdateFrame *)Sp; - SET_INFO((StgClosure *)__frame, (StgInfoTable *)&stg_upd_frame_info); __frame->updatee = (StgClosure *)(ap); + write_barrier(); + SET_INFO((StgClosure *)__frame, (StgInfoTable *)&stg_upd_frame_info); } ENTER_CCS_THUNK(cap,ap); @@ -799,7 +802,6 @@ do_apply: // build a new PAP and return it. StgPAP *new_pap; new_pap = (StgPAP *)allocate(cap, PAP_sizeW(pap->n_args + m)); - SET_HDR(new_pap,&stg_PAP_info,cap->r.rCCCS); new_pap->arity = pap->arity - n; new_pap->n_args = pap->n_args + m; new_pap->fun = pap->fun; @@ -809,6 +811,8 @@ do_apply: for (i = 0; i < m; i++) { new_pap->payload[pap->n_args + i] = (StgClosure *)SpW(i); } + write_barrier(); + SET_HDR(new_pap,&stg_PAP_info,cap->r.rCCCS); tagged_obj = (StgClosure *)new_pap; Sp_addW(m); goto do_return; @@ -844,13 +848,14 @@ do_apply: StgPAP *pap; uint32_t i; pap = (StgPAP *)allocate(cap, PAP_sizeW(m)); - SET_HDR(pap, &stg_PAP_info,cap->r.rCCCS); pap->arity = arity - n; pap->fun = obj; pap->n_args = m; for (i = 0; i < m; i++) { pap->payload[i] = (StgClosure *)SpW(i); } + write_barrier(); + SET_HDR(pap, &stg_PAP_info,cap->r.rCCCS); tagged_obj = (StgClosure *)pap; Sp_addW(m); goto do_return; @@ -1081,7 +1086,6 @@ run_BCO: // the BCO size_words = BCO_BITMAP_SIZE(obj) + 2; new_aps = (StgAP_STACK *) allocate(cap, AP_STACK_sizeW(size_words)); - SET_HDR(new_aps,&stg_AP_STACK_info,cap->r.rCCCS); new_aps->size = size_words; new_aps->fun = &stg_dummy_ret_closure; @@ -1095,6 +1099,9 @@ run_BCO: new_aps->payload[i] = (StgClosure *)SpW(i-2); } + write_barrier(); + SET_HDR(new_aps,&stg_AP_STACK_info,cap->r.rCCCS); + // Arrange the stack to call the breakpoint IO action, and // continue execution of this BCO when the IO action returns. // @@ -1421,9 +1428,10 @@ run_BCO: StgAP* ap; int n_payload = BCO_NEXT; ap = (StgAP*)allocate(cap, AP_sizeW(n_payload)); - SpW(-1) = (W_)ap; ap->n_args = n_payload; + write_barrier(); SET_HDR(ap, &stg_AP_info, cap->r.rCCCS) + SpW(-1) = (W_)ap; Sp_subW(1); goto nextInsn; } @@ -1432,9 +1440,10 @@ run_BCO: StgAP* ap; int n_payload = BCO_NEXT; ap = (StgAP*)allocate(cap, AP_sizeW(n_payload)); - SpW(-1) = (W_)ap; ap->n_args = n_payload; + write_barrier(); SET_HDR(ap, &stg_AP_NOUPD_info, cap->r.rCCCS) + SpW(-1) = (W_)ap; Sp_subW(1); goto nextInsn; } @@ -1444,10 +1453,11 @@ run_BCO: int arity = BCO_NEXT; int n_payload = BCO_NEXT; pap = (StgPAP*)allocate(cap, PAP_sizeW(n_payload)); - SpW(-1) = (W_)pap; pap->n_args = n_payload; pap->arity = arity; + write_barrier(); SET_HDR(pap, &stg_PAP_info, cap->r.rCCCS) + SpW(-1) = (W_)pap; Sp_subW(1); goto nextInsn; } @@ -1518,16 +1528,18 @@ run_BCO: int o_itbl = BCO_GET_LARGE_ARG; int n_words = BCO_NEXT; StgInfoTable* itbl = INFO_PTR_TO_STRUCT((StgInfoTable *)BCO_LIT(o_itbl)); + load_load_barrier(); int request = CONSTR_sizeW( itbl->layout.payload.ptrs, itbl->layout.payload.nptrs ); StgClosure* con = (StgClosure*)allocate_NONUPD(cap,request); ASSERT( itbl->layout.payload.ptrs + itbl->layout.payload.nptrs > 0); - SET_HDR(con, (StgInfoTable*)BCO_LIT(o_itbl), cap->r.rCCCS); for (i = 0; i < n_words; i++) { con->payload[i] = (StgClosure*)SpW(i); } Sp_addW(n_words); Sp_subW(1); + write_barrier(); + SET_HDR(con, (StgInfoTable*)BCO_LIT(o_itbl), cap->r.rCCCS); SpW(0) = (W_)con; IF_DEBUG(interpreter, debugBelch("\tBuilt "); diff --git a/rts/Messages.c b/rts/Messages.c index 2b13b6306cd48436aecc46278264cfeebf859e5d..b75f26575d53a12d18158d6146b6bb137db745c5 100644 --- a/rts/Messages.c +++ b/rts/Messages.c @@ -28,6 +28,7 @@ void sendMessage(Capability *from_cap, Capability *to_cap, Message *msg) #if defined(DEBUG) { const StgInfoTable *i = msg->header.info; + load_load_barrier(); if (i != &stg_MSG_THROWTO_info && i != &stg_MSG_BLACKHOLE_info && i != &stg_MSG_TRY_WAKEUP_info && @@ -70,6 +71,7 @@ executeMessage (Capability *cap, Message *m) loop: write_barrier(); // allow m->header to be modified by another thread i = m->header.info; + load_load_barrier(); if (i == &stg_MSG_TRY_WAKEUP_info) { StgTSO *tso = ((MessageWakeup *)m)->tso; @@ -173,6 +175,7 @@ uint32_t messageBlackHole(Capability *cap, MessageBlackHole *msg) "blackhole %p", (W_)msg->tso->id, msg->bh); info = bh->header.info; + load_load_barrier(); // If we got this message in our inbox, it might be that the // BLACKHOLE has already been updated, and GC has shorted out the @@ -196,6 +199,7 @@ loop: // and turns this into an infinite loop. p = UNTAG_CLOSURE((StgClosure*)VOLATILE_LOAD(&((StgInd*)bh)->indirectee)); info = p->header.info; + load_load_barrier(); if (info == &stg_IND_info) { @@ -226,7 +230,6 @@ loop: bq = (StgBlockingQueue*)allocate(cap, sizeofW(StgBlockingQueue)); // initialise the BLOCKING_QUEUE object - SET_HDR(bq, &stg_BLOCKING_QUEUE_DIRTY_info, CCS_SYSTEM); bq->bh = bh; bq->queue = msg; bq->owner = owner; @@ -238,6 +241,8 @@ loop: // a collision to update a BLACKHOLE and a BLOCKING_QUEUE // becomes orphaned (see updateThunk()). bq->link = owner->bq; + write_barrier(); + SET_HDR(bq, &stg_BLOCKING_QUEUE_DIRTY_info, CCS_SYSTEM); owner->bq = bq; dirty_TSO(cap, owner); // we modified owner->bq @@ -289,6 +294,7 @@ loop: recordClosureMutated(cap,(StgClosure*)msg); if (info == &stg_BLOCKING_QUEUE_CLEAN_info) { + write_barrier(); bq->header.info = &stg_BLOCKING_QUEUE_DIRTY_info; recordClosureMutated(cap,(StgClosure*)bq); } @@ -319,6 +325,7 @@ StgTSO * blackHoleOwner (StgClosure *bh) StgClosure *p; info = bh->header.info; + load_load_barrier(); if (info != &stg_BLACKHOLE_info && info != &stg_CAF_BLACKHOLE_info && @@ -334,6 +341,7 @@ loop: // and turns this into an infinite loop. p = UNTAG_CLOSURE((StgClosure*)VOLATILE_LOAD(&((StgInd*)bh)->indirectee)); info = p->header.info; + load_load_barrier(); if (info == &stg_IND_info) goto loop; diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm index 47b9bca04b8d142c7b101fc48c98fc975d1eab56..ed5d5a23710913c47e94ef875993941fee7d4ac0 100644 --- a/rts/PrimOps.cmm +++ b/rts/PrimOps.cmm @@ -68,8 +68,9 @@ stg_newByteArrayzh ( W_ n ) jump stg_raisezh(base_GHCziIOziException_heapOverflow_closure); } TICK_ALLOC_PRIM(SIZEOF_StgArrBytes,WDS(payload_words),0); - SET_HDR(p, stg_ARR_WORDS_info, CCCS); StgArrBytes_bytes(p) = n; + prim_write_barrier; + SET_HDR(p, stg_ARR_WORDS_info, CCCS); return (p); } @@ -105,8 +106,9 @@ stg_newPinnedByteArrayzh ( W_ n ) to BA_ALIGN bytes: */ p = p + ((-p - SIZEOF_StgArrBytes) & BA_MASK); - SET_HDR(p, stg_ARR_WORDS_info, CCCS); StgArrBytes_bytes(p) = n; + prim_write_barrier; + SET_HDR(p, stg_ARR_WORDS_info, CCCS); return (p); } @@ -147,8 +149,9 @@ stg_newAlignedPinnedByteArrayzh ( W_ n, W_ alignment ) <alignment> is a power of 2, which is technically not guaranteed */ p = p + ((-p - SIZEOF_StgArrBytes) & (alignment - 1)); - SET_HDR(p, stg_ARR_WORDS_info, CCCS); StgArrBytes_bytes(p) = n; + prim_write_barrier; + SET_HDR(p, stg_ARR_WORDS_info, CCCS); return (p); } @@ -257,7 +260,6 @@ stg_newArrayzh ( W_ n /* words */, gcptr init ) } TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(size), 0); - SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, CCCS); StgMutArrPtrs_ptrs(arr) = n; StgMutArrPtrs_size(arr) = size; @@ -270,6 +272,9 @@ stg_newArrayzh ( W_ n /* words */, gcptr init ) goto for; } + prim_write_barrier; + SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, CCCS); + return (arr); } @@ -281,11 +286,13 @@ stg_unsafeThawArrayzh ( gcptr arr ) // mut_list so no need to add it again. MUT_ARR_PTRS_FROZEN_CLEAN means it's // not and we should add it to a mut_list. if (StgHeader_info(arr) != stg_MUT_ARR_PTRS_FROZEN_DIRTY_info) { + prim_write_barrier; // see below: SET_INFO(arr,stg_MUT_ARR_PTRS_DIRTY_info); // must be done after SET_INFO, because it ASSERTs closure_MUTABLE(): recordMutable(arr); return (arr); } else { + prim_write_barrier; SET_INFO(arr,stg_MUT_ARR_PTRS_DIRTY_info); return (arr); } @@ -373,7 +380,6 @@ stg_newArrayArrayzh ( W_ n /* words */ ) } TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(size), 0); - SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]); StgMutArrPtrs_ptrs(arr) = n; StgMutArrPtrs_size(arr) = size; @@ -386,6 +392,9 @@ stg_newArrayArrayzh ( W_ n /* words */ ) goto for; } + prim_write_barrier; + SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]); + return (arr); } @@ -408,7 +417,6 @@ stg_newSmallArrayzh ( W_ n /* words */, gcptr init ) } TICK_ALLOC_PRIM(SIZEOF_StgSmallMutArrPtrs, WDS(n), 0); - SET_HDR(arr, stg_SMALL_MUT_ARR_PTRS_DIRTY_info, CCCS); StgSmallMutArrPtrs_ptrs(arr) = n; // Initialise all elements of the array with the value in R2 @@ -423,6 +431,9 @@ stg_newSmallArrayzh ( W_ n /* words */, gcptr init ) goto for; } + prim_write_barrier; + SET_HDR(arr, stg_SMALL_MUT_ARR_PTRS_DIRTY_info, CCCS); + return (arr); } @@ -431,11 +442,13 @@ stg_unsafeThawSmallArrayzh ( gcptr arr ) // See stg_unsafeThawArrayzh if (StgHeader_info(arr) != stg_SMALL_MUT_ARR_PTRS_FROZEN_DIRTY_info) { SET_INFO(arr, stg_SMALL_MUT_ARR_PTRS_DIRTY_info); + prim_write_barrier; recordMutable(arr); // must be done after SET_INFO, because it ASSERTs closure_MUTABLE() return (arr); } else { SET_INFO(arr, stg_SMALL_MUT_ARR_PTRS_DIRTY_info); + prim_write_barrier; return (arr); } } @@ -465,13 +478,14 @@ stg_copySmallArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n) { W_ dst_p, src_p, bytes; - SET_INFO(dst, stg_SMALL_MUT_ARR_PTRS_DIRTY_info); - dst_p = dst + SIZEOF_StgSmallMutArrPtrs + WDS(dst_off); src_p = src + SIZEOF_StgSmallMutArrPtrs + WDS(src_off); bytes = WDS(n); prim %memcpy(dst_p, src_p, bytes, SIZEOF_W); + prim_write_barrier; + SET_INFO(dst, stg_SMALL_MUT_ARR_PTRS_DIRTY_info); + return (); } @@ -479,8 +493,6 @@ stg_copySmallMutableArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n { W_ dst_p, src_p, bytes; - SET_INFO(dst, stg_SMALL_MUT_ARR_PTRS_DIRTY_info); - dst_p = dst + SIZEOF_StgSmallMutArrPtrs + WDS(dst_off); src_p = src + SIZEOF_StgSmallMutArrPtrs + WDS(src_off); bytes = WDS(n); @@ -490,6 +502,9 @@ stg_copySmallMutableArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n prim %memcpy(dst_p, src_p, bytes, SIZEOF_W); } + prim_write_barrier; + SET_INFO(dst, stg_SMALL_MUT_ARR_PTRS_DIRTY_info); + return (); } @@ -525,8 +540,9 @@ stg_newMutVarzh ( gcptr init ) ALLOC_PRIM_P (SIZEOF_StgMutVar, stg_newMutVarzh, init); mv = Hp - SIZEOF_StgMutVar + WDS(1); - SET_HDR(mv,stg_MUT_VAR_DIRTY_info,CCCS); StgMutVar_var(mv) = init; + prim_write_barrier; + SET_HDR(mv,stg_MUT_VAR_DIRTY_info,CCCS); return (mv); } @@ -609,16 +625,18 @@ stg_atomicModifyMutVar2zh ( gcptr mv, gcptr f ) TICK_ALLOC_THUNK_2(); CCCS_ALLOC(THUNK_2_SIZE); z = Hp - THUNK_2_SIZE + WDS(1); - SET_HDR(z, stg_ap_2_upd_info, CCCS); LDV_RECORD_CREATE(z); StgThunk_payload(z,0) = f; + prim_write_barrier; + SET_HDR(z, stg_ap_2_upd_info, CCCS); TICK_ALLOC_THUNK_1(); CCCS_ALLOC(THUNK_1_SIZE); y = z - THUNK_1_SIZE; - SET_HDR(y, stg_sel_0_upd_info, CCCS); LDV_RECORD_CREATE(y); StgThunk_payload(y,0) = z; + prim_write_barrier; + SET_HDR(y, stg_sel_0_upd_info, CCCS); retry: x = StgMutVar_var(mv); @@ -668,9 +686,10 @@ stg_atomicModifyMutVarzuzh ( gcptr mv, gcptr f ) TICK_ALLOC_THUNK(); CCCS_ALLOC(THUNK_SIZE); z = Hp - THUNK_SIZE + WDS(1); - SET_HDR(z, stg_ap_2_upd_info, CCCS); LDV_RECORD_CREATE(z); StgThunk_payload(z,0) = f; + prim_write_barrier; + SET_HDR(z, stg_ap_2_upd_info, CCCS); retry: x = StgMutVar_var(mv); @@ -703,7 +722,6 @@ stg_mkWeakzh ( gcptr key, ALLOC_PRIM (SIZEOF_StgWeak) w = Hp - SIZEOF_StgWeak + WDS(1); - SET_HDR(w, stg_WEAK_info, CCCS); StgWeak_key(w) = key; StgWeak_value(w) = value; @@ -711,6 +729,10 @@ stg_mkWeakzh ( gcptr key, StgWeak_cfinalizers(w) = stg_NO_FINALIZER_closure; StgWeak_link(w) = Capability_weak_ptr_list_hd(MyCapability()); + + prim_write_barrier; + SET_HDR(w, stg_WEAK_info, CCCS); + Capability_weak_ptr_list_hd(MyCapability()) = w; if (Capability_weak_ptr_list_tl(MyCapability()) == NULL) { Capability_weak_ptr_list_tl(MyCapability()) = w; @@ -737,13 +759,15 @@ stg_addCFinalizzerToWeakzh ( W_ fptr, // finalizer ALLOC_PRIM (SIZEOF_StgCFinalizerList) c = Hp - SIZEOF_StgCFinalizerList + WDS(1); - SET_HDR(c, stg_C_FINALIZER_LIST_info, CCCS); StgCFinalizerList_fptr(c) = fptr; StgCFinalizerList_ptr(c) = ptr; StgCFinalizerList_eptr(c) = eptr; StgCFinalizerList_flag(c) = flag; + prim_write_barrier; + SET_HDR(c, stg_C_FINALIZER_LIST_info, CCCS); + LOCK_CLOSURE(w, info); if (info == stg_DEAD_WEAK_info) { @@ -818,6 +842,7 @@ stg_deRefWeakzh ( gcptr w ) gcptr val; info = GET_INFO(w); + prim_read_barrier; if (info == stg_WHITEHOLE_info) { // w is locked by another thread. Now it's not immediately clear if w is @@ -1389,11 +1414,13 @@ stg_readTVarzh (P_ tvar) stg_readTVarIOzh ( P_ tvar /* :: TVar a */ ) { - W_ result; + W_ result, resultinfo; again: result = StgTVar_current_value(tvar); - if (%INFO_PTR(result) == stg_TREC_HEADER_info) { + resultinfo = %INFO_PTR(result); + prim_read_barrier; + if (resultinfo == stg_TREC_HEADER_info) { goto again; } return (result); @@ -1462,11 +1489,12 @@ stg_newMVarzh () ALLOC_PRIM_ (SIZEOF_StgMVar, stg_newMVarzh); mvar = Hp - SIZEOF_StgMVar + WDS(1); - SET_HDR(mvar,stg_MVAR_DIRTY_info,CCCS); - // MVARs start dirty: generation 0 has no mutable list StgMVar_head(mvar) = stg_END_TSO_QUEUE_closure; StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure; StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure; + prim_write_barrier; + SET_HDR(mvar,stg_MVAR_DIRTY_info,CCCS); + // MVARs start dirty: generation 0 has no mutable list return (mvar); } @@ -1486,7 +1514,7 @@ stg_newMVarzh () stg_takeMVarzh ( P_ mvar /* :: MVar a */ ) { - W_ val, info, tso, q; + W_ val, info, tso, q, qinfo; LOCK_CLOSURE(mvar, info); @@ -1508,10 +1536,12 @@ stg_takeMVarzh ( P_ mvar /* :: MVar a */ ) q = Hp - SIZEOF_StgMVarTSOQueue + WDS(1); - SET_HDR(q, stg_MVAR_TSO_QUEUE_info, CCS_SYSTEM); StgMVarTSOQueue_link(q) = END_TSO_QUEUE; StgMVarTSOQueue_tso(q) = CurrentTSO; + prim_write_barrier; + SET_HDR(q, stg_MVAR_TSO_QUEUE_info, CCS_SYSTEM); + if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) { StgMVar_head(mvar) = q; } else { @@ -1540,8 +1570,10 @@ loop: unlockClosure(mvar, info); return (val); } - if (StgHeader_info(q) == stg_IND_info || - StgHeader_info(q) == stg_MSG_NULL_info) { + qinfo = StgHeader_info(q); + prim_read_barrier; + if (qinfo == stg_IND_info || + qinfo == stg_MSG_NULL_info) { q = StgInd_indirectee(q); goto loop; } @@ -1579,7 +1611,7 @@ loop: stg_tryTakeMVarzh ( P_ mvar /* :: MVar a */ ) { - W_ val, info, tso, q; + W_ val, info, tso, q, qinfo; LOCK_CLOSURE(mvar, info); @@ -1606,8 +1638,11 @@ loop: return (1, val); } - if (StgHeader_info(q) == stg_IND_info || - StgHeader_info(q) == stg_MSG_NULL_info) { + qinfo = StgHeader_info(q); + prim_read_barrier; + + if (qinfo == stg_IND_info || + qinfo == stg_MSG_NULL_info) { q = StgInd_indirectee(q); goto loop; } @@ -1646,7 +1681,7 @@ loop: stg_putMVarzh ( P_ mvar, /* :: MVar a */ P_ val, /* :: a */ ) { - W_ info, tso, q; + W_ info, tso, q, qinfo; LOCK_CLOSURE(mvar, info); @@ -1666,10 +1701,12 @@ stg_putMVarzh ( P_ mvar, /* :: MVar a */ q = Hp - SIZEOF_StgMVarTSOQueue + WDS(1); - SET_HDR(q, stg_MVAR_TSO_QUEUE_info, CCS_SYSTEM); StgMVarTSOQueue_link(q) = END_TSO_QUEUE; StgMVarTSOQueue_tso(q) = CurrentTSO; + prim_write_barrier; + SET_HDR(q, stg_MVAR_TSO_QUEUE_info, CCS_SYSTEM); + if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) { StgMVar_head(mvar) = q; } else { @@ -1696,8 +1733,12 @@ loop: unlockClosure(mvar, stg_MVAR_DIRTY_info); return (); } - if (StgHeader_info(q) == stg_IND_info || - StgHeader_info(q) == stg_MSG_NULL_info) { + + qinfo = StgHeader_info(q); + prim_read_barrier; + + if (qinfo == stg_IND_info || + qinfo == stg_MSG_NULL_info) { q = StgInd_indirectee(q); goto loop; } @@ -1754,7 +1795,7 @@ loop: stg_tryPutMVarzh ( P_ mvar, /* :: MVar a */ P_ val, /* :: a */ ) { - W_ info, tso, q; + W_ info, tso, q, qinfo; LOCK_CLOSURE(mvar, info); @@ -1777,8 +1818,12 @@ loop: unlockClosure(mvar, stg_MVAR_DIRTY_info); return (1); } - if (StgHeader_info(q) == stg_IND_info || - StgHeader_info(q) == stg_MSG_NULL_info) { + + qinfo = StgHeader_info(q); + prim_read_barrier; + + if (qinfo == stg_IND_info || + qinfo == stg_MSG_NULL_info) { q = StgInd_indirectee(q); goto loop; } @@ -1849,10 +1894,12 @@ stg_readMVarzh ( P_ mvar, /* :: MVar a */ ) // readMVars are pushed to the front of the queue, so // they get handled immediately - SET_HDR(q, stg_MVAR_TSO_QUEUE_info, CCS_SYSTEM); StgMVarTSOQueue_link(q) = StgMVar_head(mvar); StgMVarTSOQueue_tso(q) = CurrentTSO; + prim_write_barrier; + SET_HDR(q, stg_MVAR_TSO_QUEUE_info, CCS_SYSTEM); + StgTSO__link(CurrentTSO) = q; StgTSO_block_info(CurrentTSO) = mvar; StgTSO_why_blocked(CurrentTSO) = BlockedOnMVarRead::I16; @@ -1915,9 +1962,10 @@ stg_makeStableNamezh ( P_ obj ) // too complicated and doesn't buy us much. See D5342?id=18700.) ("ptr" sn_obj) = ccall allocate(MyCapability() "ptr", BYTES_TO_WDS(SIZEOF_StgStableName)); - SET_HDR(sn_obj, stg_STABLE_NAME_info, CCCS); StgStableName_sn(sn_obj) = index; snEntry_sn_obj(W_[stable_name_table] + index*SIZEOF_snEntry) = sn_obj; + prim_write_barrier; + SET_HDR(sn_obj, stg_STABLE_NAME_info, CCCS); } else { sn_obj = snEntry_sn_obj(W_[stable_name_table] + index*SIZEOF_snEntry); } @@ -1958,7 +2006,6 @@ stg_newBCOzh ( P_ instrs, ALLOC_PRIM (bytes); bco = Hp - bytes + WDS(1); - SET_HDR(bco, stg_BCO_info, CCS_MAIN); StgBCO_instrs(bco) = instrs; StgBCO_literals(bco) = literals; @@ -1976,6 +2023,9 @@ for: goto for; } + prim_write_barrier; + SET_HDR(bco, stg_BCO_info, CCS_MAIN); + return (bco); } @@ -1994,11 +2044,13 @@ stg_mkApUpd0zh ( P_ bco ) CCCS_ALLOC(SIZEOF_StgAP); ap = Hp - SIZEOF_StgAP + WDS(1); - SET_HDR(ap, stg_AP_info, CCS_MAIN); StgAP_n_args(ap) = HALF_W_(0); StgAP_fun(ap) = bco; + prim_write_barrier; + SET_HDR(ap, stg_AP_info, CCS_MAIN); + return (ap); } @@ -2006,6 +2058,7 @@ stg_unpackClosurezh ( P_ closure ) { W_ info, ptrs, nptrs, p, ptrs_arr, dat_arr; info = %GET_STD_INFO(UNTAG(closure)); + prim_read_barrier; ptrs = TO_W_(%INFO_PTRS(info)); nptrs = TO_W_(%INFO_NPTRS(info)); @@ -2026,7 +2079,6 @@ stg_unpackClosurezh ( P_ closure ) dat_arr = Hp - dat_arr_sz + WDS(1); - SET_HDR(dat_arr, stg_ARR_WORDS_info, CCCS); StgArrBytes_bytes(dat_arr) = WDS(len); p = 0; for: @@ -2041,6 +2093,9 @@ for: // Follow the pointers ("ptr" ptrArray) = foreign "C" heap_view_closurePtrs(MyCapability() "ptr", clos "ptr"); + prim_write_barrier; + SET_HDR(dat_arr, stg_ARR_WORDS_info, CCCS); + return (info, dat_arr, ptrArray); } @@ -2334,7 +2389,10 @@ stg_noDuplicatezh /* no arg list: explicit stack layout */ stg_getApStackValzh ( P_ ap_stack, W_ offset ) { - if (%INFO_PTR(UNTAG(ap_stack)) == stg_AP_STACK_info) { + W_ ap_stackinfo; + ap_stackinfo = %INFO_PTR(UNTAG(ap_stack)); + prim_read_barrier; + if (ap_stackinfo == stg_AP_STACK_info) { return (1,StgAP_STACK_payload(ap_stack,offset)); } else { return (0,ap_stack); diff --git a/rts/RaiseAsync.c b/rts/RaiseAsync.c index f58f9177c8c31fb14b984e9ee35827b7622b90bb..359718f9df8b9e6e991f8b561d2944b2ca456193 100644 --- a/rts/RaiseAsync.c +++ b/rts/RaiseAsync.c @@ -870,6 +870,7 @@ raiseAsync(Capability *cap, StgTSO *tso, StgClosure *exception, ap->payload[i] = (StgClosure *)*sp++; } + write_barrier(); SET_HDR(ap,&stg_AP_STACK_info, ((StgClosure *)frame)->header.prof.ccs /* ToDo */); TICK_ALLOC_UP_THK(WDS(words+1),0); @@ -921,6 +922,7 @@ raiseAsync(Capability *cap, StgTSO *tso, StgClosure *exception, ap->payload[i] = (StgClosure *)*sp++; } + write_barrier(); SET_HDR(ap,&stg_AP_STACK_NOUPD_info,stack->header.prof.ccs); TICK_ALLOC_SE_THK(WDS(words+1),0); @@ -959,6 +961,7 @@ raiseAsync(Capability *cap, StgTSO *tso, StgClosure *exception, // raise = (StgThunk *)allocate(cap,sizeofW(StgThunk)+1); TICK_ALLOC_SE_THK(WDS(1),0); + write_barrier(); SET_HDR(raise,&stg_raise_info,cf->header.prof.ccs); raise->payload[0] = exception; @@ -1039,8 +1042,9 @@ raiseAsync(Capability *cap, StgTSO *tso, StgClosure *exception, atomically = (StgThunk*)allocate(cap,sizeofW(StgThunk)+1); TICK_ALLOC_SE_THK(1,0); - SET_HDR(atomically,&stg_atomically_info,af->header.prof.ccs); atomically->payload[0] = af->code; + write_barrier(); + SET_HDR(atomically,&stg_atomically_info,af->header.prof.ccs); // discard stack up to and including the ATOMICALLY_FRAME frame += sizeofW(StgAtomicallyFrame); diff --git a/rts/RtsAPI.c b/rts/RtsAPI.c index 26433ac209ba19aa13c1e6fd9d91899f254069db..1a2b7f0065b7bc1aef13baae43ee0e5c41016848 100644 --- a/rts/RtsAPI.c +++ b/rts/RtsAPI.c @@ -30,8 +30,9 @@ HaskellObj rts_mkChar (Capability *cap, HsChar c) { StgClosure *p = (StgClosure *)allocate(cap, CONSTR_sizeW(0,1)); - SET_HDR(p, Czh_con_info, CCS_SYSTEM); p->payload[0] = (StgClosure *)(StgWord)(StgChar)c; + write_barrier(); + SET_HDR(p, Czh_con_info, CCS_SYSTEM); return p; } @@ -39,8 +40,9 @@ HaskellObj rts_mkInt (Capability *cap, HsInt i) { StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,1)); - SET_HDR(p, Izh_con_info, CCS_SYSTEM); p->payload[0] = (StgClosure *)(StgInt)i; + write_barrier(); + SET_HDR(p, Izh_con_info, CCS_SYSTEM); return p; } @@ -48,9 +50,10 @@ HaskellObj rts_mkInt8 (Capability *cap, HsInt8 i) { StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,1)); - SET_HDR(p, I8zh_con_info, CCS_SYSTEM); /* Make sure we mask out the bits above the lowest 8 */ p->payload[0] = (StgClosure *)(StgInt)i; + write_barrier(); + SET_HDR(p, I8zh_con_info, CCS_SYSTEM); return p; } @@ -58,9 +61,10 @@ HaskellObj rts_mkInt16 (Capability *cap, HsInt16 i) { StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,1)); - SET_HDR(p, I16zh_con_info, CCS_SYSTEM); /* Make sure we mask out the relevant bits */ p->payload[0] = (StgClosure *)(StgInt)i; + write_barrier(); + SET_HDR(p, I16zh_con_info, CCS_SYSTEM); return p; } @@ -68,8 +72,9 @@ HaskellObj rts_mkInt32 (Capability *cap, HsInt32 i) { StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,1)); - SET_HDR(p, I32zh_con_info, CCS_SYSTEM); p->payload[0] = (StgClosure *)(StgInt)i; + write_barrier(); + SET_HDR(p, I32zh_con_info, CCS_SYSTEM); return p; } @@ -77,8 +82,9 @@ HaskellObj rts_mkInt64 (Capability *cap, HsInt64 i) { StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,2)); - SET_HDR(p, I64zh_con_info, CCS_SYSTEM); ASSIGN_Int64((P_)&(p->payload[0]), i); + write_barrier(); + SET_HDR(p, I64zh_con_info, CCS_SYSTEM); return p; } @@ -86,8 +92,9 @@ HaskellObj rts_mkWord (Capability *cap, HsWord i) { StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,1)); - SET_HDR(p, Wzh_con_info, CCS_SYSTEM); p->payload[0] = (StgClosure *)(StgWord)i; + write_barrier(); + SET_HDR(p, Wzh_con_info, CCS_SYSTEM); return p; } @@ -96,8 +103,9 @@ rts_mkWord8 (Capability *cap, HsWord8 w) { /* see rts_mkInt* comments */ StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,1)); - SET_HDR(p, W8zh_con_info, CCS_SYSTEM); p->payload[0] = (StgClosure *)(StgWord)(w & 0xff); + write_barrier(); + SET_HDR(p, W8zh_con_info, CCS_SYSTEM); return p; } @@ -106,8 +114,9 @@ rts_mkWord16 (Capability *cap, HsWord16 w) { /* see rts_mkInt* comments */ StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,1)); - SET_HDR(p, W16zh_con_info, CCS_SYSTEM); p->payload[0] = (StgClosure *)(StgWord)(w & 0xffff); + write_barrier(); + SET_HDR(p, W16zh_con_info, CCS_SYSTEM); return p; } @@ -116,8 +125,9 @@ rts_mkWord32 (Capability *cap, HsWord32 w) { /* see rts_mkInt* comments */ StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,1)); - SET_HDR(p, W32zh_con_info, CCS_SYSTEM); p->payload[0] = (StgClosure *)(StgWord)(w & 0xffffffff); + write_barrier(); + SET_HDR(p, W32zh_con_info, CCS_SYSTEM); return p; } @@ -126,8 +136,9 @@ rts_mkWord64 (Capability *cap, HsWord64 w) { StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,2)); /* see mk_Int8 comment */ - SET_HDR(p, W64zh_con_info, CCS_SYSTEM); ASSIGN_Word64((P_)&(p->payload[0]), w); + write_barrier(); + SET_HDR(p, W64zh_con_info, CCS_SYSTEM); return p; } @@ -136,8 +147,9 @@ HaskellObj rts_mkFloat (Capability *cap, HsFloat f) { StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,1)); - SET_HDR(p, Fzh_con_info, CCS_SYSTEM); ASSIGN_FLT((P_)p->payload, (StgFloat)f); + write_barrier(); + SET_HDR(p, Fzh_con_info, CCS_SYSTEM); return p; } @@ -145,8 +157,9 @@ HaskellObj rts_mkDouble (Capability *cap, HsDouble d) { StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,sizeofW(StgDouble))); - SET_HDR(p, Dzh_con_info, CCS_SYSTEM); ASSIGN_DBL((P_)p->payload, (StgDouble)d); + write_barrier(); + SET_HDR(p, Dzh_con_info, CCS_SYSTEM); return p; } @@ -154,8 +167,9 @@ HaskellObj rts_mkStablePtr (Capability *cap, HsStablePtr s) { StgClosure *p = (StgClosure *)allocate(cap,sizeofW(StgHeader)+1); - SET_HDR(p, StablePtr_con_info, CCS_SYSTEM); p->payload[0] = (StgClosure *)s; + write_barrier(); + SET_HDR(p, StablePtr_con_info, CCS_SYSTEM); return p; } @@ -163,8 +177,9 @@ HaskellObj rts_mkPtr (Capability *cap, HsPtr a) { StgClosure *p = (StgClosure *)allocate(cap,sizeofW(StgHeader)+1); - SET_HDR(p, Ptr_con_info, CCS_SYSTEM); p->payload[0] = (StgClosure *)a; + write_barrier(); + SET_HDR(p, Ptr_con_info, CCS_SYSTEM); return p; } @@ -172,8 +187,9 @@ HaskellObj rts_mkFunPtr (Capability *cap, HsFunPtr a) { StgClosure *p = (StgClosure *)allocate(cap,sizeofW(StgHeader)+1); - SET_HDR(p, FunPtr_con_info, CCS_SYSTEM); p->payload[0] = (StgClosure *)a; + write_barrier(); + SET_HDR(p, FunPtr_con_info, CCS_SYSTEM); return p; } @@ -202,9 +218,10 @@ rts_apply (Capability *cap, HaskellObj f, HaskellObj arg) // Here we don't want to use CCS_SYSTEM, because it's a hidden cost centre, // and evaluating Haskell code under a hidden cost centre leads to // confusing profiling output. (#7753) - SET_HDR(ap, (StgInfoTable *)&stg_ap_2_upd_info, CCS_MAIN); ap->payload[0] = f; ap->payload[1] = arg; + write_barrier(); + SET_HDR(ap, (StgInfoTable *)&stg_ap_2_upd_info, CCS_MAIN); return (StgClosure *)ap; } diff --git a/rts/Sparks.c b/rts/Sparks.c index bd5e120863054a41178c0af23696fb092d3269ff..4022691da2a10a8ff25434e3386c97fcb8183eb3 100644 --- a/rts/Sparks.c +++ b/rts/Sparks.c @@ -182,6 +182,7 @@ pruneSparkQueue (Capability *cap) traceEventSparkFizzle(cap); } else { info = spark->header.info; + load_load_barrier(); if (IS_FORWARDING_PTR(info)) { tmp = (StgClosure*)UN_FORWARDING_PTR(info); /* if valuable work: shift inside the pool */ diff --git a/rts/StgMiscClosures.cmm b/rts/StgMiscClosures.cmm index fdd9f1565eb592ee6ffc5633f9801830585d1656..f2aae9e2a5ccd600b4a6502e4128bc04239d181b 100644 --- a/rts/StgMiscClosures.cmm +++ b/rts/StgMiscClosures.cmm @@ -292,12 +292,14 @@ INFO_TABLE(stg_BLACKHOLE,1,0,BLACKHOLE,"BLACKHOLE","BLACKHOLE") TICK_ENT_DYN_IND(); /* tick */ retry: + prim_read_barrier; p = StgInd_indirectee(node); if (GETTAG(p) != 0) { return (p); } info = StgHeader_info(p); + prim_read_barrier; if (info == stg_IND_info) { // This could happen, if e.g. we got a BLOCKING_QUEUE that has // just been replaced with an IND by another thread in @@ -313,9 +315,10 @@ retry: ("ptr" msg) = ccall allocate(MyCapability() "ptr", BYTES_TO_WDS(SIZEOF_MessageBlackHole)); - SET_HDR(msg, stg_MSG_BLACKHOLE_info, CCS_SYSTEM); MessageBlackHole_tso(msg) = CurrentTSO; MessageBlackHole_bh(msg) = node; + prim_write_barrier; + SET_HDR(msg, stg_MSG_BLACKHOLE_info, CCS_SYSTEM); (r) = ccall messageBlackHole(MyCapability() "ptr", msg "ptr"); @@ -375,6 +378,7 @@ INFO_TABLE(stg_WHITEHOLE, 0,0, WHITEHOLE, "WHITEHOLE", "WHITEHOLE") loop: // spin until the WHITEHOLE is updated info = StgHeader_info(node); + prim_read_barrier; if (info == stg_WHITEHOLE_info) { #if defined(PROF_SPIN) W_[whitehole_lockClosure_spin] = diff --git a/rts/ThreadPaused.c b/rts/ThreadPaused.c index a916891aa8cc37b29c2157de3e17e924adc2f7a6..ea8f298a89ce0361608a6fb4c772e8887f974985 100644 --- a/rts/ThreadPaused.c +++ b/rts/ThreadPaused.c @@ -195,6 +195,7 @@ threadPaused(Capability *cap, StgTSO *tso) const StgRetInfoTable *info; const StgInfoTable *bh_info; const StgInfoTable *cur_bh_info USED_IF_THREADS; + const StgInfoTable *frame_info; StgClosure *bh; StgPtr stack_end; uint32_t words_to_squeeze = 0; @@ -220,13 +221,16 @@ threadPaused(Capability *cap, StgTSO *tso) while ((P_)frame < stack_end) { info = get_ret_itbl(frame); + load_load_barrier(); switch (info->i.type) { case UPDATE_FRAME: // If we've already marked this frame, then stop here. - if (frame->header.info == (StgInfoTable *)&stg_marked_upd_frame_info) { + frame_info = frame->header.info; + load_load_barrier(); + if (frame_info == (StgInfoTable *)&stg_marked_upd_frame_info) { if (prev_was_update_frame) { words_to_squeeze += sizeofW(StgUpdateFrame); weight += weight_pending; @@ -235,10 +239,12 @@ threadPaused(Capability *cap, StgTSO *tso) goto end; } + write_barrier(); SET_INFO(frame, (StgInfoTable *)&stg_marked_upd_frame_info); bh = ((StgUpdateFrame *)frame)->updatee; bh_info = bh->header.info; + load_load_barrier(); #if defined(THREADED_RTS) retry: diff --git a/rts/Threads.c b/rts/Threads.c index 977635322d82defac33875606c1a1b5df974cbd9..2305f7b2273039a3db4b738061cb07435f08f8ea 100644 --- a/rts/Threads.c +++ b/rts/Threads.c @@ -82,14 +82,14 @@ createThread(Capability *cap, W_ size) stack_size = round_to_mblocks(size - sizeofW(StgTSO)); stack = (StgStack *)allocate(cap, stack_size); TICK_ALLOC_STACK(stack_size); - SET_HDR(stack, &stg_STACK_info, cap->r.rCCCS); stack->stack_size = stack_size - sizeofW(StgStack); stack->sp = stack->stack + stack->stack_size; stack->dirty = 1; + write_barrier(); + SET_HDR(stack, &stg_STACK_info, cap->r.rCCCS); tso = (StgTSO *)allocate(cap, sizeofW(StgTSO)); TICK_ALLOC_TSO(); - SET_HDR(tso, &stg_TSO_info, CCS_SYSTEM); // Always start with the compiled code evaluator tso->what_next = ThreadRunGHC; @@ -116,6 +116,9 @@ createThread(Capability *cap, W_ size) tso->prof.cccs = CCS_MAIN; #endif + write_barrier(); + SET_HDR(tso, &stg_TSO_info, CCS_SYSTEM); + // put a stop frame on the stack stack->sp -= sizeofW(StgStopFrame); SET_HDR((StgClosure*)stack->sp, @@ -257,8 +260,9 @@ tryWakeupThread (Capability *cap, StgTSO *tso) { MessageWakeup *msg; msg = (MessageWakeup *)allocate(cap,sizeofW(MessageWakeup)); - SET_HDR(msg, &stg_MSG_TRY_WAKEUP_info, CCS_SYSTEM); msg->tso = tso; + write_barrier(); + SET_HDR(msg, &stg_MSG_TRY_WAKEUP_info, CCS_SYSTEM); sendMessage(cap, tso->cap, (Message*)msg); debugTraceCap(DEBUG_sched, cap, "message: try wakeup thread %ld on cap %d", (W_)tso->id, tso->cap->no); @@ -363,6 +367,7 @@ wakeBlockingQueue(Capability *cap, StgBlockingQueue *bq) for (msg = bq->queue; msg != (MessageBlackHole*)END_TSO_QUEUE; msg = msg->link) { i = msg->header.info; + load_load_barrier(); if (i != &stg_IND_info) { ASSERT(i == &stg_MSG_BLACKHOLE_info); tryWakeupThread(cap,msg->tso); @@ -384,6 +389,8 @@ checkBlockingQueues (Capability *cap, StgTSO *tso) { StgBlockingQueue *bq, *next; StgClosure *p; + const StgInfoTable *bqinfo; + const StgInfoTable *pinfo; debugTraceCap(DEBUG_sched, cap, "collision occurred; checking blocking queues for thread %ld", @@ -392,15 +399,18 @@ checkBlockingQueues (Capability *cap, StgTSO *tso) for (bq = tso->bq; bq != (StgBlockingQueue*)END_TSO_QUEUE; bq = next) { next = bq->link; - if (bq->header.info == &stg_IND_info) { + bqinfo = bq->header.info; + load_load_barrier(); + if (bqinfo == &stg_IND_info) { // ToDo: could short it out right here, to avoid // traversing this IND multiple times. continue; } p = bq->bh; - - if (p->header.info != &stg_BLACKHOLE_info || + pinfo = p->header.info; + load_load_barrier(); + if (pinfo != &stg_BLACKHOLE_info || ((StgInd *)p)->indirectee != (StgClosure*)bq) { wakeBlockingQueue(cap,bq); @@ -424,6 +434,7 @@ updateThunk (Capability *cap, StgTSO *tso, StgClosure *thunk, StgClosure *val) const StgInfoTable *i; i = thunk->header.info; + load_load_barrier(); if (i != &stg_BLACKHOLE_info && i != &stg_CAF_BLACKHOLE_info && i != &__stg_EAGER_BLACKHOLE_info && @@ -444,6 +455,7 @@ updateThunk (Capability *cap, StgTSO *tso, StgClosure *thunk, StgClosure *val) } i = v->header.info; + load_load_barrier(); if (i == &stg_TSO_info) { checkBlockingQueues(cap, tso); return; @@ -597,12 +609,13 @@ threadStackOverflow (Capability *cap, StgTSO *tso) new_stack = (StgStack*) allocate(cap, chunk_size); cap->r.rCurrentTSO = NULL; - SET_HDR(new_stack, &stg_STACK_info, old_stack->header.prof.ccs); TICK_ALLOC_STACK(chunk_size); new_stack->dirty = 0; // begin clean, we'll mark it dirty below new_stack->stack_size = chunk_size - sizeofW(StgStack); new_stack->sp = new_stack->stack + new_stack->stack_size; + write_barrier(); + SET_HDR(new_stack, &stg_STACK_info, old_stack->header.prof.ccs); tso->tot_stack_size += new_stack->stack_size; @@ -651,8 +664,9 @@ threadStackOverflow (Capability *cap, StgTSO *tso) } else { new_stack->sp -= sizeofW(StgUnderflowFrame); frame = (StgUnderflowFrame*)new_stack->sp; - frame->info = &stg_stack_underflow_frame_info; frame->next_chunk = old_stack; + write_barrier(); + frame->info = &stg_stack_underflow_frame_info; } // copy the stack chunk between tso->sp and sp to @@ -738,6 +752,7 @@ threadStackUnderflow (Capability *cap, StgTSO *tso) bool performTryPutMVar(Capability *cap, StgMVar *mvar, StgClosure *value) { const StgInfoTable *info; + const StgInfoTable *qinfo; StgMVarTSOQueue *q; StgTSO *tso; @@ -752,6 +767,8 @@ bool performTryPutMVar(Capability *cap, StgMVar *mvar, StgClosure *value) q = mvar->head; loop: + qinfo = q->header.info; + load_load_barrier(); if (q == (StgMVarTSOQueue*)&stg_END_TSO_QUEUE_closure) { /* No further takes, the MVar is now full. */ if (info == &stg_MVAR_CLEAN_info) { @@ -762,8 +779,8 @@ loop: unlockClosure((StgClosure*)mvar, &stg_MVAR_DIRTY_info); return true; } - if (q->header.info == &stg_IND_info || - q->header.info == &stg_MSG_NULL_info) { + if (qinfo == &stg_IND_info || + qinfo == &stg_MSG_NULL_info) { q = (StgMVarTSOQueue*)((StgInd*)q)->indirectee; goto loop; } diff --git a/rts/TopHandler.c b/rts/TopHandler.c index c0ac936b85ea5db318115711b2c9c1122e9ea740..d5175015e776b2ba92c7b930ac64857267efa427 100644 --- a/rts/TopHandler.c +++ b/rts/TopHandler.c @@ -29,6 +29,7 @@ StgTSO *getTopHandlerThread(void) { StgWeak *weak = (StgWeak*)deRefStablePtr(topHandlerPtr); RELEASE_LOCK(&m); const StgInfoTable *info = weak->header.info; + load_load_barrier(); if (info == &stg_WEAK_info) { StgClosure *key = ((StgWeak*)weak)->key; diff --git a/rts/Updates.h b/rts/Updates.h index 1ba398bd354d35540faf8526e1dd8a38d2ed231d..c4cc05d63f61ac4dbf34ffe47da50b9804a4e68a 100644 --- a/rts/Updates.h +++ b/rts/Updates.h @@ -39,10 +39,10 @@ PROF_HDR_FIELDS(w_,ccs,p2) \ p_ updatee - #define updateWithIndirection(p1, p2, and_then) \ W_ bd; \ \ + prim_write_barrier; \ OVERWRITING_CLOSURE(p1); \ StgInd_indirectee(p1) = p2; \ prim_write_barrier; \ @@ -69,6 +69,7 @@ INLINE_HEADER void updateWithIndirection (Capability *cap, ASSERT( (P_)p1 != (P_)p2 ); /* not necessarily true: ASSERT( !closure_IND(p1) ); */ /* occurs in RaiseAsync.c:raiseAsync() */ + write_barrier(); OVERWRITING_CLOSURE(p1); ((StgInd *)p1)->indirectee = p2; write_barrier(); diff --git a/rts/Weak.c b/rts/Weak.c index a322d822afccfe575cd073a8a535450e4c6c1032..d084f0daed677f27b587886ea2a2e8e4963b5a29 100644 --- a/rts/Weak.c +++ b/rts/Weak.c @@ -42,6 +42,7 @@ void runAllCFinalizers(StgWeak *list) { StgWeak *w; + const StgInfoTable *winfo; Task *task; task = myTask(); @@ -57,7 +58,9 @@ runAllCFinalizers(StgWeak *list) // If there's no major GC between the time that the finalizer for the // object from the oldest generation is manually called and shutdown // we end up running the same finalizer twice. See #7170. - if (w->header.info != &stg_DEAD_WEAK_info) { + winfo = w->header.info; + load_load_barrier(); + if (winfo != &stg_DEAD_WEAK_info) { runCFinalizers((StgCFinalizerList *)w->cfinalizers); } } @@ -126,6 +129,7 @@ scheduleFinalizers(Capability *cap, StgWeak *list) // there's a later call to finalizeWeak# on this weak pointer, // we don't run the finalizer again. SET_HDR(w, &stg_DEAD_WEAK_info, w->header.prof.ccs); + write_barrier(); } n_finalizers = i; @@ -138,7 +142,6 @@ scheduleFinalizers(Capability *cap, StgWeak *list) size = n + mutArrPtrsCardTableSize(n); arr = (StgMutArrPtrs *)allocate(cap, sizeofW(StgMutArrPtrs) + size); TICK_ALLOC_PRIM(sizeofW(StgMutArrPtrs), n, 0); - SET_HDR(arr, &stg_MUT_ARR_PTRS_FROZEN_CLEAN_info, CCS_SYSTEM); arr->ptrs = n; arr->size = size; @@ -154,6 +157,9 @@ scheduleFinalizers(Capability *cap, StgWeak *list) arr->payload[i] = (StgClosure *)(W_)(-1); } + write_barrier(); + SET_HDR(arr, &stg_MUT_ARR_PTRS_FROZEN_CLEAN_info, CCS_SYSTEM); + t = createIOThread(cap, RtsFlags.GcFlags.initialStkSize, rts_apply(cap, diff --git a/rts/sm/CNF.c b/rts/sm/CNF.c index 8d0ebccaf3fd5d0a2cff77759e107afcc11e438b..1e4f8166829e9d883c67dfcc4c346f9809021b9a 100644 --- a/rts/sm/CNF.c +++ b/rts/sm/CNF.c @@ -373,7 +373,6 @@ compactNew (Capability *cap, StgWord size) ALLOCATE_NEW); self = firstBlockGetCompact(block); - SET_HDR((StgClosure*)self, &stg_COMPACT_NFDATA_CLEAN_info, CCS_SYSTEM); self->autoBlockW = aligned_size / sizeof(StgWord); self->nursery = block; self->last = block; @@ -390,6 +389,9 @@ compactNew (Capability *cap, StgWord size) debugTrace(DEBUG_compact, "compactNew: size %" FMT_Word, size); + write_barrier(); + SET_HDR((StgClosure*)self, &stg_COMPACT_NFDATA_CLEAN_info, CCS_SYSTEM); + return self; } @@ -542,8 +544,10 @@ insertCompactHash (Capability *cap, StgClosure *p, StgClosure *to) { insertHashTable(str->hash, (StgWord)p, (const void*)to); - if (str->header.info == &stg_COMPACT_NFDATA_CLEAN_info) { - str->header.info = &stg_COMPACT_NFDATA_DIRTY_info; + const StgInfoTable *strinfo = str->header.info; + load_load_barrier(); + if (strinfo == &stg_COMPACT_NFDATA_CLEAN_info) { + strinfo = &stg_COMPACT_NFDATA_DIRTY_info; recordClosureMutated(cap, (StgClosure*)str); } } @@ -686,6 +690,7 @@ verify_consistency_block (StgCompactNFData *str, StgCompactNFDataBlock *block) ASSERT(LOOKS_LIKE_CLOSURE_PTR(q)); info = get_itbl(q); + load_load_barrier(); switch (info->type) { case CONSTR_1_0: check_object_in_compact(str, UNTAG_CLOSURE(q->payload[0])); @@ -925,6 +930,7 @@ fixup_block(StgCompactNFDataBlock *block, StgWord *fixup_table, uint32_t count) while (p < bd->free) { ASSERT(LOOKS_LIKE_CLOSURE_PTR(p)); info = get_itbl((StgClosure*)p); + load_load_barrier(); switch (info->type) { case CONSTR_1_0: diff --git a/rts/sm/Compact.c b/rts/sm/Compact.c index 3bfefa7ceb5674ab3a5b53976b21949483ebedc6..177803596227d488fea5ba4743496f0bd0803c5e 100644 --- a/rts/sm/Compact.c +++ b/rts/sm/Compact.c @@ -160,6 +160,7 @@ get_threaded_info( StgPtr p ) StgWord q; q = (W_)GET_INFO(UNTAG_CLOSURE((StgClosure *)p)); + load_load_barrier(); loop: switch (GET_CLOSURE_TAG((StgClosure *)q)) @@ -361,6 +362,7 @@ thread_stack(StgPtr p, StgPtr stack_end) fun_info = FUN_INFO_PTR_TO_STRUCT((StgInfoTable *)UNTAG_CLOSURE((StgClosure *) get_threaded_info((StgPtr)ret_fun->fun))); + load_load_barrier(); // *before* threading it! thread(&ret_fun->fun); p = thread_arg_block(fun_info, ret_fun->payload); @@ -383,6 +385,7 @@ thread_PAP_payload (StgClosure *fun, StgClosure **payload, StgWord size) fun_info = FUN_INFO_PTR_TO_STRUCT((StgInfoTable *)UNTAG_CLOSURE((StgClosure *) get_threaded_info((StgPtr)fun))); + load_load_barrier(); ASSERT(fun_info->i.type != PAP); p = (StgPtr)payload; @@ -550,6 +553,8 @@ update_fwd_large( bdescr *bd ) static /* STATIC_INLINE */ StgPtr thread_obj (const StgInfoTable *info, StgPtr p) { + load_load_barrier(); + switch (info->type) { case THUNK_0_1: return p + sizeofW(StgThunk) + 1; @@ -823,6 +828,7 @@ update_fwd_compact( bdescr *blocks ) // definitely have enough room. Also see bug #1147. iptr = get_threaded_info(p); info = INFO_PTR_TO_STRUCT((StgInfoTable *)UNTAG_CLOSURE((StgClosure *)iptr)); + load_load_barrier(); q = p; diff --git a/rts/sm/Evac.c b/rts/sm/Evac.c index b4050512a6dd0140f422853372fa8e20a4c24caf..c0a31af7de6738b90cd0d4f7450e2b31afa46b17 100644 --- a/rts/sm/Evac.c +++ b/rts/sm/Evac.c @@ -111,6 +111,7 @@ copy_tag(StgClosure **p, const StgInfoTable *info, { const StgInfoTable *new_info; new_info = (const StgInfoTable *)cas((StgPtr)&src->header.info, (W_)info, MK_FORWARDING_PTR(to)); + load_load_barrier(); if (new_info != info) { #if defined(PROFILING) // We copied this object at the same time as another @@ -129,8 +130,11 @@ copy_tag(StgClosure **p, const StgInfoTable *info, } } #else - src->header.info = (const StgInfoTable *)MK_FORWARDING_PTR(to); + // if somebody else reads the forwarding pointer, we better make + // sure there's a closure at the end of it. + write_barrier(); *p = TAG_CLOSURE(tag,(StgClosure*)to); + src->header.info = (const StgInfoTable *)MK_FORWARDING_PTR(to); #endif #if defined(PROFILING) @@ -205,6 +209,7 @@ spin: } #else info = (W_)src->header.info; + load_load_barrier(); #endif to = alloc_for_copy(size_to_reserve, gen_no); @@ -216,8 +221,8 @@ spin: } write_barrier(); - src->header.info = (const StgInfoTable*)MK_FORWARDING_PTR(to); *p = (StgClosure *)to; + src->header.info = (const StgInfoTable*)MK_FORWARDING_PTR(to); #if defined(PROFILING) // We store the size of the just evacuated object in the LDV word so that @@ -610,6 +615,7 @@ loop: gen_no = bd->dest_no; info = q->header.info; + load_load_barrier(); if (IS_FORWARDING_PTR(info)) { /* Already evacuated, just return the forwarding address. @@ -720,11 +726,14 @@ loop: StgClosure *r; const StgInfoTable *i; r = ((StgInd*)q)->indirectee; + load_load_barrier(); if (GET_CLOSURE_TAG(r) == 0) { i = r->header.info; + load_load_barrier(); if (IS_FORWARDING_PTR(i)) { r = (StgClosure *)UN_FORWARDING_PTR(i); i = r->header.info; + load_load_barrier(); } if (i == &stg_TSO_info || i == &stg_WHITEHOLE_info @@ -917,6 +926,7 @@ evacuate_BLACKHOLE(StgClosure **p) } gen_no = bd->dest_no; info = q->header.info; + load_load_barrier(); if (IS_FORWARDING_PTR(info)) { StgClosure *e = (StgClosure*)UN_FORWARDING_PTR(info); @@ -1099,6 +1109,7 @@ selector_chain: // need the write-barrier stuff. // - undo the chain we've built to point to p. SET_INFO((StgClosure *)p, (const StgInfoTable *)info_ptr); + write_barrier(); *q = (StgClosure *)p; if (evac) evacuate(q); unchain_thunk_selectors(prev_thunk_selector, (StgClosure *)p); @@ -1108,6 +1119,7 @@ selector_chain: #else // Save the real info pointer (NOTE: not the same as get_itbl()). info_ptr = (StgWord)p->header.info; + load_load_barrier(); SET_INFO((StgClosure *)p,&stg_WHITEHOLE_info); #endif @@ -1126,6 +1138,7 @@ selector_loop: // that evacuate() doesn't mind if it gets passed a to-space pointer. info = (StgInfoTable*)selectee->header.info; + load_load_barrier(); if (IS_FORWARDING_PTR(info)) { // We don't follow pointers into to-space; the constructor @@ -1135,6 +1148,7 @@ selector_loop: } info = INFO_PTR_TO_STRUCT(info); + load_load_barrier(); switch (info->type) { case WHITEHOLE: goto bale_out; // about to be evacuated by another thread (or a loop). @@ -1165,6 +1179,7 @@ selector_loop: SET_INFO((StgClosure*)p, (StgInfoTable *)info_ptr); OVERWRITING_CLOSURE((StgClosure*)p); SET_INFO((StgClosure*)p, &stg_WHITEHOLE_info); + write_barrier(); } #endif @@ -1178,6 +1193,7 @@ selector_loop: if (!IS_FORWARDING_PTR(info_ptr)) { info = INFO_PTR_TO_STRUCT((StgInfoTable *)info_ptr); + load_load_barrier(); switch (info->type) { case IND: case IND_STATIC: @@ -1229,9 +1245,11 @@ selector_loop: // indirection, as in evacuate(). if (GET_CLOSURE_TAG(r) == 0) { i = r->header.info; + load_load_barrier(); if (IS_FORWARDING_PTR(i)) { r = (StgClosure *)UN_FORWARDING_PTR(i); i = r->header.info; + load_load_barrier(); } if (i == &stg_TSO_info || i == &stg_WHITEHOLE_info diff --git a/rts/sm/GCAux.c b/rts/sm/GCAux.c index 23ed3f0622f77ea3060d637c1cbbf06ed5bc6851..9d0709c8d0e7b5c589ae4775d515bef8dc54ecbc 100644 --- a/rts/sm/GCAux.c +++ b/rts/sm/GCAux.c @@ -76,6 +76,7 @@ isAlive(StgClosure *p) } info = q->header.info; + load_load_barrier(); if (IS_FORWARDING_PTR(info)) { // alive! @@ -83,6 +84,7 @@ isAlive(StgClosure *p) } info = INFO_PTR_TO_STRUCT(info); + load_load_barrier(); switch (info->type) { @@ -121,8 +123,9 @@ revertCAFs( void ) c = (StgIndStatic *)c->static_link) { c = (StgIndStatic *)UNTAG_STATIC_LIST_PTR(c); - SET_INFO((StgClosure *)c, c->saved_info); c->saved_info = NULL; + write_barrier(); + SET_INFO((StgClosure *)c, c->saved_info); // could, but not necessary: c->static_link = NULL; } revertible_caf_list = (StgIndStatic*)END_OF_CAF_LIST; diff --git a/rts/sm/MarkWeak.c b/rts/sm/MarkWeak.c index d5982e2f64686cb8ba91445c96931053a0bd34b5..c6c345b6fb3ac1fe22bdfccdfe590eb237d906ff 100644 --- a/rts/sm/MarkWeak.c +++ b/rts/sm/MarkWeak.c @@ -234,16 +234,17 @@ static bool tidyWeakList(generation *gen) last_w = &gen->old_weak_ptr_list; for (w = gen->old_weak_ptr_list; w != NULL; w = next_w) { + info = get_itbl((StgClosure *)w); + /* There might be a DEAD_WEAK on the list if finalizeWeak# was * called on a live weak pointer object. Just remove it. */ - if (w->header.info == &stg_DEAD_WEAK_info) { + if (info == &stg_DEAD_WEAK_info) { next_w = w->link; *last_w = next_w; continue; } - info = get_itbl((StgClosure *)w); switch (info->type) { case WEAK: diff --git a/rts/sm/Sanity.c b/rts/sm/Sanity.c index 8082b7e6d08978edd10a351de40b1d7f4d934a58..ff76f747c98780a4067bba54a1d20738c673d3d2 100644 --- a/rts/sm/Sanity.c +++ b/rts/sm/Sanity.c @@ -233,6 +233,7 @@ checkClosure( const StgClosure* p ) p = UNTAG_CONST_CLOSURE(p); info = p->header.info; + load_load_barrier(); if (IS_FORWARDING_PTR(info)) { barf("checkClosure: found EVACUATED closure %d", info->type); @@ -243,6 +244,7 @@ checkClosure( const StgClosure* p ) #endif info = INFO_PTR_TO_STRUCT(info); + load_load_barrier(); switch (info->type) { @@ -564,6 +566,7 @@ checkTSO(StgTSO *tso) next = tso->_link; info = (const StgInfoTable*) tso->_link->header.info; + load_load_barrier(); ASSERT(next == END_TSO_QUEUE || info == &stg_MVAR_TSO_QUEUE_info || diff --git a/rts/sm/Scav.c b/rts/sm/Scav.c index 73a790e9eda2ca5e28735d250f5b30d0646d132e..a27158cd6708705e9f3e35edf63d8a181c02b522 100644 --- a/rts/sm/Scav.c +++ b/rts/sm/Scav.c @@ -187,6 +187,7 @@ scavenge_compact(StgCompactNFData *str) str, Bdescr((P_)str)->gen_no, str->totalW * sizeof(W_)) gct->eager_promotion = saved_eager; + write_barrier(); if (gct->failed_to_evac) { ((StgClosure *)str)->header.info = &stg_COMPACT_NFDATA_DIRTY_info; } else { @@ -374,6 +375,7 @@ scavenge_thunk_srt(const StgInfoTable *info) if (!major_gc) return; thunk_info = itbl_to_thunk_itbl(info); + load_load_barrier(); if (thunk_info->i.srt) { StgClosure *srt = (StgClosure*)GET_SRT(thunk_info); evacuate(&srt); @@ -388,6 +390,7 @@ scavenge_fun_srt(const StgInfoTable *info) if (!major_gc) return; fun_info = itbl_to_fun_itbl(info); + load_load_barrier(); if (fun_info->i.srt) { StgClosure *srt = (StgClosure*)GET_FUN_SRT(fun_info); evacuate(&srt); @@ -431,7 +434,7 @@ scavenge_block (bdescr *bd) // time around the loop. while (p < bd->free || (bd == ws->todo_bd && p < ws->todo_free)) { - ASSERT(bd->link == NULL); + ASSERT(bd->link == NULL); ASSERT(LOOKS_LIKE_CLOSURE_PTR(p)); info = get_itbl((StgClosure *)p); @@ -450,6 +453,7 @@ scavenge_block (bdescr *bd) evacuate((StgClosure **)&mvar->value); gct->eager_promotion = saved_eager_promotion; + write_barrier(); if (gct->failed_to_evac) { mvar->header.info = &stg_MVAR_DIRTY_info; } else { @@ -467,6 +471,7 @@ scavenge_block (bdescr *bd) evacuate((StgClosure **)&tvar->first_watch_queue_entry); gct->eager_promotion = saved_eager_promotion; + write_barrier(); if (gct->failed_to_evac) { tvar->header.info = &stg_TVAR_DIRTY_info; } else { @@ -601,6 +606,7 @@ scavenge_block (bdescr *bd) evacuate(&((StgMutVar *)p)->var); gct->eager_promotion = saved_eager_promotion; + write_barrier(); if (gct->failed_to_evac) { ((StgClosure *)q)->header.info = &stg_MUT_VAR_DIRTY_info; } else { @@ -620,6 +626,7 @@ scavenge_block (bdescr *bd) evacuate((StgClosure**)&bq->link); gct->eager_promotion = saved_eager_promotion; + write_barrier(); if (gct->failed_to_evac) { bq->header.info = &stg_BLOCKING_QUEUE_DIRTY_info; } else { @@ -672,6 +679,7 @@ scavenge_block (bdescr *bd) p = scavenge_mut_arr_ptrs((StgMutArrPtrs*)p); + write_barrier(); if (gct->failed_to_evac) { ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_DIRTY_info; } else { @@ -689,6 +697,7 @@ scavenge_block (bdescr *bd) { p = scavenge_mut_arr_ptrs((StgMutArrPtrs*)p); + write_barrier(); if (gct->failed_to_evac) { ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN_DIRTY_info; } else { @@ -714,6 +723,7 @@ scavenge_block (bdescr *bd) } gct->eager_promotion = saved_eager_promotion; + write_barrier(); if (gct->failed_to_evac) { ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_DIRTY_info; } else { @@ -735,6 +745,7 @@ scavenge_block (bdescr *bd) evacuate((StgClosure **)p); } + write_barrier(); if (gct->failed_to_evac) { ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_FROZEN_DIRTY_info; } else { @@ -875,6 +886,7 @@ scavenge_mark_stack(void) evacuate((StgClosure **)&mvar->value); gct->eager_promotion = saved_eager_promotion; + write_barrier(); if (gct->failed_to_evac) { mvar->header.info = &stg_MVAR_DIRTY_info; } else { @@ -891,6 +903,7 @@ scavenge_mark_stack(void) evacuate((StgClosure **)&tvar->first_watch_queue_entry); gct->eager_promotion = saved_eager_promotion; + write_barrier(); if (gct->failed_to_evac) { tvar->header.info = &stg_TVAR_DIRTY_info; } else { @@ -997,6 +1010,7 @@ scavenge_mark_stack(void) evacuate(&((StgMutVar *)p)->var); gct->eager_promotion = saved_eager_promotion; + write_barrier(); if (gct->failed_to_evac) { ((StgClosure *)q)->header.info = &stg_MUT_VAR_DIRTY_info; } else { @@ -1016,6 +1030,7 @@ scavenge_mark_stack(void) evacuate((StgClosure**)&bq->link); gct->eager_promotion = saved_eager_promotion; + write_barrier(); if (gct->failed_to_evac) { bq->header.info = &stg_BLOCKING_QUEUE_DIRTY_info; } else { @@ -1064,6 +1079,7 @@ scavenge_mark_stack(void) scavenge_mut_arr_ptrs((StgMutArrPtrs *)p); + write_barrier(); if (gct->failed_to_evac) { ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_DIRTY_info; } else { @@ -1083,6 +1099,7 @@ scavenge_mark_stack(void) scavenge_mut_arr_ptrs((StgMutArrPtrs *)p); + write_barrier(); if (gct->failed_to_evac) { ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN_DIRTY_info; } else { @@ -1110,6 +1127,7 @@ scavenge_mark_stack(void) } gct->eager_promotion = saved_eager; + write_barrier(); if (gct->failed_to_evac) { ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_DIRTY_info; } else { @@ -1131,6 +1149,7 @@ scavenge_mark_stack(void) evacuate((StgClosure **)p); } + write_barrier(); if (gct->failed_to_evac) { ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_FROZEN_DIRTY_info; } else { @@ -1237,6 +1256,7 @@ scavenge_one(StgPtr p) evacuate((StgClosure **)&mvar->value); gct->eager_promotion = saved_eager_promotion; + write_barrier(); if (gct->failed_to_evac) { mvar->header.info = &stg_MVAR_DIRTY_info; } else { @@ -1253,6 +1273,7 @@ scavenge_one(StgPtr p) evacuate((StgClosure **)&tvar->first_watch_queue_entry); gct->eager_promotion = saved_eager_promotion; + write_barrier(); if (gct->failed_to_evac) { tvar->header.info = &stg_TVAR_DIRTY_info; } else { @@ -1317,6 +1338,7 @@ scavenge_one(StgPtr p) evacuate(&((StgMutVar *)p)->var); gct->eager_promotion = saved_eager_promotion; + write_barrier(); if (gct->failed_to_evac) { ((StgClosure *)q)->header.info = &stg_MUT_VAR_DIRTY_info; } else { @@ -1336,6 +1358,7 @@ scavenge_one(StgPtr p) evacuate((StgClosure**)&bq->link); gct->eager_promotion = saved_eager_promotion; + write_barrier(); if (gct->failed_to_evac) { bq->header.info = &stg_BLOCKING_QUEUE_DIRTY_info; } else { @@ -1384,6 +1407,7 @@ scavenge_one(StgPtr p) scavenge_mut_arr_ptrs((StgMutArrPtrs *)p); + write_barrier(); if (gct->failed_to_evac) { ((StgClosure *)p)->header.info = &stg_MUT_ARR_PTRS_DIRTY_info; } else { @@ -1401,6 +1425,7 @@ scavenge_one(StgPtr p) // follow everything scavenge_mut_arr_ptrs((StgMutArrPtrs *)p); + write_barrier(); if (gct->failed_to_evac) { ((StgClosure *)p)->header.info = &stg_MUT_ARR_PTRS_FROZEN_DIRTY_info; } else { @@ -1428,6 +1453,7 @@ scavenge_one(StgPtr p) } gct->eager_promotion = saved_eager; + write_barrier(); if (gct->failed_to_evac) { ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_DIRTY_info; } else { @@ -1449,6 +1475,7 @@ scavenge_one(StgPtr p) evacuate((StgClosure **)p); } + write_barrier(); if (gct->failed_to_evac) { ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_FROZEN_DIRTY_info; } else { @@ -1572,6 +1599,10 @@ scavenge_mutable_list(bdescr *bd, generation *gen) StgPtr p, q; uint32_t gen_no; +#if defined(DEBUG) + const StgInfoTable *pinfo; +#endif + gen_no = gen->no; gct->evac_gen_no = gen_no; for (; bd != NULL; bd = bd->link) { @@ -1599,9 +1630,11 @@ scavenge_mutable_list(bdescr *bd, generation *gen) case TREC_CHUNK: mutlist_TREC_CHUNK++; break; case MUT_PRIM: - if (((StgClosure*)p)->header.info == &stg_TVAR_WATCH_QUEUE_info) + pinfo = ((StgClosure*)p)->header.info; + load_load_barrier(); + if (pinfo == &stg_TVAR_WATCH_QUEUE_info) mutlist_TVAR_WATCH_QUEUE++; - else if (((StgClosure*)p)->header.info == &stg_TREC_HEADER_info) + else if (pinfo == &stg_TREC_HEADER_info) mutlist_TREC_HEADER++; else mutlist_OTHERS++; @@ -1631,6 +1664,7 @@ scavenge_mutable_list(bdescr *bd, generation *gen) scavenge_mut_arr_ptrs_marked((StgMutArrPtrs *)p); + write_barrier(); if (gct->failed_to_evac) { ((StgClosure *)p)->header.info = &stg_MUT_ARR_PTRS_DIRTY_info; } else { diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c index f889e2262beec19c89dbfefb7314dc8e161b7c23..34d103e52f4d8e1a74ed4106d96f9a9e2e813bb9 100644 --- a/rts/sm/Storage.c +++ b/rts/sm/Storage.c @@ -407,8 +407,9 @@ lockCAF (StgRegTable *reg, StgIndStatic *caf) // Allocate the blackhole indirection closure bh = (StgInd *)allocate(cap, sizeofW(*bh)); - SET_HDR(bh, &stg_CAF_BLACKHOLE_info, caf->header.prof.ccs); bh->indirectee = (StgClosure *)cap->r.rCurrentTSO; + write_barrier(); + SET_HDR(bh, &stg_CAF_BLACKHOLE_info, caf->header.prof.ccs); caf->indirectee = (StgClosure *)bh; write_barrier(); @@ -1081,6 +1082,8 @@ void dirty_MUT_VAR(StgRegTable *reg, StgClosure *p) { Capability *cap = regTableToCapability(reg); + // No barrier required here as no other heap object fields are read. See + // note [Heap memory barriers] in SMP.h. if (p->header.info == &stg_MUT_VAR_CLEAN_info) { p->header.info = &stg_MUT_VAR_DIRTY_info; recordClosureMutated(cap,p); @@ -1090,6 +1093,8 @@ dirty_MUT_VAR(StgRegTable *reg, StgClosure *p) void dirty_TVAR(Capability *cap, StgTVar *p) { + // No barrier required here as no other heap object fields are read. See + // note [Heap memory barriers] in SMP.h. if (p->header.info == &stg_TVAR_CLEAN_info) { p->header.info = &stg_TVAR_DIRTY_info; recordClosureMutated(cap,(StgClosure*)p);