Commit b339c8b1 authored by Simon Marlow's avatar Simon Marlow
Browse files

Put the contents of Evac.c-inc back in Evac.c, and just compile the file twice

Similarly for Scav.c/Scav.c-inc.
parent 2ac31c7f
......@@ -397,6 +397,20 @@ endif
# -O3 helps unroll some loops (especially in copy() with a constant argument).
sm/Evac_HC_OPTS += -optc-funroll-loops
ifneq "$(findstring thr, $(way))" ""
EXTRA_SRCS += sm/Evac_thr.c sm/Scav_thr.c
sm/Evac_thr.c : sm/Evac.c
cp $< $@
sm/Scav_thr.c : sm/Scav.c
cp $< $@
sm/Evac_thr_HC_OPTS += -optc-DPARALLEL_GC
sm/Scav_thr_HC_OPTS += -optc-DPARALLEL_GC
else
EXCLUDED_SRCS += sm/Evac_thr.c sm/Scav_thr.c
endif
# Without this, thread_obj will not be inlined (at least on x86 with GCC 4.1.0)
sm/Compact_HC_OPTS += -optc-finline-limit=2500
......
......@@ -22,10 +22,19 @@
#include "Prelude.h"
#include "LdvProfile.h"
#if defined(PROF_SPIN) && defined(THREADED_RTS)
#if defined(PROF_SPIN) && defined(THREADED_RTS) && defined(PARALLEL_GC)
StgWord64 whitehole_spin = 0;
#endif
#if defined(THREADED_RTS) && !defined(PARALLEL_GC)
#define evacuate(p) evacuate1(p)
#endif
#if !defined(PARALLEL_GC)
#define copy_tag_nolock(p, info, src, size, stp, tag) \
copy_tag(p, info, src, size, stp, tag)
#endif
/* Used to avoid long recursion due to selector thunks
*/
#define MAX_THUNK_SELECTOR_DEPTH 16
......@@ -76,14 +85,592 @@ alloc_for_copy (nat size, step *stp)
The evacuate() code
-------------------------------------------------------------------------- */
#undef PARALLEL_GC
#include "Evac.c-inc"
STATIC_INLINE void
copy_tag(StgClosure **p, const StgInfoTable *info,
StgClosure *src, nat size, step *stp, StgWord tag)
{
StgPtr to, from;
nat i;
to = alloc_for_copy(size,stp);
TICK_GC_WORDS_COPIED(size);
#ifdef THREADED_RTS
#define PARALLEL_GC
#include "Evac.c-inc"
from = (StgPtr)src;
to[0] = (W_)info;
for (i = 1; i < size; i++) { // unroll for small i
to[i] = from[i];
}
// if (to+size+2 < bd->start + BLOCK_SIZE_W) {
// __builtin_prefetch(to + size + 2, 1);
// }
#if defined(PARALLEL_GC)
{
const StgInfoTable *new_info;
new_info = (const StgInfoTable *)cas((StgPtr)&src->header.info, (W_)info, MK_FORWARDING_PTR(to));
if (new_info != info) {
return evacuate(p); // does the failed_to_evac stuff
} else {
*p = TAG_CLOSURE(tag,(StgClosure*)to);
}
}
#else
src->header.info = (const StgInfoTable *)MK_FORWARDING_PTR(to);
*p = TAG_CLOSURE(tag,(StgClosure*)to);
#endif
#ifdef PROFILING
// We store the size of the just evacuated object in the LDV word so that
// the profiler can guess the position of the next object later.
SET_EVACUAEE_FOR_LDV(from, size);
#endif
}
#if defined(PARALLEL_GC)
STATIC_INLINE void
copy_tag_nolock(StgClosure **p, const StgInfoTable *info,
StgClosure *src, nat size, step *stp, StgWord tag)
{
StgPtr to, from;
nat i;
to = alloc_for_copy(size,stp);
*p = TAG_CLOSURE(tag,(StgClosure*)to);
src->header.info = (const StgInfoTable *)MK_FORWARDING_PTR(to);
TICK_GC_WORDS_COPIED(size);
from = (StgPtr)src;
to[0] = (W_)info;
for (i = 1; i < size; i++) { // unroll for small i
to[i] = from[i];
}
// if (to+size+2 < bd->start + BLOCK_SIZE_W) {
// __builtin_prefetch(to + size + 2, 1);
// }
#ifdef PROFILING
// We store the size of the just evacuated object in the LDV word so that
// the profiler can guess the position of the next object later.
SET_EVACUAEE_FOR_LDV(from, size);
#endif
}
#endif
/* Special version of copy() for when we only want to copy the info
* pointer of an object, but reserve some padding after it. This is
* used to optimise evacuation of BLACKHOLEs.
*/
static void
copyPart(StgClosure **p, StgClosure *src, nat size_to_reserve, nat size_to_copy, step *stp)
{
StgPtr to, from;
nat i;
StgWord info;
#if defined(PARALLEL_GC)
spin:
info = xchg((StgPtr)&src->header.info, (W_)&stg_WHITEHOLE_info);
if (info == (W_)&stg_WHITEHOLE_info) {
#ifdef PROF_SPIN
whitehole_spin++;
#endif
goto spin;
}
if (IS_FORWARDING_PTR(info)) {
src->header.info = (const StgInfoTable *)info;
evacuate(p); // does the failed_to_evac stuff
return ;
}
#else
info = (W_)src->header.info;
#endif
to = alloc_for_copy(size_to_reserve, stp);
*p = (StgClosure *)to;
TICK_GC_WORDS_COPIED(size_to_copy);
from = (StgPtr)src;
to[0] = info;
for (i = 1; i < size_to_copy; i++) { // unroll for small i
to[i] = from[i];
}
#if defined(PARALLEL_GC)
write_barrier();
#endif
src->header.info = (const StgInfoTable*)MK_FORWARDING_PTR(to);
#ifdef PROFILING
// We store the size of the just evacuated object in the LDV word so that
// the profiler can guess the position of the next object later.
SET_EVACUAEE_FOR_LDV(from, size_to_reserve);
// fill the slop
if (size_to_reserve - size_to_copy > 0)
LDV_FILL_SLOP(to + size_to_copy - 1, (int)(size_to_reserve - size_to_copy));
#endif
}
/* Copy wrappers that don't tag the closure after copying */
STATIC_INLINE void
copy(StgClosure **p, const StgInfoTable *info,
StgClosure *src, nat size, step *stp)
{
copy_tag(p,info,src,size,stp,0);
}
/* ----------------------------------------------------------------------------
Evacuate
This is called (eventually) for every live object in the system.
The caller to evacuate specifies a desired generation in the
gct->evac_step thread-local variable. The following conditions apply to
evacuating an object which resides in generation M when we're
collecting up to generation N
if M >= gct->evac_step
if M > N do nothing
else evac to step->to
if M < gct->evac_step evac to gct->evac_step, step 0
if the object is already evacuated, then we check which generation
it now resides in.
if M >= gct->evac_step do nothing
if M < gct->evac_step set gct->failed_to_evac flag to indicate that we
didn't manage to evacuate this object into gct->evac_step.
OPTIMISATION NOTES:
evacuate() is the single most important function performance-wise
in the GC. Various things have been tried to speed it up, but as
far as I can tell the code generated by gcc 3.2 with -O2 is about
as good as it's going to get. We pass the argument to evacuate()
in a register using the 'regparm' attribute (see the prototype for
evacuate() near the top of this file).
Changing evacuate() to take an (StgClosure **) rather than
returning the new pointer seems attractive, because we can avoid
writing back the pointer when it hasn't changed (eg. for a static
object, or an object in a generation > N). However, I tried it and
it doesn't help. One reason is that the (StgClosure **) pointer
gets spilled to the stack inside evacuate(), resulting in far more
extra reads/writes than we save.
------------------------------------------------------------------------- */
REGPARM1 void
evacuate(StgClosure **p)
{
bdescr *bd = NULL;
step *stp;
StgClosure *q;
const StgInfoTable *info;
StgWord tag;
q = *p;
loop:
/* The tag and the pointer are split, to be merged after evacing */
tag = GET_CLOSURE_TAG(q);
q = UNTAG_CLOSURE(q);
ASSERT(LOOKS_LIKE_CLOSURE_PTR(q));
if (!HEAP_ALLOCED(q)) {
if (!major_gc) return;
info = get_itbl(q);
switch (info->type) {
case THUNK_STATIC:
if (info->srt_bitmap != 0) {
if (*THUNK_STATIC_LINK((StgClosure *)q) == NULL) {
#ifndef THREADED_RTS
*THUNK_STATIC_LINK((StgClosure *)q) = gct->static_objects;
gct->static_objects = (StgClosure *)q;
#else
StgPtr link;
link = (StgPtr)cas((StgPtr)THUNK_STATIC_LINK((StgClosure *)q),
(StgWord)NULL,
(StgWord)gct->static_objects);
if (link == NULL) {
gct->static_objects = (StgClosure *)q;
}
#endif
}
}
return;
case FUN_STATIC:
if (info->srt_bitmap != 0 &&
*FUN_STATIC_LINK((StgClosure *)q) == NULL) {
#ifndef THREADED_RTS
*FUN_STATIC_LINK((StgClosure *)q) = gct->static_objects;
gct->static_objects = (StgClosure *)q;
#else
StgPtr link;
link = (StgPtr)cas((StgPtr)FUN_STATIC_LINK((StgClosure *)q),
(StgWord)NULL,
(StgWord)gct->static_objects);
if (link == NULL) {
gct->static_objects = (StgClosure *)q;
}
#endif
}
return;
case IND_STATIC:
/* If q->saved_info != NULL, then it's a revertible CAF - it'll be
* on the CAF list, so don't do anything with it here (we'll
* scavenge it later).
*/
if (((StgIndStatic *)q)->saved_info == NULL) {
if (*IND_STATIC_LINK((StgClosure *)q) == NULL) {
#ifndef THREADED_RTS
*IND_STATIC_LINK((StgClosure *)q) = gct->static_objects;
gct->static_objects = (StgClosure *)q;
#else
StgPtr link;
link = (StgPtr)cas((StgPtr)IND_STATIC_LINK((StgClosure *)q),
(StgWord)NULL,
(StgWord)gct->static_objects);
if (link == NULL) {
gct->static_objects = (StgClosure *)q;
}
#endif
}
}
return;
case CONSTR_STATIC:
if (*STATIC_LINK(info,(StgClosure *)q) == NULL) {
#ifndef THREADED_RTS
*STATIC_LINK(info,(StgClosure *)q) = gct->static_objects;
gct->static_objects = (StgClosure *)q;
#else
StgPtr link;
link = (StgPtr)cas((StgPtr)STATIC_LINK(info,(StgClosure *)q),
(StgWord)NULL,
(StgWord)gct->static_objects);
if (link == NULL) {
gct->static_objects = (StgClosure *)q;
}
#endif
}
/* I am assuming that static_objects pointers are not
* written to other objects, and thus, no need to retag. */
return;
case CONSTR_NOCAF_STATIC:
/* no need to put these on the static linked list, they don't need
* to be scavenged.
*/
return;
default:
barf("evacuate(static): strange closure type %d", (int)(info->type));
}
}
bd = Bdescr((P_)q);
if ((bd->flags & (BF_LARGE | BF_COMPACTED | BF_EVACUATED)) != 0) {
// pointer into to-space: just return it. It might be a pointer
// into a generation that we aren't collecting (> N), or it
// might just be a pointer into to-space. The latter doesn't
// happen often, but allowing it makes certain things a bit
// easier; e.g. scavenging an object is idempotent, so it's OK to
// have an object on the mutable list multiple times.
if (bd->flags & BF_EVACUATED) {
// We aren't copying this object, so we have to check
// whether it is already in the target generation. (this is
// the write barrier).
if (bd->step < gct->evac_step) {
gct->failed_to_evac = rtsTrue;
TICK_GC_FAILED_PROMOTION();
}
return;
}
/* evacuate large objects by re-linking them onto a different list.
*/
if (bd->flags & BF_LARGE) {
info = get_itbl(q);
if (info->type == TSO &&
((StgTSO *)q)->what_next == ThreadRelocated) {
q = (StgClosure *)((StgTSO *)q)->_link;
*p = q;
goto loop;
}
evacuate_large((P_)q);
return;
}
/* If the object is in a step that we're compacting, then we
* need to use an alternative evacuate procedure.
*/
if (bd->flags & BF_COMPACTED) {
if (!is_marked((P_)q,bd)) {
mark((P_)q,bd);
if (mark_stack_full()) {
mark_stack_overflowed = rtsTrue;
reset_mark_stack();
}
push_mark_stack((P_)q);
}
return;
}
}
stp = bd->step->to;
info = q->header.info;
if (IS_FORWARDING_PTR(info))
{
/* Already evacuated, just return the forwarding address.
* HOWEVER: if the requested destination generation (gct->evac_step) is
* older than the actual generation (because the object was
* already evacuated to a younger generation) then we have to
* set the gct->failed_to_evac flag to indicate that we couldn't
* manage to promote the object to the desired generation.
*/
/*
* Optimisation: the check is fairly expensive, but we can often
* shortcut it if either the required generation is 0, or the
* current object (the EVACUATED) is in a high enough generation.
* We know that an EVACUATED always points to an object in the
* same or an older generation. stp is the lowest step that the
* current object would be evacuated to, so we only do the full
* check if stp is too low.
*/
StgClosure *e = (StgClosure*)UN_FORWARDING_PTR(info);
*p = TAG_CLOSURE(tag,e);
if (stp < gct->evac_step) { // optimisation
if (Bdescr((P_)e)->step < gct->evac_step) {
gct->failed_to_evac = rtsTrue;
TICK_GC_FAILED_PROMOTION();
}
}
return;
}
switch (INFO_PTR_TO_STRUCT(info)->type) {
case WHITEHOLE:
goto loop;
case MUT_VAR_CLEAN:
case MUT_VAR_DIRTY:
case MVAR_CLEAN:
case MVAR_DIRTY:
copy(p,info,q,sizeW_fromITBL(INFO_PTR_TO_STRUCT(info)),stp);
return;
case CONSTR_0_1:
{
StgWord w = (StgWord)q->payload[0];
if (info == Czh_con_info &&
// unsigned, so always true: (StgChar)w >= MIN_CHARLIKE &&
(StgChar)w <= MAX_CHARLIKE) {
*p = TAG_CLOSURE(tag,
(StgClosure *)CHARLIKE_CLOSURE((StgChar)w)
);
}
else if (info == Izh_con_info &&
(StgInt)w >= MIN_INTLIKE && (StgInt)w <= MAX_INTLIKE) {
*p = TAG_CLOSURE(tag,
(StgClosure *)INTLIKE_CLOSURE((StgInt)w)
);
}
else {
copy_tag_nolock(p,info,q,sizeofW(StgHeader)+1,stp,tag);
}
return;
}
case FUN_0_1:
case FUN_1_0:
case CONSTR_1_0:
copy_tag_nolock(p,info,q,sizeofW(StgHeader)+1,stp,tag);
return;
case THUNK_1_0:
case THUNK_0_1:
copy(p,info,q,sizeofW(StgThunk)+1,stp);
return;
case THUNK_1_1:
case THUNK_2_0:
case THUNK_0_2:
#ifdef NO_PROMOTE_THUNKS
if (bd->gen_no == 0 &&
bd->step->no != 0 &&
bd->step->no == generations[bd->gen_no].n_steps-1) {
stp = bd->step;
}
#endif
copy(p,info,q,sizeofW(StgThunk)+2,stp);
return;
case FUN_1_1:
case FUN_2_0:
case FUN_0_2:
case CONSTR_1_1:
case CONSTR_2_0:
copy_tag_nolock(p,info,q,sizeofW(StgHeader)+2,stp,tag);
return;
case CONSTR_0_2:
copy_tag_nolock(p,info,q,sizeofW(StgHeader)+2,stp,tag);
return;
case THUNK:
copy(p,info,q,thunk_sizeW_fromITBL(INFO_PTR_TO_STRUCT(info)),stp);
return;
case FUN:
case IND_PERM:
case IND_OLDGEN_PERM:
case CONSTR:
copy_tag_nolock(p,info,q,sizeW_fromITBL(INFO_PTR_TO_STRUCT(info)),stp,tag);
return;
case WEAK:
case STABLE_NAME:
copy_tag(p,info,q,sizeW_fromITBL(INFO_PTR_TO_STRUCT(info)),stp,tag);
return;
case BCO:
copy(p,info,q,bco_sizeW((StgBCO *)q),stp);
return;
case CAF_BLACKHOLE:
case SE_CAF_BLACKHOLE:
case SE_BLACKHOLE:
case BLACKHOLE:
copyPart(p,q,BLACKHOLE_sizeW(),sizeofW(StgHeader),stp);
return;
case THUNK_SELECTOR:
eval_thunk_selector(p, (StgSelector *)q, rtsTrue);
return;
case IND:
case IND_OLDGEN:
// follow chains of indirections, don't evacuate them
q = ((StgInd*)q)->indirectee;
*p = q;
goto loop;
case RET_BCO:
case RET_SMALL:
case RET_BIG:
case RET_DYN:
case UPDATE_FRAME:
case STOP_FRAME:
case CATCH_FRAME:
case CATCH_STM_FRAME:
case CATCH_RETRY_FRAME:
case ATOMICALLY_FRAME:
// shouldn't see these
barf("evacuate: stack frame at %p\n", q);
case PAP:
copy(p,info,q,pap_sizeW((StgPAP*)q),stp);
return;
case AP:
copy(p,info,q,ap_sizeW((StgAP*)q),stp);
return;
case AP_STACK:
copy(p,info,q,ap_stack_sizeW((StgAP_STACK*)q),stp);
return;
case ARR_WORDS:
// just copy the block
copy(p,info,q,arr_words_sizeW((StgArrWords *)q),stp);
return;
case MUT_ARR_PTRS_CLEAN:
case MUT_ARR_PTRS_DIRTY:
case MUT_ARR_PTRS_FROZEN:
case MUT_ARR_PTRS_FROZEN0:
// just copy the block
copy(p,info,q,mut_arr_ptrs_sizeW((StgMutArrPtrs *)q),stp);
return;
case TSO:
{
StgTSO *tso = (StgTSO *)q;
/* Deal with redirected TSOs (a TSO that's had its stack enlarged).
*/
if (tso->what_next == ThreadRelocated) {
q = (StgClosure *)tso->_link;
*p = q;
goto loop;
}
/* To evacuate a small TSO, we need to relocate the update frame
* list it contains.
*/
{
StgTSO *new_tso;
StgPtr r, s;
copyPart(p,(StgClosure *)tso, tso_sizeW(tso), sizeofW(StgTSO), stp);
new_tso = (StgTSO *)*p;
move_TSO(tso, new_tso);
for (r = tso->sp, s = new_tso->sp;
r < tso->stack+tso->stack_size;) {
*s++ = *r++;
}
return;
}
}
case TREC_HEADER:
copy(p,info,q,sizeofW(StgTRecHeader),stp);
return;
case TVAR_WATCH_QUEUE:
copy(p,info,q,sizeofW(StgTVarWatchQueue),stp);
return;
case TVAR:
copy(p,info,q,sizeofW(StgTVar),stp);
return;
case TREC_CHUNK:
copy(p,info,q,sizeofW(StgTRecChunk),stp);
return;
case ATOMIC_INVARIANT:
copy(p,info,q,sizeofW(StgAtomicInvariant),stp);
return;
case INVARIANT_CHECK_QUEUE:
copy(p,info,q,sizeofW(StgInvariantCheckQueue),stp);
return;
default:
barf("evacuate: strange closure type %d", (int)(INFO_PTR_TO_STRUCT(info)->type));