Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
Menu
Open sidebar
Glasgow Haskell Compiler
GHC
Commits
b339c8b1
Commit
b339c8b1
authored
Jun 03, 2008
by
Simon Marlow
Browse files
Put the contents of Evac.c-inc back in Evac.c, and just compile the file twice
Similarly for Scav.c/Scav.c-inc.
parent
2ac31c7f
Changes
8
Hide whitespace changes
Inline
Side-by-side
rts/Makefile
View file @
b339c8b1
...
...
@@ -397,6 +397,20 @@ endif
# -O3 helps unroll some loops (especially in copy() with a constant argument).
sm/
Evac_HC_OPTS
+=
-optc-funroll-loops
ifneq
"$(findstring thr, $(way))" ""
EXTRA_SRCS
+=
sm/Evac_thr.c sm/Scav_thr.c
sm/Evac_thr.c
:
sm/Evac.c
cp
$<
$@
sm/Scav_thr.c
:
sm/Scav.c
cp
$<
$@
sm/
Evac_thr_HC_OPTS
+=
-optc-DPARALLEL_GC
sm/
Scav_thr_HC_OPTS
+=
-optc-DPARALLEL_GC
else
EXCLUDED_SRCS
+=
sm/Evac_thr.c sm/Scav_thr.c
endif
# Without this, thread_obj will not be inlined (at least on x86 with GCC 4.1.0)
sm/
Compact_HC_OPTS
+=
-optc-finline-limit
=
2500
...
...
rts/sm/Evac.c
View file @
b339c8b1
...
...
@@ -22,10 +22,19 @@
#include
"Prelude.h"
#include
"LdvProfile.h"
#if defined(PROF_SPIN) && defined(THREADED_RTS)
#if defined(PROF_SPIN) && defined(THREADED_RTS)
&& defined(PARALLEL_GC)
StgWord64
whitehole_spin
=
0
;
#endif
#if defined(THREADED_RTS) && !defined(PARALLEL_GC)
#define evacuate(p) evacuate1(p)
#endif
#if !defined(PARALLEL_GC)
#define copy_tag_nolock(p, info, src, size, stp, tag) \
copy_tag(p, info, src, size, stp, tag)
#endif
/* Used to avoid long recursion due to selector thunks
*/
#define MAX_THUNK_SELECTOR_DEPTH 16
...
...
@@ -76,14 +85,592 @@ alloc_for_copy (nat size, step *stp)
The evacuate() code
-------------------------------------------------------------------------- */
#undef PARALLEL_GC
#include
"Evac.c-inc"
STATIC_INLINE
void
copy_tag
(
StgClosure
**
p
,
const
StgInfoTable
*
info
,
StgClosure
*
src
,
nat
size
,
step
*
stp
,
StgWord
tag
)
{
StgPtr
to
,
from
;
nat
i
;
to
=
alloc_for_copy
(
size
,
stp
);
TICK_GC_WORDS_COPIED
(
size
);
#ifdef THREADED_RTS
#define PARALLEL_GC
#include
"Evac.c-inc"
from
=
(
StgPtr
)
src
;
to
[
0
]
=
(
W_
)
info
;
for
(
i
=
1
;
i
<
size
;
i
++
)
{
// unroll for small i
to
[
i
]
=
from
[
i
];
}
// if (to+size+2 < bd->start + BLOCK_SIZE_W) {
// __builtin_prefetch(to + size + 2, 1);
// }
#if defined(PARALLEL_GC)
{
const
StgInfoTable
*
new_info
;
new_info
=
(
const
StgInfoTable
*
)
cas
((
StgPtr
)
&
src
->
header
.
info
,
(
W_
)
info
,
MK_FORWARDING_PTR
(
to
));
if
(
new_info
!=
info
)
{
return
evacuate
(
p
);
// does the failed_to_evac stuff
}
else
{
*
p
=
TAG_CLOSURE
(
tag
,(
StgClosure
*
)
to
);
}
}
#else
src
->
header
.
info
=
(
const
StgInfoTable
*
)
MK_FORWARDING_PTR
(
to
);
*
p
=
TAG_CLOSURE
(
tag
,(
StgClosure
*
)
to
);
#endif
#ifdef PROFILING
// We store the size of the just evacuated object in the LDV word so that
// the profiler can guess the position of the next object later.
SET_EVACUAEE_FOR_LDV
(
from
,
size
);
#endif
}
#if defined(PARALLEL_GC)
STATIC_INLINE
void
copy_tag_nolock
(
StgClosure
**
p
,
const
StgInfoTable
*
info
,
StgClosure
*
src
,
nat
size
,
step
*
stp
,
StgWord
tag
)
{
StgPtr
to
,
from
;
nat
i
;
to
=
alloc_for_copy
(
size
,
stp
);
*
p
=
TAG_CLOSURE
(
tag
,(
StgClosure
*
)
to
);
src
->
header
.
info
=
(
const
StgInfoTable
*
)
MK_FORWARDING_PTR
(
to
);
TICK_GC_WORDS_COPIED
(
size
);
from
=
(
StgPtr
)
src
;
to
[
0
]
=
(
W_
)
info
;
for
(
i
=
1
;
i
<
size
;
i
++
)
{
// unroll for small i
to
[
i
]
=
from
[
i
];
}
// if (to+size+2 < bd->start + BLOCK_SIZE_W) {
// __builtin_prefetch(to + size + 2, 1);
// }
#ifdef PROFILING
// We store the size of the just evacuated object in the LDV word so that
// the profiler can guess the position of the next object later.
SET_EVACUAEE_FOR_LDV
(
from
,
size
);
#endif
}
#endif
/* Special version of copy() for when we only want to copy the info
* pointer of an object, but reserve some padding after it. This is
* used to optimise evacuation of BLACKHOLEs.
*/
static
void
copyPart
(
StgClosure
**
p
,
StgClosure
*
src
,
nat
size_to_reserve
,
nat
size_to_copy
,
step
*
stp
)
{
StgPtr
to
,
from
;
nat
i
;
StgWord
info
;
#if defined(PARALLEL_GC)
spin:
info
=
xchg
((
StgPtr
)
&
src
->
header
.
info
,
(
W_
)
&
stg_WHITEHOLE_info
);
if
(
info
==
(
W_
)
&
stg_WHITEHOLE_info
)
{
#ifdef PROF_SPIN
whitehole_spin
++
;
#endif
goto
spin
;
}
if
(
IS_FORWARDING_PTR
(
info
))
{
src
->
header
.
info
=
(
const
StgInfoTable
*
)
info
;
evacuate
(
p
);
// does the failed_to_evac stuff
return
;
}
#else
info
=
(
W_
)
src
->
header
.
info
;
#endif
to
=
alloc_for_copy
(
size_to_reserve
,
stp
);
*
p
=
(
StgClosure
*
)
to
;
TICK_GC_WORDS_COPIED
(
size_to_copy
);
from
=
(
StgPtr
)
src
;
to
[
0
]
=
info
;
for
(
i
=
1
;
i
<
size_to_copy
;
i
++
)
{
// unroll for small i
to
[
i
]
=
from
[
i
];
}
#if defined(PARALLEL_GC)
write_barrier
();
#endif
src
->
header
.
info
=
(
const
StgInfoTable
*
)
MK_FORWARDING_PTR
(
to
);
#ifdef PROFILING
// We store the size of the just evacuated object in the LDV word so that
// the profiler can guess the position of the next object later.
SET_EVACUAEE_FOR_LDV
(
from
,
size_to_reserve
);
// fill the slop
if
(
size_to_reserve
-
size_to_copy
>
0
)
LDV_FILL_SLOP
(
to
+
size_to_copy
-
1
,
(
int
)(
size_to_reserve
-
size_to_copy
));
#endif
}
/* Copy wrappers that don't tag the closure after copying */
STATIC_INLINE
void
copy
(
StgClosure
**
p
,
const
StgInfoTable
*
info
,
StgClosure
*
src
,
nat
size
,
step
*
stp
)
{
copy_tag
(
p
,
info
,
src
,
size
,
stp
,
0
);
}
/* ----------------------------------------------------------------------------
Evacuate
This is called (eventually) for every live object in the system.
The caller to evacuate specifies a desired generation in the
gct->evac_step thread-local variable. The following conditions apply to
evacuating an object which resides in generation M when we're
collecting up to generation N
if M >= gct->evac_step
if M > N do nothing
else evac to step->to
if M < gct->evac_step evac to gct->evac_step, step 0
if the object is already evacuated, then we check which generation
it now resides in.
if M >= gct->evac_step do nothing
if M < gct->evac_step set gct->failed_to_evac flag to indicate that we
didn't manage to evacuate this object into gct->evac_step.
OPTIMISATION NOTES:
evacuate() is the single most important function performance-wise
in the GC. Various things have been tried to speed it up, but as
far as I can tell the code generated by gcc 3.2 with -O2 is about
as good as it's going to get. We pass the argument to evacuate()
in a register using the 'regparm' attribute (see the prototype for
evacuate() near the top of this file).
Changing evacuate() to take an (StgClosure **) rather than
returning the new pointer seems attractive, because we can avoid
writing back the pointer when it hasn't changed (eg. for a static
object, or an object in a generation > N). However, I tried it and
it doesn't help. One reason is that the (StgClosure **) pointer
gets spilled to the stack inside evacuate(), resulting in far more
extra reads/writes than we save.
------------------------------------------------------------------------- */
REGPARM1
void
evacuate
(
StgClosure
**
p
)
{
bdescr
*
bd
=
NULL
;
step
*
stp
;
StgClosure
*
q
;
const
StgInfoTable
*
info
;
StgWord
tag
;
q
=
*
p
;
loop:
/* The tag and the pointer are split, to be merged after evacing */
tag
=
GET_CLOSURE_TAG
(
q
);
q
=
UNTAG_CLOSURE
(
q
);
ASSERT
(
LOOKS_LIKE_CLOSURE_PTR
(
q
));
if
(
!
HEAP_ALLOCED
(
q
))
{
if
(
!
major_gc
)
return
;
info
=
get_itbl
(
q
);
switch
(
info
->
type
)
{
case
THUNK_STATIC
:
if
(
info
->
srt_bitmap
!=
0
)
{
if
(
*
THUNK_STATIC_LINK
((
StgClosure
*
)
q
)
==
NULL
)
{
#ifndef THREADED_RTS
*
THUNK_STATIC_LINK
((
StgClosure
*
)
q
)
=
gct
->
static_objects
;
gct
->
static_objects
=
(
StgClosure
*
)
q
;
#else
StgPtr
link
;
link
=
(
StgPtr
)
cas
((
StgPtr
)
THUNK_STATIC_LINK
((
StgClosure
*
)
q
),
(
StgWord
)
NULL
,
(
StgWord
)
gct
->
static_objects
);
if
(
link
==
NULL
)
{
gct
->
static_objects
=
(
StgClosure
*
)
q
;
}
#endif
}
}
return
;
case
FUN_STATIC
:
if
(
info
->
srt_bitmap
!=
0
&&
*
FUN_STATIC_LINK
((
StgClosure
*
)
q
)
==
NULL
)
{
#ifndef THREADED_RTS
*
FUN_STATIC_LINK
((
StgClosure
*
)
q
)
=
gct
->
static_objects
;
gct
->
static_objects
=
(
StgClosure
*
)
q
;
#else
StgPtr
link
;
link
=
(
StgPtr
)
cas
((
StgPtr
)
FUN_STATIC_LINK
((
StgClosure
*
)
q
),
(
StgWord
)
NULL
,
(
StgWord
)
gct
->
static_objects
);
if
(
link
==
NULL
)
{
gct
->
static_objects
=
(
StgClosure
*
)
q
;
}
#endif
}
return
;
case
IND_STATIC
:
/* If q->saved_info != NULL, then it's a revertible CAF - it'll be
* on the CAF list, so don't do anything with it here (we'll
* scavenge it later).
*/
if
(((
StgIndStatic
*
)
q
)
->
saved_info
==
NULL
)
{
if
(
*
IND_STATIC_LINK
((
StgClosure
*
)
q
)
==
NULL
)
{
#ifndef THREADED_RTS
*
IND_STATIC_LINK
((
StgClosure
*
)
q
)
=
gct
->
static_objects
;
gct
->
static_objects
=
(
StgClosure
*
)
q
;
#else
StgPtr
link
;
link
=
(
StgPtr
)
cas
((
StgPtr
)
IND_STATIC_LINK
((
StgClosure
*
)
q
),
(
StgWord
)
NULL
,
(
StgWord
)
gct
->
static_objects
);
if
(
link
==
NULL
)
{
gct
->
static_objects
=
(
StgClosure
*
)
q
;
}
#endif
}
}
return
;
case
CONSTR_STATIC
:
if
(
*
STATIC_LINK
(
info
,(
StgClosure
*
)
q
)
==
NULL
)
{
#ifndef THREADED_RTS
*
STATIC_LINK
(
info
,(
StgClosure
*
)
q
)
=
gct
->
static_objects
;
gct
->
static_objects
=
(
StgClosure
*
)
q
;
#else
StgPtr
link
;
link
=
(
StgPtr
)
cas
((
StgPtr
)
STATIC_LINK
(
info
,(
StgClosure
*
)
q
),
(
StgWord
)
NULL
,
(
StgWord
)
gct
->
static_objects
);
if
(
link
==
NULL
)
{
gct
->
static_objects
=
(
StgClosure
*
)
q
;
}
#endif
}
/* I am assuming that static_objects pointers are not
* written to other objects, and thus, no need to retag. */
return
;
case
CONSTR_NOCAF_STATIC
:
/* no need to put these on the static linked list, they don't need
* to be scavenged.
*/
return
;
default:
barf
(
"evacuate(static): strange closure type %d"
,
(
int
)(
info
->
type
));
}
}
bd
=
Bdescr
((
P_
)
q
);
if
((
bd
->
flags
&
(
BF_LARGE
|
BF_COMPACTED
|
BF_EVACUATED
))
!=
0
)
{
// pointer into to-space: just return it. It might be a pointer
// into a generation that we aren't collecting (> N), or it
// might just be a pointer into to-space. The latter doesn't
// happen often, but allowing it makes certain things a bit
// easier; e.g. scavenging an object is idempotent, so it's OK to
// have an object on the mutable list multiple times.
if
(
bd
->
flags
&
BF_EVACUATED
)
{
// We aren't copying this object, so we have to check
// whether it is already in the target generation. (this is
// the write barrier).
if
(
bd
->
step
<
gct
->
evac_step
)
{
gct
->
failed_to_evac
=
rtsTrue
;
TICK_GC_FAILED_PROMOTION
();
}
return
;
}
/* evacuate large objects by re-linking them onto a different list.
*/
if
(
bd
->
flags
&
BF_LARGE
)
{
info
=
get_itbl
(
q
);
if
(
info
->
type
==
TSO
&&
((
StgTSO
*
)
q
)
->
what_next
==
ThreadRelocated
)
{
q
=
(
StgClosure
*
)((
StgTSO
*
)
q
)
->
_link
;
*
p
=
q
;
goto
loop
;
}
evacuate_large
((
P_
)
q
);
return
;
}
/* If the object is in a step that we're compacting, then we
* need to use an alternative evacuate procedure.
*/
if
(
bd
->
flags
&
BF_COMPACTED
)
{
if
(
!
is_marked
((
P_
)
q
,
bd
))
{
mark
((
P_
)
q
,
bd
);
if
(
mark_stack_full
())
{
mark_stack_overflowed
=
rtsTrue
;
reset_mark_stack
();
}
push_mark_stack
((
P_
)
q
);
}
return
;
}
}
stp
=
bd
->
step
->
to
;
info
=
q
->
header
.
info
;
if
(
IS_FORWARDING_PTR
(
info
))
{
/* Already evacuated, just return the forwarding address.
* HOWEVER: if the requested destination generation (gct->evac_step) is
* older than the actual generation (because the object was
* already evacuated to a younger generation) then we have to
* set the gct->failed_to_evac flag to indicate that we couldn't
* manage to promote the object to the desired generation.
*/
/*
* Optimisation: the check is fairly expensive, but we can often
* shortcut it if either the required generation is 0, or the
* current object (the EVACUATED) is in a high enough generation.
* We know that an EVACUATED always points to an object in the
* same or an older generation. stp is the lowest step that the
* current object would be evacuated to, so we only do the full
* check if stp is too low.
*/
StgClosure
*
e
=
(
StgClosure
*
)
UN_FORWARDING_PTR
(
info
);
*
p
=
TAG_CLOSURE
(
tag
,
e
);
if
(
stp
<
gct
->
evac_step
)
{
// optimisation
if
(
Bdescr
((
P_
)
e
)
->
step
<
gct
->
evac_step
)
{
gct
->
failed_to_evac
=
rtsTrue
;
TICK_GC_FAILED_PROMOTION
();
}
}
return
;
}
switch
(
INFO_PTR_TO_STRUCT
(
info
)
->
type
)
{
case
WHITEHOLE
:
goto
loop
;
case
MUT_VAR_CLEAN
:
case
MUT_VAR_DIRTY
:
case
MVAR_CLEAN
:
case
MVAR_DIRTY
:
copy
(
p
,
info
,
q
,
sizeW_fromITBL
(
INFO_PTR_TO_STRUCT
(
info
)),
stp
);
return
;
case
CONSTR_0_1
:
{
StgWord
w
=
(
StgWord
)
q
->
payload
[
0
];
if
(
info
==
Czh_con_info
&&
// unsigned, so always true: (StgChar)w >= MIN_CHARLIKE &&
(
StgChar
)
w
<=
MAX_CHARLIKE
)
{
*
p
=
TAG_CLOSURE
(
tag
,
(
StgClosure
*
)
CHARLIKE_CLOSURE
((
StgChar
)
w
)
);
}
else
if
(
info
==
Izh_con_info
&&
(
StgInt
)
w
>=
MIN_INTLIKE
&&
(
StgInt
)
w
<=
MAX_INTLIKE
)
{
*
p
=
TAG_CLOSURE
(
tag
,
(
StgClosure
*
)
INTLIKE_CLOSURE
((
StgInt
)
w
)
);
}
else
{
copy_tag_nolock
(
p
,
info
,
q
,
sizeofW
(
StgHeader
)
+
1
,
stp
,
tag
);
}
return
;
}
case
FUN_0_1
:
case
FUN_1_0
:
case
CONSTR_1_0
:
copy_tag_nolock
(
p
,
info
,
q
,
sizeofW
(
StgHeader
)
+
1
,
stp
,
tag
);
return
;
case
THUNK_1_0
:
case
THUNK_0_1
:
copy
(
p
,
info
,
q
,
sizeofW
(
StgThunk
)
+
1
,
stp
);
return
;
case
THUNK_1_1
:
case
THUNK_2_0
:
case
THUNK_0_2
:
#ifdef NO_PROMOTE_THUNKS
if
(
bd
->
gen_no
==
0
&&
bd
->
step
->
no
!=
0
&&
bd
->
step
->
no
==
generations
[
bd
->
gen_no
].
n_steps
-
1
)
{
stp
=
bd
->
step
;
}
#endif
copy
(
p
,
info
,
q
,
sizeofW
(
StgThunk
)
+
2
,
stp
);
return
;
case
FUN_1_1
:
case
FUN_2_0
:
case
FUN_0_2
:
case
CONSTR_1_1
:
case
CONSTR_2_0
:
copy_tag_nolock
(
p
,
info
,
q
,
sizeofW
(
StgHeader
)
+
2
,
stp
,
tag
);
return
;
case
CONSTR_0_2
:
copy_tag_nolock
(
p
,
info
,
q
,
sizeofW
(
StgHeader
)
+
2
,
stp
,
tag
);
return
;
case
THUNK
:
copy
(
p
,
info
,
q
,
thunk_sizeW_fromITBL
(
INFO_PTR_TO_STRUCT
(
info
)),
stp
);
return
;
case
FUN
:
case
IND_PERM
:
case
IND_OLDGEN_PERM
:
case
CONSTR
:
copy_tag_nolock
(
p
,
info
,
q
,
sizeW_fromITBL
(
INFO_PTR_TO_STRUCT
(
info
)),
stp
,
tag
);
return
;
case
WEAK
:
case
STABLE_NAME
:
copy_tag
(
p
,
info
,
q
,
sizeW_fromITBL
(
INFO_PTR_TO_STRUCT
(
info
)),
stp
,
tag
);
return
;
case
BCO
:
copy
(
p
,
info
,
q
,
bco_sizeW
((
StgBCO
*
)
q
),
stp
);
return
;
case
CAF_BLACKHOLE
:
case
SE_CAF_BLACKHOLE
:
case
SE_BLACKHOLE
:
case
BLACKHOLE
:
copyPart
(
p
,
q
,
BLACKHOLE_sizeW
(),
sizeofW
(
StgHeader
),
stp
);
return
;
case
THUNK_SELECTOR
:
eval_thunk_selector
(
p
,
(
StgSelector
*
)
q
,
rtsTrue
);
return
;
case
IND
:
case
IND_OLDGEN
:
// follow chains of indirections, don't evacuate them
q
=
((
StgInd
*
)
q
)
->
indirectee
;
*
p
=
q
;
goto
loop
;
case
RET_BCO
:
case
RET_SMALL
:
case
RET_BIG
:
case
RET_DYN
:
case
UPDATE_FRAME
:
case
STOP_FRAME
:
case
CATCH_FRAME
:
case
CATCH_STM_FRAME
:
case
CATCH_RETRY_FRAME
:
case
ATOMICALLY_FRAME
:
// shouldn't see these
barf
(
"evacuate: stack frame at %p
\n
"
,
q
);
case
PAP
:
copy
(
p
,
info
,
q
,
pap_sizeW
((
StgPAP
*
)
q
),
stp
);
return
;
case
AP
:
copy
(
p
,
info
,
q
,
ap_sizeW
((
StgAP
*
)
q
),
stp
);
return
;
case
AP_STACK
:
copy
(
p
,
info
,
q
,
ap_stack_sizeW
((
StgAP_STACK
*
)
q
),
stp
);
return
;
case
ARR_WORDS
:
// just copy the block
copy
(
p
,
info
,
q
,
arr_words_sizeW
((
StgArrWords
*
)
q
),
stp
);
return
;
case
MUT_ARR_PTRS_CLEAN
:
case
MUT_ARR_PTRS_DIRTY
:
case
MUT_ARR_PTRS_FROZEN
:
case
MUT_ARR_PTRS_FROZEN0
:
// just copy the block
copy
(
p
,
info
,
q
,
mut_arr_ptrs_sizeW
((
StgMutArrPtrs
*
)
q
),
stp
);
return
;
case
TSO
:
{
StgTSO
*
tso
=
(
StgTSO
*
)
q
;
/* Deal with redirected TSOs (a TSO that's had its stack enlarged).
*/
if
(
tso
->
what_next
==
ThreadRelocated
)
{
q
=
(
StgClosure
*
)
tso
->
_link
;
*
p
=
q
;
goto
loop
;
}
/* To evacuate a small TSO, we need to relocate the update frame
* list it contains.
*/
{
StgTSO
*
new_tso
;
StgPtr
r
,
s
;
copyPart
(
p
,(
StgClosure
*
)
tso
,
tso_sizeW
(
tso
),
sizeofW
(
StgTSO
),
stp
);
new_tso
=
(
StgTSO
*
)
*
p
;
move_TSO
(
tso
,
new_tso
);
for
(
r
=
tso
->
sp
,
s
=
new_tso
->
sp
;
r
<
tso
->
stack
+
tso
->
stack_size
;)
{
*
s
++
=
*
r
++
;
}
return
;
}
}
case
TREC_HEADER
:
copy
(
p
,
info
,
q
,
sizeofW
(
StgTRecHeader
),
stp
);
return
;
case
TVAR_WATCH_QUEUE
:
copy
(
p
,
info
,
q
,
sizeofW
(
StgTVarWatchQueue
),
stp
);
return
;
case
TVAR
:
copy
(
p
,
info
,
q
,
sizeofW
(
StgTVar
),
stp
);
return
;
case
TREC_CHUNK
:
copy
(
p
,
info
,
q
,
sizeofW
(
StgTRecChunk
),
stp
);
return
;
case
ATOMIC_INVARIANT
:
copy
(
p
,
info
,
q
,
sizeofW
(
StgAtomicInvariant
),
stp
);
return
;
case
INVARIANT_CHECK_QUEUE
:
copy
(
p
,
info
,
q
,
sizeofW
(
StgInvariantCheckQueue
),
stp
);
return
;
default: