ThreadPaused.c 14.6 KB
Newer Older
1 2 3 4 5 6 7 8
/* -----------------------------------------------------------------------------
 *
 * (c) The GHC Team 1998-2006
 *
 * Tidying up a thread when it stops running
 *
 * ---------------------------------------------------------------------------*/

Simon Marlow's avatar
Simon Marlow committed
9
// #include "PosixSource.h"
10
#include "Rts.h"
Simon Marlow's avatar
Simon Marlow committed
11 12 13

#include "ThreadPaused.h"
#include "sm/Storage.h"
14 15 16
#include "Updates.h"
#include "RaiseAsync.h"
#include "Trace.h"
17
#include "Threads.h"
18 19 20 21 22 23 24 25 26 27 28

#include <string.h> // for memmove()

/* -----------------------------------------------------------------------------
 * Stack squeezing
 *
 * Code largely pinched from old RTS, then hacked to bits.  We also do
 * lazy black holing here.
 *
 * -------------------------------------------------------------------------- */

Arash Rouhani's avatar
Arash Rouhani committed
29

30 31
struct stack_gap { StgWord gap_size; struct stack_gap *next_gap; };

32
static struct stack_gap *
33 34
updateAdjacentFrames (Capability *cap, StgTSO *tso, StgUpdateFrame *upd,
                      uint32_t count, struct stack_gap *next)
35 36 37
{
    StgClosure *updatee;
    struct stack_gap *gap;
38
    uint32_t i;
39 40 41 42 43 44 45 46 47 48 49 50 51 52 53

    // The first one (highest address) is the frame we take the
    // "master" updatee from; all the others will be made indirections
    // to this one.  It is essential that we do it this way around: we
    // used to make the lowest-addressed frame the "master" frame and
    // shuffle it down, but a bad case cropped up (#5505) where this
    // happened repeatedly, generating a chain of indirections which
    // the GC repeatedly traversed (indirection chains longer than one
    // are not supposed to happen).  So now after identifying a block
    // of adjacent update frames we walk downwards again updating them
    // all to point to the highest one, before squeezing out all but
    // the highest one.
    updatee = upd->updatee;
    count--;

Simon Marlow's avatar
Simon Marlow committed
54 55
    upd--;
    gap = (struct stack_gap*)upd;
56

Simon Marlow's avatar
Simon Marlow committed
57
    for (i = count; i > 0; i--, upd--) {
58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
        /*
         * Check two things: that the two update frames
         * don't point to the same object, and that the
         * updatee_bypass isn't already an indirection.
         * Both of these cases only happen when we're in a
         * block hole-style loop (and there are multiple
         * update frames on the stack pointing to the same
         * closure), but they can both screw us up if we
         * don't check.
         */
        if (upd->updatee != updatee && !closure_IND(upd->updatee)) {
            updateThunk(cap, tso, upd->updatee, updatee);
        }
    }

    gap->gap_size = count * sizeofW(StgUpdateFrame);
    gap->next_gap = next;

    return gap;
}

79
static void
80
stackSqueeze(Capability *cap, StgTSO *tso, StgPtr bottom)
81 82
{
    StgPtr frame;
83
    uint32_t adjacent_update_frames;
84 85
    struct stack_gap *gap;

86
    // Stage 1:
87 88 89 90 91
    //    Traverse the stack upwards, replacing adjacent update frames
    //    with a single update frame and a "stack gap".  A stack gap
    //    contains two values: the size of the gap, and the distance
    //    to the next gap (or the stack top).

92
    frame = tso->stackobj->sp;
93 94

    ASSERT(frame < bottom);
95

96
    adjacent_update_frames = 0;
97
    gap = (struct stack_gap *) (frame - sizeofW(StgUpdateFrame));
98

99 100 101
    while (frame <= bottom)
    {
        switch (get_ret_itbl((StgClosure *)frame)->i.type) {
102

103
        case UPDATE_FRAME:
104
        {
105 106 107 108
            if (adjacent_update_frames > 0) {
                TICK_UPD_SQUEEZED();
            }
            adjacent_update_frames++;
109

110 111 112
            frame += sizeofW(StgUpdateFrame);
            continue;
        }
113 114

        default:
115
            // we're not in a gap... check whether this is the end of a gap
116
            // (an update frame can't be the end of a gap).
117 118 119 120 121 122
            if (adjacent_update_frames > 1) {
                gap = updateAdjacentFrames(cap, tso,
                                           (StgUpdateFrame*)(frame - sizeofW(StgUpdateFrame)),
                                           adjacent_update_frames, gap);
            }
            adjacent_update_frames = 0;
123

124 125 126
            frame += stack_frame_sizeW((StgClosure *)frame);
            continue;
        }
127 128
    }

129 130 131 132
    if (adjacent_update_frames > 1) {
        gap = updateAdjacentFrames(cap, tso,
                                   (StgUpdateFrame*)(frame - sizeofW(StgUpdateFrame)),
                                   adjacent_update_frames, gap);
133 134
    }

Arash Rouhani's avatar
Arash Rouhani committed
135
    // Now we have a stack with gap-structs in it, and we have to walk down
136 137 138 139 140 141 142 143 144
    // shoving the stack up to fill in the gaps.  A diagram might
    // help:
    //
    //    +| ********* |
    //     | ********* | <- sp
    //     |           |
    //     |           | <- gap_start
    //     | ......... |                |
    //     | stack_gap | <- gap         | chunk_size
145
    //     | ......... |                |
146
    //     | ......... | <- gap_end     v
147 148 149 150
    //     | ********* |
    //     | ********* |
    //     | ********* |
    //    -| ********* |
151
    //
152
    // 'sp'  points the current top-of-stack
153 154 155 156 157 158
    // 'gap' points to the stack_gap structure inside the gap
    // *****   indicates real stack data
    // .....   indicates gap
    // <empty> indicates unused
    //
    {
159 160
        StgWord8 *sp;
        StgWord8 *gap_start, *next_gap_start, *gap_end;
161
        uint32_t chunk_size;
162

163 164
        next_gap_start = (StgWord8*)gap + sizeof(StgUpdateFrame);
        sp = next_gap_start;
165

166
        while ((StgPtr)gap > tso->stackobj->sp) {
167

168 169 170
            // we're working in *bytes* now...
            gap_start = next_gap_start;
            gap_end = gap_start - gap->gap_size * sizeof(W_);
171

172 173
            gap = gap->next_gap;
            next_gap_start = (StgWord8*)gap + sizeof(StgUpdateFrame);
174

175 176 177 178
            chunk_size = gap_end - next_gap_start;
            sp -= chunk_size;
            memmove(sp, next_gap_start, chunk_size);
        }
179

180
        tso->stackobj->sp = (StgPtr)sp;
181
    }
182
}
183 184 185

/* -----------------------------------------------------------------------------
 * Pausing a thread
186
 *
187 188 189 190 191 192 193 194
 * We have to prepare for GC - this means doing lazy black holing
 * here.  We also take the opportunity to do stack squeezing if it's
 * turned on.
 * -------------------------------------------------------------------------- */
void
threadPaused(Capability *cap, StgTSO *tso)
{
    StgClosure *frame;
195
    const StgRetInfoTable *info;
196 197
    const StgInfoTable *bh_info;
    const StgInfoTable *cur_bh_info USED_IF_THREADS;
198
    const StgInfoTable *frame_info;
199 200
    StgClosure *bh;
    StgPtr stack_end;
201 202 203
    uint32_t words_to_squeeze = 0;
    uint32_t weight           = 0;
    uint32_t weight_pending   = 0;
Ben Gamari's avatar
Ben Gamari committed
204
    bool prev_was_update_frame = false;
Arash Rouhani's avatar
Arash Rouhani committed
205
    StgWord heuristic_says_squeeze;
206

207 208 209 210 211 212 213 214
    // Check to see whether we have threads waiting to raise
    // exceptions, and we're not blocking exceptions, or are blocked
    // interruptibly.  This is important; if a thread is running with
    // TSO_BLOCKEX and becomes blocked interruptibly, this is the only
    // place we ensure that the blocked_exceptions get a chance.
    maybePerformBlockedException (cap, tso);
    if (tso->what_next == ThreadKilled) { return; }

215 216
    // NB. Updatable thunks *must* be blackholed, either by eager blackholing or
    // lazy blackholing.  See Note [upd-black-hole] in sm/Scav.c.
217

218
    stack_end = tso->stackobj->stack + tso->stackobj->stack_size;
219

220
    frame = (StgClosure *)tso->stackobj->sp;
221

222 223
    // N.B. We know that the TSO is owned by the current capability so no
    // memory barriers are needed here.
224 225 226
    while ((P_)frame < stack_end) {
        info = get_ret_itbl(frame);

227 228 229
        switch (info->i.type) {

        case UPDATE_FRAME:
230

231
            // If we've already marked this frame, then stop here.
232 233
            frame_info = frame->header.info;
            if (frame_info == (StgInfoTable *)&stg_marked_upd_frame_info) {
234 235 236 237 238 239 240 241
                if (prev_was_update_frame) {
                    words_to_squeeze += sizeofW(StgUpdateFrame);
                    weight += weight_pending;
                    weight_pending = 0;
                }
                goto end;
            }

242
            SET_INFO(frame, (StgInfoTable *)&stg_marked_upd_frame_info);
243

244
            bh = ((StgUpdateFrame *)frame)->updatee;
245
            bh_info = bh->header.info;
246

Ben Gamari's avatar
Ben Gamari committed
247
#if defined(THREADED_RTS)
248 249
        retry:
#endif
250 251
            // Note [suspend duplicate work]
            //
252 253 254 255 256 257 258 259 260 261 262 263 264 265 266
            // If the info table is a WHITEHOLE or a BLACKHOLE, then
            // another thread has claimed it (via the SET_INFO()
            // below), or is in the process of doing so.  In that case
            // we want to suspend the work that the current thread has
            // done on this thunk and wait until the other thread has
            // finished.
            //
            // If eager blackholing is taking place, it could be the
            // case that the blackhole points to the current
            // TSO. e.g.:
            //
            //    this thread                   other thread
            //    --------------------------------------------------------
            //                                  c->indirectee = other_tso;
            //                                  c->header.info = EAGER_BH
Simon Marlow's avatar
Simon Marlow committed
267 268 269 270
            //                                  threadPaused():
            //                                    c->header.info = WHITEHOLE
            //                                    c->indirectee = other_tso
            //    c->indirectee = this_tso;
271
            //    c->header.info = EAGER_BH
Simon Marlow's avatar
Simon Marlow committed
272
            //                                    c->header.info = BLACKHOLE
273 274
            //    threadPaused()
            //    *** c->header.info is now BLACKHOLE,
Simon Marlow's avatar
Simon Marlow committed
275
            //        c->indirectee  points to this_tso
276 277 278 279 280 281
            //
            // So in this case do *not* suspend the work of the
            // current thread, because the current thread will become
            // deadlocked on itself.  See #5226 for an instance of
            // this bug.
            //
Ben Gamari's avatar
Ben Gamari committed
282 283 284
            // Note that great care is required when entering computations
            // suspended by this mechanism. See Note [AP_STACKs must be eagerly
            // blackholed] for details.
285 286 287
            if (((bh_info == &stg_BLACKHOLE_info)
                 && ((StgInd*)bh)->indirectee != (StgClosure*)tso)
                || (bh_info == &stg_WHITEHOLE_info))
288
            {
289 290
                debugTrace(DEBUG_squeeze,
                           "suspending duplicate work: %ld words of stack",
291
                           (long)((StgPtr)frame - tso->stackobj->sp));
292

293 294 295 296 297
                // If this closure is already an indirection, then
                // suspend the computation up to this point.
                // NB. check raiseAsync() to see what happens when
                // we're in a loop (#2783).
                suspendComputation(cap,tso,(StgUpdateFrame*)frame);
298

299 300
                // Now drop the update frame, and arrange to return
                // the value to the frame underneath:
301 302
                tso->stackobj->sp = (StgPtr)frame + sizeofW(StgUpdateFrame) - 2;
                tso->stackobj->sp[1] = (StgWord)bh;
303
                ASSERT(bh->header.info != &stg_TSO_info);
304
                tso->stackobj->sp[0] = (W_)&stg_enter_info;
305

306 307
                // And continue with threadPaused; there might be
                // yet more computation to suspend.
308
                frame = (StgClosure *)(tso->stackobj->sp + 2);
Ben Gamari's avatar
Ben Gamari committed
309
                prev_was_update_frame = false;
310
                continue;
311
            }
312

313 314
            // zero out the slop so that the sanity checker can tell
            // where the next closure is.
315
            OVERWRITING_CLOSURE(bh);
316 317 318

            // an EAGER_BLACKHOLE or CAF_BLACKHOLE gets turned into a
            // BLACKHOLE here.
Ben Gamari's avatar
Ben Gamari committed
319
#if defined(THREADED_RTS)
320 321 322
            // first we turn it into a WHITEHOLE to claim it, and if
            // successful we write our TSO and then the BLACKHOLE info pointer.
            cur_bh_info = (const StgInfoTable *)
323 324
                cas((StgVolatilePtr)&bh->header.info,
                    (StgWord)bh_info,
325
                    (StgWord)&stg_WHITEHOLE_info);
326

327 328
            if (cur_bh_info != bh_info) {
                bh_info = cur_bh_info;
329 330 331 332
#if defined(PROF_SPIN)
                ++whitehole_threadPaused_spin;
#endif
                busy_wait_nop();
333 334
                goto retry;
            }
335
#endif
336

337 338 339 340 341 342 343 344 345 346
            IF_NONMOVING_WRITE_BARRIER_ENABLED {
                if (ip_THUNK(INFO_PTR_TO_STRUCT(bh_info))) {
                    // We are about to replace a thunk with a blackhole.
                    // Add the free variables of the closure we are about to
                    // overwrite to the update remembered set.
                    // N.B. We caught the WHITEHOLE case above.
                    updateRemembSetPushThunkEager(cap,
                                                 THUNK_INFO_PTR_TO_STRUCT(bh_info),
                                                 (StgThunk *) bh);
                }
347 348
            }

349 350 351 352 353 354 355 356 357 358
            // The payload of the BLACKHOLE points to the TSO
            ((StgInd *)bh)->indirectee = (StgClosure *)tso;
            write_barrier();
            SET_INFO(bh,&stg_BLACKHOLE_info);

            // .. and we need a write barrier, since we just mutated the closure:
            recordClosureMutated(cap,bh);

            // We pretend that bh has just been created.
            LDV_RECORD_CREATE(bh);
359 360 361 362 363 364 365

            frame = (StgClosure *) ((StgUpdateFrame *)frame + 1);
            if (prev_was_update_frame) {
                words_to_squeeze += sizeofW(StgUpdateFrame);
                weight += weight_pending;
                weight_pending = 0;
            }
Ben Gamari's avatar
Ben Gamari committed
366
            prev_was_update_frame = true;
367 368
            break;

369 370
        case UNDERFLOW_FRAME:
        case STOP_FRAME:
371 372 373 374 375
            goto end;

            // normal stack frames; do nothing except advance the pointer
        default:
        {
376
            uint32_t frame_size = stack_frame_sizeW(frame);
377 378
            weight_pending += frame_size;
            frame = (StgClosure *)((StgPtr)frame + frame_size);
Ben Gamari's avatar
Ben Gamari committed
379
            prev_was_update_frame = false;
380 381
        }
        }
382 383 384 385 386 387
    }

end:
    // Should we squeeze or not?  Arbitrary heuristic: we squeeze if
    // the number of words we have to shift down is less than the
    // number of stack words we squeeze away by doing so.
Arash Rouhani's avatar
Arash Rouhani committed
388 389 390 391 392 393 394 395 396
    // The threshold was bumped from 5 to 8 as a result of #2797
    heuristic_says_squeeze = ((weight <= 8 && words_to_squeeze > 0)
                            || weight < words_to_squeeze);

    debugTrace(DEBUG_squeeze,
        "words_to_squeeze: %d, weight: %d, squeeze: %s",
        words_to_squeeze, weight,
        heuristic_says_squeeze ? "YES" : "NO");

Ben Gamari's avatar
Ben Gamari committed
397
    if (RtsFlags.GcFlags.squeezeUpdFrames == true &&
Arash Rouhani's avatar
Arash Rouhani committed
398
        heuristic_says_squeeze) {
399
        stackSqueeze(cap, tso, (StgPtr)frame);
400 401 402 403 404
        tso->flags |= TSO_SQUEEZED;
        // This flag tells threadStackOverflow() that the stack was
        // squeezed, because it may not need to be expanded.
    } else {
        tso->flags &= ~TSO_SQUEEZED;
405 406
    }
}