Schedule.c 136 KB
Newer Older
1
/* ---------------------------------------------------------------------------
2
 *
3
 * (c) The GHC Team, 1998-2005
4
 *
5
 * The scheduler and thread-related functionality
sof's avatar
sof committed
6
 *
7 8
 * --------------------------------------------------------------------------*/

9
#include "PosixSource.h"
10 11 12 13
#include "Rts.h"
#include "SchedAPI.h"
#include "RtsUtils.h"
#include "RtsFlags.h"
14
#include "BlockAlloc.h"
15
#include "OSThreads.h"
16 17 18 19 20
#include "Storage.h"
#include "StgRun.h"
#include "Hooks.h"
#include "Schedule.h"
#include "StgMiscClosures.h"
21
#include "Interpreter.h"
22
#include "Exception.h"
23
#include "Printer.h"
24
#include "RtsSignals.h"
25
#include "Sanity.h"
26
#include "Stats.h"
27
#include "STM.h"
sof's avatar
sof committed
28
#include "Timer.h"
29
#include "Prelude.h"
30
#include "ThreadLabels.h"
31 32
#include "LdvProfile.h"
#include "Updates.h"
33 34 35 36
#ifdef PROFILING
#include "Proftimer.h"
#include "ProfHeap.h"
#endif
37
#if defined(GRAN) || defined(PARALLEL_HASKELL)
38 39 40 41 42 43 44 45
# include "GranSimRts.h"
# include "GranSim.h"
# include "ParallelRts.h"
# include "Parallel.h"
# include "ParallelDebug.h"
# include "FetchMe.h"
# include "HLC.h"
#endif
46
#include "Sparks.h"
sof's avatar
sof committed
47
#include "Capability.h"
48 49
#include "Task.h"
#include "AwaitEvent.h"
50 51 52
#if defined(mingw32_HOST_OS)
#include "win32/IOManager.h"
#endif
53

54 55 56 57 58 59 60
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif

61 62
#include <string.h>
#include <stdlib.h>
63
#include <stdarg.h>
64

65 66 67 68
#ifdef HAVE_ERRNO_H
#include <errno.h>
#endif

69 70 71 72 73 74
// Turn off inlining when debugging - it obfuscates things
#ifdef DEBUG
# undef  STATIC_INLINE
# define STATIC_INLINE static
#endif

75 76 77
/* -----------------------------------------------------------------------------
 * Global variables
 * -------------------------------------------------------------------------- */
78

79 80 81
#if defined(GRAN)

StgTSO* ActiveTSO = NULL; /* for assigning system costs; GranSim-Light only */
82
/* rtsTime TimeOfNextEvent, EndOfTimeSlice;            now in GranSim.c */
83 84

/* 
sof's avatar
sof committed
85
   In GranSim we have a runnable and a blocked queue for each processor.
86 87 88 89 90 91 92 93
   In order to minimise code changes new arrays run_queue_hds/tls
   are created. run_queue_hd is then a short cut (macro) for
   run_queue_hds[CurrentProc] (see GranSim.h).
   -- HWL
*/
StgTSO *run_queue_hds[MAX_PROC], *run_queue_tls[MAX_PROC];
StgTSO *blocked_queue_hds[MAX_PROC], *blocked_queue_tls[MAX_PROC];
StgTSO *ccalling_threadss[MAX_PROC];
94 95 96 97
/* We use the same global list of threads (all_threads) in GranSim as in
   the std RTS (i.e. we are cheating). However, we don't use this list in
   the GranSim specific code at the moment (so we are only potentially
   cheating).  */
98 99 100

#else /* !GRAN */

101 102
#if !defined(THREADED_RTS)
// Blocked/sleeping thrads
103 104
StgTSO *blocked_queue_hd = NULL;
StgTSO *blocked_queue_tl = NULL;
105 106
StgTSO *sleeping_queue = NULL;    // perhaps replace with a hash table?
#endif
107

108 109 110 111
/* Threads blocked on blackholes.
 * LOCK: sched_mutex+capability, or all capabilities
 */
StgTSO *blackhole_queue = NULL;
112 113
#endif

114 115
/* The blackhole_queue should be checked for threads to wake up.  See
 * Schedule.h for more thorough comment.
116
 * LOCK: none (doesn't matter if we miss an update)
117 118 119
 */
rtsBool blackholes_need_checking = rtsFalse;

120 121
/* Linked list of all threads.
 * Used for detecting garbage collected threads.
122
 * LOCK: sched_mutex+capability, or all capabilities
123
 */
124
StgTSO *all_threads = NULL;
125

126 127
/* flag set by signal handler to precipitate a context switch
 * LOCK: none (just an advisory flag)
128
 */
129
int context_switch = 0;
130

131 132 133 134
/* flag that tracks whether we have done any execution in this time slice.
 * LOCK: currently none, perhaps we should lock (but needs to be
 * updated in the fast path of the scheduler).
 */
135 136
nat recent_activity = ACTIVITY_YES;

137 138 139
/* if this flag is set as well, give up execution
 * LOCK: none (changes once, from false->true)
 */
140
rtsBool sched_state = SCHED_RUNNING;
141

142
/* Next thread ID to allocate.
143
 * LOCK: sched_mutex
144
 */
145
static StgThreadID next_thread_id = 1;
146 147 148 149 150

/* The smallest stack size that makes any sense is:
 *    RESERVED_STACK_WORDS    (so we can get back from the stack overflow)
 *  + sizeofW(StgStopFrame)   (the stg_stop_thread_info frame)
 *  + 1                       (the closure to enter)
151 152
 *  + 1			      (stg_ap_v_ret)
 *  + 1			      (spare slot req'd by stg_ap_v_ret)
153 154 155 156
 *
 * A thread with this stack will bomb immediately with a stack
 * overflow, which will increase its stack size.  
 */
157
#define MIN_STACK_WORDS (RESERVED_STACK_WORDS + sizeofW(StgStopFrame) + 3)
158

159
#if defined(GRAN)
160
StgTSO *CurrentTSO;
161 162
#endif

163 164 165 166 167 168
/*  This is used in `TSO.h' and gcc 2.96 insists that this variable actually 
 *  exists - earlier gccs apparently didn't.
 *  -= chak
 */
StgTSO dummy_tso;

sof's avatar
sof committed
169 170 171 172 173
/*
 * Set to TRUE when entering a shutdown state (via shutdownHaskellAndExit()) --
 * in an MT setting, needed to signal that a worker thread shouldn't hang around
 * in the scheduler when it is out of work.
 */
174
rtsBool shutting_down_scheduler = rtsFalse;
175

176 177
/*
 * This mutex protects most of the global scheduler data in
178
 * the THREADED_RTS runtime.
sof's avatar
sof committed
179
 */
180
#if defined(THREADED_RTS)
181
Mutex sched_mutex;
182
#endif
sof's avatar
sof committed
183

184
#if defined(PARALLEL_HASKELL)
185 186
StgTSO *LastTSO;
rtsTime TimeOfLastYield;
187
rtsBool emitSchedule = rtsTrue;
188 189
#endif

190 191 192 193
/* -----------------------------------------------------------------------------
 * static function prototypes
 * -------------------------------------------------------------------------- */

194
static Capability *schedule (Capability *initialCapability, Task *task);
195 196 197 198 199 200

//
// These function all encapsulate parts of the scheduler loop, and are
// abstracted only to make the structure and control flow of the
// scheduler clearer.
//
201
static void schedulePreLoop (void);
202
#if defined(THREADED_RTS)
203
static void schedulePushWork(Capability *cap, Task *task);
204
#endif
205
static void scheduleStartSignalHandlers (Capability *cap);
206 207 208
static void scheduleCheckBlockedThreads (Capability *cap);
static void scheduleCheckBlackHoles (Capability *cap);
static void scheduleDetectDeadlock (Capability *cap, Task *task);
209 210 211 212 213 214 215 216 217 218 219 220 221
#if defined(GRAN)
static StgTSO *scheduleProcessEvent(rtsEvent *event);
#endif
#if defined(PARALLEL_HASKELL)
static StgTSO *scheduleSendPendingMessages(void);
static void scheduleActivateSpark(void);
static rtsBool scheduleGetRemoteWork(rtsBool *receivedFinish);
#endif
#if defined(PAR) || defined(GRAN)
static void scheduleGranParReport(void);
#endif
static void schedulePostRunThread(void);
static rtsBool scheduleHandleHeapOverflow( Capability *cap, StgTSO *t );
222 223 224 225
static void scheduleHandleStackOverflow( Capability *cap, Task *task, 
					 StgTSO *t);
static rtsBool scheduleHandleYield( Capability *cap, StgTSO *t, 
				    nat prev_what_next );
226
static void scheduleHandleThreadBlocked( StgTSO *t );
227 228
static rtsBool scheduleHandleThreadFinished( Capability *cap, Task *task,
					     StgTSO *t );
229
static rtsBool scheduleDoHeapProfile(rtsBool ready_to_gc);
230 231 232
static Capability *scheduleDoGC(Capability *cap, Task *task,
				rtsBool force_major, 
				void (*get_roots)(evac_fn));
233 234 235

static void unblockThread(Capability *cap, StgTSO *tso);
static rtsBool checkBlackHoles(Capability *cap);
236 237
static void AllRoots(evac_fn evac);

238
static StgTSO *threadStackOverflow(Capability *cap, StgTSO *tso);
239

240
static void raiseAsync_(Capability *cap, StgTSO *tso, StgClosure *exception, 
241
			rtsBool stop_at_atomically, StgPtr stop_here);
242

243
static void deleteThread (Capability *cap, StgTSO *tso);
244
static void deleteAllThreads (Capability *cap);
245 246

#ifdef DEBUG
247 248
static void printThreadBlockage(StgTSO *tso);
static void printThreadStatus(StgTSO *tso);
249
void printThreadQueue(StgTSO *tso);
250
#endif
251 252

#if defined(PARALLEL_HASKELL)
253 254 255 256
StgTSO * createSparkThread(rtsSpark spark);
StgTSO * activateSpark (rtsSpark spark);  
#endif

257 258 259 260 261 262 263 264 265
#ifdef DEBUG
static char *whatNext_strs[] = {
  "(unknown)",
  "ThreadRunGHC",
  "ThreadInterpret",
  "ThreadKilled",
  "ThreadRelocated",
  "ThreadComplete"
};
266
#endif
sof's avatar
sof committed
267

268 269 270 271 272
/* -----------------------------------------------------------------------------
 * Putting a thread on the run queue: different scheduling policies
 * -------------------------------------------------------------------------- */

STATIC_INLINE void
273
addToRunQueue( Capability *cap, StgTSO *t )
274 275 276 277
{
#if defined(PARALLEL_HASKELL)
    if (RtsFlags.ParFlags.doFairScheduling) { 
	// this does round-robin scheduling; good for concurrency
278
	appendToRunQueue(cap,t);
279 280
    } else {
	// this does unfair scheduling; good for parallelism
281
	pushOnRunQueue(cap,t);
282 283 284
    }
#else
    // this does round-robin scheduling; good for concurrency
285
    appendToRunQueue(cap,t);
286 287
#endif
}
288

289
/* ---------------------------------------------------------------------------
290 291 292 293 294 295 296 297 298 299 300
   Main scheduling loop.

   We use round-robin scheduling, each thread returning to the
   scheduler loop when one of these conditions is detected:

      * out of heap space
      * timer expires (thread yields)
      * thread blocks
      * thread ends
      * stack overflow

301 302 303 304 305 306 307 308 309 310 311 312 313 314 315
   GRAN version:
     In a GranSim setup this loop iterates over the global event queue.
     This revolves around the global event queue, which determines what 
     to do next. Therefore, it's more complicated than either the 
     concurrent or the parallel (GUM) setup.

   GUM version:
     GUM iterates over incoming messages.
     It starts with nothing to do (thus CurrentTSO == END_TSO_QUEUE),
     and sends out a fish whenever it has nothing to do; in-between
     doing the actual reductions (shared code below) it processes the
     incoming messages and deals with delayed operations 
     (see PendingFetches).
     This is not the ugliest code you could imagine, but it's bloody close.

316
   ------------------------------------------------------------------------ */
317

318 319
static Capability *
schedule (Capability *initialCapability, Task *task)
320 321
{
  StgTSO *t;
322
  Capability *cap;
323
  StgThreadReturnCode ret;
324 325
#if defined(GRAN)
  rtsEvent *event;
326
#elif defined(PARALLEL_HASKELL)
327 328
  StgTSO *tso;
  GlobalTaskId pe;
329 330 331 332
  rtsBool receivedFinish = rtsFalse;
# if defined(DEBUG)
  nat tp_size, sp_size; // stats only
# endif
333
#endif
334
  nat prev_what_next;
335
  rtsBool ready_to_gc;
336
#if defined(THREADED_RTS)
337
  rtsBool first = rtsTrue;
338
#endif
339
  
340 341
  cap = initialCapability;

342 343 344
  // Pre-condition: this task owns initialCapability.
  // The sched_mutex is *NOT* held
  // NB. on return, we still hold a capability.
345

346
  IF_DEBUG(scheduler,
347 348
	   sched_belch("### NEW SCHEDULER LOOP (task: %p, cap: %p)",
		       task, initialCapability);
349
      );
350

351
  schedulePreLoop();
352

353 354
  // -----------------------------------------------------------
  // Scheduler loop starts here:
355

356 357 358 359 360 361 362
#if defined(PARALLEL_HASKELL)
#define TERMINATION_CONDITION        (!receivedFinish)
#elif defined(GRAN)
#define TERMINATION_CONDITION        ((event = get_next_event()) != (rtsEvent*)NULL) 
#else
#define TERMINATION_CONDITION        rtsTrue
#endif
363

364
  while (TERMINATION_CONDITION) {
365

366 367 368 369 370
#if defined(GRAN)
      /* Choose the processor with the next event */
      CurrentProc = event->proc;
      CurrentTSO = event->tso;
#endif
371

372 373 374 375 376 377
#if defined(THREADED_RTS)
      if (first) {
	  // don't yield the first time, we want a chance to run this
	  // thread for a bit, even if there are others banging at the
	  // door.
	  first = rtsFalse;
378
	  ASSERT_FULL_CAPABILITY_INVARIANTS(cap,task);
379 380 381
      } else {
	  // Yield the capability to higher-priority tasks if necessary.
	  yieldCapability(&cap, task);
382 383
      }
#endif
384
      
385
#if defined(THREADED_RTS)
386
      schedulePushWork(cap,task);
387
#endif
388

389 390 391
    // Check whether we have re-entered the RTS from Haskell without
    // going via suspendThread()/resumeThread (i.e. a 'safe' foreign
    // call).
392
    if (cap->in_haskell) {
393 394
    	  errorBelch("schedule: re-entered unsafely.\n"
    		     "   Perhaps a 'foreign import unsafe' should be 'safe'?");
395
    	  stg_exit(EXIT_FAILURE);
396 397
    }

398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422
    // The interruption / shutdown sequence.
    // 
    // In order to cleanly shut down the runtime, we want to:
    //   * make sure that all main threads return to their callers
    //     with the state 'Interrupted'.
    //   * clean up all OS threads assocated with the runtime
    //   * free all memory etc.
    //
    // So the sequence for ^C goes like this:
    //
    //   * ^C handler sets sched_state := SCHED_INTERRUPTING and
    //     arranges for some Capability to wake up
    //
    //   * all threads in the system are halted, and the zombies are
    //     placed on the run queue for cleaning up.  We acquire all
    //     the capabilities in order to delete the threads, this is
    //     done by scheduleDoGC() for convenience (because GC already
    //     needs to acquire all the capabilities).  We can't kill
    //     threads involved in foreign calls.
    // 
    //   * sched_state := SCHED_INTERRUPTED
    //
    //   * somebody calls shutdownHaskell(), which calls exitScheduler()
    //
    //   * sched_state := SCHED_SHUTTING_DOWN
423
    //
424 425 426
    //   * all workers exit when the run queue on their capability
    //     drains.  All main threads will also exit when their TSO
    //     reaches the head of the run queue and they can return.
427
    //
428 429 430 431 432 433 434 435 436 437 438
    //   * eventually all Capabilities will shut down, and the RTS can
    //     exit.
    //
    //   * We might be left with threads blocked in foreign calls, 
    //     we should really attempt to kill these somehow (TODO);
    
    switch (sched_state) {
    case SCHED_RUNNING:
	break;
    case SCHED_INTERRUPTING:
	IF_DEBUG(scheduler, sched_belch("SCHED_INTERRUPTING"));
439
#if defined(THREADED_RTS)
440 441
	discardSparksCap(cap);
#endif
442 443 444 445 446 447 448 449 450 451 452 453 454
	/* scheduleDoGC() deletes all the threads */
	cap = scheduleDoGC(cap,task,rtsFalse,GetRoots);
	break;
    case SCHED_INTERRUPTED:
	IF_DEBUG(scheduler, sched_belch("SCHED_INTERRUPTED"));
	break;
    case SCHED_SHUTTING_DOWN:
	IF_DEBUG(scheduler, sched_belch("SCHED_SHUTTING_DOWN"));
	// If we are a worker, just exit.  If we're a bound thread
	// then we will exit below when we've removed our TSO from
	// the run queue.
	if (task->tso == NULL && emptyRunQueue(cap)) {
	    return cap;
455
	}
456 457 458
	break;
    default:
	barf("sched_state: %d", sched_state);
459
    }
460

461
#if defined(THREADED_RTS)
462
    // If the run queue is empty, take a spark and turn it into a thread.
463
    {
464 465 466 467
	if (emptyRunQueue(cap)) {
	    StgClosure *spark;
	    spark = findSpark(cap);
	    if (spark != NULL) {
468
		IF_DEBUG(scheduler,
469
			 sched_belch("turning spark of closure %p into a thread",
470
				     (StgClosure *)spark));
471
		createSparkThread(cap,spark);	  
472 473
	    }
	}
474
    }
475
#endif // THREADED_RTS
476

477
    scheduleStartSignalHandlers(cap);
478

479 480 481 482
    // Only check the black holes here if we've nothing else to do.
    // During normal execution, the black hole list only gets checked
    // at GC time, to avoid repeatedly traversing this possibly long
    // list each time around the scheduler.
483
    if (emptyRunQueue(cap)) { scheduleCheckBlackHoles(cap); }
484

485
    scheduleCheckBlockedThreads(cap);
486

487
    scheduleDetectDeadlock(cap,task);
488 489 490
#if defined(THREADED_RTS)
    cap = task->cap;    // reload cap, it might have changed
#endif
491 492 493 494 495 496

    // Normally, the only way we can get here with no threads to
    // run is if a keyboard interrupt received during 
    // scheduleCheckBlockedThreads() or scheduleDetectDeadlock().
    // Additionally, it is not fatal for the
    // threaded RTS to reach here with no threads to run.
497
    //
498 499
    // win32: might be here due to awaitEvent() being abandoned
    // as a result of a console event having been delivered.
500 501
    if ( emptyRunQueue(cap) ) {
#if !defined(THREADED_RTS) && !defined(mingw32_HOST_OS)
502
	ASSERT(sched_state >= SCHED_INTERRUPTING);
503
#endif
504
	continue; // nothing to do
505
    }
506

507 508
#if defined(PARALLEL_HASKELL)
    scheduleSendPendingMessages();
509
    if (emptyRunQueue(cap) && scheduleActivateSpark()) 
510
	continue;
511

512 513 514
#if defined(SPARKS)
    ASSERT(next_fish_to_send_at==0);  // i.e. no delayed fishes left!
#endif
515

516 517
    /* If we still have no work we need to send a FISH to get a spark
       from another PE */
518
    if (emptyRunQueue(cap)) {
519 520 521 522 523 524 525 526 527 528 529 530
	if (!scheduleGetRemoteWork(&receivedFinish)) continue;
	ASSERT(rtsFalse); // should not happen at the moment
    }
    // from here: non-empty run queue.
    //  TODO: merge above case with this, only one call processMessages() !
    if (PacketsWaiting()) {  /* process incoming messages, if
				any pending...  only in else
				because getRemoteWork waits for
				messages as well */
	receivedFinish = processMessages();
    }
#endif
531

532 533 534
#if defined(GRAN)
    scheduleProcessEvent(event);
#endif
535

536 537 538
    // 
    // Get a thread to run
    //
539
    t = popRunQueue(cap);
540

541 542 543 544 545 546
#if defined(GRAN) || defined(PAR)
    scheduleGranParReport(); // some kind of debuging output
#else
    // Sanity check the thread we're about to run.  This can be
    // expensive if there is lots of thread switching going on...
    IF_DEBUG(sanity,checkTSO(t));
547 548
#endif

549
#if defined(THREADED_RTS)
550 551 552
    // Check whether we can run this thread in the current task.
    // If not, we have to pass our capability to the right task.
    {
553
	Task *bound = t->bound;
554
      
555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578
	if (bound) {
	    if (bound == task) {
		IF_DEBUG(scheduler,
			 sched_belch("### Running thread %d in bound thread",
				     t->id));
		// yes, the Haskell thread is bound to the current native thread
	    } else {
		IF_DEBUG(scheduler,
			 sched_belch("### thread %d bound to another OS thread",
				     t->id));
		// no, bound to a different Haskell thread: pass to that thread
		pushOnRunQueue(cap,t);
		continue;
	    }
	} else {
	    // The thread we want to run is unbound.
	    if (task->tso) { 
		IF_DEBUG(scheduler,
			 sched_belch("### this OS thread cannot run thread %d", t->id));
		// no, the current native thread is bound to a different
		// Haskell thread, so pass it to any worker thread
		pushOnRunQueue(cap,t);
		continue; 
	    }
579
	}
580 581 582
    }
#endif

583 584
    cap->r.rCurrentTSO = t;
    
585
    /* context switches are initiated by the timer signal, unless
586 587
     * the user specified "context switch as often as possible", with
     * +RTS -C0
sof's avatar
sof committed
588
     */
589 590
    if (RtsFlags.ConcFlags.ctxtSwitchTicks == 0
	&& !emptyThreadQueues(cap)) {
591
	context_switch = 1;
592 593
    }
	 
594
run_thread:
595

596 597
    IF_DEBUG(scheduler, sched_belch("-->> running thread %ld %s ...", 
			      (long)t->id, whatNext_strs[t->what_next]));
598

599 600 601 602
#if defined(PROFILING)
    startHeapProfTimer();
#endif

603 604 605
    // ----------------------------------------------------------------------
    // Run the current thread 

Simon Marlow's avatar
Simon Marlow committed
606 607
    ASSERT_FULL_CAPABILITY_INVARIANTS(cap,task);

608 609 610
    prev_what_next = t->what_next;

    errno = t->saved_errno;
611
    cap->in_haskell = rtsTrue;
612

Simon Marlow's avatar
Simon Marlow committed
613 614
    dirtyTSO(t);

615 616
    recent_activity = ACTIVITY_YES;

617
    switch (prev_what_next) {
618
	
619 620 621 622 623
    case ThreadKilled:
    case ThreadComplete:
	/* Thread already finished, return to scheduler. */
	ret = ThreadFinished;
	break;
624
	
625
    case ThreadRunGHC:
626 627 628 629 630
    {
	StgRegTable *r;
	r = StgRun((StgFunPtr) stg_returnToStackTop, &cap->r);
	cap = regTableToCapability(r);
	ret = r->rRet;
631
	break;
632 633
    }
    
634
    case ThreadInterpret:
635 636
	cap = interpretBCO(cap);
	ret = cap->r.rRet;
637
	break;
638
	
639
    default:
640
	barf("schedule: invalid what_next field");
641 642
    }

643
    cap->in_haskell = rtsFalse;
644

645 646 647 648
    // The TSO might have moved, eg. if it re-entered the RTS and a GC
    // happened.  So find the new location:
    t = cap->r.rCurrentTSO;

649 650 651 652 653 654 655 656 657 658 659 660
    // We have run some Haskell code: there might be blackhole-blocked
    // threads to wake up now.
    // Lock-free test here should be ok, we're just setting a flag.
    if ( blackhole_queue != END_TSO_QUEUE ) {
	blackholes_need_checking = rtsTrue;
    }
    
    // And save the current errno in this thread.
    // XXX: possibly bogus for SMP because this thread might already
    // be running again, see code below.
    t->saved_errno = errno;

661
#if defined(THREADED_RTS)
662 663 664 665 666 667
    // If ret is ThreadBlocked, and this Task is bound to the TSO that
    // blocked, we are in limbo - the TSO is now owned by whatever it
    // is blocked on, and may in fact already have been woken up,
    // perhaps even on a different Capability.  It may be the case
    // that task->cap != cap.  We better yield this Capability
    // immediately and return to normaility.
668 669
    if (ret == ThreadBlocked) {
	IF_DEBUG(scheduler,
670 671
		 sched_belch("--<< thread %d (%s) stopped: blocked\n",
			     t->id, whatNext_strs[t->what_next]));
672 673
	continue;
    }
674 675
#endif

676
    ASSERT_FULL_CAPABILITY_INVARIANTS(cap,task);
677

678
    // ----------------------------------------------------------------------
679
    
680
    // Costs for the scheduler are assigned to CCS_SYSTEM
681 682 683 684 685
#if defined(PROFILING)
    stopHeapProfTimer();
    CCCS = CCS_SYSTEM;
#endif
    
686 687
#if defined(THREADED_RTS)
    IF_DEBUG(scheduler,debugBelch("sched (task %p): ", (void *)(unsigned long)(unsigned int)osThreadId()););
688 689 690 691 692 693
#elif !defined(GRAN) && !defined(PARALLEL_HASKELL)
    IF_DEBUG(scheduler,debugBelch("sched: "););
#endif
    
    schedulePostRunThread();

694 695
    ready_to_gc = rtsFalse;

696 697 698 699 700 701
    switch (ret) {
    case HeapOverflow:
	ready_to_gc = scheduleHandleHeapOverflow(cap,t);
	break;

    case StackOverflow:
702
	scheduleHandleStackOverflow(cap,task,t);
703 704 705
	break;

    case ThreadYielding:
706
	if (scheduleHandleYield(cap, t, prev_what_next)) {
707 708 709 710 711 712 713 714 715 716
            // shortcut for switching between compiler/interpreter:
	    goto run_thread; 
	}
	break;

    case ThreadBlocked:
	scheduleHandleThreadBlocked(t);
	break;

    case ThreadFinished:
717
	if (scheduleHandleThreadFinished(cap, task, t)) return cap;
718
	ASSERT_FULL_CAPABILITY_INVARIANTS(cap,task);
719 720 721 722 723 724
	break;

    default:
      barf("schedule: invalid thread return code %d", (int)ret);
    }

725
    if (scheduleDoHeapProfile(ready_to_gc)) { ready_to_gc = rtsFalse; }
726
    if (ready_to_gc) {
727
      cap = scheduleDoGC(cap,task,rtsFalse,GetRoots);
728
    }
729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760
  } /* end of while() */

  IF_PAR_DEBUG(verbose,
	       debugBelch("== Leaving schedule() after having received Finish\n"));
}

/* ----------------------------------------------------------------------------
 * Setting up the scheduler loop
 * ------------------------------------------------------------------------- */

static void
schedulePreLoop(void)
{
#if defined(GRAN) 
    /* set up first event to get things going */
    /* ToDo: assign costs for system setup and init MainTSO ! */
    new_event(CurrentProc, CurrentProc, CurrentTime[CurrentProc],
	      ContinueThread, 
	      CurrentTSO, (StgClosure*)NULL, (rtsSpark*)NULL);
    
    IF_DEBUG(gran,
	     debugBelch("GRAN: Init CurrentTSO (in schedule) = %p\n", 
			CurrentTSO);
	     G_TSO(CurrentTSO, 5));
    
    if (RtsFlags.GranFlags.Light) {
	/* Save current time; GranSim Light only */
	CurrentTSO->gran.clock = CurrentTime[CurrentProc];
    }      
#endif
}

761 762 763 764 765 766
/* -----------------------------------------------------------------------------
 * schedulePushWork()
 *
 * Push work to other Capabilities if we have some.
 * -------------------------------------------------------------------------- */

767
#if defined(THREADED_RTS)
768
static void
769 770
schedulePushWork(Capability *cap USED_IF_THREADS, 
		 Task *task      USED_IF_THREADS)
771 772 773 774
{
    Capability *free_caps[n_capabilities], *cap0;
    nat i, n_free_caps;

775 776 777 778
    // Check whether we have more threads on our run queue, or sparks
    // in our pool, that we could hand to another Capability.
    if ((emptyRunQueue(cap) || cap->run_queue_hd->link == END_TSO_QUEUE)
	&& sparkPoolSizeCap(cap) < 2) {
779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808
	return;
    }

    // First grab as many free Capabilities as we can.
    for (i=0, n_free_caps=0; i < n_capabilities; i++) {
	cap0 = &capabilities[i];
	if (cap != cap0 && tryGrabCapability(cap0,task)) {
	    if (!emptyRunQueue(cap0) || cap->returning_tasks_hd != NULL) {
		// it already has some work, we just grabbed it at 
		// the wrong moment.  Or maybe it's deadlocked!
		releaseCapability(cap0);
	    } else {
		free_caps[n_free_caps++] = cap0;
	    }
	}
    }

    // we now have n_free_caps free capabilities stashed in
    // free_caps[].  Share our run queue equally with them.  This is
    // probably the simplest thing we could do; improvements we might
    // want to do include:
    //
    //   - giving high priority to moving relatively new threads, on 
    //     the gournds that they haven't had time to build up a
    //     working set in the cache on this CPU/Capability.
    //
    //   - giving low priority to moving long-lived threads

    if (n_free_caps > 0) {
	StgTSO *prev, *t, *next;
809 810
	rtsBool pushed_to_all;

811 812 813
	IF_DEBUG(scheduler, sched_belch("excess threads on run queue and %d free capabilities, sharing...", n_free_caps));

	i = 0;
814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833
	pushed_to_all = rtsFalse;

	if (cap->run_queue_hd != END_TSO_QUEUE) {
	    prev = cap->run_queue_hd;
	    t = prev->link;
	    prev->link = END_TSO_QUEUE;
	    for (; t != END_TSO_QUEUE; t = next) {
		next = t->link;
		t->link = END_TSO_QUEUE;
		if (t->what_next == ThreadRelocated
		    || t->bound == task) { // don't move my bound thread
		    prev->link = t;
		    prev = t;
		} else if (i == n_free_caps) {
		    pushed_to_all = rtsTrue;
		    i = 0;
		    // keep one for us
		    prev->link = t;
		    prev = t;
		} else {
834
		    IF_DEBUG(scheduler, sched_belch("pushing thread %d to capability %d", t->id, free_caps[i]->no));
835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855
		    appendToRunQueue(free_caps[i],t);
		    if (t->bound) { t->bound->cap = free_caps[i]; }
		    i++;
		}
	    }
	    cap->run_queue_tl = prev;
	}

	// If there are some free capabilities that we didn't push any
	// threads to, then try to push a spark to each one.
	if (!pushed_to_all) {
	    StgClosure *spark;
	    // i is the next free capability to push to
	    for (; i < n_free_caps; i++) {
		if (emptySparkPoolCap(free_caps[i])) {
		    spark = findSpark(cap);
		    if (spark != NULL) {
			IF_DEBUG(scheduler, sched_belch("pushing spark %p to capability %d", spark, free_caps[i]->no));
			newSpark(&(free_caps[i]->r), spark);
		    }
		}
856 857 858 859 860 861 862 863 864 865 866
	    }
	}

	// release the capabilities
	for (i = 0; i < n_free_caps; i++) {
	    task->cap = free_caps[i];
	    releaseCapability(free_caps[i]);
	}
    }
    task->cap = cap; // reset to point to our Capability.
}
867
#endif
868

869 870 871 872
/* ----------------------------------------------------------------------------
 * Start any pending signal handlers
 * ------------------------------------------------------------------------- */

873
#if defined(RTS_USER_SIGNALS) && (!defined(THREADED_RTS) || defined(mingw32_HOST_OS))
874
static void
875
scheduleStartSignalHandlers(Capability *cap)
876
{
877
    if (signals_pending()) { // safe outside the lock
878
	startSignalHandlers(cap);
879 880
    }
}
881 882 883 884 885 886
#else
static void
scheduleStartSignalHandlers(Capability *cap STG_UNUSED)
{
}
#endif
887 888 889 890 891 892

/* ----------------------------------------------------------------------------
 * Check for blocked threads that can be woken up.
 * ------------------------------------------------------------------------- */

static void
893
scheduleCheckBlockedThreads(Capability *cap USED_IF_NOT_THREADS)
894
{
895
#if !defined(THREADED_RTS)
896 897 898 899 900
    //
    // Check whether any waiting threads need to be woken up.  If the
    // run queue is empty, and there are no other tasks running, we
    // can wait indefinitely for something to happen.
    //
901
    if ( !emptyQueue(blocked_queue_hd) || !emptyQueue(sleeping_queue) )
902
    {
903
	awaitEvent( emptyRunQueue(cap) && !blackholes_need_checking );
904
    }
905
#endif
906 907 908 909 910 911 912
}


/* ----------------------------------------------------------------------------
 * Check for threads blocked on BLACKHOLEs that can be woken up
 * ------------------------------------------------------------------------- */
static void
913
scheduleCheckBlackHoles (Capability *cap)
914
{
915
    if ( blackholes_need_checking ) // check without the lock first
916
    {
917 918 919 920 921 922
	ACQUIRE_LOCK(&sched_mutex);
	if ( blackholes_need_checking ) {
	    checkBlackHoles(cap);
	    blackholes_need_checking = rtsFalse;
	}
	RELEASE_LOCK(&sched_mutex);
923 924 925 926 927 928 929 930
    }
}

/* ----------------------------------------------------------------------------
 * Detect deadlock conditions and attempt to resolve them.
 * ------------------------------------------------------------------------- */

static void
931
scheduleDetectDeadlock (Capability *cap, Task *task)
932
{
933 934

#if defined(PARALLEL_HASKELL)
935
    // ToDo: add deadlock detection in GUM (similar to THREADED_RTS) -- HWL
936 937 938
    return;
#endif

939 940
    /* 
     * Detect deadlock: when we have no threads to run, there are no
941 942 943
     * threads blocked, waiting for I/O, or sleeping, and all the
     * other tasks are waiting for work, we must have a deadlock of
     * some description.
944
     */
945
    if ( emptyThreadQueues(cap) )
946
    {
947
#if defined(THREADED_RTS)
948 949 950 951 952 953 954 955 956
	/* 
	 * In the threaded RTS, we only check for deadlock if there
	 * has been no activity in a complete timeslice.  This means
	 * we won't eagerly start a full GC just because we don't have
	 * any threads to run currently.
	 */
	if (recent_activity != ACTIVITY_INACTIVE) return;
#endif

957 958 959 960 961 962 963
	IF_DEBUG(scheduler, sched_belch("deadlocked, forcing major GC..."));

	// Garbage collection can release some new threads due to
	// either (a) finalizers or (b) threads resurrected because
	// they are unreachable and will therefore be sent an
	// exception.  Any threads thus released will be immediately
	// runnable.
964
	cap = scheduleDoGC (cap, task, rtsTrue/*force  major GC*/, GetRoots);
965

966
	recent_activity = ACTIVITY_DONE_GC;
967 968
	
	if ( !emptyRunQueue(cap) ) return;
969

970
#if defined(RTS_USER_SIGNALS) && (!defined(THREADED_RTS) || defined(mingw32_HOST_OS))
971 972 973 974 975 976 977 978 979 980 981
	/* If we have user-installed signal handlers, then wait
	 * for signals to arrive rather then bombing out with a
	 * deadlock.
	 */
	if ( anyUserHandlers() ) {
	    IF_DEBUG(scheduler, 
		     sched_belch("still deadlocked, waiting for signals..."));

	    awaitUserSignals();

	    if (signals_pending()) {
982
		startSignalHandlers(cap);
983 984 985
	    }

	    // either we have threads to run, or we were interrupted:
986
	    ASSERT(!emptyRunQueue(cap) || sched_state >= SCHED_INTERRUPTING);
987 988 989
	}
#endif

990
#if !defined(THREADED_RTS)