Schedule.c 115 KB
Newer Older
1
/* ---------------------------------------------------------------------------
2
 * $Id: Schedule.c,v 1.173 2003/08/15 12:43:57 simonmar Exp $
3
 *
4
 * (c) The GHC Team, 1998-2000
5
6
7
 *
 * Scheduler
 *
8
9
10
11
12
 * Different GHC ways use this scheduler quite differently (see comments below)
 * Here is the global picture:
 *
 * WAY  Name     CPP flag  What's it for
 * --------------------------------------
sof's avatar
sof committed
13
14
15
16
17
 * mp   GUM      PAR          Parallel execution on a distributed memory machine
 * s    SMP      SMP          Parallel execution on a shared memory machine
 * mg   GranSim  GRAN         Simulation of parallel execution
 * md   GUM/GdH  DIST         Distributed execution (based on GUM)
 *
18
19
20
21
 * --------------------------------------------------------------------------*/

//@node Main scheduling code, , ,
//@section Main scheduling code
22

23
24
/* 
 * Version with scheduler monitor support for SMPs (WAY=s):
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41

   This design provides a high-level API to create and schedule threads etc.
   as documented in the SMP design document.

   It uses a monitor design controlled by a single mutex to exercise control
   over accesses to shared data structures, and builds on the Posix threads
   library.

   The majority of state is shared.  In order to keep essential per-task state,
   there is a Capability structure, which contains all the information
   needed to run a thread: its STG registers, a pointer to its TSO, a
   nursery etc.  During STG execution, a pointer to the capability is
   kept in a register (BaseReg).

   In a non-SMP build, there is one global capability, namely MainRegTable.

   SDM & KH, 10/99
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59

 * Version with support for distributed memory parallelism aka GUM (WAY=mp):

   The main scheduling loop in GUM iterates until a finish message is received.
   In that case a global flag @receivedFinish@ is set and this instance of
   the RTS shuts down. See ghc/rts/parallel/HLComms.c:processMessages()
   for the handling of incoming messages, such as PP_FINISH.
   Note that in the parallel case we have a system manager that coordinates
   different PEs, each of which are running one instance of the RTS.
   See ghc/rts/parallel/SysMan.c for the main routine of the parallel program.
   From this routine processes executing ghc/rts/Main.c are spawned. -- HWL

 * Version with support for simulating parallel execution aka GranSim (WAY=mg):

   The main scheduling code in GranSim is quite different from that in std
   (concurrent) Haskell: while concurrent Haskell just iterates over the
   threads in the runnable queue, GranSim is event driven, i.e. it iterates
   over the events in the global event queue.  -- HWL
60
61
*/

62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
//@menu
//* Includes::			
//* Variables and Data structures::  
//* Main scheduling loop::	
//* Suspend and Resume::	
//* Run queue code::		
//* Garbage Collextion Routines::  
//* Blocking Queue Routines::	
//* Exception Handling Routines::  
//* Debugging Routines::	
//* Index::			
//@end menu

//@node Includes, Variables and Data structures, Main scheduling code, Main scheduling code
//@subsection Includes

78
#include "PosixSource.h"
79
80
81
82
83
84
85
86
#include "Rts.h"
#include "SchedAPI.h"
#include "RtsUtils.h"
#include "RtsFlags.h"
#include "Storage.h"
#include "StgRun.h"
#include "StgStartup.h"
#include "Hooks.h"
sof's avatar
sof committed
87
#define COMPILING_SCHEDULER
88
89
90
#include "Schedule.h"
#include "StgMiscClosures.h"
#include "Storage.h"
91
#include "Interpreter.h"
92
#include "Exception.h"
93
94
95
#include "Printer.h"
#include "Signals.h"
#include "Sanity.h"
96
#include "Stats.h"
sof's avatar
sof committed
97
#include "Timer.h"
98
#include "Prelude.h"
99
#include "ThreadLabels.h"
100
101
102
103
#ifdef PROFILING
#include "Proftimer.h"
#include "ProfHeap.h"
#endif
104
105
106
107
108
109
110
111
112
#if defined(GRAN) || defined(PAR)
# include "GranSimRts.h"
# include "GranSim.h"
# include "ParallelRts.h"
# include "Parallel.h"
# include "ParallelDebug.h"
# include "FetchMe.h"
# include "HLC.h"
#endif
113
#include "Sparks.h"
sof's avatar
sof committed
114
115
#include "Capability.h"
#include "OSThreads.h"
sof's avatar
sof committed
116
#include  "Task.h"
117

118
119
120
121
122
123
124
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif

125
126
#include <string.h>
#include <stdlib.h>
127
#include <stdarg.h>
128

129
130
131
//@node Variables and Data structures, Prototypes, Includes, Main scheduling code
//@subsection Variables and Data structures

132
133
134
/* Main thread queue.
 * Locks required: sched_mutex.
 */
135
StgMainThread *main_threads = NULL;
136

137
138
139
140
141
142
143
144
145
#ifdef THREADED_RTS
// Pointer to the thread that executes main
// When this thread is finished, the program terminates
// by calling shutdownHaskellAndExit.
// It would be better to add a call to shutdownHaskellAndExit
// to the Main.main wrapper and to remove this hack.
StgMainThread *main_main_thread = NULL;
#endif

146
147
148
/* Thread queues.
 * Locks required: sched_mutex.
 */
149
150
151
#if defined(GRAN)

StgTSO* ActiveTSO = NULL; /* for assigning system costs; GranSim-Light only */
152
/* rtsTime TimeOfNextEvent, EndOfTimeSlice;            now in GranSim.c */
153
154

/* 
sof's avatar
sof committed
155
   In GranSim we have a runnable and a blocked queue for each processor.
156
157
158
159
160
161
162
163
   In order to minimise code changes new arrays run_queue_hds/tls
   are created. run_queue_hd is then a short cut (macro) for
   run_queue_hds[CurrentProc] (see GranSim.h).
   -- HWL
*/
StgTSO *run_queue_hds[MAX_PROC], *run_queue_tls[MAX_PROC];
StgTSO *blocked_queue_hds[MAX_PROC], *blocked_queue_tls[MAX_PROC];
StgTSO *ccalling_threadss[MAX_PROC];
164
165
166
167
/* We use the same global list of threads (all_threads) in GranSim as in
   the std RTS (i.e. we are cheating). However, we don't use this list in
   the GranSim specific code at the moment (so we are only potentially
   cheating).  */
168
169
170

#else /* !GRAN */

171
172
173
174
175
StgTSO *run_queue_hd = NULL;
StgTSO *run_queue_tl = NULL;
StgTSO *blocked_queue_hd = NULL;
StgTSO *blocked_queue_tl = NULL;
StgTSO *sleeping_queue = NULL;    /* perhaps replace with a hash table? */
176

177
178
#endif

179
180
181
/* Linked list of all threads.
 * Used for detecting garbage collected threads.
 */
182
StgTSO *all_threads = NULL;
183

sof's avatar
sof committed
184
185
186
/* When a thread performs a safe C call (_ccall_GC, using old
 * terminology), it gets put on the suspended_ccalling_threads
 * list. Used by the garbage collector.
187
188
189
 */
static StgTSO *suspended_ccalling_threads;

190
191
static StgTSO *threadStackOverflow(StgTSO *tso);

192
193
194
195
196
/* KH: The following two flags are shared memory locations.  There is no need
       to lock them, since they are only unset at the end of a scheduler
       operation.
*/

197
/* flag set by signal handler to precipitate a context switch */
198
//@cindex context_switch
199
nat context_switch = 0;
200

201
/* if this flag is set as well, give up execution */
202
//@cindex interrupted
203
rtsBool interrupted = rtsFalse;
204

205
/* Next thread ID to allocate.
sof's avatar
sof committed
206
 * Locks required: thread_id_mutex
207
 */
208
//@cindex next_thread_id
209
static StgThreadID next_thread_id = 1;
210
211
212
213
214
215

/*
 * Pointers to the state of the current thread.
 * Rule of thumb: if CurrentTSO != NULL, then we're running a Haskell
 * thread.  If CurrentTSO == NULL, then we're at the scheduler level.
 */
216
 
217
218
219
220
/* The smallest stack size that makes any sense is:
 *    RESERVED_STACK_WORDS    (so we can get back from the stack overflow)
 *  + sizeofW(StgStopFrame)   (the stg_stop_thread_info frame)
 *  + 1                       (the closure to enter)
221
222
 *  + 1			      (stg_ap_v_ret)
 *  + 1			      (spare slot req'd by stg_ap_v_ret)
223
224
225
226
227
 *
 * A thread with this stack will bomb immediately with a stack
 * overflow, which will increase its stack size.  
 */

228
#define MIN_STACK_WORDS (RESERVED_STACK_WORDS + sizeofW(StgStopFrame) + 3)
229

sof's avatar
sof committed
230

231
#if defined(GRAN)
232
StgTSO *CurrentTSO;
233
234
#endif

235
236
237
238
239
240
/*  This is used in `TSO.h' and gcc 2.96 insists that this variable actually 
 *  exists - earlier gccs apparently didn't.
 *  -= chak
 */
StgTSO dummy_tso;

241
static rtsBool ready_to_gc;
sof's avatar
sof committed
242
243
244
245
246
247
248

/*
 * Set to TRUE when entering a shutdown state (via shutdownHaskellAndExit()) --
 * in an MT setting, needed to signal that a worker thread shouldn't hang around
 * in the scheduler when it is out of work.
 */
static rtsBool shutting_down_scheduler = rtsFalse;
249
250
251
252

void            addToBlockedQueue ( StgTSO *tso );

static void     schedule          ( void );
253
       void     interruptStgRts   ( void );
254

255
256
static void     detectBlackHoles  ( void );

257
258
259
#ifdef DEBUG
static void sched_belch(char *s, ...);
#endif
260

sof's avatar
sof committed
261
262
263
264
#if defined(RTS_SUPPORTS_THREADS)
/* ToDo: carefully document the invariants that go together
 *       with these synchronisation objects.
 */
sof's avatar
sof committed
265
266
Mutex     sched_mutex       = INIT_MUTEX_VAR;
Mutex     term_mutex        = INIT_MUTEX_VAR;
sof's avatar
sof committed
267

sof's avatar
sof committed
268
269
270
271
272
273
274
/*
 * A heavyweight solution to the problem of protecting
 * the thread_id from concurrent update.
 */
Mutex     thread_id_mutex   = INIT_MUTEX_VAR;


sof's avatar
sof committed
275
276
# if defined(SMP)
static Condition gc_pending_cond = INIT_COND_VAR;
277
nat await_death;
sof's avatar
sof committed
278
# endif
279

sof's avatar
sof committed
280
#endif /* RTS_SUPPORTS_THREADS */
sof's avatar
sof committed
281

282
283
284
#if defined(PAR)
StgTSO *LastTSO;
rtsTime TimeOfLastYield;
285
rtsBool emitSchedule = rtsTrue;
286
287
#endif

288
#if DEBUG
289
static char *whatNext_strs[] = {
290
  "ThreadRunGHC",
291
  "ThreadInterpret",
292
  "ThreadKilled",
293
  "ThreadRelocated",
294
295
296
297
  "ThreadComplete"
};
#endif

sof's avatar
sof committed
298
#if defined(PAR)
299
300
301
302
StgTSO * createSparkThread(rtsSpark spark);
StgTSO * activateSpark (rtsSpark spark);  
#endif

303
304
305
306
307
308
/*
 * The thread state for the main thread.
// ToDo: check whether not needed any more
StgTSO   *MainTSO;
 */

sof's avatar
sof committed
309
310
311
312
313
314
315
316
317
#if defined(PAR) || defined(RTS_SUPPORTS_THREADS)
static void taskStart(void);
static void
taskStart(void)
{
  schedule();
}
#endif

318
319
320
321
322
323
324
#if defined(RTS_SUPPORTS_THREADS)
void
startSchedulerTask(void)
{
    startTask(taskStart);
}
#endif
sof's avatar
sof committed
325

326
327
328
329
//@node Main scheduling loop, Suspend and Resume, Prototypes, Main scheduling code
//@subsection Main scheduling loop

/* ---------------------------------------------------------------------------
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
   Main scheduling loop.

   We use round-robin scheduling, each thread returning to the
   scheduler loop when one of these conditions is detected:

      * out of heap space
      * timer expires (thread yields)
      * thread blocks
      * thread ends
      * stack overflow

   Locking notes:  we acquire the scheduler lock once at the beginning
   of the scheduler loop, and release it when
    
      * running a thread, or
      * waiting for work, or
      * waiting for a GC to complete.

348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
   GRAN version:
     In a GranSim setup this loop iterates over the global event queue.
     This revolves around the global event queue, which determines what 
     to do next. Therefore, it's more complicated than either the 
     concurrent or the parallel (GUM) setup.

   GUM version:
     GUM iterates over incoming messages.
     It starts with nothing to do (thus CurrentTSO == END_TSO_QUEUE),
     and sends out a fish whenever it has nothing to do; in-between
     doing the actual reductions (shared code below) it processes the
     incoming messages and deals with delayed operations 
     (see PendingFetches).
     This is not the ugliest code you could imagine, but it's bloody close.

363
364
   ------------------------------------------------------------------------ */
//@cindex schedule
365
366
367
368
369
370
static void
schedule( void )
{
  StgTSO *t;
  Capability *cap;
  StgThreadReturnCode ret;
371
372
373
#if defined(GRAN)
  rtsEvent *event;
#elif defined(PAR)
374
  StgSparkPool *pool;
375
376
377
  rtsSpark spark;
  StgTSO *tso;
  GlobalTaskId pe;
378
379
380
381
  rtsBool receivedFinish = rtsFalse;
# if defined(DEBUG)
  nat tp_size, sp_size; // stats only
# endif
382
#endif
383
  rtsBool was_interrupted = rtsFalse;
384
  StgTSOWhatNext prev_what_next;
385
386
  
  ACQUIRE_LOCK(&sched_mutex);
sof's avatar
sof committed
387
388
 
#if defined(RTS_SUPPORTS_THREADS)
sof's avatar
sof committed
389
  waitForWorkCapability(&sched_mutex, &cap, rtsFalse);
390
  IF_DEBUG(scheduler, sched_belch("worker thread (osthread %p): entering RTS", osThreadId()));
sof's avatar
sof committed
391
392
393
#else
  /* simply initialise it in the non-threaded case */
  grabCapability(&cap);
sof's avatar
sof committed
394
#endif
395

396
#if defined(GRAN)
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
  /* set up first event to get things going */
  /* ToDo: assign costs for system setup and init MainTSO ! */
  new_event(CurrentProc, CurrentProc, CurrentTime[CurrentProc],
	    ContinueThread, 
	    CurrentTSO, (StgClosure*)NULL, (rtsSpark*)NULL);

  IF_DEBUG(gran,
	   fprintf(stderr, "GRAN: Init CurrentTSO (in schedule) = %p\n", CurrentTSO);
	   G_TSO(CurrentTSO, 5));

  if (RtsFlags.GranFlags.Light) {
    /* Save current time; GranSim Light only */
    CurrentTSO->gran.clock = CurrentTime[CurrentProc];
  }      

  event = get_next_event();

  while (event!=(rtsEvent*)NULL) {
    /* Choose the processor with the next event */
    CurrentProc = event->proc;
    CurrentTSO = event->tso;

419
#elif defined(PAR)
420

421
422
  while (!receivedFinish) {    /* set by processMessages */
                               /* when receiving PP_FINISH message         */ 
423
#else
424

425
  while (1) {
426

427
#endif
428

429
430
    IF_DEBUG(scheduler, printAllThreads());

sof's avatar
sof committed
431
432
433
434
#if defined(RTS_SUPPORTS_THREADS)
    /* Check to see whether there are any worker threads
       waiting to deposit external call results. If so,
       yield our capability */
sof's avatar
sof committed
435
    yieldToReturningWorker(&sched_mutex, &cap);
sof's avatar
sof committed
436
437
#endif

438
439
440
441
442
    /* If we're interrupted (the user pressed ^C, or some other
     * termination condition occurred), kill all the currently running
     * threads.
     */
    if (interrupted) {
443
      IF_DEBUG(scheduler, sched_belch("interrupted"));
444
445
      interrupted = rtsFalse;
      was_interrupted = rtsTrue;
446
447
448
449
450
451
452
453
454
455
456
#if defined(RTS_SUPPORTS_THREADS)
      // In the threaded RTS, deadlock detection doesn't work,
      // so just exit right away.
      prog_belch("interrupted");
      releaseCapability(cap);
      startTask(taskStart);	// thread-safe-call to shutdownHaskellAndExit
      RELEASE_LOCK(&sched_mutex);
      shutdownHaskellAndExit(EXIT_SUCCESS);
#else
      deleteAllThreads();
#endif
457
458
459
460
461
462
463
    }

    /* Go through the list of main threads and wake up any
     * clients whose computations have finished.  ToDo: this
     * should be done more efficiently without a linear scan
     * of the main threads list, somehow...
     */
sof's avatar
sof committed
464
#if defined(RTS_SUPPORTS_THREADS)
465
466
467
    { 
      StgMainThread *m, **prev;
      prev = &main_threads;
468
      for (m = main_threads; m != NULL; prev = &m->link, m = m->link) {
469
	switch (m->tso->what_next) {
470
	case ThreadComplete:
471
	  if (m->ret) {
472
473
              // NOTE: return val is tso->sp[1] (see StgStartup.hc)
	      *(m->ret) = (StgClosure *)m->tso->sp[1]; 
474
475
476
	  }
	  *prev = m->link;
	  m->stat = Success;
sof's avatar
sof committed
477
	  broadcastCondition(&m->wakeup);
478
#ifdef DEBUG
sof's avatar
sof committed
479
	  removeThreadLabel((StgWord)m->tso);
480
#endif
481
482
483
484
485
486
487
          if(m == main_main_thread)
          {
              releaseCapability(cap);
              startTask(taskStart);	// thread-safe-call to shutdownHaskellAndExit
              RELEASE_LOCK(&sched_mutex);
              shutdownHaskellAndExit(EXIT_SUCCESS);
          }
488
489
	  break;
	case ThreadKilled:
490
	  if (m->ret) *(m->ret) = NULL;
491
	  *prev = m->link;
492
	  if (was_interrupted) {
493
494
495
496
	    m->stat = Interrupted;
	  } else {
	    m->stat = Killed;
	  }
sof's avatar
sof committed
497
	  broadcastCondition(&m->wakeup);
498
#ifdef DEBUG
sof's avatar
sof committed
499
	  removeThreadLabel((StgWord)m->tso);
500
#endif
501
502
503
504
505
506
507
          if(m == main_main_thread)
          {
              releaseCapability(cap);
              startTask(taskStart);	// thread-safe-call to shutdownHaskellAndExit
              RELEASE_LOCK(&sched_mutex);
              shutdownHaskellAndExit(EXIT_SUCCESS);
          }
508
509
510
	  break;
	default:
	  break;
511
512
513
	}
      }
    }
514

sof's avatar
sof committed
515
#else /* not threaded */
516

517
518
519
520
# if defined(PAR)
    /* in GUM do this only on the Main PE */
    if (IAmMainThread)
# endif
521
522
523
524
    /* If our main thread has finished or been killed, return.
     */
    {
      StgMainThread *m = main_threads;
525
526
      if (m->tso->what_next == ThreadComplete
	  || m->tso->what_next == ThreadKilled) {
527
#ifdef DEBUG
528
	removeThreadLabel((StgWord)m->tso);
529
#endif
530
	main_threads = main_threads->link;
531
	if (m->tso->what_next == ThreadComplete) {
532
533
534
535
536
	    // We finished successfully, fill in the return value
	    // NOTE: return val is tso->sp[1] (see StgStartup.hc)
	    if (m->ret) { *(m->ret) = (StgClosure *)m->tso->sp[1]; };
	    m->stat = Success;
	    return;
537
	} else {
538
	  if (m->ret) { *(m->ret) = NULL; };
539
	  if (was_interrupted) {
540
541
542
543
	    m->stat = Interrupted;
	  } else {
	    m->stat = Killed;
	  }
544
545
546
547
548
549
	  return;
	}
      }
    }
#endif

550
551
552
    /* Top up the run queue from our spark pool.  We try to make the
     * number of threads in the run queue equal to the number of
     * free capabilities.
sof's avatar
sof committed
553
554
555
     *
     * Disable spark support in SMP for now, non-essential & requires
     * a little bit of work to make it compile cleanly. -- sof 1/02.
556
     */
sof's avatar
sof committed
557
#if 0 /* defined(SMP) */
558
    {
sof's avatar
sof committed
559
      nat n = getFreeCapabilities();
560
561
562
563
564
565
566
567
568
569
      StgTSO *tso = run_queue_hd;

      /* Count the run queue */
      while (n > 0 && tso != END_TSO_QUEUE) {
	tso = tso->link;
	n--;
      }

      for (; n > 0; n--) {
	StgClosure *spark;
570
	spark = findSpark(rtsFalse);
571
572
573
	if (spark == NULL) {
	  break; /* no more sparks in the pool */
	} else {
574
575
576
	  /* I'd prefer this to be done in activateSpark -- HWL */
	  /* tricky - it needs to hold the scheduler lock and
	   * not try to re-acquire it -- SDM */
577
	  createSparkThread(spark);	  
578
	  IF_DEBUG(scheduler,
579
		   sched_belch("==^^ turning spark of closure %p into a thread",
580
581
582
583
584
585
			       (StgClosure *)spark));
	}
      }
      /* We need to wake up the other tasks if we just created some
       * work for them.
       */
sof's avatar
sof committed
586
      if (getFreeCapabilities() - n > 1) {
sof's avatar
sof committed
587
   	  signalCondition( &thread_ready_cond );
588
589
      }
    }
590
#endif // SMP
591

592
    /* check for signals each time around the scheduler */
sof's avatar
sof committed
593
#if defined(RTS_USER_SIGNALS)
594
    if (signals_pending()) {
sof's avatar
sof committed
595
      RELEASE_LOCK(&sched_mutex); /* ToDo: kill */
596
      startSignalHandlers();
sof's avatar
sof committed
597
      ACQUIRE_LOCK(&sched_mutex);
598
599
600
    }
#endif

601
602
603
    /* Check whether any waiting threads need to be woken up.  If the
     * run queue is empty, and there are no other tasks running, we
     * can wait indefinitely for something to happen.
604
     */
605
606
607
608
609
610
    if ( !EMPTY_QUEUE(blocked_queue_hd) || !EMPTY_QUEUE(sleeping_queue) 
#if defined(RTS_SUPPORTS_THREADS) && !defined(SMP)
		|| EMPTY_RUN_QUEUE()
#endif
        )
    {
sof's avatar
sof committed
611
      awaitEvent( EMPTY_RUN_QUEUE()
sof's avatar
sof committed
612
613
#if defined(SMP)
	&& allFreeCapabilities()
614
615
#endif
	);
616
    }
617
618
619
    /* we can be interrupted while waiting for I/O... */
    if (interrupted) continue;

620
621
622
623
624
625
626
627
628
629
    /* 
     * Detect deadlock: when we have no threads to run, there are no
     * threads waiting on I/O or sleeping, and all the other tasks are
     * waiting for work, we must have a deadlock of some description.
     *
     * We first try to find threads blocked on themselves (ie. black
     * holes), and generate NonTermination exceptions where necessary.
     *
     * If no threads are black holed, we have a deadlock situation, so
     * inform all the main threads.
630
     */
631
#if !defined(PAR) && !defined(RTS_SUPPORTS_THREADS)
632
    if (   EMPTY_THREAD_QUEUES()
sof's avatar
sof committed
633
#if defined(RTS_SUPPORTS_THREADS)
sof's avatar
sof committed
634
	&& EMPTY_QUEUE(suspended_ccalling_threads)
sof's avatar
sof committed
635
636
637
#endif
#ifdef SMP
	&& allFreeCapabilities()
638
639
#endif
	)
640
    {
641
	IF_DEBUG(scheduler, sched_belch("deadlocked, forcing major GC..."));
sof's avatar
sof committed
642
643
644
645
#if defined(THREADED_RTS)
	/* and SMP mode ..? */
	releaseCapability(cap);
#endif
646
647
648
649
	// Garbage collection can release some new threads due to
	// either (a) finalizers or (b) threads resurrected because
	// they are about to be send BlockedOnDeadMVar.  Any threads
	// thus released will be immediately runnable.
650
	GarbageCollect(GetRoots,rtsTrue);
651
652
653
654
655
656
657
658
659

	if ( !EMPTY_RUN_QUEUE() ) { goto not_deadlocked; }

	IF_DEBUG(scheduler, 
		 sched_belch("still deadlocked, checking for black holes..."));
	detectBlackHoles();

	if ( !EMPTY_RUN_QUEUE() ) { goto not_deadlocked; }

sof's avatar
sof committed
660
#if defined(RTS_USER_SIGNALS)
661
662
663
664
	/* If we have user-installed signal handlers, then wait
	 * for signals to arrive rather then bombing out with a
	 * deadlock.
	 */
sof's avatar
sof committed
665
666
667
668
669
670
671
672
673
674
675
#if defined(RTS_SUPPORTS_THREADS)
	if ( 0 ) { /* hmm..what to do? Simply stop waiting for
		      a signal with no runnable threads (or I/O
		      suspended ones) leads nowhere quick.
		      For now, simply shut down when we reach this
		      condition.
		      
		      ToDo: define precisely under what conditions
		      the Scheduler should shut down in an MT setting.
		   */
#else
676
	if ( anyUserHandlers() ) {
sof's avatar
sof committed
677
#endif
678
679
680
681
682
683
684
685
686
	    IF_DEBUG(scheduler, 
		     sched_belch("still deadlocked, waiting for signals..."));

	    awaitUserSignals();

	    // we might be interrupted...
	    if (interrupted) { continue; }

	    if (signals_pending()) {
sof's avatar
sof committed
687
		RELEASE_LOCK(&sched_mutex);
688
		startSignalHandlers();
sof's avatar
sof committed
689
		ACQUIRE_LOCK(&sched_mutex);
690
691
692
693
694
695
696
697
698
699
700
701
702
	    }
	    ASSERT(!EMPTY_RUN_QUEUE());
	    goto not_deadlocked;
	}
#endif

	/* Probably a real deadlock.  Send the current main thread the
	 * Deadlock exception (or in the SMP build, send *all* main
	 * threads the deadlock exception, since none of them can make
	 * progress).
	 */
	{
	    StgMainThread *m;
sof's avatar
sof committed
703
#if defined(RTS_SUPPORTS_THREADS)
704
	    for (m = main_threads; m != NULL; m = m->link) {
705
706
		switch (m->tso->why_blocked) {
		case BlockedOnBlackHole:
sof's avatar
sof committed
707
		    raiseAsync(m->tso, (StgClosure *)NonTermination_closure);
708
709
710
		    break;
		case BlockedOnException:
		case BlockedOnMVar:
sof's avatar
sof committed
711
		    raiseAsync(m->tso, (StgClosure *)Deadlock_closure);
712
713
714
715
		    break;
		default:
		    barf("deadlock: main thread blocked in a strange way");
		}
716
	    }
717
718
719
720
#else
	    m = main_threads;
	    switch (m->tso->why_blocked) {
	    case BlockedOnBlackHole:
sof's avatar
sof committed
721
		raiseAsync(m->tso, (StgClosure *)NonTermination_closure);
722
723
724
		break;
	    case BlockedOnException:
	    case BlockedOnMVar:
sof's avatar
sof committed
725
		raiseAsync(m->tso, (StgClosure *)Deadlock_closure);
726
727
728
		break;
	    default:
		barf("deadlock: main thread blocked in a strange way");
sof's avatar
sof committed
729
	    }
sof's avatar
sof committed
730
#endif
731
	}
732
733
734
735
736

#if defined(RTS_SUPPORTS_THREADS)
	/* ToDo: revisit conditions (and mechanism) for shutting
	   down a multi-threaded world  */
	IF_DEBUG(scheduler, sched_belch("all done, i think...shutting down."));
sof's avatar
sof committed
737
738
739
	RELEASE_LOCK(&sched_mutex);
	shutdownHaskell();
	return;
740
#endif
741
    }
742
743
  not_deadlocked:

744
745
#elif defined(RTS_SUPPORTS_THREADS)
    /* ToDo: add deadlock detection in threaded RTS */
746
747
#elif defined(PAR)
    /* ToDo: add deadlock detection in GUM (similar to SMP) -- HWL */
748
749
#endif

sof's avatar
sof committed
750
#if defined(SMP)
751
752
753
754
    /* If there's a GC pending, don't do anything until it has
     * completed.
     */
    if (ready_to_gc) {
755
      IF_DEBUG(scheduler,sched_belch("waiting for GC"));
sof's avatar
sof committed
756
      waitCondition( &gc_pending_cond, &sched_mutex );
757
    }
sof's avatar
sof committed
758
759
#endif    

sof's avatar
sof committed
760
#if defined(RTS_SUPPORTS_THREADS)
761
#if defined(SMP)
762
763
    /* block until we've got a thread on the run queue and a free
     * capability.
sof's avatar
sof committed
764
     *
765
     */
sof's avatar
sof committed
766
767
768
    if ( EMPTY_RUN_QUEUE() ) {
      /* Give up our capability */
      releaseCapability(cap);
sof's avatar
sof committed
769
770
771
772
773
774
775
776

      /* If we're in the process of shutting down (& running the
       * a batch of finalisers), don't wait around.
       */
      if ( shutting_down_scheduler ) {
	RELEASE_LOCK(&sched_mutex);
	return;
      }
sof's avatar
sof committed
777
778
779
      IF_DEBUG(scheduler, sched_belch("thread %d: waiting for work", osThreadId()));
      waitForWorkCapability(&sched_mutex, &cap, rtsTrue);
      IF_DEBUG(scheduler, sched_belch("thread %d: work now available", osThreadId()));
780
    }
781
782
783
784
785
#else
    if ( EMPTY_RUN_QUEUE() ) {
      continue; // nothing to do
    }
#endif
786
#endif
787
788

#if defined(GRAN)
789
790
791
792
793
794
795
796
797
798
799
800
    if (RtsFlags.GranFlags.Light)
      GranSimLight_enter_system(event, &ActiveTSO); // adjust ActiveTSO etc

    /* adjust time based on time-stamp */
    if (event->time > CurrentTime[CurrentProc] &&
        event->evttype != ContinueThread)
      CurrentTime[CurrentProc] = event->time;
    
    /* Deal with the idle PEs (may issue FindWork or MoveSpark events) */
    if (!RtsFlags.GranFlags.Light)
      handleIdlePEs();

801
    IF_DEBUG(gran, fprintf(stderr, "GRAN: switch by event-type\n"));
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914

    /* main event dispatcher in GranSim */
    switch (event->evttype) {
      /* Should just be continuing execution */
    case ContinueThread:
      IF_DEBUG(gran, fprintf(stderr, "GRAN: doing ContinueThread\n"));
      /* ToDo: check assertion
      ASSERT(run_queue_hd != (StgTSO*)NULL &&
	     run_queue_hd != END_TSO_QUEUE);
      */
      /* Ignore ContinueThreads for fetching threads (if synchr comm) */
      if (!RtsFlags.GranFlags.DoAsyncFetch &&
	  procStatus[CurrentProc]==Fetching) {
	belch("ghuH: Spurious ContinueThread while Fetching ignored; TSO %d (%p) [PE %d]",
	      CurrentTSO->id, CurrentTSO, CurrentProc);
	goto next_thread;
      }	
      /* Ignore ContinueThreads for completed threads */
      if (CurrentTSO->what_next == ThreadComplete) {
	belch("ghuH: found a ContinueThread event for completed thread %d (%p) [PE %d] (ignoring ContinueThread)", 
	      CurrentTSO->id, CurrentTSO, CurrentProc);
	goto next_thread;
      }	
      /* Ignore ContinueThreads for threads that are being migrated */
      if (PROCS(CurrentTSO)==Nowhere) { 
	belch("ghuH: trying to run the migrating TSO %d (%p) [PE %d] (ignoring ContinueThread)",
	      CurrentTSO->id, CurrentTSO, CurrentProc);
	goto next_thread;
      }
      /* The thread should be at the beginning of the run queue */
      if (CurrentTSO!=run_queue_hds[CurrentProc]) { 
	belch("ghuH: TSO %d (%p) [PE %d] is not at the start of the run_queue when doing a ContinueThread",
	      CurrentTSO->id, CurrentTSO, CurrentProc);
	break; // run the thread anyway
      }
      /*
      new_event(proc, proc, CurrentTime[proc],
		FindWork,
		(StgTSO*)NULL, (StgClosure*)NULL, (rtsSpark*)NULL);
      goto next_thread; 
      */ /* Catches superfluous CONTINUEs -- should be unnecessary */
      break; // now actually run the thread; DaH Qu'vam yImuHbej 

    case FetchNode:
      do_the_fetchnode(event);
      goto next_thread;             /* handle next event in event queue  */
      
    case GlobalBlock:
      do_the_globalblock(event);
      goto next_thread;             /* handle next event in event queue  */
      
    case FetchReply:
      do_the_fetchreply(event);
      goto next_thread;             /* handle next event in event queue  */
      
    case UnblockThread:   /* Move from the blocked queue to the tail of */
      do_the_unblock(event);
      goto next_thread;             /* handle next event in event queue  */
      
    case ResumeThread:  /* Move from the blocked queue to the tail of */
      /* the runnable queue ( i.e. Qu' SImqa'lu') */ 
      event->tso->gran.blocktime += 
	CurrentTime[CurrentProc] - event->tso->gran.blockedat;
      do_the_startthread(event);
      goto next_thread;             /* handle next event in event queue  */
      
    case StartThread:
      do_the_startthread(event);
      goto next_thread;             /* handle next event in event queue  */
      
    case MoveThread:
      do_the_movethread(event);
      goto next_thread;             /* handle next event in event queue  */
      
    case MoveSpark:
      do_the_movespark(event);
      goto next_thread;             /* handle next event in event queue  */
      
    case FindWork:
      do_the_findwork(event);
      goto next_thread;             /* handle next event in event queue  */
      
    default:
      barf("Illegal event type %u\n", event->evttype);
    }  /* switch */
    
    /* This point was scheduler_loop in the old RTS */

    IF_DEBUG(gran, belch("GRAN: after main switch"));

    TimeOfLastEvent = CurrentTime[CurrentProc];
    TimeOfNextEvent = get_time_of_next_event();
    IgnoreEvents=(TimeOfNextEvent==0); // HWL HACK
    // CurrentTSO = ThreadQueueHd;

    IF_DEBUG(gran, belch("GRAN: time of next event is: %ld", 
			 TimeOfNextEvent));

    if (RtsFlags.GranFlags.Light) 
      GranSimLight_leave_system(event, &ActiveTSO); 

    EndOfTimeSlice = CurrentTime[CurrentProc]+RtsFlags.GranFlags.time_slice;

    IF_DEBUG(gran, 
	     belch("GRAN: end of time-slice is %#lx", EndOfTimeSlice));

    /* in a GranSim setup the TSO stays on the run queue */
    t = CurrentTSO;
    /* Take a thread from the run queue. */
    t = POP_RUN_QUEUE(); // take_off_run_queue(t);

    IF_DEBUG(gran, 
	     fprintf(stderr, "GRAN: About to run current thread, which is\n");
915
	     G_TSO(t,5));
916
917
918
919
920
921
922
923

    context_switch = 0; // turned on via GranYield, checking events and time slice

    IF_DEBUG(gran, 
	     DumpGranEvent(GR_SCHEDULE, t));

    procStatus[CurrentProc] = Busy;

924
#elif defined(PAR)
925
926
927
928
    if (PendingFetches != END_BF_QUEUE) {
        processFetches();
    }

929
    /* ToDo: phps merge with spark activation above */
930
    /* check whether we have local work and send requests if we have none */
931
    if (EMPTY_RUN_QUEUE()) {  /* no runnable threads */
932
      /* :-[  no local threads => look out for local sparks */
933
934
      /* the spark pool for the current PE */
      pool = &(MainRegTable.rSparks); // generalise to cap = &MainRegTable
935
      if (advisory_thread_count < RtsFlags.ParFlags.maxThreads &&
936
	  pool->hd < pool->tl) {
937
938
939
940
941
942
943
944
	/* 
	 * ToDo: add GC code check that we really have enough heap afterwards!!
	 * Old comment:
	 * If we're here (no runnable threads) and we have pending
	 * sparks, we must have a space problem.  Get enough space
	 * to turn one of those pending sparks into a
	 * thread... 
	 */
945
946

	spark = findSpark(rtsFalse);                /* get a spark */
947
948
	if (spark != (rtsSpark) NULL) {
	  tso = activateSpark(spark);       /* turn the spark into a thread */
949
950
951
	  IF_PAR_DEBUG(schedule,
		       belch("==== schedule: Created TSO %d (%p); %d threads active",
			     tso->id, tso, advisory_thread_count));
952

953
	  if (tso==END_TSO_QUEUE) { /* failed to activate spark->back to loop */
954
	    belch("==^^ failed to activate spark");
955
	    goto next_thread;
956
	  }               /* otherwise fall through & pick-up new tso */
957
958
	} else {
	  IF_PAR_DEBUG(verbose,
959
960
		       belch("==^^ no local sparks (spark pool contains only NFs: %d)", 
			     spark_queue_len(pool)));
961
962
	  goto next_thread;
	}
963
964
965
966
967
968
      }

      /* If we still have no work we need to send a FISH to get a spark
	 from another PE 
      */
      if (EMPTY_RUN_QUEUE()) {
969
970
971
972
973
974
975
976
977
      /* =8-[  no local sparks => look for work on other PEs */
	/*
	 * We really have absolutely no work.  Send out a fish
	 * (there may be some out there already), and wait for
	 * something to arrive.  We clearly can't run any threads
	 * until a SCHEDULE or RESUME arrives, and so that's what
	 * we're hoping to see.  (Of course, we still have to
	 * respond to other types of messages.)
	 */
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
	TIME now = msTime() /*CURRENT_TIME*/;
	IF_PAR_DEBUG(verbose, 
		     belch("--  now=%ld", now));
	IF_PAR_DEBUG(verbose,
		     if (outstandingFishes < RtsFlags.ParFlags.maxFishes &&
			 (last_fish_arrived_at!=0 &&
			  last_fish_arrived_at+RtsFlags.ParFlags.fishDelay > now)) {
		       belch("--$$ delaying FISH until %ld (last fish %ld, delay %ld, now %ld)",
			     last_fish_arrived_at+RtsFlags.ParFlags.fishDelay,
			     last_fish_arrived_at,
			     RtsFlags.ParFlags.fishDelay, now);
		     });
	
	if (outstandingFishes < RtsFlags.ParFlags.maxFishes &&
	    (last_fish_arrived_at==0 ||
	     (last_fish_arrived_at+RtsFlags.ParFlags.fishDelay <= now))) {
	  /* outstandingFishes is set in sendFish, processFish;
995
996
997
998
	     avoid flooding system with fishes via delay */
	  pe = choosePE();
	  sendFish(pe, mytid, NEW_FISH_AGE, NEW_FISH_HISTORY, 
		   NEW_FISH_HUNGER);
999
1000
1001
1002
1003
1004

	  // Global statistics: count no. of fishes
	  if (RtsFlags.ParFlags.ParStats.Global &&
	      RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
	    globalParStats.tot_fish_mess++;
	  }
1005
	}
1006
1007
      
	receivedFinish = processMessages();
1008
1009
1010
	goto next_thread;
      }
    } else if (PacketsWaiting()) {  /* Look for incoming messages */
1011
      receivedFinish = processMessages();
1012
1013
1014
1015
    }

    /* Now we are sure that we have some work available */
    ASSERT(run_queue_hd != END_TSO_QUEUE);
1016

1017
    /* Take a thread from the run queue, if we have work */
1018
    t = POP_RUN_QUEUE();  // take_off_run_queue(END_TSO_QUEUE);
1019
    IF_DEBUG(sanity,checkTSO(t));
1020
1021
1022
1023
1024
1025

    /* ToDo: write something to the log-file
    if (RTSflags.ParFlags.granSimStats && !sameThread)
        DumpGranEvent(GR_SCHEDULE, RunnableThreadsHd);

    CurrentTSO = t;
1026
1027
1028
    */
    /* the spark pool for the current PE */
    pool = &(MainRegTable.rSparks); // generalise to cap = &MainRegTable
1029

1030
1031
1032
1033
    IF_DEBUG(scheduler, 
	     belch("--=^ %d threads, %d sparks on [%#x]", 
		   run_queue_len(), spark_queue_len(pool), CURRENT_PROC));

sof's avatar
sof committed
1034
# if 1
1035
1036
1037
1038
1039
1040
1041
1042
    if (0 && RtsFlags.ParFlags.ParStats.Full && 
	t && LastTSO && t->id != LastTSO->id && 
	LastTSO->why_blocked == NotBlocked && 
	LastTSO->what_next != ThreadComplete) {
      // if previously scheduled TSO not blocked we have to record the context switch
      DumpVeryRawGranEvent(TimeOfLastYield, CURRENT_PROC, CURRENT_PROC,
			   GR_DESCHEDULE, LastTSO, (StgClosure *)NULL, 0, 0);
    }
1043

1044
1045
1046
    if (RtsFlags.ParFlags.ParStats.Full && 
	(emitSchedule /* forced emit */ ||
        (t && LastTSO && t->id != LastTSO->id))) {
1047
1048
1049
1050
1051
1052
1053
1054
1055
      /* 
	 we are running a different TSO, so write a schedule event to log file
	 NB: If we use fair scheduling we also have to write  a deschedule 
	     event for LastTSO; with unfair scheduling we know that the
	     previous tso has blocked whenever we switch to another tso, so
	     we don't need it in GUM for now
      */
      DumpRawGranEvent(CURRENT_PROC, CURRENT_PROC,
		       GR_SCHEDULE, t, (StgClosure *)NULL, 0, 0);
1056
      emitSchedule = rtsFalse;
1057
    }
1058
     
sof's avatar
sof committed
1059
# endif
1060
#else /* !GRAN && !PAR */
1061
  
sof's avatar
sof committed
1062
    /* grab a thread from the run queue */
1063
    ASSERT(run_queue_hd != END_TSO_QUEUE);
1064
    t = POP_RUN_QUEUE();
1065
1066
    // Sanity check the thread we're about to run.  This can be
    // expensive if there is lots of thread switching going on...
1067
    IF_DEBUG(sanity,checkTSO(t));
1068
#endif
1069

1070
    cap->r.rCurrentTSO = t;
1071
    
1072
1073
1074
1075
    /* context switches are now initiated by the timer signal, unless
     * the user specified "context switch as often as possible", with
     * +RTS -C0
     */
1076
    if ((RtsFlags.ConcFlags.ctxtSwitchTicks == 0
1077
1078
1079
	 && (run_queue_hd != END_TSO_QUEUE
	     || blocked_queue_hd != END_TSO_QUEUE
	     || sleeping_queue != END_TSO_QUEUE)))
1080
1081
1082
	context_switch = 1;
    else
	context_switch = 0;
1083

1084
1085
run_thread:

1086
    RELEASE_LOCK(&sched_mutex);
1087

1088
1089
    IF_DEBUG(scheduler, sched_belch("-->> running thread %ld %s ...", 
			      t->id, whatNext_strs[t->what_next]));
1090

1091
1092
1093
1094
#ifdef PROFILING
    startHeapProfTimer();
#endif

1095
    /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
1096
1097
    /* Run the current thread 
     */
1098
1099
    prev_what_next = t->what_next;
    switch (prev_what_next) {
1100
1101
    case ThreadKilled:
    case ThreadComplete:
1102
1103
1104
	/* Thread already finished, return to scheduler. */
	ret = ThreadFinished;
	break;
1105
    case ThreadRunGHC:
1106
	ret = StgRun((StgFunPtr) stg_returnToStackTop, &cap->r);
1107
	break;
1108
    case ThreadInterpret:
1109
1110
	ret = interpretBCO(cap);
	break;
1111
    default:
1112
      barf("schedule: invalid what_next field");
1113
    }
1114
    /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
1115
1116
1117
    
    /* Costs for the scheduler are assigned to CCS_SYSTEM */
#ifdef PROFILING
1118
    stopHeapProfTimer();
1119
1120
1121
1122
    CCCS = CCS_SYSTEM;
#endif
    
    ACQUIRE_LOCK(&sched_mutex);
1123
1124
    
#ifdef RTS_SUPPORTS_THREADS
sof's avatar
sof committed
1125
    IF_DEBUG(scheduler,fprintf(stderr,"scheduler (task %ld): ", osThreadId()););
1126
#elif !defined(GRAN) && !defined(PAR)
1127
    IF_DEBUG(scheduler,fprintf(stderr,"scheduler: "););
1128
#endif
1129
    t = cap->r.rCurrentTSO;
1130
    
1131
1132
1133
1134
#if defined(PAR)
    /* HACK 675: if the last thread didn't yield, make sure to print a 
       SCHEDULE event to the log file when StgRunning the next thread, even
       if it is the same one as before */
1135
    LastTSO = t; 
1136
1137
1138
    TimeOfLastYield = CURRENT_TIME;
#endif

1139
1140
    switch (ret) {
    case HeapOverflow:
1141
#if defined(GRAN)
1142
      IF_DEBUG(gran, DumpGranEvent(GR_DESCHEDULE, t));
1143
1144
1145
1146
      globalGranStats.tot_heapover++;
#elif defined(PAR)
      globalParStats.tot_heapover++;
#endif
1147
1148
1149
1150
1151
1152
1153
1154
1155

      // did the task ask for a large block?
      if (cap->r.rHpAlloc > BLOCK_SIZE_W) {
	  // if so, get one and push it on the front of the nursery.
	  bdescr *bd;
	  nat blocks;
	  
	  blocks = (nat)BLOCK_ROUND_UP(cap->r.rHpAlloc * sizeof(W_)) / BLOCK_SIZE;

1156
1157
	  IF_DEBUG(scheduler,belch("--<< thread %ld (%s) stopped: requesting a large block (size %d)", 
				   t->id, whatNext_strs[t->what_next], blocks));
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177

	  // don't do this if it would push us over the
	  // alloc_blocks_lim limit; we'll GC first.
	  if (alloc_blocks + blocks < alloc_blocks_lim) {

	      alloc_blocks += blocks;
	      bd = allocGroup( blocks );

	      // link the new group into the list
	      bd->link = cap->r.rCurrentNursery;
	      bd->u.back = cap->r.rCurrentNursery->u.back;
	      if (cap->r.rCurrentNursery->u.back != NULL) {
		  cap->r.rCurrentNursery->u.back->link = bd;
	      } else {
		  ASSERT(g0s0->blocks == cap->r.rCurrentNursery &&
			 g0s0->blocks == cap->r.rNursery);
		  cap->r.rNursery = g0s0->blocks = bd;
	      }		  
	      cap->r.rCurrentNursery->u.back = bd;

1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
	      // initialise it as a nursery block.  We initialise the
	      // step, gen_no, and flags field of *every* sub-block in
	      // this large block, because this is easier than making
	      // sure that we always find the block head of a large
	      // block whenever we call Bdescr() (eg. evacuate() and
	      // isAlive() in the GC would both have to do this, at
	      // least).
	      { 
		  bdescr *x;
		  for (x = bd; x < bd + blocks; x++) {
		      x->step = g0s0;
		      x->gen_no = 0;
		      x->flags = 0;
		  }
	      }
1193
1194
1195

	      // don't forget to update the block count in g0s0.
	      g0s0->n_blocks += blocks;
1196
1197
	      // This assert can be a killer if the app is doing lots
	      // of large block allocations.
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
	      ASSERT(countBlocks(g0s0->blocks) == g0s0->n_blocks);

	      // now update the nursery to point to the new block
	      cap->r.rCurrentNursery = bd;

	      // we might be unlucky and have another thread get on the
	      // run queue before us and steal the large block, but in that
	      // case the thread will just end up requesting another large
	      // block.
	      PUSH_ON_RUN_QUEUE(t);
	      break;
	  }
      }

1212
1213
1214
1215
      /* make all the running tasks block on a condition variable,
       * maybe set context_switch and wait till they all pile in,
       * then have them wait on a GC condition variable.
       */
1216
1217
      IF_DEBUG(scheduler,belch("--<< thread %ld (%s) stopped: HeapOverflow", 
			       t->id, whatNext_strs[t->what_next]));
1218
      threadPaused(t);
1219
1220
#if defined(GRAN)
      ASSERT(!is_on_queue(t,CurrentProc));
1221
1222
1223
1224
1225
1226
1227
1228
1229
#elif defined(PAR)
      /* Currently we emit a DESCHEDULE event before GC in GUM.
         ToDo: either add separate event to distinguish SYSTEM time from rest
	       or just nuke this DESCHEDULE (and the following SCHEDULE) */
      if (0 && RtsFlags.ParFlags.ParStats.Full) {
	DumpRawGranEvent(CURRENT_PROC, CURRENT_PROC,
			 GR_DESCHEDULE, t, (StgClosure *)NULL, 0, 0);
	emitSchedule = rtsTrue;
      }
1230
#endif
1231
1232
1233
1234
      
      ready_to_gc = rtsTrue;
      context_switch = 1;		/* stop other threads ASAP */
      PUSH_ON_RUN_QUEUE(t);
1235
      /* actual GC is done at the end of the while loop */
1236
1237
1238
      break;
      
    case StackOverflow:
1239
1240
1241
1242
1243
1244
1245
1246
1247
#if defined(GRAN)
      IF_DEBUG(gran, 
	       DumpGranEvent(GR_DESCHEDULE, t));
      globalGranStats.tot_stackover++;
#elif defined(PAR)
      // IF_DEBUG(par, 
      // DumpGranEvent(GR_DESCHEDULE, t);
      globalParStats.tot_stackover++;
#endif
1248
1249
      IF_DEBUG(scheduler,belch("--<< thread %ld (%s) stopped, StackOverflow", 
			       t->id, whatNext_strs[t->what_next]));
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
      /* just adjust the stack for this thread, then pop it back
       * on the run queue.
       */
      threadPaused(t);
      { 
	StgMainThread *m;
	/* enlarge the stack */
	StgTSO *new_t = threadStackOverflow(t);
	
	/* This TSO has moved, so update any pointers to it from the
	 * main thread stack.  It better not be on any other queues...
1261
	 * (it shouldn't be).
1262
1263
1264
1265
1266
1267
	 */
	for (m = main_threads; m != NULL; m = m->link) {
	  if (m->tso == t) {
	    m->tso = new_t;
	  }
	}
1268
	threadPaused(new_t);
1269
1270
1271
1272
1273
	PUSH_ON_RUN_QUEUE(new_t);
      }
      break;

    case ThreadYielding:
1274
1275
1276
1277
1278
#if defined(GRAN)
      IF_DEBUG(gran, 
	       DumpGranEvent(GR_DESCHEDULE, t));
      globalGranStats.tot_yields++;
#elif defined(PAR)
1279
1280
1281
      // IF_DEBUG(par, 
      // DumpGranEvent(GR_DESCHEDULE, t);
      globalParStats.tot_yields++;
1282
#endif
1283
1284
1285
1286
1287
      /* put the thread back on the run queue.  Then, if we're ready to
       * GC, check whether this is the last task to stop.  If so, wake
       * up the GC thread.  getThread will block during a GC until the
       * GC is finished.
       */