From a5fda6b2dd3cfea1566e5a297ab243762d050fc5 Mon Sep 17 00:00:00 2001
From: simonmar <unknown>
Date: Mon, 17 Jan 2000 17:33:30 +0000
Subject: [PATCH] [project @ 2000-01-17 17:33:30 by simonmar] Put a giant loop
 around awaitEvent, to protect against awaitEvent(rtsTrue) returning with no
 threads to run.

This can happen if we try to delay for some time X, and select(2)
decides to wait for a shorter time X-\delta instead.  It appears that
Solaris is more prone to doing this than Linux.

This fixes the "schedule: invalid whatNext field" crashes that people
may have seen.
---
 ghc/rts/Select.c | 298 ++++++++++++++++++++++++-----------------------
 1 file changed, 153 insertions(+), 145 deletions(-)

diff --git a/ghc/rts/Select.c b/ghc/rts/Select.c
index 87f3267c82f9..b7361298c0de 100644
--- a/ghc/rts/Select.c
+++ b/ghc/rts/Select.c
@@ -1,5 +1,5 @@
 /* -----------------------------------------------------------------------------
- * $Id: Select.c,v 1.7 2000/01/13 12:40:16 simonmar Exp $
+ * $Id: Select.c,v 1.8 2000/01/17 17:33:30 simonmar Exp $
  *
  * (c) The GHC Team 1995-1999
  *
@@ -60,179 +60,187 @@ awaitEvent(rtsBool wait)
 
     IF_DEBUG(scheduler,belch("Checking for threads blocked on I/O...\n"));
 
-    /* see how long it's been since we last checked the blocked queue.
-     * ToDo: make this check atomic, so we don't lose any ticks.
+    /* loop until we've woken up some threads.  This loop is needed
+     * because the select timing isn't accurate, we sometimes sleep
+     * for a while but not long enough to wake up a thread in
+     * a threadDelay.
      */
-    delta = ticks_since_select;
-    ticks_since_select = 0;
-    delta = delta * TICK_MILLISECS * 1000;
+    do {
 
-    min = wait == rtsTrue ? 0x7fffffff : 0;
-
-    /* 
-     * Collect all of the fd's that we're interested in, and capture
-     * the minimum waiting time (in microseconds) for the delayed threads.
-     */
-    FD_ZERO(&rfd);
-    FD_ZERO(&wfd);
-
-    for(tso = blocked_queue_hd; tso != END_TSO_QUEUE; tso = next) {
-      next = tso->link;
-
-      switch (tso->why_blocked) {
-      case BlockedOnRead:
-	{ 
-	  int fd = tso->block_info.fd;
-	  maxfd = (fd > maxfd) ? fd : maxfd;
-	  FD_SET(fd, &rfd);
-	  continue;
-	}
+      /* see how long it's been since we last checked the blocked queue.
+       * ToDo: make this check atomic, so we don't lose any ticks.
+       */
+      delta = ticks_since_select;
+      ticks_since_select = 0;
+      delta = delta * TICK_MILLISECS * 1000;
 
-      case BlockedOnWrite:
-	{ 
-	  int fd = tso->block_info.fd;
-	  maxfd = (fd > maxfd) ? fd : maxfd;
-	  FD_SET(fd, &wfd);
-	  continue;
-	}
+      min = wait == rtsTrue ? 0x7fffffff : 0;
 
-      case BlockedOnDelay:
-	{
-	  if (tso->block_info.delay < min)
-	    min = tso->block_info.delay;
-	  continue;
+      /* 
+       * Collect all of the fd's that we're interested in, and capture
+       * the minimum waiting time (in microseconds) for the delayed threads.
+       */
+      FD_ZERO(&rfd);
+      FD_ZERO(&wfd);
+
+      for(tso = blocked_queue_hd; tso != END_TSO_QUEUE; tso = next) {
+	next = tso->link;
+
+	switch (tso->why_blocked) {
+	case BlockedOnRead:
+	  { 
+	    int fd = tso->block_info.fd;
+	    maxfd = (fd > maxfd) ? fd : maxfd;
+	    FD_SET(fd, &rfd);
+	    continue;
+	  }
+
+	case BlockedOnWrite:
+	  { 
+	    int fd = tso->block_info.fd;
+	    maxfd = (fd > maxfd) ? fd : maxfd;
+	    FD_SET(fd, &wfd);
+	    continue;
+	  }
+
+	case BlockedOnDelay:
+	  {
+	    if (tso->block_info.delay < min)
+	      min = tso->block_info.delay;
+	    continue;
+	  }
+
+	default:
+	  barf("AwaitEvent");
 	}
-
-      default:
-	barf("AwaitEvent");
       }
-    }
-
-    /* Release the scheduler lock while we do the poll.
-     * this means that someone might muck with the blocked_queue
-     * while we do this, but it shouldn't matter:
-     *
-     *   - another task might poll for I/O and remove one
-     *     or more threads from the blocked_queue.
-     *   - more I/O threads may be added to blocked_queue.
-     *   - more delayed threads may be added to blocked_queue. We'll
-     *     just subtract delta from their delays after the poll.
-     *
-     * I believe none of these cases lead to trouble --SDM.
-     */
-    RELEASE_LOCK(&sched_mutex);
 
-    /* Check for any interesting events */
+      /* Release the scheduler lock while we do the poll.
+       * this means that someone might muck with the blocked_queue
+       * while we do this, but it shouldn't matter:
+       *
+       *   - another task might poll for I/O and remove one
+       *     or more threads from the blocked_queue.
+       *   - more I/O threads may be added to blocked_queue.
+       *   - more delayed threads may be added to blocked_queue. We'll
+       *     just subtract delta from their delays after the poll.
+       *
+       * I believe none of these cases lead to trouble --SDM.
+       */
+      RELEASE_LOCK(&sched_mutex);
+
+      /* Check for any interesting events */
 
-    tv.tv_sec = min / 1000000;
-    tv.tv_usec = min % 1000000;
+      tv.tv_sec = min / 1000000;
+      tv.tv_usec = min % 1000000;
 
 #ifndef linux_TARGET_OS
-    gettimeofday(&tv_before, (struct timezone *) NULL);
+      gettimeofday(&tv_before, (struct timezone *) NULL);
 #endif
 
-    while (!interrupted &&
-	   (numFound = select(maxfd+1, &rfd, &wfd, NULL, &tv)) < 0) {
-      if (errno != EINTR) {
-	/* fflush(stdout); */
-	perror("select");
-	barf("select failed");
-      }
-      ACQUIRE_LOCK(&sched_mutex);
-
-      /* We got a signal; could be one of ours.  If so, we need
-       * to start up the signal handler straight away, otherwise
-       * we could block for a long time before the signal is
-       * serviced.
-       */
-      if (signals_pending()) {
-	start_signal_handlers();
-	RELEASE_LOCK(&sched_mutex);
-	break;
-      }
+      while (!interrupted &&
+	     (numFound = select(maxfd+1, &rfd, &wfd, NULL, &tv)) < 0) {
+	if (errno != EINTR) {
+	  /* fflush(stdout); */
+	  perror("select");
+	  barf("select failed");
+	}
+	ACQUIRE_LOCK(&sched_mutex);
+
+	/* We got a signal; could be one of ours.  If so, we need
+	 * to start up the signal handler straight away, otherwise
+	 * we could block for a long time before the signal is
+	 * serviced.
+	 */
+	if (signals_pending()) {
+	  start_signal_handlers();
+	  RELEASE_LOCK(&sched_mutex);
+	  break;
+	}
 
-      /* If new runnable threads have arrived, stop waiting for
-       * I/O and run them.
-       */
-      if (run_queue_hd != END_TSO_QUEUE) {
+	/* If new runnable threads have arrived, stop waiting for
+	 * I/O and run them.
+	 */
+	if (run_queue_hd != END_TSO_QUEUE) {
+	  RELEASE_LOCK(&sched_mutex);
+	  break;
+	}
+	
 	RELEASE_LOCK(&sched_mutex);
-	break;
-      }
-
-      RELEASE_LOCK(&sched_mutex);
-    }	
+      }	
 
 #ifdef linux_TARGET_OS
-    /* on Linux, tv is set to indicate the amount of time not
-     * slept, so we don't need to gettimeofday() to find out.
-     */
-    delta += min - (tv.tv_sec * 1000000 + tv.tv_usec);
+      /* on Linux, tv is set to indicate the amount of time not
+       * slept, so we don't need to gettimeofday() to find out.
+       */
+      delta += min - (tv.tv_sec * 1000000 + tv.tv_usec);
 #else
-    gettimeofday(&tv_after, (struct timezone *) NULL);
-    delta += (tv_after.tv_sec - tv_before.tv_sec) * 1000000 +
-      tv_after.tv_usec - tv_before.tv_usec;
+      gettimeofday(&tv_after, (struct timezone *) NULL);
+      delta += (tv_after.tv_sec - tv_before.tv_sec) * 1000000 +
+	tv_after.tv_usec - tv_before.tv_usec;
 #endif
 
 #if 0
-    if (delta != 0) { fprintf(stderr,"waited: %d %d %d\n", min, delta,
-			      interrupted); }
+      if (delta != 0) { fprintf(stderr,"waited: %d %d %d\n", min, delta,
+				interrupted); }
 #endif
 
-    ACQUIRE_LOCK(&sched_mutex);
-
-    /*
-      Step through the waiting queue, unblocking every thread that now has
-      a file descriptor in a ready state.
+      ACQUIRE_LOCK(&sched_mutex);
 
-      For the delayed threads, decrement the number of microsecs
-      we've been blocked for. Unblock the threads that have thusly expired.
-     */
+      /* Step through the waiting queue, unblocking every thread that now has
+       * a file descriptor in a ready state.
+	
+       * For the delayed threads, decrement the number of microsecs
+       * we've been blocked for. Unblock the threads that have thusly expired.
+       */
 
-    prev = NULL;
-    for(tso = blocked_queue_hd; tso != END_TSO_QUEUE; tso = next) {
-      next = tso->link;
-      switch (tso->why_blocked) {
-      case BlockedOnRead:
-	ready = FD_ISSET(tso->block_info.fd, &rfd);
-	break;
+      prev = NULL;
+      for(tso = blocked_queue_hd; tso != END_TSO_QUEUE; tso = next) {
+	next = tso->link;
+	switch (tso->why_blocked) {
+	case BlockedOnRead:
+	  ready = FD_ISSET(tso->block_info.fd, &rfd);
+	  break;
+	
+	case BlockedOnWrite:
+	  ready = FD_ISSET(tso->block_info.fd, &wfd);
+	  break;
 	
-      case BlockedOnWrite:
-	ready = FD_ISSET(tso->block_info.fd, &wfd);
-	break;
+	case BlockedOnDelay:
+	  if (tso->block_info.delay > delta) {
+	    tso->block_info.delay -= delta;
+	    ready = 0;
+	  } else {
+	    tso->block_info.delay = 0;
+	    ready = 1;
+	  }
+	  break;
 	
-      case BlockedOnDelay:
-	if (tso->block_info.delay > delta) {
-	  tso->block_info.delay -= delta;
-	  ready = 0;
+	default:
+	  barf("awaitEvent");
+	}
+      
+	if (ready) {
+	  IF_DEBUG(scheduler,belch("Waking up thread %d\n", tso->id));
+	  tso->why_blocked = NotBlocked;
+	  tso->link = END_TSO_QUEUE;
+	  PUSH_ON_RUN_QUEUE(tso);
 	} else {
-	  tso->block_info.delay = 0;
-	  ready = 1;
+	  if (prev == NULL)
+	    blocked_queue_hd = tso;
+	  else
+	    prev->link = tso;
+	  prev = tso;
 	}
-	break;
-	
-      default:
-	barf("awaitEvent");
       }
-      
-      if (ready) {
-	IF_DEBUG(scheduler,belch("Waking up thread %d\n", tso->id));
-	tso->why_blocked = NotBlocked;
-	tso->link = END_TSO_QUEUE;
-	PUSH_ON_RUN_QUEUE(tso);
-      } else {
-	if (prev == NULL)
-	  blocked_queue_hd = tso;
-	else
-	  prev->link = tso;
-	prev = tso;
+
+      if (prev == NULL)
+	blocked_queue_hd = blocked_queue_tl = END_TSO_QUEUE;
+      else {
+	prev->link = END_TSO_QUEUE;
+	blocked_queue_tl = prev;
       }
-    }
-
-    if (prev == NULL)
-      blocked_queue_hd = blocked_queue_tl = END_TSO_QUEUE;
-    else {
-      prev->link = END_TSO_QUEUE;
-      blocked_queue_tl = prev;
-    }
+
+    } while (wait && run_queue_hd == END_TSO_QUEUE);
 #endif
 }
-- 
GitLab