From a1791350ab5702a356bbb0082a4da97a83bce40e Mon Sep 17 00:00:00 2001
From: Ben Gamari <ben@well-typed.com>
Date: Mon, 9 Dec 2024 10:21:57 -0500
Subject: [PATCH] rts: Determine max_n_capabilities at RTS startup
Previously the maximum number of capabilities supported by the RTS was
statically capped at 256. However, this bound is uncomfortably low given
the size of today's machine.
While supporting unbounded, fully-dynamic adjustment would be nice, it
is complex and so instead we do something simpler: Probe the logical
core count at RTS startup and use this as the static bound for the rest
of our execution.
This should avoid users running into the capability limit on large
machines while avoiding wasting memory on a large capabilities array for
most users and keeping complexity at bay.
Addresses #25560.
(cherry picked from commit 71f050b74eaa2fdc2ca5da53f85497ac94ab6a2a)
---
 docs/users_guide/using-concurrent.rst |  5 ++--
 rts/Capability.c                      | 34 +++++++++++++++++++++++----
 rts/Capability.h                      |  6 +++--
 rts/RtsSymbols.c                      |  1 +
 rts/Schedule.c                        |  7 +++---
 rts/include/rts/Config.h              |  1 +
 rts/include/rts/Threads.h             |  4 ++++
 7 files changed, 46 insertions(+), 12 deletions(-)
diff --git a/docs/users_guide/using-concurrent.rst b/docs/users_guide/using-concurrent.rst
index 37dc212d0bc..9f13c711ca2 100644
--- a/docs/users_guide/using-concurrent.rst
+++ b/docs/users_guide/using-concurrent.rst
@@ -157,8 +157,9 @@ use the RTS :rts-flag:`-N ⟨x⟩` options.
 .. note::
 
     The maximum number of capabilities supported by the GHC runtime system is
-    determined when the compiler is built and currently defaults to 256
-    capabilities.
+    determined when at RTS startup to be either 256, the value given by
+    :rts-flag:`-N ⟨x⟩`, or the number of logical CPU cores, whichever is
+    greater.
 
 The following options affect the way the runtime schedules threads on
 CPUs:
diff --git a/rts/Capability.c b/rts/Capability.c
index e20332ed98b..5dee858d29e 100644
--- a/rts/Capability.c
+++ b/rts/Capability.c
@@ -16,6 +16,7 @@
  *
  * --------------------------------------------------------------------------*/
 
+#include "rts/Config.h"
 #include "rts/PosixSource.h"
 #include "Rts.h"
 
@@ -40,12 +41,16 @@ Capability MainCapability;
 uint32_t n_capabilities = 0;
 uint32_t enabled_capabilities = 0;
 
+// The size of the `capabilities` array initialized at RTS startup. Referenced
+// by GHC.Internal.Conc.Sync
+uint32_t max_n_capabilities = MAX_N_CAPABILITIES;
+
 // The array of Capabilities.  It's important that when we need
 // to allocate more Capabilities we don't have to move the existing
 // Capabilities, because there may be pointers to them in use
 // (e.g. threads in waitForCapability(), see #8209), so this is
 // an array of Capability* rather than an array of Capability.
-Capability *capabilities[MAX_N_CAPABILITIES];
+Capability **capabilities;
 
 // Holds the Capability which last became free.  This is used so that
 // an in-call has a chance of quickly finding a free Capability.
@@ -386,12 +391,30 @@ void initCapabilities (void)
     }
 #endif
 
-    if (RtsFlags.ParFlags.nCapabilities > MAX_N_CAPABILITIES) {
-        errorBelch("warning: this GHC runtime system only supports up to %d capabilities",
-                   MAX_N_CAPABILITIES);
-        RtsFlags.ParFlags.nCapabilities = MAX_N_CAPABILITIES;
+    /*
+     * Note [Capabilities array sizing]
+     * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+     * Determine the size of the capabilities array as the maximum of:
+     *   * the static lower bound, `MAX_N_CAPABILITIES`
+     *   * the logical core count
+     *   * the users's choice of `+RTS -N`
+     * This will serve as the upper bound on the capability count for the rest
+     * of execution. Calls to `setNumCapabilities` exceeding this bound will
+     * issue a warning and otherwise have no effect.
+     *
+     * See #25560.
+     */
+    uint32_t core_count = getNumberOfProcessors();
+    if (core_count > max_n_capabilities) {
+        max_n_capabilities = core_count;
     }
 
+    if (RtsFlags.ParFlags.nCapabilities > max_n_capabilities) {
+        max_n_capabilities = RtsFlags.ParFlags.nCapabilities;
+    }
+
+    capabilities = stgMallocBytes(sizeof(Capability) * max_n_capabilities, "initCapabilities");
+
     n_capabilities = 0;
     moreCapabilities(0, RtsFlags.ParFlags.nCapabilities);
     n_capabilities = RtsFlags.ParFlags.nCapabilities;
@@ -399,6 +422,7 @@ void initCapabilities (void)
 #else /* !THREADED_RTS */
 
     n_capabilities = 1;
+    capabilities = stgMallocBytes(sizeof(Capability), "initCapabilities");
     capabilities[0] = &MainCapability;
 
     initCapability(&MainCapability, 0);
diff --git a/rts/Capability.h b/rts/Capability.h
index 463e03cf133..f7014def5ab 100644
--- a/rts/Capability.h
+++ b/rts/Capability.h
@@ -266,11 +266,13 @@ INLINE_HEADER void releaseCapability_ (Capability* cap STG_UNUSED,
 // extern Capability MainCapability;
 
 // declared in rts/include/rts/Threads.h:
+// extern uint32_t max_n_capabilities;
 // extern uint32_t n_capabilities;
 // extern uint32_t enabled_capabilities;
 
-// Array of all the capabilities
-extern Capability *capabilities[MAX_N_CAPABILITIES];
+// Array of all the capabilities, of size max_n_capabilities
+// See Note [Capabilities array sizing] in rts/Capability.c.
+extern Capability **capabilities;
 
 INLINE_HEADER Capability *getCapability(uint32_t i)
 {
diff --git a/rts/RtsSymbols.c b/rts/RtsSymbols.c
index 84fd378e7c1..fef4856356b 100644
--- a/rts/RtsSymbols.c
+++ b/rts/RtsSymbols.c
@@ -911,6 +911,7 @@ extern char **environ;
       SymI_NeedsDataProto(rts_stop_on_exception)                        \
       SymI_HasProto(stopTimer)                                          \
       SymI_HasProto(n_capabilities)                                     \
+      SymI_HasProto(max_n_capabilities)                                 \
       SymI_HasProto(enabled_capabilities)                               \
       SymI_HasDataProto(stg_traceEventzh)                                   \
       SymI_HasDataProto(stg_traceMarkerzh)                                  \
diff --git a/rts/Schedule.c b/rts/Schedule.c
index dabd25bad0c..5dc7b940f59 100644
--- a/rts/Schedule.c
+++ b/rts/Schedule.c
@@ -2276,9 +2276,10 @@ setNumCapabilities (uint32_t new_n_capabilities USED_IF_THREADS)
     } else if (new_n_capabilities <= 0) {
         errorBelch("setNumCapabilities: Capability count must be positive");
         return;
-    } else if (new_n_capabilities > MAX_N_CAPABILITIES) {
-        errorBelch("Attempt to increase capability count beyond MAX_N_CAPABILITIES\n");
-        return;
+    } else if (new_n_capabilities > max_n_capabilities) {
+        // See Note [Capabilities array sizing] in rts/Capability.c.
+        errorBelch("setNumCapabilities: Attempt to increase capability count beyond maximum capability count %" PRIu32 "; clamping...\n", max_n_capabilities);
+        new_n_capabilities = max_n_capabilities;
     }
 
     debugTrace(DEBUG_sched, "changing the number of Capabilities from %d to %d",
diff --git a/rts/include/rts/Config.h b/rts/include/rts/Config.h
index 56a7d7060a4..40b5e87f570 100644
--- a/rts/include/rts/Config.h
+++ b/rts/include/rts/Config.h
@@ -79,6 +79,7 @@ code.
 
 #if defined(THREADED_RTS)
 /*
+ * See Note [Capabilities array sizing] in rts/Capability.c.
  * Update the note in docs/users_guide/using-concurrent.rst when updating this.
  */
 #define MAX_N_CAPABILITIES 256
diff --git a/rts/include/rts/Threads.h b/rts/include/rts/Threads.h
index 2c965d41fc9..bee7c92b391 100644
--- a/rts/include/rts/Threads.h
+++ b/rts/include/rts/Threads.h
@@ -77,6 +77,10 @@ INLINE_HEADER unsigned int getNumCapabilities(void)
 // The number of Capabilities that are not disabled
 extern uint32_t enabled_capabilities;
 
+// The maximum number of Capabilities supported by the RTS.
+// See Note [Capabilities array sizing] in rts/Capability.c.
+extern uint32_t max_n_capabilities;
+
 #if !IN_STG_CODE
 extern Capability MainCapability;
 #endif
-- 
GitLab