From 21e3f3250e88640087a1a60bee2cc113bf04509f Mon Sep 17 00:00:00 2001
From: Cheng Shao <terrorjack@type.dance>
Date: Mon, 4 Mar 2024 20:45:23 +0000
Subject: [PATCH] rts: add -xr option to control two step allocator reserved
 space size

This patch adds a -xr RTS option to control the size of virtual memory
address space reserved by the two step allocator on a 64-bit platform,
see added documentation for explanation. Closes #24498.
---
 docs/users_guide/9.10.1-notes.rst    |  5 +++++
 docs/users_guide/runtime_control.rst | 12 ++++++++++++
 rts/RtsFlags.c                       | 19 +++++++++++++++++--
 rts/include/rts/Flags.h              |  2 ++
 rts/sm/MBlock.c                      | 10 ++--------
 testsuite/tests/rts/all.T            |  2 +-
 6 files changed, 39 insertions(+), 11 deletions(-)

diff --git a/docs/users_guide/9.10.1-notes.rst b/docs/users_guide/9.10.1-notes.rst
index a72874e12f35..38bbcae39f89 100644
--- a/docs/users_guide/9.10.1-notes.rst
+++ b/docs/users_guide/9.10.1-notes.rst
@@ -232,6 +232,11 @@ Runtime system
 - Add a :rts-flag:`--no-automatic-time-samples` flag which stops time profiling samples being automatically started on
   startup. Time profiling can be controlled manually using functions in ``GHC.Profiling``.
 
+- Add a :rts-flag:`-xr ⟨size⟩` which controls the size of virtual
+  memory address space reserved by the two step allocator on a 64-bit
+  platform. The default size is now 1T on aarch64 as well. See
+  :ghc-ticket:`24498`.
+
 ``base`` library
 ~~~~~~~~~~~~~~~~
 
diff --git a/docs/users_guide/runtime_control.rst b/docs/users_guide/runtime_control.rst
index 5f14e9c17e30..8ab6b2c8ea1a 100644
--- a/docs/users_guide/runtime_control.rst
+++ b/docs/users_guide/runtime_control.rst
@@ -368,6 +368,18 @@ Miscellaneous RTS options
     thread can execute its exception handlers. The ``-xq`` controls the
     size of this additional quota.
 
+.. rts-flag:: -xr ⟨size⟩
+
+    :default: 1T
+
+    This option controls the size of virtual memory address space
+    reserved by the two step allocator on a 64-bit platform. It can be
+    useful in scenarios where even reserving a large address range
+    without committing can be expensive (e.g. WSL1), or when you
+    actually have enough physical memory and want to support a Haskell
+    heap larger than 1T. ``-xr`` is a no-op if GHC is configured with
+    ``--disable-large-address-space`` or if the platform is 32-bit.
+
 .. _rts-options-gc:
 
 RTS options to control the garbage collector
diff --git a/rts/RtsFlags.c b/rts/RtsFlags.c
index 36f8bd485aec..9e26df76f5e0 100644
--- a/rts/RtsFlags.c
+++ b/rts/RtsFlags.c
@@ -186,6 +186,9 @@ void initRtsFlagsDefaults(void)
     RtsFlags.GcFlags.ringBell           = false;
     RtsFlags.GcFlags.longGCSync         = 0; /* detection turned off */
 
+    // 1 TBytes
+    RtsFlags.GcFlags.addressSpaceSize   = (StgWord64)1 << 40;
+
     RtsFlags.DebugFlags.scheduler       = false;
     RtsFlags.DebugFlags.interpreter     = false;
     RtsFlags.DebugFlags.weak            = false;
@@ -552,6 +555,11 @@ usage_text[] = {
 "  -xq        The allocation limit given to a thread after it receives",
 "             an AllocationLimitExceeded exception. (default: 100k)",
 "",
+#if defined(USE_LARGE_ADDRESS_SPACE)
+"  -xr        The size of virtual memory address space reserved by the",
+"             two step allocator (default: 1T)",
+"",
+#endif
 "  -Mgrace=<n>",
 "             The amount of allocation after the program receives a",
 "             HeapOverflow exception before the exception is thrown again, if",
@@ -1820,6 +1828,12 @@ error = true;
                           / BLOCK_SIZE;
                   break;
 
+                case 'r':
+                    OPTION_UNSAFE;
+                    RtsFlags.GcFlags.addressSpaceSize
+                      = decodeSize(rts_argv[arg], 3, MBLOCK_SIZE, HS_WORD64_MAX);
+                    break;
+
                   default:
                     OPTION_SAFE;
                     errorBelch("unknown RTS option: %s",rts_argv[arg]);
@@ -2118,7 +2132,9 @@ decodeSize(const char *flag, uint32_t offset, StgWord64 min, StgWord64 max)
         m = atof(s);
         c = s[strlen(s)-1];
 
-        if (c == 'g' || c == 'G')
+        if (c == 't' || c == 'T')
+            m *= (StgWord64)1024*1024*1024*1024;
+        else if (c == 'g' || c == 'G')
             m *= 1024*1024*1024;
         else if (c == 'm' || c == 'M')
             m *= 1024*1024;
@@ -2737,4 +2753,3 @@ doingErasProfiling( void )
             || RtsFlags.ProfFlags.eraSelector != 0);
 }
 #endif /* PROFILING */
-
diff --git a/rts/include/rts/Flags.h b/rts/include/rts/Flags.h
index f73f6978c7cf..66f8fa568ecf 100644
--- a/rts/include/rts/Flags.h
+++ b/rts/include/rts/Flags.h
@@ -89,6 +89,8 @@ typedef struct _GC_FLAGS {
 
     bool numa;                   /* Use NUMA */
     StgWord numaMask;
+
+    StgWord64 addressSpaceSize;  /* large address space size in bytes */
 } GC_FLAGS;
 
 /* See Note [Synchronization of flags and base APIs] */
diff --git a/rts/sm/MBlock.c b/rts/sm/MBlock.c
index 6eb337530453..0ca7bb160086 100644
--- a/rts/sm/MBlock.c
+++ b/rts/sm/MBlock.c
@@ -659,20 +659,14 @@ initMBlocks(void)
 
 #if defined(USE_LARGE_ADDRESS_SPACE)
     {
-        W_ size;
-#if defined(aarch64_HOST_ARCH)
-        size = (W_)1 << 38; // 1/4 TByte
-#else
-        size = (W_)1 << 40; // 1 TByte
-#endif
         void *startAddress = NULL;
         if (RtsFlags.GcFlags.heapBase) {
             startAddress = (void*) RtsFlags.GcFlags.heapBase;
         }
-        void *addr = osReserveHeapMemory(startAddress, &size);
+        void *addr = osReserveHeapMemory(startAddress, &RtsFlags.GcFlags.addressSpaceSize);
 
         mblock_address_space.begin = (W_)addr;
-        mblock_address_space.end = (W_)addr + size;
+        mblock_address_space.end = (W_)addr + RtsFlags.GcFlags.addressSpaceSize;
         mblock_high_watermark = (W_)addr;
     }
 #elif SIZEOF_VOID_P == 8
diff --git a/testsuite/tests/rts/all.T b/testsuite/tests/rts/all.T
index d89a9d82e599..19ac227d757c 100644
--- a/testsuite/tests/rts/all.T
+++ b/testsuite/tests/rts/all.T
@@ -3,7 +3,7 @@ test('testblockalloc',
      compile_and_run, [''])
 
 test('testmblockalloc',
-     [c_src, only_ways(['normal','threaded1']), extra_run_opts('+RTS -I0'),
+     [c_src, only_ways(['normal','threaded1']), extra_run_opts('+RTS -I0 -xr0.125T'),
       when(arch('wasm32'), skip)], # MBlocks can't be freed on wasm32, see Note [Megablock allocator on wasm] in rts
      compile_and_run, [''])
 # -I0 is important: the idle GC will run the memory leak detector,
-- 
GitLab