Commit 0171e09e authored by blaze's avatar blaze Committed by Ben Gamari

Make RTS keep less memory (fixes #14702)

Currently runtime keeps hold to 4*used_memory. This includes, in
particular, nursery, which can be quite large on multiprocessor
machines: 16 CPUs x 64Mb each is 1GB. Multiplying it by 4 means whatever
actual memory usage is, runtime will never release memory under 4GB, and
this is quite excessive for processes which only need a lot of memory
shortly (think building data structures from large files).

This diff makes multiplier to apply only to GC-managed memory, leaving
all "static" allocations alone.

Test Plan: make test TEST="T14702"

Reviewers: bgamari, erikd, simonmar

Reviewed By: simonmar

Subscribers: rwbarton, thomie, carter

GHC Trac Issues: #14702

Differential Revision: https://phabricator.haskell.org/D4338
parent 5f922fbb
......@@ -279,7 +279,6 @@ isEmptyRetainerStack( void )
/* -----------------------------------------------------------------------------
* Returns size of stack
* -------------------------------------------------------------------------- */
#if defined(DEBUG)
W_
retainerStackBlocks( void )
{
......@@ -291,7 +290,6 @@ retainerStackBlocks( void )
return res;
}
#endif
/* -----------------------------------------------------------------------------
* Returns true if stackTop is at the stack boundary of the current stack,
......
......@@ -41,9 +41,7 @@ retainerSetOf( const StgClosure *c )
}
// Used by Storage.c:memInventory()
#if defined(DEBUG)
extern W_ retainerStackBlocks ( void );
#endif
#include "EndPrivate.h"
......
......@@ -28,6 +28,7 @@
#include "Sparks.h"
#include "Sweep.h"
#include "Arena.h"
#include "Storage.h"
#include "RtsUtils.h"
#include "Apply.h"
......@@ -50,6 +51,10 @@
#include "CNF.h"
#include "RtsFlags.h"
#if defined(PROFILING)
#include "RetainerProfile.h"
#endif
#include <string.h> // for memset()
#include <unistd.h>
......@@ -756,24 +761,51 @@ GarbageCollect (uint32_t collect_gen,
ACQUIRE_SM_LOCK;
if (major_gc) {
W_ need, got;
need = BLOCKS_TO_MBLOCKS(n_alloc_blocks);
got = mblocks_allocated;
W_ need_prealloc, need_live, need, got;
uint32_t i;
need_live = 0;
for (i = 0; i < RtsFlags.GcFlags.generations; i++) {
need_live += genLiveBlocks(&generations[i]);
}
need_live = stg_max(RtsFlags.GcFlags.minOldGenSize, need_live);
need_prealloc = 0;
for (i = 0; i < n_nurseries; i++) {
need_prealloc += nurseries[i].n_blocks;
}
need_prealloc += RtsFlags.GcFlags.largeAllocLim;
need_prealloc += countAllocdBlocks(exec_block);
need_prealloc += arenaBlocks();
#if defined(PROFILING)
if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) {
need_prealloc = retainerStackBlocks();
}
#endif
/* If the amount of data remains constant, next major GC we'll
require (F+1)*need. We leave (F+2)*need in order to reduce
repeated deallocation and reallocation. */
need = (RtsFlags.GcFlags.oldGenFactor + 2) * need;
* require (F+1)*live + prealloc. We leave (F+2)*live + prealloc
* in order to reduce repeated deallocation and reallocation. #14702
*/
need = need_prealloc + (RtsFlags.GcFlags.oldGenFactor + 2) * need_live;
/* Also, if user set heap size, do not drop below it.
*/
need = stg_max(RtsFlags.GcFlags.heapSizeSuggestion, need);
/* But with a large nursery, the above estimate might exceed
* maxHeapSize. A large resident set size might make the OS
* kill this process, or swap unnecessarily. Therefore we
* ensure that our estimate does not exceed maxHeapSize.
*/
if (RtsFlags.GcFlags.maxHeapSize != 0) {
W_ max = BLOCKS_TO_MBLOCKS(RtsFlags.GcFlags.maxHeapSize);
if (need > max) {
need = max;
}
need = stg_min(RtsFlags.GcFlags.maxHeapSize, need);
}
need = BLOCKS_TO_MBLOCKS(need);
got = mblocks_allocated;
if (got > need) {
returnMemoryToOS(got - need);
}
......
module Main where
import Control.Monad
import Data.Array.IO.Safe
import Data.Word
import GHC.Stats
import System.Exit
import System.Mem
printAlloc :: String -> IO (Word64, Word64)
printAlloc name = do
performGC
details <- gc <$> getRTSStats
let dat = (gcdetails_live_bytes details, gcdetails_mem_in_use_bytes details)
putStrLn $ name ++ ": " ++ show dat
pure dat
allocateAndPrint :: IO ()
allocateAndPrint = do
-- allocate and touch a lot of memory (4MB * 260 ~ 1GB)
memoryHog <- forM [1 .. 300] $ \_ ->
(newArray (0, 1000000) 0 :: IO (IOUArray Word Word32))
_ <- printAlloc "with large allocation"
-- do something with memory to prevent it from being GC'ed until now
forM_ memoryHog $ \a -> void $ readArray a 0
main :: IO ()
main = do
(firstLive, firstTotal) <- printAlloc "initial"
allocateAndPrint
(lastLive, lastTotal) <- printAlloc "final"
-- Now there is no reason to have more memory allocated than at start
let ratio = fromIntegral lastTotal / fromIntegral firstTotal
putStrLn $ "alloc ratio " ++ show ratio
when (ratio > 1.5) $ exitFailure
......@@ -383,3 +383,8 @@ test('T13832', exit_code(1), compile_and_run, ['-threaded'])
test('T13894', normal, compile_and_run, [''])
test('T14497', normal, compile_and_run, ['-O'])
test('T14695', normal, run_command, ['$MAKE -s --no-print-directory T14695'])
test('T14702', [ ignore_stdout
, only_ways(['threaded1', 'threaded2'])
, extra_run_opts('+RTS -A32m -N8 -T -RTS')
]
, compile_and_run, [''])
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment