diff --git a/rts/sm/BlockAlloc.c b/rts/sm/BlockAlloc.c index 180a06f33ebccbff4e05c92c3c9d67d09f0f4885..d5651db6d8e8d201de992375893607478a0b7356 100644 --- a/rts/sm/BlockAlloc.c +++ b/rts/sm/BlockAlloc.c @@ -161,11 +161,23 @@ static void initMBlock(void *mblock, uint32_t node); static bdescr *free_list[MAX_NUMA_NODES][NUM_FREE_LISTS]; static bdescr *free_mblock_list[MAX_NUMA_NODES]; +// For avoiding quadratic runtime performance when freeing a large number of +// mblocks during a single GC run, free will be deferred to a separate free list +// that foregoes sorting and coalecense. As the final step in a GC run we can +// then separately sort the deferred list, and merge it with the actual free +// list in one go. +static bool defer_mblock_frees; +static bdescr *deferred_free_mblock_list[MAX_NUMA_NODES]; + W_ n_alloc_blocks; // currently allocated blocks W_ hw_alloc_blocks; // high-water allocated blocks W_ n_alloc_blocks_by_node[MAX_NUMA_NODES]; + +static bdescr* splitDeferredList(bdescr* head); +static void sortDeferredList(bdescr** head); + /* ----------------------------------------------------------------------------- Initialisation -------------------------------------------------------------------------- */ @@ -371,41 +383,79 @@ split_block_low (bdescr *bd, W_ n) return bd; } -/* Only initializes the start pointers on the first megablock and the - * blocks field of the first bdescr; callers are responsible for calling - * initGroup afterwards. + +/* Find a fitting block for the allocation request in the given free list. + Returns: + - not NULL: when an exact match was found in the free list. + - NULL: when no exact match was found. In this case, the out parameter + `best` can be inspected to get the best fitting block from the free list. */ static bdescr * -alloc_mega_group (uint32_t node, StgWord mblocks) +alloc_mega_group_from_free_list (bdescr** free_list_head, StgWord n, bdescr** best) { - bdescr *best, *bd, *prev; - StgWord n; - - n = MBLOCK_GROUP_BLOCKS(mblocks); - - best = NULL; + bdescr *bd, *prev; + *best = NULL; prev = NULL; - for (bd = free_mblock_list[node]; bd != NULL; prev = bd, bd = bd->link) + for (bd = *free_list_head; bd != NULL; prev = bd, bd = bd->link) { if (bd->blocks == n) { if (prev) { prev->link = bd->link; } else { - free_mblock_list[node] = bd->link; + *free_list_head = bd->link; } return bd; } else if (bd->blocks > n) { - if (!best || bd->blocks < best->blocks) + if (!*best || bd->blocks < (*best)->blocks) { - best = bd; + *best = bd; } } } + return NULL; +} - if (best) +/* Only initializes the start pointers on the first megablock and the + * blocks field of the first bdescr; callers are responsible for calling + * initGroup afterwards. + */ +static bdescr * +alloc_mega_group (uint32_t node, StgWord mblocks) +{ + bdescr *best, *bd; + StgWord n; + + n = MBLOCK_GROUP_BLOCKS(mblocks); + + if(defer_mblock_frees) + { + // Freeing mega groups is currently deferred. Try to serve new requests + // preferentially from our deferred free list. + bd = alloc_mega_group_from_free_list(&deferred_free_mblock_list[node], n, &best); + if(bd) + { + return bd; + } + else if(!best) + { + // If there was neither an exact nor a best match, try the regular free list. + bd = alloc_mega_group_from_free_list(&free_mblock_list[node], n, &best); + } + } + else + { + // Otherwise, just always use the regular free list + bd = alloc_mega_group_from_free_list(&free_mblock_list[node], n, &best); + } + + if (bd) + { + return bd; + } + else if (best) { // we take our chunk off the end here. StgWord best_mblocks = BLOCKS_TO_MBLOCKS(best->blocks); @@ -758,30 +808,90 @@ free_mega_group (bdescr *mg) bdescr *bd, *prev; uint32_t node; - // Find the right place in the free list. free_mblock_list is - // sorted by *address*, not by size as the free_list is. - prev = NULL; node = mg->node; - bd = free_mblock_list[node]; - while (bd && bd->start < mg->start) { - prev = bd; - bd = bd->link; - } - // coalesce backwards - if (prev) - { - mg->link = prev->link; - prev->link = mg; - mg = coalesce_mblocks(prev); + if(defer_mblock_frees) { + // Put the block on the deferred free list without coalescing. + mg->link = deferred_free_mblock_list[node]; + deferred_free_mblock_list[node] = mg; + } else { + // Find the right place in the free list. free_mblock_list is + // sorted by *address*, not by size as the free_list is. + prev = NULL; + bd = free_mblock_list[node]; + while (bd && bd->start < mg->start) { + prev = bd; + bd = bd->link; + } + + // coalesce backwards + if (prev) + { + mg->link = prev->link; + prev->link = mg; + mg = coalesce_mblocks(prev); + } + else + { + mg->link = free_mblock_list[node]; + free_mblock_list[node] = mg; + } + // coalesce forwards + coalesce_mblocks(mg); + + IF_DEBUG(sanity, checkFreeListSanity()); } - else - { - mg->link = free_mblock_list[node]; - free_mblock_list[node] = mg; +} + +static void +free_deferred_mega_groups (uint32_t node) +{ + bdescr *mg, *bd, *prev, *new_head; + + sortDeferredList(&deferred_free_mblock_list[node]); + + new_head = deferred_free_mblock_list[node]; + deferred_free_mblock_list[node] = NULL; + + // Keeping track of the location in the free list + prev = NULL; + bd = free_mblock_list[node]; + + while(new_head != NULL) { + // Obtain mblock to free + mg = new_head; + new_head = new_head->link; + + // Find the right place in the free list. This is similar to the process + // in `free_mega_group`, but we can exploit that the deferred list is + // sorted: the search starts out where the previous mblock was inserted. + // This means we only need to traverse the free list once to free all + // the mblocks, rather than once per mblock. + while (bd && bd->start < mg->start) { + prev = bd; + bd = bd->link; + } + + // coalesce backwards + if (prev) + { + mg->link = prev->link; + prev->link = mg; + mg = coalesce_mblocks(prev); + } + else + { + mg->link = free_mblock_list[node]; + free_mblock_list[node] = mg; + } + + // coalesce forwards + coalesce_mblocks(mg); + + // initialize search for next round + prev = mg; + bd = prev->link; } - // coalesce forwards - coalesce_mblocks(mg); IF_DEBUG(sanity, checkFreeListSanity()); } @@ -949,6 +1059,93 @@ initMBlock(void *mblock, uint32_t node) } } +// Find the midpoint of the linked list. +static bdescr* splitDeferredList(bdescr* head) { + bdescr *fast, *slow, *second_half; + + slow = head; + fast = slow->link; + + while(fast != NULL) { + fast = fast->link; + if(fast) { + fast = fast->link; + slow = slow->link; + } + } + + second_half = slow->link; + // Cap first list at half + slow->link = NULL; + return second_half; +} + +static void sortDeferredList(bdescr** head) { + bdescr *first_half, *second_half, *cur; + + if(*head == NULL || (*head)->link == NULL) { + // 0 or 1 elements, done + return; + } + + first_half = *head; + second_half = splitDeferredList(*head); + + sortDeferredList(&first_half); + sortDeferredList(&second_half); + + // Sort by address + if(first_half->start < second_half->start) { + *head = first_half; + first_half = first_half->link; + } else { + *head = second_half; + second_half = second_half->link; + } + cur = *head; + + while(first_half != NULL && second_half != NULL) { + if(first_half->start < second_half->start) { + cur->link = first_half; + first_half = first_half->link; + } else { + cur->link = second_half; + second_half = second_half->link; + } + cur = cur->link; + } + + // Now one of the two is exhausted, so order doesn't matter + while(first_half != NULL) { + cur->link = first_half; + first_half = first_half->link; + cur = cur->link; + } + while(second_half != NULL) { + cur->link = second_half; + second_half = second_half->link; + cur = cur->link; + } +} + +void deferMBlockFreeing() { + if(defer_mblock_frees) { + barf("MBlock freeing is already deferred"); + } + defer_mblock_frees = true; +} + +void commitMBlockFreeing() { + if(! defer_mblock_frees) { + barf("MBlock freeing was never deferred"); + } + defer_mblock_frees = false; + + for(uint32_t node = 0; node < n_numa_nodes; node++) { + free_deferred_mega_groups(node); + } +} + /* ----------------------------------------------------------------------------- Stats / metrics -------------------------------------------------------------------------- */ diff --git a/rts/sm/BlockAlloc.h b/rts/sm/BlockAlloc.h index f28e35fc87a615ae2498179c8ce9e281140ec9eb..addee6cb834136bdc592fdc8f3e1c093bc410dec 100644 --- a/rts/sm/BlockAlloc.h +++ b/rts/sm/BlockAlloc.h @@ -13,6 +13,9 @@ bdescr *allocLargeChunk (W_ min, W_ max); bdescr *allocLargeChunkOnNode (uint32_t node, W_ min, W_ max); +void deferMBlockFreeing(void); +void commitMBlockFreeing(void); + /* Debugging -------------------------------------------------------------- */ extern W_ countBlocks (bdescr *bd); diff --git a/rts/sm/GC.c b/rts/sm/GC.c index 15aef3a9fc54c908aa9bfab8808693f3947ae2e5..71c1ecbfeb90f7868b5d41acea9d1d7cf084438e 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -442,6 +442,11 @@ GarbageCollect (uint32_t collect_gen, memInventory(DEBUG_gc); #endif + // Defer all free calls for the megablock allocator to avoid quadratic runtime + // explosion when freeing a lot of memory in a single GC + // (https://gitlab.haskell.org/ghc/ghc/-/issues/19897). + deferMBlockFreeing(); + // do this *before* we start scavenging collectFreshWeakPtrs(); @@ -977,6 +982,11 @@ GarbageCollect (uint32_t collect_gen, resurrectThreads(resurrected_threads); ACQUIRE_SM_LOCK; + // Finally free the deferred mblocks by sorting the deferred free list and + // merging it into the actual sorted free list. This needs to happen here so + // that the `returnMemoryToOS` call down below can successfully free memory. + commitMBlockFreeing(); + if (major_gc) { W_ need_prealloc, need_live, need, got; uint32_t i;