Skip to content
Snippets Groups Projects
Commit 9ca51f9e authored by Cheng Shao's avatar Cheng Shao :beach: Committed by Marge Bot
Browse files

rts: add the rts_clearMemory function

This patch adds the rts_clearMemory function that does its best to
zero out unused RTS memory for a wasm backend use case. See the
comment above rts_clearMemory() prototype declaration for more
detailed explanation. Closes #22920.
parent 79d8fd65
No related branches found
No related tags found
No related merge requests found
Pipeline #63408 failed
Pipeline: ghcup-ci

#63422

    Pipeline: head.hackage

    #63421

      Pipeline: head.hackage

      #63419

        ......@@ -925,6 +925,7 @@ extern char **environ;
        SymI_HasProto(newArena) \
        SymI_HasProto(arenaAlloc) \
        SymI_HasProto(arenaFree) \
        SymI_HasProto(rts_clearMemory) \
        RTS_USER_SIGNALS_SYMBOLS \
        RTS_INTCHAR_SYMBOLS
        ......
        ......@@ -599,6 +599,51 @@ extern StgWord base_GHCziTopHandler_runNonIO_closure[];
        /* ------------------------------------------------------------------------ */
        // This is a public RTS API function that does its best to zero out
        // unused RTS memory. rts_clearMemory() takes the storage manager
        // lock. It's only safe to call rts_clearMemory() when all mutators
        // have stopped and either minor/major garbage collection has just
        // been run.
        //
        // rts_clearMemory() works for all RTS ways on all platforms, though
        // the main intended use case is the pre-initialization of a
        // wasm32-wasi reactor module (#22920). A reactor module is like
        // shared library on other platforms, with foreign exported Haskell
        // functions as entrypoints. At run-time, the user calls hs_init_ghc()
        // to initialize the RTS, after that they can invoke Haskell
        // computation by calling the exported Haskell functions, persisting
        // the memory state across these invocations.
        //
        // Besides hs_init_ghc(), the user may want to invoke some Haskell
        // function to initialize some global state in the user code, this
        // global state is used by subsequent invocations. Now, it's possible
        // to run hs_init_ghc() & custom init logic in Haskell, then snapshot
        // the entire memory into a new wasm module! And the user can call the
        // new wasm module's exports directly, thus eliminating the
        // initialization overhead at run-time entirely.
        //
        // There's one problem though. After the custom init logic runs, the
        // RTS memory contains a lot of garbage data in various places. These
        // garbage data will be snapshotted into the new wasm module, causing
        // a significant size bloat. Therefore, we need an RTS API function
        // that zeros out unused RTS memory.
        //
        // At the end of the day, the custom init function will be a small C
        // function that first calls hs_init_ghc(), then calls a foreign
        // exported Haskell function to initialize whatever global state the
        // other Haskell functions need, followed by a hs_perform_gc() call to
        // do a major GC, and finally an rts_clearMemory() call to zero out
        // the unused RTS memory.
        //
        // Why add rts_clearMemory(), where there's the -DZ RTS flag that
        // zeros freed memory on GC? The -DZ flag actually fills freed memory
        // with a garbage byte like 0xAA, and the flag only works in debug
        // RTS. Why not add a new RTS flag that zeros freed memory on the go?
        // Because it only makes sense to do the zeroing once before
        // snapshotting the memory, but there's no point to pay for the
        // zeroing overhead at the new module's run-time.
        void rts_clearMemory(void);
        #if defined(__cplusplus)
        }
        #endif
        ......@@ -1395,3 +1395,17 @@ reportUnmarkedBlocks (void)
        }
        #endif
        void clear_free_list(void) {
        for (uint32_t node = 0; node < n_numa_nodes; ++node) {
        for (bdescr *bd = free_mblock_list[node]; bd != NULL; bd = bd->link) {
        clear_blocks(bd);
        }
        for (int ln = 0; ln < NUM_FREE_LISTS; ++ln) {
        for (bdescr *bd = free_list[node][ln]; bd != NULL; bd = bd->link) {
        clear_blocks(bd);
        }
        }
        }
        }
        ......@@ -32,4 +32,6 @@ void reportUnmarkedBlocks (void);
        extern W_ n_alloc_blocks; // currently allocated blocks
        extern W_ hw_alloc_blocks; // high-water allocated blocks
        RTS_PRIVATE void clear_free_list(void);
        #include "EndPrivate.h"
        ......@@ -356,6 +356,10 @@ void print_thread_list(StgTSO* tso);
        #endif
        RTS_PRIVATE void clear_segment(struct NonmovingSegment*);
        RTS_PRIVATE void clear_segment_free_blocks(struct NonmovingSegment*);
        #include "EndPrivate.h"
        #endif // CMINUSMINUS
        ......@@ -106,14 +106,16 @@ void nonmovingGcCafs()
        debug_caf_list_snapshot = (StgIndStatic*)END_OF_CAF_LIST;
        }
        static void
        #endif
        void
        clear_segment(struct NonmovingSegment* seg)
        {
        size_t end = ((size_t)seg) + NONMOVING_SEGMENT_SIZE;
        memset(&seg->bitmap, 0, end - (size_t)&seg->bitmap);
        }
        static void
        void
        clear_segment_free_blocks(struct NonmovingSegment* seg)
        {
        unsigned int block_size = nonmovingSegmentBlockSize(seg);
        ......@@ -125,8 +127,6 @@ clear_segment_free_blocks(struct NonmovingSegment* seg)
        }
        }
        #endif
        GNUC_ATTR_HOT void nonmovingSweep(void)
        {
        while (nonmovingHeap.sweep_list) {
        ......
        ......@@ -1924,3 +1924,46 @@ The compacting collector does nothing to improve megablock
        level fragmentation. The role of the compacting GC is to remove object level
        fragmentation and to use less memory when collecting. - see #19248
        */
        void rts_clearMemory(void) {
        ACQUIRE_SM_LOCK;
        clear_free_list();
        for (uint32_t i = 0; i < n_nurseries; ++i) {
        for (bdescr *bd = nurseries[i].blocks; bd; bd = bd->link) {
        clear_blocks(bd);
        }
        }
        for (unsigned int i = 0; i < getNumCapabilities(); ++i) {
        for (bdescr *bd = getCapability(i)->pinned_object_empty; bd; bd = bd->link) {
        clear_blocks(bd);
        }
        for (bdescr *bd = gc_threads[i]->free_blocks; bd; bd = bd->link) {
        clear_blocks(bd);
        }
        }
        if (RtsFlags.GcFlags.useNonmoving)
        {
        for (struct NonmovingSegment *seg = nonmovingHeap.free; seg; seg = seg->link) {
        clear_segment(seg);
        }
        for (int i = 0; i < NONMOVING_ALLOCA_CNT; ++i) {
        struct NonmovingAllocator *alloc = nonmovingHeap.allocators[i];
        for (struct NonmovingSegment *seg = alloc->active; seg; seg = seg->link) {
        clear_segment_free_blocks(seg);
        }
        for (unsigned int j = 0; j < getNumCapabilities(); ++j) {
        clear_segment_free_blocks(alloc->current[j]);
        }
        }
        }
        RELEASE_SM_LOCK;
        }
        ......@@ -206,4 +206,8 @@ extern StgIndStatic * dyn_caf_list;
        extern StgIndStatic * debug_caf_list;
        extern StgIndStatic * revertible_caf_list;
        STATIC_INLINE void clear_blocks(bdescr *bd) {
        memset(bd->start, 0, BLOCK_SIZE * bd->blocks);
        }
        #include "EndPrivate.h"
        ......@@ -5,5 +5,6 @@
        HsInt out (HsInt x)
        {
        performMajorGC();
        rts_clearMemory();
        return incall(x);
        }
        0% Loading or .
        You are about to add 0 people to the discussion. Proceed with caution.
        Finish editing this message first!
        Please register or to comment