From a586b33f8e8ad60b5c5ef3501c89e9b71794bbed Mon Sep 17 00:00:00 2001
From: Matthew Pickering <matthewtpickering@gmail.com>
Date: Thu, 6 Jun 2019 14:03:50 +0100
Subject: [PATCH] rts: Correct handling of LARGE ARR_WORDS in LDV profiler

This implements the correct fix for #11627 by skipping over the slop
(which is zeroed) rather than adding special case logic for LARGE
ARR_WORDS which runs the risk of not performing a correct census by
ignoring any subsequent blocks.

This approach implements similar logic to that in Sanity.c
---
 includes/Rts.h                       |  8 ++++++++
 includes/rts/storage/ClosureMacros.h | 16 ++++++++++------
 rts/ProfHeap.c                       | 15 ++-------------
 3 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/includes/Rts.h b/includes/Rts.h
index f1f8351298c4..dd60726c39e2 100644
--- a/includes/Rts.h
+++ b/includes/Rts.h
@@ -147,6 +147,14 @@ void _assertFail(const char *filename, unsigned int linenum)
 #define USED_IF_NOT_THREADS
 #endif
 
+#if defined(PROFILING)
+#define USED_IF_PROFILING
+#define USED_IF_NOT_PROFILING STG_UNUSED
+#else
+#define USED_IF_PROFILING STG_UNUSED
+#define USED_IF_NOT_PROFILING
+#endif
+
 #define FMT_SizeT    "zu"
 #define FMT_HexSizeT "zx"
 
diff --git a/includes/rts/storage/ClosureMacros.h b/includes/rts/storage/ClosureMacros.h
index 7a2c5dab8099..478ba1a5d719 100644
--- a/includes/rts/storage/ClosureMacros.h
+++ b/includes/rts/storage/ClosureMacros.h
@@ -542,8 +542,10 @@ void LDV_recordDead (const StgClosure *c, uint32_t size);
 
 EXTERN_INLINE void overwritingClosure_ (StgClosure *p,
                                         uint32_t offset /* in words */,
-                                        uint32_t size /* closure size, in words */);
-EXTERN_INLINE void overwritingClosure_ (StgClosure *p, uint32_t offset, uint32_t size)
+                                        uint32_t size /* closure size, in words */,
+                                        bool prim /* Whether to call LDV_recordDead */
+                                        );
+EXTERN_INLINE void overwritingClosure_ (StgClosure *p, uint32_t offset, uint32_t size, bool prim USED_IF_PROFILING)
 {
 #if ZERO_SLOP_FOR_LDV_PROF && !ZERO_SLOP_FOR_SANITY_CHECK
     // see Note [zeroing slop], also #8402
@@ -552,7 +554,7 @@ EXTERN_INLINE void overwritingClosure_ (StgClosure *p, uint32_t offset, uint32_t
 
     // For LDV profiling, we need to record the closure as dead
 #if defined(PROFILING)
-    LDV_recordDead(p, size);
+    if (!prim) { LDV_recordDead(p, size); };
 #endif
 
     for (uint32_t i = offset; i < size; i++) {
@@ -563,7 +565,7 @@ EXTERN_INLINE void overwritingClosure_ (StgClosure *p, uint32_t offset, uint32_t
 EXTERN_INLINE void overwritingClosure (StgClosure *p);
 EXTERN_INLINE void overwritingClosure (StgClosure *p)
 {
-    overwritingClosure_(p, sizeofW(StgThunkHeader), closure_sizeW(p));
+    overwritingClosure_(p, sizeofW(StgThunkHeader), closure_sizeW(p), false);
 }
 
 // Version of 'overwritingClosure' which overwrites only a suffix of a
@@ -576,12 +578,14 @@ EXTERN_INLINE void overwritingClosure (StgClosure *p)
 EXTERN_INLINE void overwritingClosureOfs (StgClosure *p, uint32_t offset);
 EXTERN_INLINE void overwritingClosureOfs (StgClosure *p, uint32_t offset)
 {
-    overwritingClosure_(p, offset, closure_sizeW(p));
+    // Set prim = true because only called on ARR_WORDS with the
+    // shrinkMutableByteArray# primop
+    overwritingClosure_(p, offset, closure_sizeW(p), true);
 }
 
 // Version of 'overwritingClosure' which takes closure size as argument.
 EXTERN_INLINE void overwritingClosureSize (StgClosure *p, uint32_t size /* in words */);
 EXTERN_INLINE void overwritingClosureSize (StgClosure *p, uint32_t size)
 {
-    overwritingClosure_(p, sizeofW(StgThunkHeader), size);
+    overwritingClosure_(p, sizeofW(StgThunkHeader), size, false);
 }
diff --git a/rts/ProfHeap.c b/rts/ProfHeap.c
index 6fbfb6ea8848..155e4d55a524 100644
--- a/rts/ProfHeap.c
+++ b/rts/ProfHeap.c
@@ -1010,19 +1010,6 @@ heapCensusChain( Census *census, bdescr *bd )
 
         p = bd->start;
 
-        // When we shrink a large ARR_WORDS, we do not adjust the free pointer
-        // of the associated block descriptor, thus introducing slop at the end
-        // of the object.  This slop remains after GC, violating the assumption
-        // of the loop below that all slop has been eliminated (#11627).
-        // Consequently, we handle large ARR_WORDS objects as a special case.
-        if (bd->flags & BF_LARGE
-            && get_itbl((StgClosure *)p)->type == ARR_WORDS) {
-            size = arr_words_sizeW((StgArrBytes *)p);
-            prim = true;
-            heapProfObject(census, (StgClosure *)p, size, prim);
-            continue;
-        }
-
         while (p < bd->free) {
             info = get_itbl((const StgClosure *)p);
             prim = false;
@@ -1172,6 +1159,8 @@ heapCensusChain( Census *census, bdescr *bd )
             heapProfObject(census,(StgClosure*)p,size,prim);
 
             p += size;
+            /* skip over slop */
+            while (p < bd->free && !*p) p++; // skip slop
         }
     }
 }
-- 
GitLab