diff --git a/includes/Rts.h b/includes/Rts.h
index f1f8351298c4deca702c9a69a0c5daacbc7f3a23..dd60726c39e2dcb26afff5b4a8012500c4cdd15e 100644
--- a/includes/Rts.h
+++ b/includes/Rts.h
@@ -147,6 +147,14 @@ void _assertFail(const char *filename, unsigned int linenum)
 #define USED_IF_NOT_THREADS
 #endif
 
+#if defined(PROFILING)
+#define USED_IF_PROFILING
+#define USED_IF_NOT_PROFILING STG_UNUSED
+#else
+#define USED_IF_PROFILING STG_UNUSED
+#define USED_IF_NOT_PROFILING
+#endif
+
 #define FMT_SizeT    "zu"
 #define FMT_HexSizeT "zx"
 
diff --git a/includes/rts/storage/ClosureMacros.h b/includes/rts/storage/ClosureMacros.h
index 7a2c5dab80992658ef326344c617a87dd3b84890..478ba1a5d719166710793561d8d8ea064cee4be3 100644
--- a/includes/rts/storage/ClosureMacros.h
+++ b/includes/rts/storage/ClosureMacros.h
@@ -542,8 +542,10 @@ void LDV_recordDead (const StgClosure *c, uint32_t size);
 
 EXTERN_INLINE void overwritingClosure_ (StgClosure *p,
                                         uint32_t offset /* in words */,
-                                        uint32_t size /* closure size, in words */);
-EXTERN_INLINE void overwritingClosure_ (StgClosure *p, uint32_t offset, uint32_t size)
+                                        uint32_t size /* closure size, in words */,
+                                        bool prim /* Whether to call LDV_recordDead */
+                                        );
+EXTERN_INLINE void overwritingClosure_ (StgClosure *p, uint32_t offset, uint32_t size, bool prim USED_IF_PROFILING)
 {
 #if ZERO_SLOP_FOR_LDV_PROF && !ZERO_SLOP_FOR_SANITY_CHECK
     // see Note [zeroing slop], also #8402
@@ -552,7 +554,7 @@ EXTERN_INLINE void overwritingClosure_ (StgClosure *p, uint32_t offset, uint32_t
 
     // For LDV profiling, we need to record the closure as dead
 #if defined(PROFILING)
-    LDV_recordDead(p, size);
+    if (!prim) { LDV_recordDead(p, size); };
 #endif
 
     for (uint32_t i = offset; i < size; i++) {
@@ -563,7 +565,7 @@ EXTERN_INLINE void overwritingClosure_ (StgClosure *p, uint32_t offset, uint32_t
 EXTERN_INLINE void overwritingClosure (StgClosure *p);
 EXTERN_INLINE void overwritingClosure (StgClosure *p)
 {
-    overwritingClosure_(p, sizeofW(StgThunkHeader), closure_sizeW(p));
+    overwritingClosure_(p, sizeofW(StgThunkHeader), closure_sizeW(p), false);
 }
 
 // Version of 'overwritingClosure' which overwrites only a suffix of a
@@ -576,12 +578,14 @@ EXTERN_INLINE void overwritingClosure (StgClosure *p)
 EXTERN_INLINE void overwritingClosureOfs (StgClosure *p, uint32_t offset);
 EXTERN_INLINE void overwritingClosureOfs (StgClosure *p, uint32_t offset)
 {
-    overwritingClosure_(p, offset, closure_sizeW(p));
+    // Set prim = true because only called on ARR_WORDS with the
+    // shrinkMutableByteArray# primop
+    overwritingClosure_(p, offset, closure_sizeW(p), true);
 }
 
 // Version of 'overwritingClosure' which takes closure size as argument.
 EXTERN_INLINE void overwritingClosureSize (StgClosure *p, uint32_t size /* in words */);
 EXTERN_INLINE void overwritingClosureSize (StgClosure *p, uint32_t size)
 {
-    overwritingClosure_(p, sizeofW(StgThunkHeader), size);
+    overwritingClosure_(p, sizeofW(StgThunkHeader), size, false);
 }
diff --git a/rts/ProfHeap.c b/rts/ProfHeap.c
index 6fbfb6ea8848710e027cd1647fca3c1c330f812f..155e4d55a5248568d2015ada3cd9869a6d99f312 100644
--- a/rts/ProfHeap.c
+++ b/rts/ProfHeap.c
@@ -1010,19 +1010,6 @@ heapCensusChain( Census *census, bdescr *bd )
 
         p = bd->start;
 
-        // When we shrink a large ARR_WORDS, we do not adjust the free pointer
-        // of the associated block descriptor, thus introducing slop at the end
-        // of the object.  This slop remains after GC, violating the assumption
-        // of the loop below that all slop has been eliminated (#11627).
-        // Consequently, we handle large ARR_WORDS objects as a special case.
-        if (bd->flags & BF_LARGE
-            && get_itbl((StgClosure *)p)->type == ARR_WORDS) {
-            size = arr_words_sizeW((StgArrBytes *)p);
-            prim = true;
-            heapProfObject(census, (StgClosure *)p, size, prim);
-            continue;
-        }
-
         while (p < bd->free) {
             info = get_itbl((const StgClosure *)p);
             prim = false;
@@ -1172,6 +1159,8 @@ heapCensusChain( Census *census, bdescr *bd )
             heapProfObject(census,(StgClosure*)p,size,prim);
 
             p += size;
+            /* skip over slop */
+            while (p < bd->free && !*p) p++; // skip slop
         }
     }
 }