From 5e49a9d237e6b8f8077964a23fc1b565d911da3e Mon Sep 17 00:00:00 2001
From: Ben Gamari <ben@smart-cactus.org>
Date: Fri, 30 Nov 2018 12:21:36 -0500
Subject: [PATCH] Turn bdescr.free into an offset

This allows us to shrink it to 32 bits, increasing the density of bdescr.
---
 compiler/GHC/StgToCmm/Foreign.hs | 74 +++++++++++++++++++-------------
 includes/Cmm.h                   |  6 +--
 includes/rts/storage/Block.h     | 26 +++++------
 rts/Arena.c                      |  2 +-
 rts/Capability.h                 |  5 ++-
 rts/Compact.cmm                  |  6 +--
 rts/HeapStackCheck.cmm           |  5 +--
 rts/Printer.c                    |  2 +-
 rts/Schedule.c                   |  2 +-
 rts/Stats.c                      |  2 +-
 rts/sm/BlockAlloc.c              | 31 ++++++-------
 rts/sm/CNF.c                     | 18 ++++----
 rts/sm/GC.c                      | 12 +++---
 rts/sm/GCUtils.c                 |  4 +-
 rts/sm/GCUtils.h                 |  7 +--
 rts/sm/Storage.c                 | 26 +++++------
 rts/sm/Storage.h                 |  4 +-
 utils/deriveConstants/Main.hs    |  2 +-
 18 files changed, 125 insertions(+), 109 deletions(-)

diff --git a/compiler/GHC/StgToCmm/Foreign.hs b/compiler/GHC/StgToCmm/Foreign.hs
index 389e60e35c47..09c0ab59ac5d 100644
--- a/compiler/GHC/StgToCmm/Foreign.hs
+++ b/compiler/GHC/StgToCmm/Foreign.hs
@@ -327,33 +327,36 @@ Closing the nursery corresponds to the following code:
   // check to see whether it has overflowed at this point, that check is
   // made when we run out of space in the current heap block (stg_gc_noregs)
   // and in the scheduler when context switching (schedulePostRunThread).
-  tso->alloc_limit -= Hp + WDS(1) - cn->start;
+  tso->alloc_limit -= Hp + WDS(1) - bdescr_start(cn);
 
-  // Set cn->free to the next unoccupied word in the block
-  cn->free = Hp + WDS(1);
+  // Set cn->free_off to the next unoccupied word in the block
+  cn->free_off = Hp - bdescr_start(cn) + WDS(1);
 @
 -}
 closeNursery :: MonadUnique m => DynFlags -> LocalReg -> m CmmAGraph
 closeNursery df tso = do
   let tsoreg  = CmmLocal tso
   cnreg      <- CmmLocal <$> newTemp (bWord df)
+  startreg   <- CmmLocal <$> newTemp (bWord df)
+
+  -- alloc = (Hp + WDS(1)) - bdescr_start(CurrentNursery);
+  let alloc =
+          cmmSubWord df
+            (cmmOffsetW df hpExpr 1)
+            (CmmReg startreg)
+
   pure $ catAGraphs [
     mkAssign cnreg currentNurseryExpr,
 
-    -- CurrentNursery->free = Hp+1;
-    mkStore (nursery_bdescr_free df cnreg) (cmmOffsetW df hpExpr 1),
+    mkAssign startreg (nursery_bdescr_start df cnreg),
 
-    let alloc =
-           CmmMachOp (mo_wordSub df)
-              [ cmmOffsetW df hpExpr 1
-              , nursery_bdescr_start df cnreg
-              ]
-
-        alloc_limit = cmmOffset df (CmmReg tsoreg) (tso_alloc_limit df)
-    in
+    -- CurrentNursery->free_off = (StgWord32) alloc;
+    mkStore (nursery_bdescr_free_off df cnreg)
+      (CmmMachOp (mo_WordTo32 df) [alloc]),
 
     -- tso->alloc_limit += alloc
-    mkStore alloc_limit (CmmMachOp (MO_Sub W64)
+    let alloc_limit = cmmOffset df (CmmReg tsoreg) (tso_alloc_limit df)
+    in mkStore alloc_limit (CmmMachOp (MO_Sub W64)
                                [ CmmLoad alloc_limit b64
                                , CmmMachOp (mo_WordTo64 df) [alloc] ])
    ]
@@ -410,19 +413,19 @@ Opening the nursery corresponds to the following code:
 @
    tso = CurrentTSO;
    cn = CurrentNursery;
-   bdfree = CurrentNursery->free;
-   bdstart = CurrentNursery->start;
+   bdfree_off = bdescr_free_off(CurrentNursery);
+   bdstart = bdescr_start(CurrentNursery);
 
    // We *add* the currently occupied portion of the nursery block to
    // the allocation limit, because we will subtract it again in
    // closeNursery.
-   tso->alloc_limit += bdfree - bdstart;
+   tso->alloc_limit += bdfree_off;
 
    // Set Hp to the last occupied word of the heap block.  Why not the
    // next unocupied word?  Doing it this way means that we get to use
    // an offset of zero more often, which might lead to slightly smaller
    // code on some architectures.
-   Hp = bdfree - WDS(1);
+   Hp = bdstart + bdfree_off - WDS(1);
 
    // Set HpLim to the end of the current nursery block (note that this block
    // might be a block group, consisting of several adjacent blocks.
@@ -443,15 +446,19 @@ openNursery df tso = do
   pure $ catAGraphs [
      mkAssign cnreg currentNurseryExpr,
 
-     -- free = CurrentNursery->free
-     mkAssign bdfreereg  (CmmLoad (nursery_bdescr_free df cnreg)  (bWord df)),
-
-     -- Hp = CurrentNursery->free - 1;
-     mkAssign hpReg (cmmOffsetW df (CmmReg bdfreereg) (-1)),
+     -- free = CurrentNursery->free_off;
+     mkAssign bdfreereg
+       (CmmMachOp (MO_UU_Conv W32 (wordWidth df))
+                  [CmmLoad (nursery_bdescr_free_off df cnreg) b32]),
 
      -- start = bdescr_start(CurrentNursery)
      mkAssign bdstartreg (nursery_bdescr_start df cnreg),
 
+     -- Hp = CurrentNursery->free - WDS(1);
+     let hp = CmmMachOp (mo_wordAdd df)
+                [CmmReg bdstartreg, CmmReg bdfreereg]
+     in mkAssign hpReg (cmmOffsetW df hp (-1)),
+
      -- HpLim = CurrentNursery->start +
      --              CurrentNursery->blocks*BLOCK_SIZE_W - 1;
      mkAssign hpLimReg
@@ -467,24 +474,26 @@ openNursery df tso = do
              )
          ),
 
-     -- alloc = bd->free - start
-     let alloc =
-           CmmMachOp (mo_wordSub df) [CmmReg bdfreereg, CmmReg bdstartreg]
+     -- alloc = free_off;
+     let alloc = CmmReg bdfreereg
 
          alloc_limit = cmmOffset df (CmmReg tsoreg) (tso_alloc_limit df)
      in
 
-     -- tso->alloc_limit += alloc
+     -- tso->alloc_limit += alloc;
      mkStore alloc_limit (CmmMachOp (MO_Add W64)
                                [ CmmLoad alloc_limit b64
                                , CmmMachOp (mo_WordTo64 df) [alloc] ])
 
    ]
 
-nursery_bdescr_free, nursery_bdescr_start, nursery_bdescr_blocks
-  :: DynFlags -> CmmReg -> CmmExpr
-nursery_bdescr_free   dflags cn =
-  cmmOffset dflags (CmmReg cn) (oFFSET_bdescr_free dflags)
+-- | The address of the @free_off@ field of a nursery block's @bdescr@.
+nursery_bdescr_free_off :: DynFlags -> CmmReg -> CmmExpr
+nursery_bdescr_free_off dflags cn =
+  cmmOffset dflags (CmmReg cn) (oFFSET_bdescr_free_off dflags)
+
+-- | The address of the start of a nursery block.
+nursery_bdescr_start :: DynFlags -> CmmReg -> CmmExpr
 nursery_bdescr_start  dflags cn =
   ((bd `mkAnd` intLit (mBLOCK_MASK dflags))
    `mkShl` intLit (bLOCK_SHIFT dflags - bDESCR_SHIFT dflags))
@@ -497,6 +506,9 @@ nursery_bdescr_start  dflags cn =
     mkOr x y  = CmmMachOp (MO_Or (wordWidth dflags))  [x, y]
     mkAnd x y = CmmMachOp (MO_And (wordWidth dflags)) [x, y]
     mkShl x y = CmmMachOp (MO_Shl (wordWidth dflags)) [x, y]
+
+-- | The address of the @blocks@ field of a nursery block's block descriptor
+nursery_bdescr_blocks :: DynFlags -> CmmReg -> CmmExpr
 nursery_bdescr_blocks dflags cn =
   cmmOffset dflags (CmmReg cn) (oFFSET_bdescr_blocks dflags)
 
diff --git a/includes/Cmm.h b/includes/Cmm.h
index 546e81e8f6b7..4f60b9d418c0 100644
--- a/includes/Cmm.h
+++ b/includes/Cmm.h
@@ -820,8 +820,8 @@
   W_ __bd;                                                              \
   W_ mut_list;                                                          \
   mut_list = Capability_mut_lists(MyCapability()) + WDS(gen);           \
- __bd = W_[mut_list];                                                   \
-  if (bdescr_free(__bd) >= bdescr_start(__bd) + BLOCK_SIZE) {           \
+  __bd = W_[mut_list];                                                  \
+  if (TO_W_(bdescr_free_off(__bd)) >= BLOCK_SIZE) {                     \
       W_ __new_bd;                                                      \
       ("ptr" __new_bd) = foreign "C" allocBlock_lock();                 \
       bdescr_link(__new_bd) = __bd;                                     \
@@ -831,7 +831,7 @@
   W_ free;                                                              \
   free = bdescr_free(__bd);                                             \
   W_[free] = p;                                                         \
-  bdescr_free(__bd) = free + WDS(1);
+  bdescr_free_off(__bd) = %lobits32(TO_W_(bdescr_free_off(__bd)) + WDS(1));
 
 #define recordMutable(p)                                        \
       P_ __p;                                                   \
diff --git a/includes/rts/storage/Block.h b/includes/rts/storage/Block.h
index 3787ec12650a..da7a3b57db38 100644
--- a/includes/rts/storage/Block.h
+++ b/includes/rts/storage/Block.h
@@ -87,17 +87,14 @@
 typedef struct bdescr_ {
 
     union {
-        StgPtr free;               // First free byte of memory.
-                                   // allocGroup() sets this to the value of start.
-                                   // NB. during use this value should lie
-                                   // between start and start + blocks *
-                                   // BLOCK_SIZE.  Values outside this
-                                   // range are reserved for use by the
-                                   // block allocator.  In particular, the
-                                   // value (StgPtr)(-1) is used to
-                                   // indicate that a block is unallocated.
-                                   //
-                                   // Unused by the non-moving allocator.
+        StgWord32 free_off;    // Offset to the first free byte of memory.
+                               // allocGroup() sets this to the value of start.
+                               // NB. during use this value should lie
+                               // between 0 and blocks * BLOCK_SIZE. Values
+                               // outside this range are reserved for use by the
+                               // block allocator. In particular, the value
+                               // (StgPtr)(-1) is used to indicate that a block
+                               // is unallocated.
         struct NonmovingSegmentInfo {
             StgWord8 log_block_size;
             StgWord16 next_free_snap;
@@ -179,6 +176,9 @@ typedef struct bdescr_ {
     ((((bd) & MBLOCK_MASK) << (BLOCK_SHIFT-BDESCR_SHIFT)) \
      | ((bd) & ~MBLOCK_MASK))
 
+#define bdescr_free(bd)                                   \
+    (bdescr_start(bd) + TO_W_(bdescr_free_off(bd)))
+
 #else
 
 EXTERN_INLINE bdescr *Bdescr(StgPtr p);
@@ -202,13 +202,13 @@ EXTERN_INLINE StgPtr bdescr_start(const bdescr *bd)
 EXTERN_INLINE StgPtr bdescr_free(const bdescr *bd);
 EXTERN_INLINE StgPtr bdescr_free(const bdescr *bd)
 {
-    return bd->free;
+    return ((StgPtr) ((W_) bdescr_start(bd) + bd->free_off));
 }
 
 EXTERN_INLINE void bdescr_set_free(bdescr *bd, void *free);
 EXTERN_INLINE void bdescr_set_free(bdescr *bd, void *free)
 {
-    bd->free = free;
+    bd->free_off = (uint8_t *) free - (uint8_t *) bdescr_start(bd);
 }
 
 #endif
diff --git a/rts/Arena.c b/rts/Arena.c
index 8adfdcb2df81..92315bdf23b6 100644
--- a/rts/Arena.c
+++ b/rts/Arena.c
@@ -87,7 +87,7 @@ arenaAlloc( Arena *arena, size_t size )
         bd->gen_no  = 0;
         bd->dest_no = 0;
         bd->flags   = 0;
-        bd->free    = bdescr_start(bd);
+        bd->free_off= 0;
         bd->link    = arena->current;
         arena->current = bd;
         arena->free = bdescr_start(bd) + size_w;
diff --git a/rts/Capability.h b/rts/Capability.h
index 22c102d3932b..7c223cd6cdcb 100644
--- a/rts/Capability.h
+++ b/rts/Capability.h
@@ -415,14 +415,15 @@ recordMutableCap (const StgClosure *p, Capability *cap, uint32_t gen)
     //    ASSERT(cap->running_task == myTask());
     // NO: assertion is violated by performPendingThrowTos()
     bd = cap->mut_lists[gen];
-    if (bdescr_free(bd) >= bdescr_start(bd) + BLOCK_SIZE_W) {
+    if (bd->free_off >= BLOCK_SIZE_W) {
         bdescr *new_bd;
         new_bd = allocBlockOnNode_lock(cap->node);
         new_bd->link = bd;
         bd = new_bd;
         cap->mut_lists[gen] = bd;
     }
-    *bd->free++ = (StgWord)p;
+    *bdescr_free(bd) = (StgWord)p;
+    bd->free_off += sizeof(W_);
 }
 
 EXTERN_INLINE void
diff --git a/rts/Compact.cmm b/rts/Compact.cmm
index bae94a03cd8c..fc1e20bf3a81 100644
--- a/rts/Compact.cmm
+++ b/rts/Compact.cmm
@@ -386,10 +386,10 @@ stg_compactGetFirstBlockzh ( P_ str )
     // We have to save Hp back to the nursery, otherwise the size will
     // be wrong.
     bd = Bdescr(StgCompactNFData_nursery(str));
-    bdescr_free(bd) = StgCompactNFData_hp(str);
+    bdescr_free_off(bd) = StgCompactNFData_hp(str) - bdescr_start(bd);
 
     bd = Bdescr(str);
-    size = bdescr_free(bd) - bdescr_start(bd);
+    size = TO_W_(bdescr_free_off(bd));
     ASSERT(size <= TO_W_(bdescr_blocks(bd)) * BLOCK_SIZE);
 
     return (block, size);
@@ -423,7 +423,7 @@ stg_compactGetNextBlockzh ( P_ str, W_ block )
             StgCompactNFDataBlock_owner(next_block) == NULL);
 
     bd = Bdescr(next_block);
-    size = bdescr_free(bd) - bdescr_start(bd);
+    size = TO_W_(bdescr_free_off(bd));
     ASSERT(size <= TO_W_(bdescr_blocks(bd)) * BLOCK_SIZE);
 
     return (next_block, size);
diff --git a/rts/HeapStackCheck.cmm b/rts/HeapStackCheck.cmm
index 85fb1cbef679..5fab352cf71f 100644
--- a/rts/HeapStackCheck.cmm
+++ b/rts/HeapStackCheck.cmm
@@ -100,10 +100,9 @@ stg_gc_noregs
             CLOSE_NURSERY();
             Capability_total_allocated(MyCapability()) =
               Capability_total_allocated(MyCapability()) +
-              BYTES_TO_WDS(bdescr_free(CurrentNursery) -
-                           bdescr_start(CurrentNursery));
+              BYTES_TO_WDS(TO_W_(bdescr_free_off(CurrentNursery)));
             CurrentNursery = bdescr_link(CurrentNursery);
-            bdescr_free(CurrentNursery) = bdescr_start(CurrentNursery);
+            bdescr_free_off(CurrentNursery) = 0;
             OPEN_NURSERY();
             if (Capability_context_switch(MyCapability()) != 0 :: CInt ||
                 Capability_interrupt(MyCapability())      != 0 :: CInt ||
diff --git a/rts/Printer.c b/rts/Printer.c
index 413fca54fe93..f2baaa933410 100644
--- a/rts/Printer.c
+++ b/rts/Printer.c
@@ -906,7 +906,7 @@ findPtr(P_ p, int follow)
 
 #if 0
   // We can't search the nursery, because we don't know which blocks contain
-  // valid data, because the bd->free pointers in the nursery are only reset
+  // valid data, because the bd->free_off fields in the nursery are only reset
   // just before a block is used.
   for (n = 0; n < n_capabilities; n++) {
       bd = nurseries[i].blocks;
diff --git a/rts/Schedule.c b/rts/Schedule.c
index fc081a6c8f3d..2556db2caff9 100644
--- a/rts/Schedule.c
+++ b/rts/Schedule.c
@@ -1173,7 +1173,7 @@ scheduleHandleHeapOverflow( Capability *cap, StgTSO *t )
                 bdescr *x;
                 for (x = bd; x < bd + blocks; x++) {
                     initBdescr(x,g0,g0);
-                    x->free = bdescr_start(x);
+                    x->free_off = 0;
                     x->flags = 0;
                 }
             }
diff --git a/rts/Stats.c b/rts/Stats.c
index 79474d3fc856..ca8a074b42b0 100644
--- a/rts/Stats.c
+++ b/rts/Stats.c
@@ -1453,7 +1453,7 @@ statDescribeGens(void)
           // Add the pinned object block.
           bd = capabilities[i]->pinned_object_block;
           if (bd != NULL) {
-              gen_live   += bd->free - bdescr_start(bd);
+              gen_live   += bd->free_off / sizeof(W_);
               gen_blocks += bd->blocks;
           }
 
diff --git a/rts/sm/BlockAlloc.c b/rts/sm/BlockAlloc.c
index 37a517f285fa..9ca20173a436 100644
--- a/rts/sm/BlockAlloc.c
+++ b/rts/sm/BlockAlloc.c
@@ -42,10 +42,10 @@ static void  initMBlock(void *mblock, uint32_t node);
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    bdescr_start(bd) always points to the start of the block.
 
-   bd->free is either:
+   bd->free_off is either:
       - zero for a non-group-head; bd->link points to the head
       - (-1) for the head of a free block group
-      - or it points within the block (group)
+      - or it is an offset within the block (group)
 
    bd->blocks is either:
       - zero for a non-group-head; bd->link points to the head
@@ -218,8 +218,8 @@ tail_of (bdescr *bd)
 STATIC_INLINE void
 initGroup(bdescr *head)
 {
-  head->free   = bdescr_start(head);
-  head->link   = NULL;
+  head->free_off = 0;
+  head->link     = NULL;
 
   // If this is a block group (but not a megablock group), we
   // make the last block of the group point to the head.  This is used
@@ -300,7 +300,7 @@ setup_tail (bdescr *bd)
     tail = tail_of(bd);
     if (tail != bd) {
         tail->blocks = 0;
-        tail->free = 0;
+        tail->free_off = 0;
         tail->link = bd;
     }
 }
@@ -332,7 +332,7 @@ split_block_high (bdescr *bd, W_ n)
 
     bdescr* ret = bd + bd->blocks - n; // take n blocks off the end
     ret->blocks = n;
-    ret->free = bdescr_start(bd) + (bd->blocks - n)*BLOCK_SIZE_W;
+    bdescr_set_free(ret, bdescr_start(bd) + (bd->blocks - n)*BLOCK_SIZE_W);
     ret->link = NULL;
 
     bd->blocks -= n;
@@ -353,7 +353,7 @@ split_block_low (bdescr *bd, W_ n)
 
     bdescr* bd_ = bd + n;
     bd_->blocks = bd->blocks - n;
-    bd_->free = bdescr_start(bd) + (bd->blocks - n)*BLOCK_SIZE_W;
+    bdescr_set_free(bd_,  bdescr_start(bd) + (bd->blocks - n)*BLOCK_SIZE_W);
 
     bd->blocks = n;
 
@@ -786,11 +786,11 @@ freeGroup(bdescr *p)
   // not true in multithreaded GC:
   // ASSERT_SM_LOCK();
 
-  ASSERT(p->free != (P_)-1);
+  ASSERT(p->free_off != (StgWord32) -1);
 
   node = p->node;
 
-  p->free = (void *)-1;  /* indicates that this block is free */
+  p->free_off = (StgWord32) -1;  /* indicates that this block is free */
   p->gen_no = 0;
   /* fill the block group with garbage if sanity checking is on */
   IF_DEBUG(zero_on_gc, memset(bdescr_start(p), 0xaa, (W_)p->blocks * BLOCK_SIZE));
@@ -817,7 +817,8 @@ freeGroup(bdescr *p)
   {
       bdescr *next;
       next = p + p->blocks;
-      if (next <= LAST_BDESCR(MBLOCK_ROUND_DOWN(p)) && next->free == (P_)-1)
+      if (next <= LAST_BDESCR(MBLOCK_ROUND_DOWN(p))
+          && next->free_off == (StgWord32) -1)
       {
           p->blocks += next->blocks;
           ln = log_2(next->blocks);
@@ -838,7 +839,7 @@ freeGroup(bdescr *p)
       prev = p - 1;
       if (prev->blocks == 0) prev = prev->link; // find the head
 
-      if (prev->free == (P_)-1)
+      if (prev->free_off == (StgWord32) -1)
       {
           ln = log_2(prev->blocks);
           dbl_link_remove(prev, &free_list[node][ln]);
@@ -994,7 +995,7 @@ check_tail (bdescr *bd)
     if (tail != bd)
     {
         ASSERT(tail->blocks == 0);
-        ASSERT(tail->free == 0);
+        ASSERT(tail->free_off == 0);
         ASSERT(tail->link == bd);
     }
 }
@@ -1018,7 +1019,7 @@ checkFreeListSanity(void)
                 IF_DEBUG(block_alloc,
                          debugBelch("group at %p, length %ld blocks\n",
                                     bdescr_start(bd), (long)bd->blocks));
-                ASSERT(bd->free == (P_)-1);
+                ASSERT(bd->free_off == (StgWord32) -1);
                 ASSERT(bd->blocks > 0 && bd->blocks < BLOCKS_PER_MBLOCK);
                 ASSERT(bd->blocks >= min && bd->blocks <= (min*2 - 1));
                 ASSERT(bd->link != bd); // catch easy loops
@@ -1036,7 +1037,7 @@ checkFreeListSanity(void)
                     next = bd + bd->blocks;
                     if (next <= LAST_BDESCR(MBLOCK_ROUND_DOWN(bd)))
                     {
-                        ASSERT(next->free != (P_)-1);
+                        ASSERT(next->free_off != (StgWord32) -1);
                     }
                 }
             }
@@ -1117,7 +1118,7 @@ reportUnmarkedBlocks (void)
     for (mblock = getFirstMBlock(&state); mblock != NULL;
          mblock = getNextMBlock(&state, mblock)) {
         for (bd = FIRST_BDESCR(mblock); bd <= LAST_BDESCR(mblock); ) {
-            if (!(bd->flags & BF_KNOWN) && bd->free != (P_)-1) {
+            if (!(bd->flags & BF_KNOWN) && bd->free_off != (StgWord32) -1) {
                 debugBelch("  %p\n",bd);
             }
             if (bd->blocks >= BLOCKS_PER_MBLOCK) {
diff --git a/rts/sm/CNF.c b/rts/sm/CNF.c
index e105aff8b09e..2c160be44b23 100644
--- a/rts/sm/CNF.c
+++ b/rts/sm/CNF.c
@@ -442,8 +442,8 @@ compactResize (Capability *cap, StgCompactNFData *str, StgWord new_size)
 STATIC_INLINE bool
 has_room_for  (bdescr *bd, StgWord sizeW)
 {
-    return (bd->free < bdescr_start(bd) + BLOCK_SIZE_W * BLOCKS_PER_MBLOCK
-            && bd->free + sizeW <= bdescr_start(bd) + BLOCK_SIZE_W * bd->blocks);
+    return (bd->free_off < BLOCK_SIZE * BLOCKS_PER_MBLOCK
+            && bd->free_off + sizeW * sizeof(StgWord) <= BLOCK_SIZE * bd->blocks);
 }
 
 static bool
@@ -494,8 +494,8 @@ allocateForCompact (Capability *cap,
         next_size = BLOCK_ROUND_UP(sizeW*sizeof(W_) + sizeof(StgCompactNFDataBlock));
         block = compactAppendBlock(cap, str, next_size);
         bd = Bdescr((P_)block);
-        to = bd->free;
-        bd->free += sizeW;
+        to = bdescr_free(bd);
+        bd->free_off += sizeW * sizeof(StgWord);
         return to;
     }
 
@@ -519,8 +519,8 @@ allocateForCompact (Capability *cap,
     for (block = str->nursery->next; block != NULL; block = block->next) {
         bd = Bdescr((P_)block);
         if (has_room_for(bd,sizeW)) {
-            to = bd->free;
-            bd->free += sizeW;
+            to = bdescr_free(bd);
+            bd->free_off += sizeW * sizeof(StgWord);
             return to;
         }
     }
@@ -532,8 +532,8 @@ allocateForCompact (Capability *cap,
 
     block = compactAppendBlock(cap, str, next_size);
     bd = Bdescr((P_)block);
-    to = bd->free;
-    bd->free += sizeW;
+    to = bdescr_free(bd);
+    bd->free_off += sizeW * sizeof(StgWord);
     return to;
 }
 
@@ -813,7 +813,7 @@ spew_failing_pointer(StgWord *fixup_table, uint32_t count, StgWord address)
 
         block = (StgCompactNFDataBlock*)value;
         bd = Bdescr((P_)block);
-        size = (W_)bd->free - (W_)bdescr_start(bd);
+        size = bd->free_off;
 
         debugBelch("%" FMT_Word32 ": was 0x%" FMT_HexWord "-0x%" FMT_HexWord
                    ", now 0x%" FMT_HexWord "-0x%" FMT_HexWord "\n", i, key,
diff --git a/rts/sm/GC.c b/rts/sm/GC.c
index 79f827754a13..fe26c4dc5854 100644
--- a/rts/sm/GC.c
+++ b/rts/sm/GC.c
@@ -620,7 +620,7 @@ GarbageCollect (uint32_t collect_gen,
                     }
                     else
                     {
-                        gen->n_words += bdescr_free(bd) - bdescr_start(bd);
+                        gen->n_words += bd->free_off / sizeof(StgWord);
 
                         // NB. this step might not be compacted next
                         // time, so reset the BF_MARKED flags.
@@ -691,7 +691,7 @@ GarbageCollect (uint32_t collect_gen,
         for (bd = gen->scavenged_large_objects; bd; bd = next) {
             next = bd->link;
             dbl_link_onto(bd, &gen->large_objects);
-            gen->n_large_words += bdescr_free(bd) - bdescr_start(bd);
+            gen->n_large_words += bd->free_off / sizeof(StgWord);
         }
 
         // And same for compacts
@@ -996,14 +996,16 @@ new_gc_thread (uint32_t n, gc_thread *t)
         // Hence, allocate a block for todo_bd manually:
         {
             bdescr *bd = allocBlockOnNode(capNoToNumaNode(n));
+            const StgPtr start = bdescr_start(bd);
                 // no lock, locks aren't initialised yet
             initBdescr(bd, ws->gen, ws->gen->to);
             bd->flags = BF_EVACUATED;
-            bd->u.scan = bd->free = bdescr_start(bd);
+            bd->u.scan = start;
+            bd->free_off = 0;
 
             ws->todo_bd = bd;
             ws->todo_free = bdescr_free(bd);
-            ws->todo_lim = bdescr_start(bd) + BLOCK_SIZE_W;
+            ws->todo_lim = start + BLOCK_SIZE_W;
         }
 
         ws->todo_q = newWSDeque(128);
@@ -1643,7 +1645,7 @@ collect_pinned_object_blocks (void)
             for (bdescr *bd = capabilities[n]->pinned_object_blocks; bd != NULL; bd = bd->link) {
                 bd->flags |= BF_NONMOVING;
                 bd->gen_no = oldest_gen->no;
-                oldest_gen->n_large_words += bdescr_free(bd) - bdescr_start(bd);
+                oldest_gen->n_large_words += bd->free_off / sizeof(StgWord);
                 oldest_gen->n_large_blocks += bd->blocks;
                 last = bd;
             }
diff --git a/rts/sm/GCUtils.c b/rts/sm/GCUtils.c
index 29fd5fe0ac9d..35dae6d279f3 100644
--- a/rts/sm/GCUtils.c
+++ b/rts/sm/GCUtils.c
@@ -63,7 +63,7 @@ allocBlocks_sync(uint32_t n, bdescr **hd)
     for (i = 0; i < n; i++) {
         bd[i].blocks = 1;
         bd[i].link = &bd[i+1];
-        bd[i].free = bdescr_start(&bd[i]);
+        bd[i].free_off = 0;
     }
     bd[n-1].link = NULL;
     // We have to hold the lock until we've finished fiddling with the metadata,
@@ -263,7 +263,7 @@ todo_block_full (uint32_t size, gen_workspace *ws)
         // push it on to the scanned list.
         if (bd->u.scan == bdescr_free(bd))
         {
-            if (bd->free == bdescr_start(bd)) {
+            if (bd->free_off == 0) {
                 // Normally the block would not be empty, because then
                 // there would be enough room to copy the current
                 // object.  However, if the object we're copying is
diff --git a/rts/sm/GCUtils.h b/rts/sm/GCUtils.h
index 40d19c6f9f47..3154ac87d8f2 100644
--- a/rts/sm/GCUtils.h
+++ b/rts/sm/GCUtils.h
@@ -48,7 +48,7 @@ bdescr *steal_todo_block       (uint32_t s);
 INLINE_HEADER bool
 isPartiallyFull(bdescr *bd)
 {
-    return (bd->free + WORK_UNIT_WORDS < bdescr_start(bd) + BLOCK_SIZE_W);
+    return bd->free_off + WORK_UNIT_WORDS * sizeof(StgWord) < BLOCK_SIZE;
 }
 
 // Version of recordMutableGen for use during GC.  This uses the
@@ -60,14 +60,15 @@ recordMutableGen_GC (StgClosure *p, uint32_t gen_no)
     bdescr *bd;
 
     bd = gct->mut_lists[gen_no];
-    if (bd->free >= bdescr_start(bd) + BLOCK_SIZE_W) {
+    if (bd->free_off >= BLOCK_SIZE) {
         bdescr *new_bd;
         new_bd = allocBlock_sync();
         new_bd->link = bd;
         bd = new_bd;
         gct->mut_lists[gen_no] = bd;
     }
-    *bd->free++ = (StgWord)p;
+    *bdescr_free(bd) = (StgWord)p;
+    bd->free_off += sizeof(StgWord);
 }
 
 #include "EndPrivate.h"
diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c
index 92e3adcc0adc..aa5cde1c9f66 100644
--- a/rts/sm/Storage.c
+++ b/rts/sm/Storage.c
@@ -675,7 +675,7 @@ allocNursery (uint32_t node, bdescr *tail, W_ blocks)
                 }
             }
 
-            bd[i].free = bdescr_start(&bd[i]);
+            bd[i].free_off = 0;
         }
 
         tail = &bd[0];
@@ -985,7 +985,7 @@ allocateMightFail (Capability *cap, W_ n)
         RELEASE_SM_LOCK;
         initBdescr(bd, g0, g0);
         bd->flags = BF_LARGE;
-        bd->free = bdescr_start(bd) + n;
+        bd->free_off = n * sizeof(W_);
         cap->total_allocated += n;
         return bdescr_start(bd);
     }
@@ -994,7 +994,7 @@ allocateMightFail (Capability *cap, W_ n)
 
     accountAllocation(cap, n);
     bd = cap->r.rCurrentAlloc;
-    if (RTS_UNLIKELY(bd == NULL || bd->free + n > bdescr_start(bd) + BLOCK_SIZE_W)) {
+    if (RTS_UNLIKELY(bd == NULL || (bd->free_off + n*sizeof(W_)) > BLOCK_SIZE)) {
 
         if (bd) finishedNurseryBlock(cap,bd);
 
@@ -1051,8 +1051,8 @@ allocateMightFail (Capability *cap, W_ n)
         cap->r.rCurrentAlloc = bd;
         IF_DEBUG(sanity, checkNurserySanity(cap->r.rNursery));
     }
-    p = bd->free;
-    bd->free += n;
+    p = bdescr_free(bd);
+    bd->free_off += n * sizeof(W_);
 
     IF_DEBUG(sanity, ASSERT(*((StgWord8*)p) == 0xaa));
     return p;
@@ -1105,7 +1105,7 @@ allocatePinned (Capability *cap, W_ n)
 
     // If we don't have a block of pinned objects yet, or the current
     // one isn't large enough to hold the new object, get a new one.
-    if (bd == NULL || (bd->free + n) > (bdescr_start(bd) + BLOCK_SIZE_W)) {
+    if (bd == NULL || (bd->free_off + n*sizeof(W_)) > BLOCK_SIZE) {
 
         // stash the old block on cap->pinned_object_blocks.  On the
         // next GC cycle these objects will be moved to
@@ -1159,8 +1159,8 @@ allocatePinned (Capability *cap, W_ n)
         // live).
     }
 
-    p = bd->free;
-    bd->free += n;
+    p = bdescr_free(bd);
+    bd->free_off += n * sizeof(W_);
     return p;
 }
 
@@ -1396,8 +1396,8 @@ W_ countOccupied (bdescr *bd)
 
     words = 0;
     for (; bd != NULL; bd = bd->link) {
-        ASSERT(bd->free <= bdescr_start(bd) + bd->blocks * BLOCK_SIZE_W);
-        words += bd->free - bdescr_start(bd);
+        ASSERT(bd->free_off <= bd->blocks * BLOCK_SIZE);
+        words += bd->free_off / sizeof(W_);
     }
     return words;
 }
@@ -1686,7 +1686,7 @@ AdjustorWritable allocateExec (W_ bytes, AdjustorExecutable *exec_ret)
     }
 
     if (exec_block == NULL ||
-        exec_block->free + n + 1 > bdescr_start(exec_block) + BLOCK_SIZE_W) {
+        (exec_block->free_off + (n + 1)*sizeof(W_)) > BLOCK_SIZE) {
         bdescr *bd;
         W_ pagesize = getPageSize();
         bd = allocGroup(stg_max(1, pagesize / BLOCK_SIZE));
@@ -1704,7 +1704,7 @@ AdjustorWritable allocateExec (W_ bytes, AdjustorExecutable *exec_ret)
     *bdescr_free(exec_block) = n;  // store the size of this chunk
     exec_block->gen_no += n;  // gen_no stores the number of words allocated
     ret = bdescr_free(exec_block) + 1;
-    exec_block->free += n + 1;
+    exec_block->free_off += (n + 1) * sizeof(W_);
 
     RELEASE_SM_LOCK
     *exec_ret = ret;
@@ -1738,7 +1738,7 @@ void freeExec (void *addr)
             setExecutable(bdescr_start(bd), bd->blocks * BLOCK_SIZE, false);
             freeGroup(bd);
         } else {
-            bd->free = bdescr_start(bd);
+            bd->free_off = 0;
         }
     }
 
diff --git a/rts/sm/Storage.h b/rts/sm/Storage.h
index 6e4533e9137e..49281c4627af 100644
--- a/rts/sm/Storage.h
+++ b/rts/sm/Storage.h
@@ -87,11 +87,11 @@ bool doYouWantToGC(Capability *cap)
 // allocated in cap->total_allocated.
 //
 INLINE_HEADER void finishedNurseryBlock (Capability *cap, bdescr *bd) {
-    cap->total_allocated += bd->free - bdescr_start(bd);
+    cap->total_allocated += bd->free_off / sizeof(W_);
 }
 
 INLINE_HEADER void newNurseryBlock (bdescr *bd) {
-    bd->free = bdescr_start(bd);
+    bd->free_off = 0;
 }
 
 void    updateNurseriesStats (void);
diff --git a/utils/deriveConstants/Main.hs b/utils/deriveConstants/Main.hs
index e0fa1e62cbdf..3a4205ceb9cc 100644
--- a/utils/deriveConstants/Main.hs
+++ b/utils/deriveConstants/Main.hs
@@ -393,7 +393,7 @@ wanteds os = concat
           ,structField C    "Capability" "weak_ptr_list_hd"
           ,structField C    "Capability" "weak_ptr_list_tl"
 
-          ,structField Both "bdescr" "free"
+          ,structField Both "bdescr" "free_off"
           ,structField Both "bdescr" "blocks"
           ,structField C    "bdescr" "gen_no"
           ,structField C    "bdescr" "link"
-- 
GitLab