diff --git a/includes/rts/storage/TSO.h b/includes/rts/storage/TSO.h
index 06056fe716e0130d5ec45af98127b9a3af27bb20..744ab2b519169054ff8a11831040cb2a6fe2cb46 100644
--- a/includes/rts/storage/TSO.h
+++ b/includes/rts/storage/TSO.h
@@ -155,6 +155,9 @@ typedef struct StgTSO_ {
      * This is an integer, because we might update it in a place where
      * it isn't convenient to raise the exception, so we want it to
      * stay negative until we get around to checking it.
+     *
+     * Use only PK_Int64/ASSIGN_Int64 macros to get/set the value of alloc_limit
+     * in C code otherwise you will cause alignment issues on SPARC
      */
     StgInt64  alloc_limit;     /* in bytes */
 
diff --git a/rts/Schedule.c b/rts/Schedule.c
index f25b37288d24755dc0749c0a0e78b7ce22021bc4..957aa4b9cb967c50ebc78feb1dce44ffd6b36f76 100644
--- a/rts/Schedule.c
+++ b/rts/Schedule.c
@@ -1086,15 +1086,15 @@ schedulePostRunThread (Capability *cap, StgTSO *t)
     // If the current thread's allocation limit has run out, send it
     // the AllocationLimitExceeded exception.
 
-    if (t->alloc_limit < 0 && (t->flags & TSO_ALLOC_LIMIT)) {
+    if (PK_Int64((W_*)&(t->alloc_limit)) < 0 && (t->flags & TSO_ALLOC_LIMIT)) {
         // Use a throwToSelf rather than a throwToSingleThreaded, because
         // it correctly handles the case where the thread is currently
         // inside mask.  Also the thread might be blocked (e.g. on an
         // MVar), and throwToSingleThreaded doesn't unblock it
         // correctly in that case.
         throwToSelf(cap, t, allocationLimitExceeded_closure);
-        t->alloc_limit = (StgInt64)RtsFlags.GcFlags.allocLimitGrace
-            * BLOCK_SIZE;
+        ASSIGN_Int64((W_*)&(t->alloc_limit),
+                     (StgInt64)RtsFlags.GcFlags.allocLimitGrace * BLOCK_SIZE);
     }
 
   /* some statistics gathering in the parallel case */
diff --git a/rts/Threads.c b/rts/Threads.c
index 90efd9ce4e1db99eeacb23bc191d0d2adb06bd5f..99f2be73048af353881429cc5c17c7b645df51c1 100644
--- a/rts/Threads.c
+++ b/rts/Threads.c
@@ -110,7 +110,7 @@ createThread(Capability *cap, W_ size)
     tso->stackobj       = stack;
     tso->tot_stack_size = stack->stack_size;
 
-    tso->alloc_limit = 0;
+    ASSIGN_Int64((W_*)&(tso->alloc_limit), 0);
 
     tso->trec = NO_TREC;
 
@@ -173,12 +173,12 @@ HsInt64 rts_getThreadAllocationCounter(StgPtr tso)
 {
     // NB. doesn't take into account allocation in the current nursery
     // block, so it might be off by up to 4k.
-    return ((StgTSO *)tso)->alloc_limit;
+    return PK_Int64((W_*)&(((StgTSO *)tso)->alloc_limit));
 }
 
 void rts_setThreadAllocationCounter(StgPtr tso, HsInt64 i)
 {
-    ((StgTSO *)tso)->alloc_limit = i;
+    ASSIGN_Int64((W_*)&(((StgTSO *)tso)->alloc_limit), i);
 }
 
 void rts_enableThreadAllocationLimit(StgPtr tso)
diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c
index f02c00591c10453b4d1f6c76347b850594e4e0fa..50926b70b375bb3565ec69d684369348002d02f4 100644
--- a/rts/sm/Storage.c
+++ b/rts/sm/Storage.c
@@ -746,7 +746,10 @@ StgPtr allocate (Capability *cap, W_ n)
     TICK_ALLOC_HEAP_NOCTR(WDS(n));
     CCS_ALLOC(cap->r.rCCCS,n);
     if (cap->r.rCurrentTSO != NULL) {
-        cap->r.rCurrentTSO->alloc_limit -= n*sizeof(W_);
+        // cap->r.rCurrentTSO->alloc_limit -= n*sizeof(W_)
+        ASSIGN_Int64((W_*)&(cap->r.rCurrentTSO->alloc_limit),
+                     (PK_Int64((W_*)&(cap->r.rCurrentTSO->alloc_limit))
+                      - n*sizeof(W_)));
     }
 
     if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
@@ -897,7 +900,10 @@ allocatePinned (Capability *cap, W_ n)
     TICK_ALLOC_HEAP_NOCTR(WDS(n));
     CCS_ALLOC(cap->r.rCCCS,n);
     if (cap->r.rCurrentTSO != NULL) {
-        cap->r.rCurrentTSO->alloc_limit -= n*sizeof(W_);
+        // cap->r.rCurrentTSO->alloc_limit -= n*sizeof(W_);
+        ASSIGN_Int64((W_*)&(cap->r.rCurrentTSO->alloc_limit),
+                     (PK_Int64((W_*)&(cap->r.rCurrentTSO->alloc_limit))
+                      - n*sizeof(W_)));
     }
 
     bd = cap->pinned_object_block;