Commit 7cf5ba3d authored by Michal Terepeta's avatar Michal Terepeta Committed by Marge Bot

Improve performance of newSmallArray#

This:
- Hoists part of the condition outside of the initialization loop in
  `stg_newSmallArrayzh`.
- Annotates one of the unlikely branches as unlikely, also in
  `stg_newSmallArrayzh`.
- Adds a couple of annotations to `allocateMightFail` indicating which
  branches are likely to be taken.

Together this gives about 5% improvement.
Signed-off-by: Michal Terepeta's avatarMichal Terepeta <michal.terepeta@gmail.com>
parent 39282422
...@@ -58,7 +58,13 @@ extern "C" { ...@@ -58,7 +58,13 @@ extern "C" {
#if __GNUC__ >= 4 #if __GNUC__ >= 4
#define RTS_UNLIKELY(p) __builtin_expect((p),0) #define RTS_UNLIKELY(p) __builtin_expect((p),0)
#else #else
#define RTS_UNLIKELY(p) p #define RTS_UNLIKELY(p) (p)
#endif
#if __GNUC__ >= 4
#define RTS_LIKELY(p) __builtin_expect(!!(p), 1)
#else
#define RTS_LIKELY(p) (p)
#endif #endif
/* __builtin_unreachable is supported since GNU C 4.5 */ /* __builtin_unreachable is supported since GNU C 4.5 */
......
...@@ -403,7 +403,7 @@ stg_newSmallArrayzh ( W_ n /* words */, gcptr init ) ...@@ -403,7 +403,7 @@ stg_newSmallArrayzh ( W_ n /* words */, gcptr init )
words = BYTES_TO_WDS(SIZEOF_StgSmallMutArrPtrs) + n; words = BYTES_TO_WDS(SIZEOF_StgSmallMutArrPtrs) + n;
("ptr" arr) = ccall allocateMightFail(MyCapability() "ptr",words); ("ptr" arr) = ccall allocateMightFail(MyCapability() "ptr",words);
if (arr == NULL) { if (arr == NULL) (likely: False) {
jump stg_raisezh(base_GHCziIOziException_heapOverflow_closure); jump stg_raisezh(base_GHCziIOziException_heapOverflow_closure);
} }
TICK_ALLOC_PRIM(SIZEOF_StgSmallMutArrPtrs, WDS(n), 0); TICK_ALLOC_PRIM(SIZEOF_StgSmallMutArrPtrs, WDS(n), 0);
...@@ -413,8 +413,11 @@ stg_newSmallArrayzh ( W_ n /* words */, gcptr init ) ...@@ -413,8 +413,11 @@ stg_newSmallArrayzh ( W_ n /* words */, gcptr init )
// Initialise all elements of the array with the value in R2 // Initialise all elements of the array with the value in R2
p = arr + SIZEOF_StgSmallMutArrPtrs; p = arr + SIZEOF_StgSmallMutArrPtrs;
// Avoid the shift for `WDS(n)` in the inner loop
W_ limit;
limit = arr + SIZEOF_StgSmallMutArrPtrs + WDS(n);
for: for:
if (p < arr + SIZEOF_StgSmallMutArrPtrs + WDS(n)) (likely: True) { if (p < limit) (likely: True) {
W_[p] = init; W_[p] = init;
p = p + WDS(1); p = p + WDS(1);
goto for; goto for;
......
...@@ -856,7 +856,7 @@ allocateMightFail (Capability *cap, W_ n) ...@@ -856,7 +856,7 @@ allocateMightFail (Capability *cap, W_ n)
bdescr *bd; bdescr *bd;
StgPtr p; StgPtr p;
if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) { if (RTS_UNLIKELY(n >= LARGE_OBJECT_THRESHOLD/sizeof(W_))) {
// The largest number of words such that // The largest number of words such that
// the computation of req_blocks will not overflow. // the computation of req_blocks will not overflow.
W_ max_words = (HS_WORD_MAX & ~(BLOCK_SIZE-1)) / sizeof(W_); W_ max_words = (HS_WORD_MAX & ~(BLOCK_SIZE-1)) / sizeof(W_);
...@@ -897,7 +897,7 @@ allocateMightFail (Capability *cap, W_ n) ...@@ -897,7 +897,7 @@ allocateMightFail (Capability *cap, W_ n)
accountAllocation(cap, n); accountAllocation(cap, n);
bd = cap->r.rCurrentAlloc; bd = cap->r.rCurrentAlloc;
if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) { if (RTS_UNLIKELY(bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W)) {
if (bd) finishedNurseryBlock(cap,bd); if (bd) finishedNurseryBlock(cap,bd);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment