Commit 7cf5ba3d authored by Michal Terepeta's avatar Michal Terepeta Committed by Marge Bot

Improve performance of newSmallArray#

This:
- Hoists part of the condition outside of the initialization loop in
  `stg_newSmallArrayzh`.
- Annotates one of the unlikely branches as unlikely, also in
  `stg_newSmallArrayzh`.
- Adds a couple of annotations to `allocateMightFail` indicating which
  branches are likely to be taken.

Together this gives about 5% improvement.
Signed-off-by: Michal Terepeta's avatarMichal Terepeta <michal.terepeta@gmail.com>
parent 39282422
Pipeline #4196 passed with stages
in 620 minutes and 27 seconds
......@@ -58,7 +58,13 @@ extern "C" {
#if __GNUC__ >= 4
#define RTS_UNLIKELY(p) __builtin_expect((p),0)
#else
#define RTS_UNLIKELY(p) p
#define RTS_UNLIKELY(p) (p)
#endif
#if __GNUC__ >= 4
#define RTS_LIKELY(p) __builtin_expect(!!(p), 1)
#else
#define RTS_LIKELY(p) (p)
#endif
/* __builtin_unreachable is supported since GNU C 4.5 */
......
......@@ -403,7 +403,7 @@ stg_newSmallArrayzh ( W_ n /* words */, gcptr init )
words = BYTES_TO_WDS(SIZEOF_StgSmallMutArrPtrs) + n;
("ptr" arr) = ccall allocateMightFail(MyCapability() "ptr",words);
if (arr == NULL) {
if (arr == NULL) (likely: False) {
jump stg_raisezh(base_GHCziIOziException_heapOverflow_closure);
}
TICK_ALLOC_PRIM(SIZEOF_StgSmallMutArrPtrs, WDS(n), 0);
......@@ -413,8 +413,11 @@ stg_newSmallArrayzh ( W_ n /* words */, gcptr init )
// Initialise all elements of the array with the value in R2
p = arr + SIZEOF_StgSmallMutArrPtrs;
// Avoid the shift for `WDS(n)` in the inner loop
W_ limit;
limit = arr + SIZEOF_StgSmallMutArrPtrs + WDS(n);
for:
if (p < arr + SIZEOF_StgSmallMutArrPtrs + WDS(n)) (likely: True) {
if (p < limit) (likely: True) {
W_[p] = init;
p = p + WDS(1);
goto for;
......
......@@ -856,7 +856,7 @@ allocateMightFail (Capability *cap, W_ n)
bdescr *bd;
StgPtr p;
if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
if (RTS_UNLIKELY(n >= LARGE_OBJECT_THRESHOLD/sizeof(W_))) {
// The largest number of words such that
// the computation of req_blocks will not overflow.
W_ max_words = (HS_WORD_MAX & ~(BLOCK_SIZE-1)) / sizeof(W_);
......@@ -897,7 +897,7 @@ allocateMightFail (Capability *cap, W_ n)
accountAllocation(cap, n);
bd = cap->r.rCurrentAlloc;
if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
if (RTS_UNLIKELY(bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W)) {
if (bd) finishedNurseryBlock(cap,bd);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment