Skip to content

AArch64 NCG calls primops when native instructions exist

Summary

(Making this issue because I'm planning to resolve it, and it says elsewhere that each MR should reference an issue).

There are several places where the AArch64 NGC emits calls to primops written in C where single instructions implement the same feature. Examples include:

  • MO_F32_Sqrt, MO_F64_Sqrt which become the fsqrt instruction

  • MO_BSwap, which becomes one of rev or rev16 (it might be nice to expose these in their 64 bit 'SIMD-ish' forms

    • rev32 0xAABBCCDD_EEFFGGHH = 0XDDCCBBAA_HHGGFFEE,
    • rev16 0xAABB_CCDD_EEFF_GGHH = 0xBBAA_DDCC_FFEE_HHGG)
  • MO_PopCnt using the (required) NEON instruction cnt - this might be more work, as it needs to use the vector registers. Clang produces the following:

    #include <stdint.h>
    #include <stdio.h>
    #include <stdlib.h>
    
    int main(int argc, char*argv[]) {
      int i   = atoi(argv[1]);
      int i2  = atoi(argv[2]);
      int ci  = __builtin_popcountll((int64_t) i);
      int ci2 = __builtin_popcount(i2);
      printf("popcountll(%d) = %d\n", i, ci);
      printf("popcount(%d) = %d\n", i2, ci2);
      return 0;
     }
    cnt:
    (__TEXT,__text) section
    _main:
      sub sp, sp, #0x40
      stp x22, x21, [sp, #0x10]
      stp x20, x19, [sp, #0x20]
      stp x29, x30, [sp, #0x30]
      add x29, sp, #0x30
      mov x19, x1
      ldr x0, [x1, #0x8]
      bl  0x100003f68 ; symbol stub for: _atoi
      mov x20, x0
      ldr x0, [x19, #0x10]
      bl  0x100003f68 ; symbol stub for: _atoi
      mov x19, x0
      sxtw    x8, w20
      fmov    d0, x8
      cnt.8b  v0, v0
      uaddlv.8b   h0, v0
      fmov    w8, s0
      mov w9, w0
      fmov    d0, x9
      cnt.8b  v0, v0
      uaddlv.8b   h0, v0
      fmov    w21, s0
      stp x20, x8, [sp]
      adrp    x0, 0 ; 0x100003000
      add x0, x0, #0xf80 ; literal pool for: "popcountll(%d) = %d\n"
      bl  0x100003f74 ; symbol stub for: _printf
      stp x19, x21, [sp]
      adrp    x0, 0 ; 0x100003000
      add x0, x0, #0xf95 ; literal pool for: "popcount(%d) = %d\n"
      bl  0x100003f74 ; symbol stub for: _printf
      mov w0, #0x0
      ldp x29, x30, [sp, #0x30]
      ldp x20, x19, [sp, #0x20]
      ldp x22, x21, [sp, #0x10]
      add sp, sp, #0x40
      ret

I'm planning to add support for more of these instructions, and any others I find.

Edited by Alex Mason
To upload designs, you'll need to enable LFS and have an admin enable hashed storage. More information