Skip to content

Very poor code generation for CoreUnfold.callSiteInline

I happened to glimpse at the assembler of CoreUnfold.callSiteInline and was slightly shocked at what I found. This seemingly harmless, 16 line function produces over 7000 lines of instructions, nearly all of which are data movement (stack spilling, I think). For instance, here is a taste:

   16ba5:	4c 8b 93 bf 02 00 00 	mov    0x2bf(%rbx),%r10
   16bac:	4c 89 9c 24 b0 02 00 	mov    %r11,0x2b0(%rsp)
   16bb3:	00 
   16bb4:	4c 8b 9b c7 02 00 00 	mov    0x2c7(%rbx),%r11
   16bbb:	4c 89 b4 24 b8 02 00 	mov    %r14,0x2b8(%rsp)
   16bc2:	00 
   16bc3:	4c 8b b3 cf 02 00 00 	mov    0x2cf(%rbx),%r14
   16bca:	48 89 84 24 c0 02 00 	mov    %rax,0x2c0(%rsp)
   16bd1:	00 
   16bd2:	48 8b 83 d7 02 00 00 	mov    0x2d7(%rbx),%rax
   16bd9:	48 89 8c 24 c8 02 00 	mov    %rcx,0x2c8(%rsp)
   16be0:	00 
   16be1:	48 8b 8b df 02 00 00 	mov    0x2df(%rbx),%rcx
   16be8:	48 89 94 24 d0 02 00 	mov    %rdx,0x2d0(%rsp)
   16bef:	00 
   16bf0:	48 8b 93 e7 02 00 00 	mov    0x2e7(%rbx),%rdx
   16bf7:	48 89 b4 24 d8 02 00 	mov    %rsi,0x2d8(%rsp)
   16bfe:	00 
   16bff:	48 8b b3 ef 02 00 00 	mov    0x2ef(%rbx),%rsi
   16c06:	48 89 bc 24 e0 02 00 	mov    %rdi,0x2e0(%rsp)
   16c0d:	00 
   16c0e:	48 8b bb f7 02 00 00 	mov    0x2f7(%rbx),%rdi
   16c15:	4c 89 84 24 e8 02 00 	mov    %r8,0x2e8(%rsp)
   16c1c:	00 
   16c1d:	4c 8b 83 ff 02 00 00 	mov    0x2ff(%rbx),%r8
   16c24:	4c 89 8c 24 f0 02 00 	mov    %r9,0x2f0(%rsp)
   16c2b:	00 
   16c2c:	4c 8b 8b 07 03 00 00 	mov    0x307(%rbx),%r9
   16c33:	4c 89 94 24 f8 02 00 	mov    %r10,0x2f8(%rsp)
   16c3a:	00 
   16c3b:	4c 8b 93 0f 03 00 00 	mov    0x30f(%rbx),%r10
   16c42:	4c 89 9c 24 00 03 00 	mov    %r11,0x300(%rsp)
   16c49:	00 
   16c4a:	4c 8b 9b 17 03 00 00 	mov    0x317(%rbx),%r11
   16c51:	4c 89 b4 24 08 03 00 	mov    %r14,0x308(%rsp)
   16c58:	00 
   16c59:	4c 8b b3 1f 03 00 00 	mov    0x31f(%rbx),%r14
   16c60:	48 89 84 24 10 03 00 	mov    %rax,0x310(%rsp)
   16c67:	00 
   16c68:	48 8b 83 27 03 00 00 	mov    0x327(%rbx),%rax
   16c6f:	48 89 8c 24 18 03 00 	mov    %rcx,0x318(%rsp)
   16c76:	00 
   16c77:	48 8b 8b 2f 03 00 00 	mov    0x32f(%rbx),%rcx
   16c7e:	48 89 94 24 20 03 00 	mov    %rdx,0x320(%rsp)
   16c85:	00 
   16c86:	48 8b 93 37 03 00 00 	mov    0x337(%rbx),%rdx
   16c8d:	48 89 b4 24 28 03 00 	mov    %rsi,0x328(%rsp)
   16c94:	00 
   16c95:	48 8b b3 3f 03 00 00 	mov    0x33f(%rbx),%rsi
   16c9c:	48 89 bc 24 30 03 00 	mov    %rdi,0x330(%rsp)
   16ca3:	00 
   16ca4:	48 8b bb 47 03 00 00 	mov    0x347(%rbx),%rdi
   16cab:	4c 89 84 24 38 03 00 	mov    %r8,0x338(%rsp)
   16cb2:	00 
   16cb3:	4c 8b 83 4f 03 00 00 	mov    0x34f(%rbx),%r8
   16cba:	4c 89 8c 24 40 03 00 	mov    %r9,0x340(%rsp)
   16cc1:	00 
   16cc2:	4c 8b 8b 57 03 00 00 	mov    0x357(%rbx),%r9
   16cc9:	4c 89 94 24 48 03 00 	mov    %r10,0x348(%rsp)
   16cd0:	00 
   16cd1:	4c 8b 93 5f 03 00 00 	mov    0x35f(%rbx),%r10
   16cd8:	4c 89 9c 24 50 03 00 	mov    %r11,0x350(%rsp)
   16cdf:	00 
   16ce0:	4c 8b 9b 67 03 00 00 	mov    0x367(%rbx),%r11
   16ce7:	4c 89 b4 24 58 03 00 	mov    %r14,0x358(%rsp)
   16cee:	00 
```y
To upload designs, you'll need to enable LFS and have an admin enable hashed storage. More information