... | ... | @@ -171,42 +171,127 @@ case (x <$# 0#) `orI#` (x >=$# width) `orI#` (y <$# 0#) `orI#` (y >=$# height) o |
|
|
```
|
|
|
|
|
|
|
|
|
Using the LLVM backend this compiles to:
|
|
|
Let's analyze how that code gets compiled by the LLVM backend. For purposes of this analysis I will convert the above case expression to the following function:
|
|
|
|
|
|
|
|
|
```wiki
|
|
|
# BB#0: # %c1oe
|
|
|
movq %rsi, %rax
|
|
|
orq %r14, %rax
|
|
|
shrq $63, %rax
|
|
|
cmpq %rdi, %r14
|
|
|
setge %cl
|
|
|
movzbl %cl, %ecx
|
|
|
orq %rax, %rcx
|
|
|
cmpq %r8, %rsi
|
|
|
setge %al
|
|
|
movzbl %al, %eax
|
|
|
orq %rcx, %rax
|
|
|
jne .LBB2_1
|
|
|
# BB#3: # %c1oP
|
|
|
movq (%rbp), %rax
|
|
|
movl $r1mu_closure+1, %ebx
|
|
|
jmpq *%rax # TAILCALL
|
|
|
.LBB2_1: # %c1oe
|
|
|
cmpq $1, %rax
|
|
|
jne .LBB2_2
|
|
|
# BB#4: # %c1oZ
|
|
|
movq (%rbp), %rax
|
|
|
movl $r1mv_closure+1, %ebx
|
|
|
jmpq *%rax # TAILCALL
|
|
|
.LBB2_2: # %c1oF
|
|
|
movq r1mt_closure(%rip), %rax
|
|
|
movl $r1mt_closure, %ebx
|
|
|
jmpq *%rax # TAILCALL
|
|
|
f :: Int# -> Int# -> Int# -> Int# -> String
|
|
|
f x y width height =
|
|
|
case (x <$# 0#) `orI#` (x >=$# width) `orI#` (y <$# 0#) `orI#` (y >=$# height) of
|
|
|
1# -> "one"
|
|
|
0# -> "zero"
|
|
|
```
|
|
|
|
|
|
|
|
|
By dumping intermediate Cmm representation we can determine how variables are mapped to CPU registers. Arguments are passed to the function using a stack:
|
|
|
|
|
|
```wiki
|
|
|
Main.f_slow() // [R1]
|
|
|
{ info_tbl: []
|
|
|
stack_info: arg_space: 0 updfr_space: Nothing
|
|
|
}
|
|
|
{offset
|
|
|
cWY:
|
|
|
R5 = I64[Sp + 24];
|
|
|
R4 = I64[Sp + 16];
|
|
|
R3 = I64[Sp + 8];
|
|
|
R2 = I64[Sp];
|
|
|
R1 = R1;
|
|
|
Sp = Sp + 32;
|
|
|
call Main.f_info(R5, R4, R3, R2, R1) args: 8, res: 0, upd: 8;
|
|
|
}
|
|
|
}
|
|
|
```
|
|
|
|
|
|
`R2` contains the first argument `x`, `R3` contains `y`, `R4` contains `width` and `R5` contains `height`. We can verify that by looking at the body of `Main.f_info`:
|
|
|
|
|
|
```wiki
|
|
|
_sV9::I64 = R3;
|
|
|
_sV3::I64 = R2;
|
|
|
_sWx::I64 = %MO_S_Lt_W64(_sV3::I64, 0) | %MO_S_Ge_W64(_sV3::I64, R4) |
|
|
|
%MO_S_Lt_W64(_sV9::I64, 0) | %MO_S_Ge_W64(_sV9::I64, R5);
|
|
|
```
|
|
|
|
|
|
|
|
|
Mappings between Cmm's `R(2/3/4/5)` registers and machine registers are defined in `includes/stg/MachRegs.h`:
|
|
|
|
|
|
```wiki
|
|
|
#define REG_R2 r14
|
|
|
#define REG_R3 rsi
|
|
|
#define REG_R4 rdi
|
|
|
#define REG_R5 r8
|
|
|
```
|
|
|
|
|
|
|
|
|
Knowing that we can dump the assembly generated by LLVM backend:
|
|
|
|
|
|
```wiki
|
|
|
Main_f_info:
|
|
|
# BB#0:
|
|
|
movq %rsi, %rax
|
|
|
orq %r14, %rax
|
|
|
shrq $63, %rax
|
|
|
cmpq %rdi, %r14
|
|
|
setge %cl
|
|
|
movzbl %cl, %ecx
|
|
|
orq %rax, %rcx
|
|
|
cmpq %r8, %rsi
|
|
|
setge %al
|
|
|
movzbl %al, %eax
|
|
|
orq %rcx, %rax
|
|
|
jne .LBB4_1
|
|
|
# BB#3:
|
|
|
movq Main_f2_closure(%rip), %rax
|
|
|
movl $Main_f2_closure, %ebx
|
|
|
jmpq *%rax # TAILCALL
|
|
|
.LBB4_1:
|
|
|
cmpq $1, %rax
|
|
|
jne .LBB4_2
|
|
|
# BB#4:
|
|
|
movq Main_f1_closure(%rip), %rax
|
|
|
movl $Main_f1_closure, %ebx
|
|
|
jmpq *%rax # TAILCALL
|
|
|
.LBB4_2:
|
|
|
movq Main_f3_closure(%rip), %rax
|
|
|
movl $Main_f3_closure, %ebx
|
|
|
jmpq *%rax # TAILCALL
|
|
|
```
|
|
|
|
|
|
|
|
|
Let's analyze line by line the part responsible for evaluating the scrutinee:
|
|
|
|
|
|
```wiki
|
|
|
movq %rsi, %rax # load y (stored in %rsi) into %rax register
|
|
|
orq %r14, %rax # perform bitwise OR between y (now in %rax) and x (in %r14)
|
|
|
# and store result in %rax
|
|
|
shrq $63, %rax # shift %rax 63 bits to the right. If both x and y were
|
|
|
# positive numbers, then their sign bits (MSB) were set to
|
|
|
# 0 and so %rax is now 0. If at least one of them was
|
|
|
# negative then its sign bit must have been 1 and so %rax
|
|
|
# is now 1
|
|
|
cmpq %rdi, %r14 # compare width (in %rdi) with x (in %r14) and set the flags
|
|
|
# in the flag register according to the result.
|
|
|
setge %cl # if, based on flags set by cmpq, x was greater or equal to
|
|
|
# width then we set %cl to 1. Otherwise it is set to 0.
|
|
|
movzbl %cl, %ecx # zero the bits of %ecx register except the lowest 8 bits
|
|
|
# containg the result of previous operation
|
|
|
orq %rax, %rcx # perform logical OR on results of the previous test and
|
|
|
# store the result in %rcx. At this point if %rcx is 1
|
|
|
# then either x was negative, or y was negative or
|
|
|
# x was greater or equal to width
|
|
|
cmpq %r8, %rsi # now we are checking whether y is greter or equal to
|
|
|
# height. This is the same as previosly for x and width.
|
|
|
setge %al # This time we set LSB of %al to 1 if y >= height
|
|
|
movzbl %al, %eax # as previously, clear bits of %eax except lowest 8 bits
|
|
|
orq %rcx, %rax # perform logical OR which combines result of previous
|
|
|
# three comparisons with the last one
|
|
|
jne .LBB2_1 # if ZF is not set it this means that either %rcx or %rax
|
|
|
# was not zero, which means that at least one condition
|
|
|
# in the scrutinee was true
|
|
|
```
|
|
|
|
|
|
|
|
|
The assembly does not contain comparisons and branches in the scrutinee of the case expression, but still uses jumps to select an appropriate branch of the case expression.
|
|
|
The assembly does not contain comparisons and branches in the scrutinee of the case expression, but still uses jumps to select an appropriate branch of the case expression.
|
|
|
|
|
|
### Benchmarks
|
|
|
|
... | ... | |