Skip to content
Snippets Groups Projects
Verified Commit 9b65c166 authored by Rodrigo Mesquita's avatar Rodrigo Mesquita :seedling:
Browse files

ncg(aarch64): Implement MO_BSwap using REV

Implements MO_BSwap by producing assembly to do the byte swapping
instead of producing a foreign call a C function.

In `tar`, the hot loop for `deserialise` got almost 4x faster by
avoiding the foreign call which caused spilling live variables to the
stack -- this means the loop did 4x more memory read/writing than
necessary in that particular case!
parent 3dca3b7d
No related tags found
1 merge request!12876ncg(aarch64): Implement MO_BSwap using REV
Pipeline #96733 passed
Pipeline: test-primops

#96758

    ......@@ -1147,10 +1147,12 @@ callishMachOps platform = listToUFM $
    ( "prefetch1", (MO_Prefetch_Data 1,)),
    ( "prefetch2", (MO_Prefetch_Data 2,)),
    ( "prefetch3", (MO_Prefetch_Data 3,))
    ] ++ concat
    [ allWidths "popcnt" MO_PopCnt
    , allWidths "pdep" MO_Pdep
    , allWidths "pext" MO_Pext
    , allWidths "bswap" MO_BSwap
    , allWidths "cmpxchg" MO_Cmpxchg
    , allWidths "xchg" MO_Xchg
    , allWidths "load_relaxed" (\w -> MO_AtomicRead w MemOrderAcquire)
    ......
    ......@@ -1546,7 +1546,7 @@ genCondBranch _ true false expr = do
    -- range within 64bit.
    genCCall
    :: ForeignTarget -- function to call
    :: ForeignTarget -- function to call (or primop)
    -> [CmmFormal] -- where to put the result
    -> [CmmActual] -- arguments (of mixed type)
    -> BlockId -- The block we are in
    ......@@ -2014,7 +2014,15 @@ genCCall target dest_regs arg_regs bid = do
    MO_PopCnt w -> mkCCall (popCntLabel w)
    MO_Pdep w -> mkCCall (pdepLabel w)
    MO_Pext w -> mkCCall (pextLabel w)
    MO_BSwap w -> mkCCall (bSwapLabel w)
    MO_BSwap w
    | [src_reg] <- arg_regs
    , [dst_reg] <- dest_regs -> do
    (src, _fmt_p, code_p) <- getSomeReg src_reg
    platform <- getPlatform
    let dst = getRegisterReg platform (CmmLocal dst_reg)
    code = code_p `snocOL` REV (OpReg w dst) (OpReg w src)
    return (code, Nothing)
    | otherwise -> panic "mal-formed ByteSwap"
    -- -- Atomic read-modify-write.
    MO_AtomicRead w ord
    ......
    ......@@ -102,6 +102,7 @@ regUsageOfInstr platform instr = case instr of
    UXTH dst src -> usage (regOp src, regOp dst)
    CLZ dst src -> usage (regOp src, regOp dst)
    RBIT dst src -> usage (regOp src, regOp dst)
    REV dst src -> usage (regOp src, regOp dst)
    -- 3. Logical and Move Instructions ------------------------------------------
    AND dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
    ASR dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
    ......@@ -238,7 +239,8 @@ patchRegsOfInstr instr env = case instr of
    SXTH o1 o2 -> SXTH (patchOp o1) (patchOp o2)
    UXTH o1 o2 -> UXTH (patchOp o1) (patchOp o2)
    CLZ o1 o2 -> CLZ (patchOp o1) (patchOp o2)
    RBIT o1 o2 -> RBIT (patchOp o1) (patchOp o2)
    RBIT o1 o2 -> RBIT (patchOp o1) (patchOp o2)
    REV o1 o2 -> REV (patchOp o1) (patchOp o2)
    -- 3. Logical and Move Instructions ----------------------------------------
    AND o1 o2 o3 -> AND (patchOp o1) (patchOp o2) (patchOp o3)
    ......@@ -599,6 +601,7 @@ data Instr
    | UBFX Operand Operand Operand Operand -- rd = rn[i,j]
    | CLZ Operand Operand -- rd = countLeadingZeros(rn)
    | RBIT Operand Operand -- rd = reverseBits(rn)
    | REV Operand Operand -- rd = bswap(rn)
    -- 3. Logical and Move Instructions ----------------------------------------
    | AND Operand Operand Operand -- rd = rn & op2
    ......@@ -686,6 +689,7 @@ instrCon i =
    UBFX{} -> "UBFX"
    CLZ{} -> "CLZ"
    RBIT{} -> "RBIT"
    REV{} -> "REV"
    AND{} -> "AND"
    ASR{} -> "ASR"
    EOR{} -> "EOR"
    ......
    ......@@ -397,7 +397,12 @@ pprInstr platform instr = case instr of
    SBFM o1 o2 o3 o4 -> op4 (text "\tsbfm") o1 o2 o3 o4
    UBFM o1 o2 o3 o4 -> op4 (text "\tubfm") o1 o2 o3 o4
    CLZ o1 o2 -> op2 (text "\tclz") o1 o2
    RBIT o1 o2 -> op2 (text "\trbit") o1 o2
    RBIT o1 o2 -> op2 (text "\trbit") o1 o2
    REV (OpReg W8 (RegReal (RealRegSingle i))) _ | i < 32 ->
    {- swapping a single byte is a no-op -} empty
    REV o1@(OpReg W16 (RegReal (RealRegSingle i))) o2 | i < 32 ->
    op2 (text "\trev16") o1 o2
    REV o1 o2 -> op2 (text "\trev") o1 o2
    -- signed and unsigned bitfield extract
    SBFX o1 o2 o3 o4 -> op4 (text "\tsbfx") o1 o2 o3 o4
    UBFX o1 o2 o3 o4 -> op4 (text "\tubfx") o1 o2 o3 o4
    ......
    0% Loading or .
    You are about to add 0 people to the discussion. Proceed with caution.
    Finish editing this message first!
    Please register or to comment