diff --git a/compiler/GHC/CmmToAsm/AArch64/CodeGen.hs b/compiler/GHC/CmmToAsm/AArch64/CodeGen.hs index 9b642ea3cfff77f83ed31b89632a6ece3ec15079..94cb802e700eb7baafe7c0f44a5524a4a8fd9c9a 100644 --- a/compiler/GHC/CmmToAsm/AArch64/CodeGen.hs +++ b/compiler/GHC/CmmToAsm/AArch64/CodeGen.hs @@ -1757,6 +1757,137 @@ genCCall target dest_regs arg_regs bid = do truncateReg W64 w lo , Nothing) | otherwise -> unsupported (MO_U_Mul2 w) + PrimTarget (MO_Clz w) + | w == W64 || w == W32 + , [src] <- arg_regs + , [dst] <- dest_regs + -> do + (reg_a, _format_x, code_x) <- getSomeReg src + let dst_reg = getRegisterReg platform (CmmLocal dst) + return ( + code_x `snocOL` + CLZ (OpReg w dst_reg) (OpReg w reg_a) + , Nothing) + | w == W16 + , [src] <- arg_regs + , [dst] <- dest_regs + -> do + (reg_a, _format_x, code_x) <- getSomeReg src + let dst' = getRegisterReg platform (CmmLocal dst) + r n = OpReg W32 n + imm n = OpImm (ImmInt n) + {- dst = clz(x << 16 | 0x0000_8000) -} + return ( + code_x `appOL` toOL + [ LSL (r dst') (r reg_a) (imm 16) + , ORR (r dst') (r dst') (imm 0x00008000) + , CLZ (r dst') (r dst') + ] + , Nothing) + | w == W8 + , [src] <- arg_regs + , [dst] <- dest_regs + -> do + (reg_a, _format_x, code_x) <- getSomeReg src + let dst' = getRegisterReg platform (CmmLocal dst) + r n = OpReg W32 n + imm n = OpImm (ImmInt n) + {- dst = clz(x << 24 | 0x0080_0000) -} + return ( + code_x `appOL` toOL + [ LSL (r dst') (r reg_a) (imm 24) + , ORR (r dst') (r dst') (imm 0x00800000) + , CLZ (r dst') (r dst') + ] + , Nothing) + | otherwise -> unsupported (MO_Clz w) + PrimTarget (MO_Ctz w) + | w == W64 || w == W32 + , [src] <- arg_regs + , [dst] <- dest_regs + -> do + (reg_a, _format_x, code_x) <- getSomeReg src + let dst_reg = getRegisterReg platform (CmmLocal dst) + return ( + code_x `snocOL` + RBIT (OpReg w dst_reg) (OpReg w reg_a) `snocOL` + CLZ (OpReg w dst_reg) (OpReg w dst_reg) + , Nothing) + | w == W16 + , [src] <- arg_regs + , [dst] <- dest_regs + -> do + (reg_a, _format_x, code_x) <- getSomeReg src + let dst' = getRegisterReg platform (CmmLocal dst) + r n = OpReg W32 n + imm n = OpImm (ImmInt n) + {- dst = clz(reverseBits(x) | 0x0000_8000) -} + return ( + code_x `appOL` toOL + [ RBIT (r dst') (r reg_a) + , ORR (r dst') (r dst') (imm 0x00008000) + , CLZ (r dst') (r dst') + ] + , Nothing) + | w == W8 + , [src] <- arg_regs + , [dst] <- dest_regs + -> do + (reg_a, _format_x, code_x) <- getSomeReg src + let dst' = getRegisterReg platform (CmmLocal dst) + r n = OpReg W32 n + imm n = OpImm (ImmInt n) + {- dst = clz(reverseBits(x) | 0x0080_0000) -} + return ( + code_x `appOL` toOL + [ RBIT (r dst') (r reg_a) + , ORR (r dst') (r dst') (imm 0x00800000) + , CLZ (r dst') (r dst') + ] + , Nothing) + | otherwise -> unsupported (MO_Ctz w) + PrimTarget (MO_BRev w) + | w == W64 || w == W32 + , [src] <- arg_regs + , [dst] <- dest_regs + -> do + (reg_a, _format_x, code_x) <- getSomeReg src + let dst_reg = getRegisterReg platform (CmmLocal dst) + return ( + code_x `snocOL` + RBIT (OpReg w dst_reg) (OpReg w reg_a) + , Nothing) + | w == W16 + , [src] <- arg_regs + , [dst] <- dest_regs + -> do + (reg_a, _format_x, code_x) <- getSomeReg src + let dst' = getRegisterReg platform (CmmLocal dst) + r n = OpReg W32 n + imm n = OpImm (ImmInt n) + {- dst = reverseBits32(x << 16) -} + return ( + code_x `appOL` toOL + [ LSL (r dst') (r reg_a) (imm 16) + , RBIT (r dst') (r dst') + ] + , Nothing) + | w == W8 + , [src] <- arg_regs + , [dst] <- dest_regs + -> do + (reg_a, _format_x, code_x) <- getSomeReg src + let dst' = getRegisterReg platform (CmmLocal dst) + r n = OpReg W32 n + imm n = OpImm (ImmInt n) + {- dst = reverseBits32(x << 24) -} + return ( + code_x `appOL` toOL + [ LSL (r dst') (r reg_a) (imm 24) + , RBIT (r dst') (r dst') + ] + , Nothing) + | otherwise -> unsupported (MO_BRev w) -- or a possibly side-effecting machine operation @@ -1883,10 +2014,7 @@ genCCall target dest_regs arg_regs bid = do MO_PopCnt w -> mkCCall (popCntLabel w) MO_Pdep w -> mkCCall (pdepLabel w) MO_Pext w -> mkCCall (pextLabel w) - MO_Clz w -> mkCCall (clzLabel w) - MO_Ctz w -> mkCCall (ctzLabel w) MO_BSwap w -> mkCCall (bSwapLabel w) - MO_BRev w -> mkCCall (bRevLabel w) -- -- Atomic read-modify-write. MO_AtomicRead w ord diff --git a/compiler/GHC/CmmToAsm/AArch64/Instr.hs b/compiler/GHC/CmmToAsm/AArch64/Instr.hs index 8836ab7d9f7fa05123c65a33c246ca709e0927a9..01443e02bad41a5acd36f1b3bdbd180c67c3847b 100644 --- a/compiler/GHC/CmmToAsm/AArch64/Instr.hs +++ b/compiler/GHC/CmmToAsm/AArch64/Instr.hs @@ -100,6 +100,8 @@ regUsageOfInstr platform instr = case instr of UXTB dst src -> usage (regOp src, regOp dst) SXTH dst src -> usage (regOp src, regOp dst) UXTH dst src -> usage (regOp src, regOp dst) + CLZ dst src -> usage (regOp src, regOp dst) + RBIT dst src -> usage (regOp src, regOp dst) -- 3. Logical and Move Instructions ------------------------------------------ AND dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst) ASR dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst) @@ -140,7 +142,8 @@ regUsageOfInstr platform instr = case instr of FMA _ dst src1 src2 src3 -> usage (regOp src1 ++ regOp src2 ++ regOp src3, regOp dst) - _ -> panic $ "regUsageOfInstr: " ++ instrCon instr + LOCATION{} -> panic $ "regUsageOfInstr: " ++ instrCon instr + NEWBLOCK{} -> panic $ "regUsageOfInstr: " ++ instrCon instr where -- filtering the usage is necessary, otherwise the register @@ -234,6 +237,8 @@ patchRegsOfInstr instr env = case instr of UXTB o1 o2 -> UXTB (patchOp o1) (patchOp o2) SXTH o1 o2 -> SXTH (patchOp o1) (patchOp o2) UXTH o1 o2 -> UXTH (patchOp o1) (patchOp o2) + CLZ o1 o2 -> CLZ (patchOp o1) (patchOp o2) + RBIT o1 o2 -> RBIT (patchOp o1) (patchOp o2) -- 3. Logical and Move Instructions ---------------------------------------- AND o1 o2 o3 -> AND (patchOp o1) (patchOp o2) (patchOp o3) @@ -276,7 +281,8 @@ patchRegsOfInstr instr env = case instr of FMA s o1 o2 o3 o4 -> FMA s (patchOp o1) (patchOp o2) (patchOp o3) (patchOp o4) - _ -> panic $ "patchRegsOfInstr: " ++ instrCon instr + NEWBLOCK{} -> panic $ "patchRegsOfInstr: " ++ instrCon instr + LOCATION{} -> panic $ "patchRegsOfInstr: " ++ instrCon instr where patchOp :: Operand -> Operand patchOp (OpReg w r) = OpReg w (env r) @@ -591,6 +597,8 @@ data Instr -- Signed/Unsigned bitfield extract | SBFX Operand Operand Operand Operand -- rd = rn[i,j] | UBFX Operand Operand Operand Operand -- rd = rn[i,j] + | CLZ Operand Operand -- rd = countLeadingZeros(rn) + | RBIT Operand Operand -- rd = reverseBits(rn) -- 3. Logical and Move Instructions ---------------------------------------- | AND Operand Operand Operand -- rd = rn & op2 @@ -676,6 +684,8 @@ instrCon i = UBFM{} -> "UBFM" SBFX{} -> "SBFX" UBFX{} -> "UBFX" + CLZ{} -> "CLZ" + RBIT{} -> "RBIT" AND{} -> "AND" ASR{} -> "ASR" EOR{} -> "EOR" diff --git a/compiler/GHC/CmmToAsm/AArch64/Ppr.hs b/compiler/GHC/CmmToAsm/AArch64/Ppr.hs index 8c5d08167bfc663fcad575537a7257554d8d1780..7fb0a5351cb1ec73e601074e25ef44c4ce7d4439 100644 --- a/compiler/GHC/CmmToAsm/AArch64/Ppr.hs +++ b/compiler/GHC/CmmToAsm/AArch64/Ppr.hs @@ -396,6 +396,8 @@ pprInstr platform instr = case instr of -- 2. Bit Manipulation Instructions ------------------------------------------ SBFM o1 o2 o3 o4 -> op4 (text "\tsbfm") o1 o2 o3 o4 UBFM o1 o2 o3 o4 -> op4 (text "\tubfm") o1 o2 o3 o4 + CLZ o1 o2 -> op2 (text "\tclz") o1 o2 + RBIT o1 o2 -> op2 (text "\trbit") o1 o2 -- signed and unsigned bitfield extract SBFX o1 o2 o3 o4 -> op4 (text "\tsbfx") o1 o2 o3 o4 UBFX o1 o2 o3 o4 -> op4 (text "\tubfx") o1 o2 o3 o4 diff --git a/testsuite/tests/codeGen/should_run/CtzClz0.hs b/testsuite/tests/codeGen/should_run/CtzClz0.hs new file mode 100644 index 0000000000000000000000000000000000000000..0caad01b1da802051963ec20493d902346bd2e7f --- /dev/null +++ b/testsuite/tests/codeGen/should_run/CtzClz0.hs @@ -0,0 +1,27 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE MagicHash #-} + +module Main where + +import GHC.Exts +import Control.Monad + +#include <MachDeps.h> + +{-# OPAQUE x #-} -- needed to avoid triggering constant folding +x :: Word +x = 0 + +main :: IO () +main = do + let !(W# w) = x + + guard (W# (ctz# w) == WORD_SIZE_IN_BITS) + guard (W# (ctz8# w) == 8) + guard (W# (ctz16# w) == 16) + guard (W# (ctz32# w) == 32) + + guard (W# (clz# w) == WORD_SIZE_IN_BITS) + guard (W# (clz8# w) == 8) + guard (W# (clz16# w) == 16) + guard (W# (clz32# w) == 32) diff --git a/testsuite/tests/codeGen/should_run/all.T b/testsuite/tests/codeGen/should_run/all.T index 40ddadecde14bf1e7b91c00eccdf9f0cb8a2507a..264c1877f809a8a048187ce85bafa4aaa2505a14 100644 --- a/testsuite/tests/codeGen/should_run/all.T +++ b/testsuite/tests/codeGen/should_run/all.T @@ -246,3 +246,4 @@ test('T24295a', normal, compile_and_run, ['-O -floopification']) test('T24295b', normal, compile_and_run, ['-O -floopification -fpedantic-bottoms']) test('T24664a', normal, compile_and_run, ['-O']) test('T24664b', normal, compile_and_run, ['-O']) +test('CtzClz0', normal, compile_and_run, [''])