Commit 2d0438f3 authored by tibbe's avatar tibbe Committed by Simon Marlow

Add popCnt# primop

parent 49dbe605
......@@ -448,6 +448,8 @@ data CallishMachOp
| MO_Memcpy
| MO_Memset
| MO_Memmove
| MO_PopCnt Width
deriving (Eq, Show)
pprCallishMachOp :: CallishMachOp -> SDoc
......
......@@ -374,6 +374,12 @@ emitPrimOp [] CopyByteArrayOp [src,src_off,dst,dst_off,n] live =
emitPrimOp [] CopyMutableByteArrayOp [src,src_off,dst,dst_off,n] live =
doCopyMutableByteArrayOp src src_off dst dst_off n live
-- Population count
emitPrimOp [res] PopCnt8Op [w] live = emitPopCntCall res w W8 live
emitPrimOp [res] PopCnt16Op [w] live = emitPopCntCall res w W16 live
emitPrimOp [res] PopCnt32Op [w] live = emitPopCntCall res w W32 live
emitPrimOp [res] PopCnt64Op [w] live = emitPopCntCall res w W64 live
emitPrimOp [res] PopCntOp [w] live = emitPopCntCall res w wordWidth live
-- The rest just translate straightforwardly
emitPrimOp [res] op [arg] _
......@@ -908,3 +914,14 @@ emitAllocateCall res cap n live = do
where
allocate = CmmLit (CmmLabel (mkForeignLabel (fsLit "allocate") Nothing
ForeignLabelInExternalPackage IsFunction))
emitPopCntCall :: LocalReg -> CmmExpr -> Width -> StgLiveVars -> Code
emitPopCntCall res x width live = do
vols <- getVolatileRegs live
emitForeignCall' PlayRisky
[CmmHinted res NoHint]
(CmmPrim (MO_PopCnt width))
[(CmmHinted x NoHint)]
(Just vols)
NoC_SRT -- No SRT b/c we do PlayRisky
CmmMayReturn
......@@ -443,6 +443,13 @@ emitPrimOp [] CopyByteArrayOp [src,src_off,dst,dst_off,n] =
emitPrimOp [] CopyMutableByteArrayOp [src,src_off,dst,dst_off,n] =
doCopyMutableByteArrayOp src src_off dst dst_off n
-- Population count
emitPrimOp [res] PopCnt8Op [w] = emitPopCntCall res w W8
emitPrimOp [res] PopCnt16Op [w] = emitPopCntCall res w W16
emitPrimOp [res] PopCnt32Op [w] = emitPopCntCall res w W32
emitPrimOp [res] PopCnt64Op [w] = emitPopCntCall res w W64
emitPrimOp [res] PopCntOp [w] = emitPopCntCall res w wordWidth
-- The rest just translate straightforwardly
emitPrimOp [res] op [arg]
| nopOp op
......@@ -940,3 +947,10 @@ emitAllocateCall res cap n = do
where
allocate = CmmLit (CmmLabel (mkForeignLabel (fsLit "allocate") Nothing
ForeignLabelInExternalPackage IsFunction))
emitPopCntCall :: LocalReg -> CmmExpr -> Width -> FCode ()
emitPopCntCall res x width = do
emitPrimCall
[ res ]
(MO_PopCnt width)
[ x ]
......@@ -497,6 +497,7 @@ Library
RegClass
PIC
Platform
CPrim
X86.Regs
X86.RegInfo
X86.Instr
......
......@@ -276,6 +276,7 @@ data DynFlag
| Opt_SharedImplib
| Opt_BuildingCabalPackage
| Opt_SSE2
| Opt_SSE4_2
| Opt_GhciSandbox
| Opt_HelpfulErrors
......@@ -1518,6 +1519,7 @@ dynamic_flags = [
, flagA "monly-3-regs" (NoArg (addWarn "The -monly-3-regs flag does nothing; it will be removed in a future GHC release"))
, flagA "monly-4-regs" (NoArg (addWarn "The -monly-4-regs flag does nothing; it will be removed in a future GHC release"))
, flagA "msse2" (NoArg (setDynFlag Opt_SSE2))
, flagA "msse4.2" (NoArg (setDynFlag Opt_SSE4_2))
------ Warning opts -------------------------------------------------
, flagA "W" (NoArg (mapM_ setWarningFlag minusWOpts))
......
-- | Generating C symbol names emitted by the compiler.
module CPrim (popCntLabel) where
import CmmType
import Outputable
popCntLabel :: Width -> String
popCntLabel w = "hs_popcnt" ++ pprWidth w
where
pprWidth W8 = "8"
pprWidth W16 = "16"
pprWidth W32 = "32"
pprWidth W64 = "64"
pprWidth w = pprPanic "popCntLabel: Unsupported word width " (ppr w)
......@@ -28,6 +28,7 @@ where
import PPC.Instr
import PPC.Cond
import PPC.Regs
import CPrim
import NCGMonad
import Instruction
import PIC
......@@ -1142,6 +1143,8 @@ genCCall' gcp target dest_regs argsAndHints
MO_Memset -> (fsLit "memset", False)
MO_Memmove -> (fsLit "memmove", False)
MO_PopCnt w -> (fsLit $ popCntLabel w, False)
other -> pprPanic "genCCall(ppc): unknown callish op"
(pprCallishMachOp other)
......
......@@ -13,6 +13,7 @@ import SPARC.Instr
import SPARC.Imm
import SPARC.Regs
import SPARC.Base
import CPrim
import NCGMonad
import PIC
import Instruction
......@@ -332,5 +333,7 @@ outOfLineMachOp_table mop
MO_Memset -> fsLit "memset"
MO_Memmove -> fsLit "memmove"
MO_PopCnt w -> fsLit $ popCntLabel w
_ -> pprPanic "outOfLineMachOp(sparc): Unknown callish mach op "
(pprCallishMachOp mop)
......@@ -28,6 +28,7 @@ import X86.Instr
import X86.Cond
import X86.Regs
import X86.RegInfo
import CPrim
import Instruction
import PIC
import NCGMonad
......@@ -70,9 +71,14 @@ sse2Enabled = do
-- calling convention specifies the use of xmm regs,
-- and possibly other places.
return True
ArchX86 -> return (dopt Opt_SSE2 dflags)
ArchX86 -> return (dopt Opt_SSE2 dflags || dopt Opt_SSE4_2 dflags)
_ -> panic "sse2Enabled: Not an X86* arch"
sse4_2Enabled :: NatM Bool
sse4_2Enabled = do
dflags <- getDynFlagsNat
return (dopt Opt_SSE4_2 dflags)
if_sse2 :: NatM a -> NatM a -> NatM a
if_sse2 sse2 x87 = do
b <- sse2Enabled
......@@ -1574,6 +1580,26 @@ genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
-- write barrier compiles to no code on x86/x86-64;
-- we keep it this long in order to prevent earlier optimisations.
genCCall (CmmPrim (MO_PopCnt width)) dest_regs@[CmmHinted dst _]
args@[CmmHinted src _] = do
sse4_2 <- sse4_2Enabled
if sse4_2
then do code_src <- getAnyReg src
src_r <- getNewRegNat size
return $ code_src src_r `appOL`
(if width == W8 then
-- The POPCNT instruction doesn't take a r/m8
unitOL (MOVZxL II8 (OpReg src_r) (OpReg src_r)) `appOL`
unitOL (POPCNT II16 (OpReg src_r)
(getRegisterReg False (CmmLocal dst)))
else
unitOL (POPCNT size (OpReg src_r)
(getRegisterReg False (CmmLocal dst))))
else genCCall (CmmCallee (fn width) CCallConv) dest_regs args
where size = intSize width
fn w = CmmLit (CmmLabel (mkForeignLabel (fsLit (popCntLabel w)) Nothing
ForeignLabelInExternalPackage IsFunction))
genCCall target dest_regs args =
do dflags <- getDynFlagsNat
if target32Bit (targetPlatform dflags)
......@@ -1990,6 +2016,8 @@ outOfLineCmmOp mop res args
MO_Memset -> fsLit "memset"
MO_Memmove -> fsLit "memmove"
MO_PopCnt _ -> fsLit "popcnt"
other -> panic $ "outOfLineCmmOp: unmatched op! (" ++ show other ++ ")"
......
......@@ -310,6 +310,8 @@ data Instr
-- call 1f
-- 1: popl %reg
-- SSE4.2
| POPCNT Size Operand Reg -- src, dst
data Operand
= OpReg Reg -- register
......@@ -403,6 +405,8 @@ x86_regUsageOfInstr instr
COMMENT _ -> noUsage
DELTA _ -> noUsage
POPCNT _ src dst -> mkRU (use_R src) [dst]
_other -> panic "regUsage: unrecognised instr"
where
......@@ -539,6 +543,8 @@ x86_patchRegsOfInstr instr env
JXX_GBL _ _ -> instr
CLTD _ -> instr
POPCNT sz src dst -> POPCNT sz (patchOp src) (env dst)
_other -> panic "patchRegs: unrecognised instr"
where
......
......@@ -574,6 +574,8 @@ pprInstr platform (XOR FF32 src dst) = pprOpOp platform (sLit "xorps") FF32 src
pprInstr platform (XOR FF64 src dst) = pprOpOp platform (sLit "xorpd") FF64 src dst
pprInstr platform (XOR size src dst) = pprSizeOpOp platform (sLit "xor") size src dst
pprInstr platform (POPCNT size src dst) = pprOpOp platform (sLit "popcnt") size src (OpReg dst)
pprInstr platform (NOT size op) = pprSizeOp platform (sLit "not") size op
pprInstr platform (NEGI size op) = pprSizeOp platform (sLit "neg") size op
......
......@@ -302,6 +302,22 @@ primop WordNeOp "neWord#" Compare Word# -> Word# -> Bool
primop WordLtOp "ltWord#" Compare Word# -> Word# -> Bool
primop WordLeOp "leWord#" Compare Word# -> Word# -> Bool
primop PopCnt8Op "popCnt8#" Monadic Word# -> Word#
{Count the number of set bits in the lower 8 bits of a word.}
primop PopCnt16Op "popCnt16#" Monadic Word# -> Word#
{Count the number of set bits in the lower 16 bits of a word.}
primop PopCnt32Op "popCnt32#" Monadic Word# -> Word#
{Count the number of set bits in the lower 32 bits of a word.}
#if WORD_SIZE_IN_BITS < 64
primop PopCnt64Op "popCnt64#" Monadic Word64# -> Word#
{Count the number of set bits in a 64-bit word.}
#else
primop PopCnt64Op "popCnt64#" Monadic Word# -> Word#
{Count the number of set bits in a 64-bit word.}
#endif
primop PopCntOp "popCnt#" Monadic Word# -> Word#
{Count the number of set bits in a word.}
------------------------------------------------------------------------
section "Narrowings"
{Explicit narrowing of native-sized ints or words.}
......@@ -1926,6 +1942,3 @@ primop TraceEventOp "traceEvent#" GenPrimOp
------------------------------------------------------------------------
thats_all_folks
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment