Commit 1c5b0511 authored by ian@well-typed.com's avatar ian@well-typed.com

Add support for byte endian swapping for Word 16/32/64.

* Exposes bSwap{,16,32,64}# primops
* Add a new machops MO_BSwap
* Use a Stg implementation (hs_bswap{16,32,64}) for other implementation
  in NCG.
* Generate bswap in X86 NCG for 32 and 64 bits, and for 16 bits, bswap+shr
  instead of using xchg.
* Generate llvm.bswap intrinsics in llvm codegen.

Patch from Vincent Hanquez.
parent 972c044d
......@@ -529,6 +529,7 @@ data CallishMachOp
| MO_Memmove
| MO_PopCnt Width
| MO_BSwap Width
deriving (Eq, Show)
pprCallishMachOp :: CallishMachOp -> SDoc
......
......@@ -738,6 +738,7 @@ pprCallishMachOp_for_C mop
MO_Memcpy -> ptext (sLit "memcpy")
MO_Memset -> ptext (sLit "memset")
MO_Memmove -> ptext (sLit "memmove")
(MO_BSwap w) -> ptext (sLit $ bSwapLabel w)
(MO_PopCnt w) -> ptext (sLit $ popCntLabel w)
(MO_UF_Conv w) -> ptext (sLit $ word2FloatLabel w)
......
......@@ -541,6 +541,11 @@ emitPrimOp _ [] CopyMutableByteArrayOp [src,src_off,dst,dst_off,n] =
emitPrimOp _ [] SetByteArrayOp [ba,off,len,c] =
doSetByteArrayOp ba off len c
emitPrimOp _ [res] BSwap16Op [w] = emitBSwapCall res w W16
emitPrimOp _ [res] BSwap32Op [w] = emitBSwapCall res w W32
emitPrimOp _ [res] BSwap64Op [w] = emitBSwapCall res w W64
emitPrimOp dflags [res] BSwapOp [w] = emitBSwapCall res w (wordWidth dflags)
-- Population count
emitPrimOp _ [res] PopCnt8Op [w] = emitPopCntCall res w W8
emitPrimOp _ [res] PopCnt16Op [w] = emitPopCntCall res w W16
......@@ -1568,6 +1573,13 @@ emitAllocateCall res cap n = do
allocate = CmmLit (CmmLabel (mkForeignLabel (fsLit "allocate") Nothing
ForeignLabelInExternalPackage IsFunction))
emitBSwapCall :: LocalReg -> CmmExpr -> Width -> FCode ()
emitBSwapCall res x width = do
emitPrimCall
[ res ]
(MO_BSwap width)
[ x ]
emitPopCntCall :: LocalReg -> CmmExpr -> Width -> FCode ()
emitPopCntCall res x width = do
emitPrimCall
......
......@@ -220,30 +220,11 @@ genCall env t@(PrimTarget MO_Prefetch_Data) [] args = do
`appOL` trashStmts (getDflags env) `snocOL` call
return (env2, stmts, top1 ++ top2)
-- Handle popcnt function specifically since GHC only really has i32 and i64
-- types and things like Word8 are backed by an i32 and just present a logical
-- i8 range. So we must handle conversions from i32 to i8 explicitly as LLVM
-- is strict about types.
genCall env t@(PrimTarget (MO_PopCnt w)) [dst] args = do
let dflags = getDflags env
width = widthToLlvmInt w
dstTy = cmmToLlvmType $ localRegType dst
funTy = \n -> LMFunction $ LlvmFunctionDecl n ExternallyVisible
CC_Ccc width FixedArgs (tysToParams [width]) Nothing
(env1, dstV, stmts1, top1) = getCmmReg env (CmmLocal dst)
let (_, arg_hints) = foreignTargetHints t
let args_hints = zip args arg_hints
(env2, argsV, stmts2, top2) <- arg_vars env1 args_hints ([], nilOL, [])
(env3, fptr, stmts3, top3) <- getFunPtr env2 funTy t
(argsV', stmts4) <- castVars dflags $ zip argsV [width]
(retV, s1) <- doExpr width $ Call StdCall fptr argsV' []
([retV'], stmts5) <- castVars dflags [(retV,dstTy)]
let s2 = Store retV' dstV
let stmts = stmts1 `appOL` stmts2 `appOL` stmts3 `appOL` stmts4 `snocOL`
s1 `appOL` stmts5 `snocOL` s2
return (env3, stmts, top1 ++ top2 ++ top3)
-- Handle PopCnt and BSwap that need to only convert arg and return types
genCall env t@(PrimTarget (MO_PopCnt w)) dsts args =
genCallSimpleCast env w t dsts args
genCall env t@(PrimTarget (MO_BSwap w)) dsts args =
genCallSimpleCast env w t dsts args
-- Handle memcpy function specifically since llvm's intrinsic version takes
-- some extra parameters.
......@@ -386,6 +367,36 @@ genCall env target res args = do
return (env3, allStmts `snocOL` s2 `snocOL` s3
`appOL` retStmt, top1 ++ top2 ++ top3)
-- Handle simple function call that only need simple type casting, of the form:
-- truncate arg >>= \a -> call(a) >>= zext
--
-- since GHC only really has i32 and i64 types and things like Word8 are backed
-- by an i32 and just present a logical i8 range. So we must handle conversions
-- from i32 to i8 explicitly as LLVM is strict about types.
genCallSimpleCast :: LlvmEnv -> Width -> ForeignTarget -> [CmmFormal] -> [CmmActual]
-> UniqSM StmtData
genCallSimpleCast env w t [dst] args = do
let dflags = getDflags env
width = widthToLlvmInt w
dstTy = cmmToLlvmType $ localRegType dst
funTy = \n -> LMFunction $ LlvmFunctionDecl n ExternallyVisible
CC_Ccc width FixedArgs (tysToParams [width]) Nothing
(env1, dstV, stmts1, top1) = getCmmReg env (CmmLocal dst)
let (_, arg_hints) = foreignTargetHints t
let args_hints = zip args arg_hints
(env2, argsV, stmts2, top2) <- arg_vars env1 args_hints ([], nilOL, [])
(env3, fptr, stmts3, top3) <- getFunPtr env2 funTy t
(argsV', stmts4) <- castVars dflags $ zip argsV [width]
(retV, s1) <- doExpr width $ Call StdCall fptr argsV' []
(retV', stmts5) <- if getVarType retV == dstTy
then return (retV, Nop)
else doExpr dstTy $ Cast LM_Zext retV dstTy
let s2 = Store retV' dstV
let stmts = stmts1 `appOL` stmts2 `appOL` stmts3 `appOL` stmts4 `snocOL`
s1 `appOL` toOL [stmts5] `snocOL` s2
return (env3, stmts, top1 ++ top2 ++ top3)
-- | Create a function pointer from a target.
getFunPtr :: LlvmEnv -> (LMString -> LlvmType) -> ForeignTarget
......@@ -539,6 +550,7 @@ cmmPrimOpFunctions env mop
MO_Memset -> fsLit $ "llvm.memset." ++ intrinTy2
(MO_PopCnt w) -> fsLit $ "llvm.ctpop." ++ show (widthToLlvmInt w)
(MO_BSwap w) -> fsLit $ "llvm.bswap." ++ show (widthToLlvmInt w)
MO_Prefetch_Data -> fsLit "llvm.prefetch"
......
-- | Generating C symbol names emitted by the compiler.
module CPrim
( popCntLabel
, bSwapLabel
, word2FloatLabel
) where
......@@ -16,6 +17,14 @@ popCntLabel w = "hs_popcnt" ++ pprWidth w
pprWidth W64 = "64"
pprWidth w = pprPanic "popCntLabel: Unsupported word width " (ppr w)
bSwapLabel :: Width -> String
bSwapLabel w = "hs_bswap" ++ pprWidth w
where
pprWidth W16 = "16"
pprWidth W32 = "32"
pprWidth W64 = "64"
pprWidth w = pprPanic "bSwapLabel: Unsupported word width " (ppr w)
word2FloatLabel :: Width -> String
word2FloatLabel w = "hs_word2float" ++ pprWidth w
where
......
......@@ -1155,6 +1155,7 @@ genCCall' dflags gcp target dest_regs args0
MO_Memset -> (fsLit "memset", False)
MO_Memmove -> (fsLit "memmove", False)
MO_BSwap w -> (fsLit $ bSwapLabel w, False)
MO_PopCnt w -> (fsLit $ popCntLabel w, False)
MO_S_QuotRem {} -> unsupported
......
......@@ -647,6 +647,7 @@ outOfLineMachOp_table mop
MO_Memset -> fsLit "memset"
MO_Memmove -> fsLit "memmove"
MO_BSwap w -> fsLit $ bSwapLabel w
MO_PopCnt w -> fsLit $ popCntLabel w
MO_S_QuotRem {} -> unsupported
......
......@@ -1658,6 +1658,19 @@ genCCall _ (PrimTarget MO_Touch) _ _ = return nilOL
genCCall _ (PrimTarget MO_Prefetch_Data) _ _ = return nilOL
genCCall _ (PrimTarget (MO_BSwap width)) [dst] [src] = do
dflags <- getDynFlags
let platform = targetPlatform dflags
let dst_r = getRegisterReg platform False (CmmLocal dst)
code_src <- getAnyReg src
case width of
W16 -> return $ code_src dst_r `appOL`
unitOL (BSWAP II32 dst_r) `appOL`
unitOL (SHR II32 (OpImm $ ImmInt 16) (OpReg dst_r))
_ -> return $ code_src dst_r `appOL` unitOL (BSWAP size dst_r)
where
size = intSize width
genCCall is32Bit (PrimTarget (MO_PopCnt width)) dest_regs@[dst]
args@[src] = do
sse4_2 <- sse4_2Enabled
......@@ -2325,6 +2338,7 @@ outOfLineCmmOp mop res args
MO_Memmove -> fsLit "memmove"
MO_PopCnt _ -> fsLit "popcnt"
MO_BSwap _ -> fsLit "bswap"
MO_UF_Conv _ -> unsupported
......
......@@ -208,6 +208,7 @@ data Instr
| XOR Size Operand Operand
| NOT Size Operand
| NEGI Size Operand -- NEG instruction (name clash with Cond)
| BSWAP Size Reg
-- Shifts (amount may be immediate or %cl only)
| SHL Size Operand{-amount-} Operand
......@@ -351,6 +352,7 @@ x86_regUsageOfInstr platform instr
XOR _ src dst -> usageRM src dst
NOT _ op -> usageM op
BSWAP _ reg -> mkRU [reg] [reg]
NEGI _ op -> usageM op
SHL _ imm dst -> usageRM imm dst
SAR _ imm dst -> usageRM imm dst
......@@ -489,6 +491,7 @@ x86_patchRegsOfInstr instr env
OR sz src dst -> patch2 (OR sz) src dst
XOR sz src dst -> patch2 (XOR sz) src dst
NOT sz op -> patch1 (NOT sz) op
BSWAP sz reg -> BSWAP sz (env reg)
NEGI sz op -> patch1 (NEGI sz) op
SHL sz imm dst -> patch1 (SHL sz imm) dst
SAR sz imm dst -> patch1 (SAR sz imm) dst
......
......@@ -578,6 +578,7 @@ pprInstr (XOR size src dst) = pprSizeOpOp (sLit "xor") size src dst
pprInstr (POPCNT size src dst) = pprOpOp (sLit "popcnt") size src (OpReg dst)
pprInstr (NOT size op) = pprSizeOp (sLit "not") size op
pprInstr (BSWAP size op) = pprSizeOp (sLit "bswap") size (OpReg op)
pprInstr (NEGI size op) = pprSizeOp (sLit "neg") size op
pprInstr (SHL size src dst) = pprShift (sLit "shl") size src dst
......
......@@ -363,6 +363,15 @@ primop PopCnt64Op "popCnt64#" GenPrimOp WORD64 -> Word#
primop PopCntOp "popCnt#" Monadic Word# -> Word#
{Count the number of set bits in a word.}
primop BSwap16Op "byteSwap16#" Monadic Word# -> Word#
{Swap bytes in the lower 16 bits of a word. The higher bytes are undefined. }
primop BSwap32Op "byteSwap32#" Monadic Word# -> Word#
{Swap bytes in the lower 32 bits of a word. The higher bytes are undefined. }
primop BSwap64Op "byteSwap64#" GenPrimOp WORD64 -> WORD64
{Swap bytes in a 64 bits of a word.}
primop BSwapOp "byteSwap#" Monadic Word# -> Word#
{Swap bytes in a word.}
------------------------------------------------------------------------
section "Narrowings"
{Explicit narrowing of native-sized ints or words.}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment