Commit 18087a11 authored by thoughtpolice's avatar thoughtpolice

Add support for byte endian swapping for Word 16/32/64.

  * Exposes bSwap{,16,32,64}# primops
  * Add a new machop: MO_BSwap
  * Use a Stg implementation (hs_bswap{16,32,64}) for other implementation
    in NCG.
  * Generate bswap in X86 NCG for 32 and 64 bits, and for 16 bits, bswap+shr
    instead of using xchg.
  * Generate llvm.bswap intrinsics in llvm codegen.
Authored-by: tab's avatarVincent Hanquez <tab@snarc.org>
Signed-off-by: thoughtpolice's avatarAustin Seipp <aseipp@pobox.com>
parent 2f99cdb9
......@@ -529,6 +529,7 @@ data CallishMachOp
| MO_Memmove
| MO_PopCnt Width
| MO_BSwap Width
deriving (Eq, Show)
pprCallishMachOp :: CallishMachOp -> SDoc
......
......@@ -738,6 +738,7 @@ pprCallishMachOp_for_C mop
MO_Memcpy -> ptext (sLit "memcpy")
MO_Memset -> ptext (sLit "memset")
MO_Memmove -> ptext (sLit "memmove")
(MO_BSwap w) -> ptext (sLit $ bSwapLabel w)
(MO_PopCnt w) -> ptext (sLit $ popCntLabel w)
(MO_UF_Conv w) -> ptext (sLit $ word2FloatLabel w)
......
......@@ -541,6 +541,11 @@ emitPrimOp _ [] CopyMutableByteArrayOp [src,src_off,dst,dst_off,n] =
emitPrimOp _ [] SetByteArrayOp [ba,off,len,c] =
doSetByteArrayOp ba off len c
emitPrimOp _ [res] BSwap16Op [w] = emitBSwapCall res w W16
emitPrimOp _ [res] BSwap32Op [w] = emitBSwapCall res w W32
emitPrimOp _ [res] BSwap64Op [w] = emitBSwapCall res w W64
emitPrimOp dflags [res] BSwapOp [w] = emitBSwapCall res w (wordWidth dflags)
-- Population count
emitPrimOp _ [res] PopCnt8Op [w] = emitPopCntCall res w W8
emitPrimOp _ [res] PopCnt16Op [w] = emitPopCntCall res w W16
......@@ -1568,6 +1573,13 @@ emitAllocateCall res cap n = do
allocate = CmmLit (CmmLabel (mkForeignLabel (fsLit "allocate") Nothing
ForeignLabelInExternalPackage IsFunction))
emitBSwapCall :: LocalReg -> CmmExpr -> Width -> FCode ()
emitBSwapCall res x width = do
emitPrimCall
[ res ]
(MO_BSwap width)
[ x ]
emitPopCntCall :: LocalReg -> CmmExpr -> Width -> FCode ()
emitPopCntCall res x width = do
emitPrimCall
......
......@@ -221,30 +221,11 @@ genCall t@(PrimTarget MO_Prefetch_Data) [] args = do
`appOL` trash `snocOL` call
return (stmts, top1 ++ top2)
-- Handle popcnt function specifically since GHC only really has i32 and i64
-- types and things like Word8 are backed by an i32 and just present a logical
-- i8 range. So we must handle conversions from i32 to i8 explicitly as LLVM
-- is strict about types.
genCall t@(PrimTarget op@(MO_PopCnt w)) [dst] args = do
let width = widthToLlvmInt w
dstTy = cmmToLlvmType $ localRegType dst
fname <- cmmPrimOpFunctions op
(fptr, _, top3) <- getInstrinct fname width [width]
dstV <- getCmmReg (CmmLocal dst)
let (_, arg_hints) = foreignTargetHints t
let args_hints = zip args arg_hints
(argsV, stmts2, top2) <- arg_vars args_hints ([], nilOL, [])
(argsV', stmts4) <- castVars $ zip argsV [width]
(retV, s1) <- doExpr width $ Call StdCall fptr argsV' []
([retV'], stmts5) <- castVars [(retV,dstTy)]
let s2 = Store retV' dstV
let stmts = stmts2 `appOL` stmts4 `snocOL`
s1 `appOL` stmts5 `snocOL` s2
return (stmts, top2 ++ top3)
-- Handle PopCnt and BSwap that need to only convert arg and return types
genCall t@(PrimTarget (MO_PopCnt w)) dsts args =
genCallSimpleCast w t dsts args
genCall t@(PrimTarget (MO_BSwap w)) dsts args =
genCallSimpleCast w t dsts args
-- Handle memcpy function specifically since llvm's intrinsic version takes
-- some extra parameters.
......@@ -390,9 +371,36 @@ genCall target res args = do
return (allStmts `snocOL` s2 `snocOL` s3
`appOL` retStmt, top1 ++ top2)
-- Handle simple function call that only need simple type casting, of the form:
-- truncate arg >>= \a -> call(a) >>= zext
--
-- since GHC only really has i32 and i64 types and things like Word8 are backed
-- by an i32 and just present a logical i8 range. So we must handle conversions
-- from i32 to i8 explicitly as LLVM is strict about types.
genCallSimpleCast :: Width -> ForeignTarget -> [CmmFormal] -> [CmmActual]
-> LlvmM StmtData
genCallSimpleCast w t@(PrimTarget op) [dst] args = do
let width = widthToLlvmInt w
dstTy = cmmToLlvmType $ localRegType dst
fname <- cmmPrimOpFunctions op
(fptr, _, top3) <- getInstrinct fname width [width]
dstV <- getCmmReg (CmmLocal dst)
-- genCallSimpleCast _ _ _ dsts _ =
-- panic ("genCallSimpleCast: " ++ show (length dsts) ++ " dsts")
let (_, arg_hints) = foreignTargetHints t
let args_hints = zip args arg_hints
(argsV, stmts2, top2) <- arg_vars args_hints ([], nilOL, [])
(argsV', stmts4) <- castVars $ zip argsV [width]
(retV, s1) <- doExpr width $ Call StdCall fptr argsV' []
([retV'], stmts5) <- castVars [(retV,dstTy)]
let s2 = Store retV' dstV
let stmts = stmts2 `appOL` stmts4 `snocOL`
s1 `appOL` stmts5 `snocOL` s2
return (stmts, top2 ++ top3)
genCallSimpleCast _ _ dsts _ =
panic ("genCallSimpleCast: " ++ show (length dsts) ++ " dsts")
-- | Create a function pointer from a target.
getFunPtr :: (LMString -> LlvmType) -> ForeignTarget
......@@ -534,6 +542,7 @@ cmmPrimOpFunctions mop = do
MO_Memset -> fsLit $ "llvm.memset." ++ intrinTy2
(MO_PopCnt w) -> fsLit $ "llvm.ctpop." ++ showSDoc dflags (ppr $ widthToLlvmInt w)
(MO_BSwap w) -> fsLit $ "llvm.bswap." ++ showSDoc dflags (ppr $ widthToLlvmInt w)
MO_Prefetch_Data -> fsLit "llvm.prefetch"
......
-- | Generating C symbol names emitted by the compiler.
module CPrim
( popCntLabel
, bSwapLabel
, word2FloatLabel
) where
......@@ -16,6 +17,14 @@ popCntLabel w = "hs_popcnt" ++ pprWidth w
pprWidth W64 = "64"
pprWidth w = pprPanic "popCntLabel: Unsupported word width " (ppr w)
bSwapLabel :: Width -> String
bSwapLabel w = "hs_bswap" ++ pprWidth w
where
pprWidth W16 = "16"
pprWidth W32 = "32"
pprWidth W64 = "64"
pprWidth w = pprPanic "bSwapLabel: Unsupported word width " (ppr w)
word2FloatLabel :: Width -> String
word2FloatLabel w = "hs_word2float" ++ pprWidth w
where
......
......@@ -1155,6 +1155,7 @@ genCCall' dflags gcp target dest_regs args0
MO_Memset -> (fsLit "memset", False)
MO_Memmove -> (fsLit "memmove", False)
MO_BSwap w -> (fsLit $ bSwapLabel w, False)
MO_PopCnt w -> (fsLit $ popCntLabel w, False)
MO_S_QuotRem {} -> unsupported
......
......@@ -647,6 +647,7 @@ outOfLineMachOp_table mop
MO_Memset -> fsLit "memset"
MO_Memmove -> fsLit "memmove"
MO_BSwap w -> fsLit $ bSwapLabel w
MO_PopCnt w -> fsLit $ popCntLabel w
MO_S_QuotRem {} -> unsupported
......
......@@ -1658,6 +1658,29 @@ genCCall _ (PrimTarget MO_Touch) _ _ = return nilOL
genCCall _ (PrimTarget MO_Prefetch_Data) _ _ = return nilOL
genCCall is32Bit (PrimTarget (MO_BSwap width)) [dst] [src] = do
dflags <- getDynFlags
let platform = targetPlatform dflags
let dst_r = getRegisterReg platform False (CmmLocal dst)
case width of
W64 | is32Bit -> do
ChildCode64 vcode rlo <- iselExpr64 src
let dst_rhi = getHiVRegFromLo dst_r
rhi = getHiVRegFromLo rlo
return $ vcode `appOL`
toOL [ MOV II32 (OpReg rlo) (OpReg dst_rhi),
MOV II32 (OpReg rhi) (OpReg dst_r),
BSWAP II32 dst_rhi,
BSWAP II32 dst_r ]
W16 -> do code_src <- getAnyReg src
return $ code_src dst_r `appOL`
unitOL (BSWAP II32 dst_r) `appOL`
unitOL (SHR II32 (OpImm $ ImmInt 16) (OpReg dst_r))
_ -> do code_src <- getAnyReg src
return $ code_src dst_r `appOL` unitOL (BSWAP size dst_r)
where
size = intSize width
genCCall is32Bit (PrimTarget (MO_PopCnt width)) dest_regs@[dst]
args@[src] = do
sse4_2 <- sse4_2Enabled
......@@ -2325,6 +2348,7 @@ outOfLineCmmOp mop res args
MO_Memmove -> fsLit "memmove"
MO_PopCnt _ -> fsLit "popcnt"
MO_BSwap _ -> fsLit "bswap"
MO_UF_Conv _ -> unsupported
......
......@@ -208,6 +208,7 @@ data Instr
| XOR Size Operand Operand
| NOT Size Operand
| NEGI Size Operand -- NEG instruction (name clash with Cond)
| BSWAP Size Reg
-- Shifts (amount may be immediate or %cl only)
| SHL Size Operand{-amount-} Operand
......@@ -351,6 +352,7 @@ x86_regUsageOfInstr platform instr
XOR _ src dst -> usageRM src dst
NOT _ op -> usageM op
BSWAP _ reg -> mkRU [reg] [reg]
NEGI _ op -> usageM op
SHL _ imm dst -> usageRM imm dst
SAR _ imm dst -> usageRM imm dst
......@@ -489,6 +491,7 @@ x86_patchRegsOfInstr instr env
OR sz src dst -> patch2 (OR sz) src dst
XOR sz src dst -> patch2 (XOR sz) src dst
NOT sz op -> patch1 (NOT sz) op
BSWAP sz reg -> BSWAP sz (env reg)
NEGI sz op -> patch1 (NEGI sz) op
SHL sz imm dst -> patch1 (SHL sz imm) dst
SAR sz imm dst -> patch1 (SAR sz imm) dst
......
......@@ -578,6 +578,7 @@ pprInstr (XOR size src dst) = pprSizeOpOp (sLit "xor") size src dst
pprInstr (POPCNT size src dst) = pprOpOp (sLit "popcnt") size src (OpReg dst)
pprInstr (NOT size op) = pprSizeOp (sLit "not") size op
pprInstr (BSWAP size op) = pprSizeOp (sLit "bswap") size (OpReg op)
pprInstr (NEGI size op) = pprSizeOp (sLit "neg") size op
pprInstr (SHL size src dst) = pprShift (sLit "shl") size src dst
......
......@@ -363,6 +363,15 @@ primop PopCnt64Op "popCnt64#" GenPrimOp WORD64 -> Word#
primop PopCntOp "popCnt#" Monadic Word# -> Word#
{Count the number of set bits in a word.}
primop BSwap16Op "byteSwap16#" Monadic Word# -> Word#
{Swap bytes in the lower 16 bits of a word. The higher bytes are undefined. }
primop BSwap32Op "byteSwap32#" Monadic Word# -> Word#
{Swap bytes in the lower 32 bits of a word. The higher bytes are undefined. }
primop BSwap64Op "byteSwap64#" Monadic WORD64 -> WORD64
{Swap bytes in a 64 bits of a word.}
primop BSwapOp "byteSwap#" Monadic Word# -> Word#
{Swap bytes in a word.}
------------------------------------------------------------------------
section "Narrowings"
{Explicit narrowing of native-sized ints or words.}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment