Commit 76735615 authored by alexbiehl's avatar alexbiehl Committed by Ben Gamari

Turn `compareByteArrays#` out-of-line primop into inline primop

Depends on D4090

Reviewers: austin, bgamari, erikd, simonmar, alexbiehl

Reviewed By: bgamari

Subscribers: rwbarton, thomie

Differential Revision: https://phabricator.haskell.org/D4091
parent cecd2f2d
...@@ -584,6 +584,7 @@ data CallishMachOp ...@@ -584,6 +584,7 @@ data CallishMachOp
| MO_Memcpy Int | MO_Memcpy Int
| MO_Memset Int | MO_Memset Int
| MO_Memmove Int | MO_Memmove Int
| MO_Memcmp Int
| MO_PopCnt Width | MO_PopCnt Width
| MO_Clz Width | MO_Clz Width
...@@ -616,6 +617,7 @@ callishMachOpHints op = case op of ...@@ -616,6 +617,7 @@ callishMachOpHints op = case op of
MO_Memcpy _ -> ([], [AddrHint,AddrHint,NoHint]) MO_Memcpy _ -> ([], [AddrHint,AddrHint,NoHint])
MO_Memset _ -> ([], [AddrHint,NoHint,NoHint]) MO_Memset _ -> ([], [AddrHint,NoHint,NoHint])
MO_Memmove _ -> ([], [AddrHint,AddrHint,NoHint]) MO_Memmove _ -> ([], [AddrHint,AddrHint,NoHint])
MO_Memcmp _ -> ([], [AddrHint, AddrHint, NoHint])
_ -> ([],[]) _ -> ([],[])
-- empty lists indicate NoHint -- empty lists indicate NoHint
...@@ -625,4 +627,5 @@ machOpMemcpyishAlign op = case op of ...@@ -625,4 +627,5 @@ machOpMemcpyishAlign op = case op of
MO_Memcpy align -> Just align MO_Memcpy align -> Just align
MO_Memset align -> Just align MO_Memset align -> Just align
MO_Memmove align -> Just align MO_Memmove align -> Just align
MO_Memcmp align -> Just align
_ -> Nothing _ -> Nothing
...@@ -994,6 +994,7 @@ callishMachOps = listToUFM $ ...@@ -994,6 +994,7 @@ callishMachOps = listToUFM $
( "memcpy", memcpyLikeTweakArgs MO_Memcpy ), ( "memcpy", memcpyLikeTweakArgs MO_Memcpy ),
( "memset", memcpyLikeTweakArgs MO_Memset ), ( "memset", memcpyLikeTweakArgs MO_Memset ),
( "memmove", memcpyLikeTweakArgs MO_Memmove ), ( "memmove", memcpyLikeTweakArgs MO_Memmove ),
( "memcmp", memcpyLikeTweakArgs MO_Memcmp ),
("prefetch0", (,) $ MO_Prefetch_Data 0), ("prefetch0", (,) $ MO_Prefetch_Data 0),
("prefetch1", (,) $ MO_Prefetch_Data 1), ("prefetch1", (,) $ MO_Prefetch_Data 1),
......
...@@ -786,6 +786,7 @@ pprCallishMachOp_for_C mop ...@@ -786,6 +786,7 @@ pprCallishMachOp_for_C mop
MO_Memcpy _ -> text "memcpy" MO_Memcpy _ -> text "memcpy"
MO_Memset _ -> text "memset" MO_Memset _ -> text "memset"
MO_Memmove _ -> text "memmove" MO_Memmove _ -> text "memmove"
MO_Memcmp _ -> text "memcmp"
(MO_BSwap w) -> ptext (sLit $ bSwapLabel w) (MO_BSwap w) -> ptext (sLit $ bSwapLabel w)
(MO_PopCnt w) -> ptext (sLit $ popCntLabel w) (MO_PopCnt w) -> ptext (sLit $ popCntLabel w)
(MO_Clz w) -> ptext (sLit $ clzLabel w) (MO_Clz w) -> ptext (sLit $ clzLabel w)
......
...@@ -47,7 +47,7 @@ import Outputable ...@@ -47,7 +47,7 @@ import Outputable
import Util import Util
import Data.Bits ((.&.), bit) import Data.Bits ((.&.), bit)
import Control.Monad (liftM, when) import Control.Monad (liftM, when, unless)
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- Primitive operations and foreign calls -- Primitive operations and foreign calls
...@@ -568,6 +568,10 @@ emitPrimOp _ [] CopyAddrToByteArrayOp [src,dst,dst_off,n] = ...@@ -568,6 +568,10 @@ emitPrimOp _ [] CopyAddrToByteArrayOp [src,dst,dst_off,n] =
emitPrimOp _ [] SetByteArrayOp [ba,off,len,c] = emitPrimOp _ [] SetByteArrayOp [ba,off,len,c] =
doSetByteArrayOp ba off len c doSetByteArrayOp ba off len c
-- Comparing byte arrays
emitPrimOp _ [res] CompareByteArraysOp [ba1,ba1_off,ba2,ba2_off,n] =
doCompareByteArraysOp res ba1 ba1_off ba2 ba2_off n
emitPrimOp _ [res] BSwap16Op [w] = emitBSwapCall res w W16 emitPrimOp _ [res] BSwap16Op [w] = emitBSwapCall res w W16
emitPrimOp _ [res] BSwap32Op [w] = emitBSwapCall res w W32 emitPrimOp _ [res] BSwap32Op [w] = emitBSwapCall res w W32
emitPrimOp _ [res] BSwap64Op [w] = emitBSwapCall res w W64 emitPrimOp _ [res] BSwap64Op [w] = emitBSwapCall res w W64
...@@ -1719,6 +1723,17 @@ doNewByteArrayOp res_r n = do ...@@ -1719,6 +1723,17 @@ doNewByteArrayOp res_r n = do
emit $ mkAssign (CmmLocal res_r) base emit $ mkAssign (CmmLocal res_r) base
-- ----------------------------------------------------------------------------
-- Comparing byte arrays
doCompareByteArraysOp :: LocalReg -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
-> FCode ()
doCompareByteArraysOp res ba1 ba1_off ba2 ba2_off n = do
dflags <- getDynFlags
ba1_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba1 (arrWordsHdrSize dflags)) ba1_off
ba2_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba2 (arrWordsHdrSize dflags)) ba2_off
emitMemcmpCall res ba1_p ba2_p n 1
-- ---------------------------------------------------------------------------- -- ----------------------------------------------------------------------------
-- Copying byte arrays -- Copying byte arrays
...@@ -2213,6 +2228,30 @@ emitMemsetCall dst c n align = do ...@@ -2213,6 +2228,30 @@ emitMemsetCall dst c n align = do
(MO_Memset align) (MO_Memset align)
[ dst, c, n ] [ dst, c, n ]
emitMemcmpCall :: LocalReg -> CmmExpr -> CmmExpr -> CmmExpr -> Int -> FCode ()
emitMemcmpCall res ptr1 ptr2 n align = do
-- 'MO_Memcmp' is assumed to return an 32bit 'CInt' because all
-- code-gens currently call out to the @memcmp(3)@ C function.
-- This was easier than moving the sign-extensions into
-- all the code-gens.
dflags <- getDynFlags
let is32Bit = typeWidth (localRegType res) == W32
cres <- if is32Bit
then return res
else newTemp b32
emitPrimCall
[ cres ]
(MO_Memcmp align)
[ ptr1, ptr2, n ]
unless is32Bit $ do
emit $ mkAssign (CmmLocal res)
(CmmMachOp
(mo_s_32ToWord dflags)
[(CmmReg (CmmLocal cres))])
emitBSwapCall :: LocalReg -> CmmExpr -> Width -> FCode () emitBSwapCall :: LocalReg -> CmmExpr -> Width -> FCode ()
emitBSwapCall res x width = do emitBSwapCall res x width = do
emitPrimCall emitPrimCall
......
...@@ -728,6 +728,7 @@ cmmPrimOpFunctions mop = do ...@@ -728,6 +728,7 @@ cmmPrimOpFunctions mop = do
MO_Memcpy _ -> fsLit $ "llvm.memcpy." ++ intrinTy1 MO_Memcpy _ -> fsLit $ "llvm.memcpy." ++ intrinTy1
MO_Memmove _ -> fsLit $ "llvm.memmove." ++ intrinTy1 MO_Memmove _ -> fsLit $ "llvm.memmove." ++ intrinTy1
MO_Memset _ -> fsLit $ "llvm.memset." ++ intrinTy2 MO_Memset _ -> fsLit $ "llvm.memset." ++ intrinTy2
MO_Memcmp _ -> fsLit $ "memcmp"
(MO_PopCnt w) -> fsLit $ "llvm.ctpop." ++ showSDoc dflags (ppr $ widthToLlvmInt w) (MO_PopCnt w) -> fsLit $ "llvm.ctpop." ++ showSDoc dflags (ppr $ widthToLlvmInt w)
(MO_BSwap w) -> fsLit $ "llvm.bswap." ++ showSDoc dflags (ppr $ widthToLlvmInt w) (MO_BSwap w) -> fsLit $ "llvm.bswap." ++ showSDoc dflags (ppr $ widthToLlvmInt w)
......
...@@ -1905,6 +1905,7 @@ genCCall' dflags gcp target dest_regs args ...@@ -1905,6 +1905,7 @@ genCCall' dflags gcp target dest_regs args
MO_Memcpy _ -> (fsLit "memcpy", False) MO_Memcpy _ -> (fsLit "memcpy", False)
MO_Memset _ -> (fsLit "memset", False) MO_Memset _ -> (fsLit "memset", False)
MO_Memmove _ -> (fsLit "memmove", False) MO_Memmove _ -> (fsLit "memmove", False)
MO_Memcmp _ -> (fsLit "memcmp", False)
MO_BSwap w -> (fsLit $ bSwapLabel w, False) MO_BSwap w -> (fsLit $ bSwapLabel w, False)
MO_PopCnt w -> (fsLit $ popCntLabel w, False) MO_PopCnt w -> (fsLit $ popCntLabel w, False)
......
...@@ -650,6 +650,7 @@ outOfLineMachOp_table mop ...@@ -650,6 +650,7 @@ outOfLineMachOp_table mop
MO_Memcpy _ -> fsLit "memcpy" MO_Memcpy _ -> fsLit "memcpy"
MO_Memset _ -> fsLit "memset" MO_Memset _ -> fsLit "memset"
MO_Memmove _ -> fsLit "memmove" MO_Memmove _ -> fsLit "memmove"
MO_Memcmp _ -> fsLit "memcmp"
MO_BSwap w -> fsLit $ bSwapLabel w MO_BSwap w -> fsLit $ bSwapLabel w
MO_PopCnt w -> fsLit $ popCntLabel w MO_PopCnt w -> fsLit $ popCntLabel w
......
...@@ -2682,6 +2682,7 @@ outOfLineCmmOp mop res args ...@@ -2682,6 +2682,7 @@ outOfLineCmmOp mop res args
MO_Memcpy _ -> fsLit "memcpy" MO_Memcpy _ -> fsLit "memcpy"
MO_Memset _ -> fsLit "memset" MO_Memset _ -> fsLit "memset"
MO_Memmove _ -> fsLit "memmove" MO_Memmove _ -> fsLit "memmove"
MO_Memcmp _ -> fsLit "memcmp"
MO_PopCnt _ -> fsLit "popcnt" MO_PopCnt _ -> fsLit "popcnt"
MO_BSwap _ -> fsLit "bswap" MO_BSwap _ -> fsLit "bswap"
......
...@@ -1410,7 +1410,6 @@ primop CompareByteArraysOp "compareByteArrays#" GenPrimOp ...@@ -1410,7 +1410,6 @@ primop CompareByteArraysOp "compareByteArrays#" GenPrimOp
respectively, to be byte-wise lexicographically less than, to respectively, to be byte-wise lexicographically less than, to
match, or be greater than the second range.} match, or be greater than the second range.}
with with
out_of_line = True
can_fail = True can_fail = True
primop CopyByteArrayOp "copyByteArray#" GenPrimOp primop CopyByteArrayOp "copyByteArray#" GenPrimOp
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment