Commit 33173a51 authored by Alexandre R. Baldé's avatar Alexandre R. Baldé Committed by Marge Bot

Add support for bitreverse primop

    This commit includes the necessary changes in code and
    documentation to support a primop that reverses a word's
    bits. It also includes a test.
parent 6f7115df
......@@ -617,6 +617,7 @@ data CallishMachOp
| MO_Ctz Width
| MO_BSwap Width
| MO_BRev Width
-- Atomic read-modify-write.
| MO_AtomicRMW Width AtomicMachOp
......
......@@ -814,6 +814,7 @@ pprCallishMachOp_for_C mop
MO_Memmove _ -> text "memmove"
MO_Memcmp _ -> text "memcmp"
(MO_BSwap w) -> ptext (sLit $ bSwapLabel w)
(MO_BRev w) -> ptext (sLit $ bRevLabel w)
(MO_PopCnt w) -> ptext (sLit $ popCntLabel w)
(MO_Pext w) -> ptext (sLit $ pextLabel w)
(MO_Pdep w) -> ptext (sLit $ pdepLabel w)
......
......@@ -619,6 +619,12 @@ emitPrimOp _ [res] BSwap32Op [w] = emitBSwapCall res w W32
emitPrimOp _ [res] BSwap64Op [w] = emitBSwapCall res w W64
emitPrimOp dflags [res] BSwapOp [w] = emitBSwapCall res w (wordWidth dflags)
emitPrimOp _ [res] BRev8Op [w] = emitBRevCall res w W8
emitPrimOp _ [res] BRev16Op [w] = emitBRevCall res w W16
emitPrimOp _ [res] BRev32Op [w] = emitBRevCall res w W32
emitPrimOp _ [res] BRev64Op [w] = emitBRevCall res w W64
emitPrimOp dflags [res] BRevOp [w] = emitBRevCall res w (wordWidth dflags)
-- Population count
emitPrimOp _ [res] PopCnt8Op [w] = emitPopCntCall res w W8
emitPrimOp _ [res] PopCnt16Op [w] = emitPopCntCall res w W16
......@@ -2511,6 +2517,13 @@ emitBSwapCall res x width = do
(MO_BSwap width)
[ x ]
emitBRevCall :: LocalReg -> CmmExpr -> Width -> FCode ()
emitBRevCall res x width = do
emitPrimCall
[ res ]
(MO_BRev width)
[ x ]
emitPopCntCall :: LocalReg -> CmmExpr -> Width -> FCode ()
emitPopCntCall res x width = do
emitPrimCall
......
......@@ -230,6 +230,8 @@ genCall t@(PrimTarget (MO_Ctz w)) dsts args =
genCallSimpleCast w t dsts args
genCall t@(PrimTarget (MO_BSwap w)) dsts args =
genCallSimpleCast w t dsts args
genCall t@(PrimTarget (MO_BRev w)) dsts args =
genCallSimpleCast w t dsts args
genCall (PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n] = runStmtsDecls $ do
addrVar <- exprToVarW addr
......@@ -791,10 +793,11 @@ cmmPrimOpFunctions mop = do
MO_Memset _ -> fsLit $ "llvm.memset." ++ intrinTy2
MO_Memcmp _ -> fsLit $ "memcmp"
(MO_PopCnt w) -> fsLit $ "llvm.ctpop." ++ showSDoc dflags (ppr $ widthToLlvmInt w)
(MO_BSwap w) -> fsLit $ "llvm.bswap." ++ showSDoc dflags (ppr $ widthToLlvmInt w)
(MO_Clz w) -> fsLit $ "llvm.ctlz." ++ showSDoc dflags (ppr $ widthToLlvmInt w)
(MO_Ctz w) -> fsLit $ "llvm.cttz." ++ showSDoc dflags (ppr $ widthToLlvmInt w)
(MO_PopCnt w) -> fsLit $ "llvm.ctpop." ++ showSDoc dflags (ppr $ widthToLlvmInt w)
(MO_BSwap w) -> fsLit $ "llvm.bswap." ++ showSDoc dflags (ppr $ widthToLlvmInt w)
(MO_BRev w) -> fsLit $ "llvm.bitreverse." ++ showSDoc dflags (ppr $ widthToLlvmInt w)
(MO_Clz w) -> fsLit $ "llvm.ctlz." ++ showSDoc dflags (ppr $ widthToLlvmInt w)
(MO_Ctz w) -> fsLit $ "llvm.cttz." ++ showSDoc dflags (ppr $ widthToLlvmInt w)
(MO_Pdep w) -> let w' = showSDoc dflags (ppr $ widthInBits w)
in if isBmi2Enabled dflags
......
......@@ -8,6 +8,7 @@ module CPrim
, pdepLabel
, pextLabel
, bSwapLabel
, bRevLabel
, clzLabel
, ctzLabel
, word2FloatLabel
......@@ -54,6 +55,15 @@ bSwapLabel w = "hs_bswap" ++ pprWidth w
pprWidth W64 = "64"
pprWidth w = pprPanic "bSwapLabel: Unsupported word width " (ppr w)
bRevLabel :: Width -> String
bRevLabel w = "hs_bitrev" ++ pprWidth w
where
pprWidth W8 = "8"
pprWidth W16 = "16"
pprWidth W32 = "32"
pprWidth W64 = "64"
pprWidth w = pprPanic "bRevLabel: Unsupported word width " (ppr w)
clzLabel :: Width -> String
clzLabel w = "hs_clz" ++ pprWidth w
where
......
......@@ -2007,6 +2007,7 @@ genCCall' dflags gcp target dest_regs args
MO_Memcmp _ -> (fsLit "memcmp", False)
MO_BSwap w -> (fsLit $ bSwapLabel w, False)
MO_BRev w -> (fsLit $ bRevLabel w, False)
MO_PopCnt w -> (fsLit $ popCntLabel w, False)
MO_Pdep w -> (fsLit $ pdepLabel w, False)
MO_Pext w -> (fsLit $ pextLabel w, False)
......
......@@ -667,6 +667,7 @@ outOfLineMachOp_table mop
MO_Memcmp _ -> fsLit "memcmp"
MO_BSwap w -> fsLit $ bSwapLabel w
MO_BRev w -> fsLit $ bRevLabel w
MO_PopCnt w -> fsLit $ popCntLabel w
MO_Pdep w -> fsLit $ pdepLabel w
MO_Pext w -> fsLit $ pextLabel w
......
......@@ -531,7 +531,7 @@ getRegister' dflags is32Bit (CmmRegOff r n)
getRegister' dflags is32Bit (CmmMachOp (MO_AlignmentCheck align _) [e])
= addAlignmentCheck align <$> getRegister' dflags is32Bit e
-- for 32-bit architectuers, support some 64 -> 32 bit conversions:
-- for 32-bit architectures, support some 64 -> 32 bit conversions:
-- TO_W_(x), TO_W_(x >> 32)
getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W64 W32)
......@@ -2936,6 +2936,10 @@ outOfLineCmmOp bid mop res args
MO_PopCnt _ -> fsLit "popcnt"
MO_BSwap _ -> fsLit "bswap"
{- Here the C implementation is used as there is no x86
instruction to reverse a word's bit order.
-}
MO_BRev w -> fsLit $ bRevLabel w
MO_Clz w -> fsLit $ clzLabel w
MO_Ctz _ -> unsupported
......
......@@ -655,6 +655,17 @@ primop BSwap64Op "byteSwap64#" Monadic WORD64 -> WORD64
primop BSwapOp "byteSwap#" Monadic Word# -> Word#
{Swap bytes in a word.}
primop BRev8Op "bitReverse8#" Monadic Word# -> Word#
{Reverse the order of the bits in a 8-bit word.}
primop BRev16Op "bitReverse16#" Monadic Word# -> Word#
{Reverse the order of the bits in a 16-bit word.}
primop BRev32Op "bitReverse32#" Monadic Word# -> Word#
{Reverse the order of the bits in a 32-bit word.}
primop BRev64Op "bitReverse64#" Monadic WORD64 -> WORD64
{Reverse the order of the bits in a 64-bit word.}
primop BRevOp "bitReverse#" Monadic Word# -> Word#
{Reverse the order of the bits in a word.}
------------------------------------------------------------------------
section "Narrowings"
{Explicit narrowing of native-sized ints or words.}
......
......@@ -70,6 +70,10 @@ Template Haskell
``ghc-prim`` library
~~~~~~~~~~~~~~~~~~~~
- Add new `bitReverse#` primops that, for a `Word` of 8, 16, 32 or 64 bits,
reverse the order of its bits e.g. `0b110001` becomes `0b100011`.
These primitives use optimized machine instructions when available.
``ghc`` library
~~~~~~~~~~~~~~~
......
......@@ -56,6 +56,14 @@ StgWord16 hs_bswap16(StgWord16 x);
StgWord32 hs_bswap32(StgWord32 x);
StgWord64 hs_bswap64(StgWord64 x);
/* libraries/ghc-prim/cbits/bitrev.c
This was done as part of issue #16164.
See Note [Bit reversal primop] for more details about the implementation.*/
StgWord hs_bitrev8(StgWord x);
StgWord16 hs_bitrev16(StgWord16 x);
StgWord32 hs_bitrev32(StgWord32 x);
StgWord64 hs_bitrev64(StgWord64 x);
/* TODO: longlong.c */
/* libraries/ghc-prim/cbits/pdep.c */
......
......@@ -25,6 +25,9 @@ module Data.Word
-- * byte swapping
byteSwap16, byteSwap32, byteSwap64,
-- * bit reversal
bitReverse8, bitReverse16, bitReverse32, bitReverse64
-- * Notes
-- $notes
......
......@@ -31,6 +31,12 @@ module GHC.Word (
byteSwap32,
byteSwap64,
-- * Bit reversal
bitReverse8,
bitReverse16,
bitReverse32,
bitReverse64,
-- * Equality operators
-- | See GHC.Classes#matching_overloaded_methods_in_rules
eqWord, neWord, gtWord, geWord, ltWord, leWord,
......@@ -1006,6 +1012,35 @@ byteSwap64 :: Word64 -> Word64
byteSwap64 (W64# w#) = W64# (byteSwap# w#)
#endif
-- | Reverse the order of the bits in a 'Word8'.
--
-- @since 4.12.0.0
bitReverse8 :: Word8 -> Word8
bitReverse8 (W8# w#) = W8# (narrow8Word# (bitReverse8# w#))
-- | Reverse the order of the bits in a 'Word16'.
--
-- @since 4.12.0.0
bitReverse16 :: Word16 -> Word16
bitReverse16 (W16# w#) = W16# (narrow16Word# (bitReverse16# w#))
-- | Reverse the order of the bits in a 'Word32'.
--
-- @since 4.12.0.0
bitReverse32 :: Word32 -> Word32
bitReverse32 (W32# w#) = W32# (narrow32Word# (bitReverse32# w#))
-- | Reverse the order of the bits in a 'Word64'.
--
-- @since 4.12.0.0
#if WORD_SIZE_IN_BITS < 64
bitReverse64 :: Word64 -> Word64
bitReverse64 (W64# w#) = W64# (bitReverse64# w#)
#else
bitReverse64 :: Word64 -> Word64
bitReverse64 (W64# w#) = W64# (bitReverse# w#)
#endif
-------------------------------------------------------------------------------
{-# RULES
......
#include "Rts.h"
/*
Note [Bit reversal primop]
~~~~~~~~~~~~~~~~~~~~~~~~~~
There are two main ways of reversing the bit order of a word: bit twiddling
and using a lookup table.
See [this excellent](https://stackoverflow.com/questions/746171/most-efficient-algorithm-for-bit-reversal-from-msb-lsb-to-lsb-msb-in-c this)
Stack Overflow answer about bit order reversal for (much) more detail.
(Link valid as of March 2019.)
To summarize,
* the lookup table is faster, but much more memory-heavy e.g.
doing it for 64-bit words can take 64KB if only 16-bits are reversed at
a time.
* working directly with bits is slower (roughly on the same order of
magnitude as the previous alternative), but uses much less memory as
bit-wise operators aren't space-onerous.
The code below uses the latter option. If in the future the performance of this
primop must be improved, the information provided in this comment should be
useful in making the decision of which changes to make.
For more information on how the below bit-twiddling functions came to be, see
[this](http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel)
page.
*/
extern StgWord hs_bitrev8(StgWord x);
StgWord
hs_bitrev8(StgWord x)
{
x = ((x >> 1) & 0x55) | ((x & 0x55) << 1 );
x = ((x >> 2) & 0x33) | ((x & 0x33) << 2 );
x = ((x >> 4) & 0x0F) | ((x & 0x0F) << 4 );
return x;
}
extern StgWord16 hs_bitrev16(StgWord16 x);
StgWord16
hs_bitrev16(StgWord16 x)
{
x = ((x >> 1) & 0x5555) | ((x & 0x5555) << 1 );
x = ((x >> 2) & 0x3333) | ((x & 0x3333) << 2 );
x = ((x >> 4) & 0x0F0F) | ((x & 0x0F0F) << 4 );
x = ((x >> 8) & 0x00FF) | ((x & 0x00FF) << 8 );
return x;
}
extern StgWord32 hs_bitrev32(StgWord32 x);
StgWord32
hs_bitrev32(StgWord32 x)
{
x = ((x >> 1) & 0x55555555) | ((x & 0x55555555) << 1 );
x = ((x >> 2) & 0x33333333) | ((x & 0x33333333) << 2 );
x = ((x >> 4) & 0x0F0F0F0F) | ((x & 0x0F0F0F0F) << 4 );
x = ((x >> 8) & 0x00FF00FF) | ((x & 0x00FF00FF) << 8 );
x = ( x >> 16 ) | ( x << 16 );
return x;
}
extern StgWord64 hs_bitrev64(StgWord64 x);
StgWord64
hs_bitrev64(StgWord64 x)
{
// swap odd and even bits
x = ((x >> 1) & 0x5555555555555555) | ((x & 0x5555555555555555) << 1 );
// swap consecutive pairs of bits
x = ((x >> 2) & 0x3333333333333333) | ((x & 0x3333333333333333) << 2 );
// swap consecutive pairs of nibbles (a nibble is 4 bits)
x = ((x >> 4) & 0x0F0F0F0F0F0F0F0F) | ((x & 0x0F0F0F0F0F0F0F0F) << 4 );
// swap consecutive pairs of bytes
x = ((x >> 8) & 0x00FF00FF00FF00FF) | ((x & 0x00FF00FF00FF00FF) << 8 );
// swap consecutive pairs of 16-bit words
x = ((x >> 16) & 0x0000FFFF0000FFFF) | ((x & 0x0000FFFF0000FFFF) << 16);
// swap 32-bit long pairs
x = ( x >> 32 ) | ( x << 32 );
return x;
}
\ No newline at end of file
## 0.6.1
## 0.6.1 (edit as necessary)
- Shipped with GHC 8.10.1
......@@ -6,6 +6,17 @@
closureSize# :: a -> Int#
- Added to `GHC.Prim`:
bitReverse# :: Word# -> Word#
bitReverse8# :: Word# -> Word#
bitReverse16# :: Word# -> Word#
bitReverse32# :: Word# -> Word#
bitReverse64# :: Word# -> Word#
`bitReverse#` is a primop that, for a `Word` of 8, 16, 32 or 64 bits,
reverses the order of its bits e.g. `0b110001` becomes `0b100011`.
These primitives use optimized machine instructions when available.
## 0.6.0
- Shipped with GHC 8.8.1
......@@ -14,7 +25,7 @@
traceBinaryEvent# :: Addr# -> Int# -> State# s -> State# s
## 0.5.3 (edit as necessary)
## 0.5.3
- Shipped with GHC 8.6.1
......
......@@ -69,6 +69,7 @@ Library
c-sources:
cbits/atomic.c
cbits/bswap.c
cbits/bitrev.c
cbits/clz.c
cbits/ctz.c
cbits/debug.c
......
......@@ -1277,6 +1277,7 @@ mk/ghcconfig*_test___spaces_ghc*.exe.mk
/tests/primops/should_run/T6135
/tests/primops/should_run/T7689
/tests/primops/should_run/T9430
/tests/primops/should_run/T16164
/tests/profiling/should_compile/prof001
/tests/profiling/should_compile/prof002
/tests/profiling/should_run/2592
......
import Data.Bits (FiniteBits (..), unsafeShiftL, unsafeShiftR, (.&.),
(.|.))
import Data.Char (intToDigit)
import Data.Word (Word8, Word16, Word32, Word64, bitReverse8,
bitReverse16, bitReverse32, bitReverse64)
import Numeric (showIntAtBase)
-- | Given a word, check:
--
-- * if the reverse of its @String@ representation in base 2 matches the
-- @String@ representation of that word with its bit order reversed.
-- order reversed, and
-- * if reversing its bits and then reverse the resulting word's bits again
-- yields the same word.
-- Takes the bit reversion function as an argument so different word types
-- can be used with their own functions.
test :: (FiniteBits a, Integral a, Show a) => (a -> a) -> a -> Bool
test bitReverter x =
let -- These zeroes are to left-pad the base-2 representation of
-- @x@ so that the string has one ASCII character per bit in the
-- word type e.g. @Word8@s produce strings with 8 characters.
leftPad = countLeadingZeros x
-- These zeroes are to left-pad the base-2 representation of
-- bit-reversed @x@ so that the string has one ASCII character per bit
-- in the word type e.g. @Word8@s produce strings with 8 characters.
reverseLeftPad = countTrailingZeros x
toBinaryString a = showIntAtBase 2 intToDigit a ""
binaryX = replicate leftPad '0' ++ toBinaryString x
revX = bitReverter x
binaryRevX = replicate reverseLeftPad '0' ++ toBinaryString revX
revRevX = bitReverter revX
in (x == revRevX) && (reverse binaryX == binaryRevX)
word8s :: [Word8]
word8s = [29, 31, 61, 102, 129, 129, 153, 213, 241, 246]
word16s :: [Word16]
word16s = [555, 3298, 4548, 12557, 16464, 16481, 40722, 51736, 55009, 62554]
word32s :: [Word32]
word32s = [6585, 10944, 21639, 25202, 27228, 836732395, 848624442, 3798715760, 3909052537, 4224371164]
word64s :: [Word64]
word64s = [2451351, 5096456, 8248539, 13039372, 15656413, 367814400638368418, 15152819454280096771, 15184978641026131315, 16329695467052396714, 17634654963076276082]
main :: IO ()
main = do
let printer f = mapM_ (print . test f)
printer bitReverse8 word8s
printer bitReverse16 word16s
printer bitReverse32 word32s
printer bitReverse64 word64s
\ No newline at end of file
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
\ No newline at end of file
......@@ -13,6 +13,7 @@ test('T10678',
compile_and_run, ['-O'])
test('T11296', normal, compile_and_run, [''])
test('T13825-compile', normal, compile_and_run, [''])
test('T16164', normal, compile_and_run, [''])
test('ArithInt8', omit_ways(['ghci']), compile_and_run, [''])
test('ArithWord8', omit_ways(['ghci']), compile_and_run, [''])
test('CmpInt8', normal, compile_and_run, [''])
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment