Commit ec962cb9 authored by Andreas Klebinger's avatar Andreas Klebinger

Backport fixes from !1953 fixing #17334.

parent dde5c06a
......@@ -550,6 +550,10 @@ cmmNativeGen dflags this_mod modLoc ncgImpl us fileIds dbgMap cmm count
= do
let platform = targetPlatform dflags
let proc_name = case cmm of
(CmmProc _ entry_label _ _) -> ppr entry_label
_ -> text "DataChunk"
-- rewrite assignments to global regs
let fixed_cmm =
{-# SCC "fixStgRegisters" #-}
......@@ -579,12 +583,11 @@ cmmNativeGen dflags this_mod modLoc ncgImpl us fileIds dbgMap cmm count
Opt_D_dump_asm_native "Native code"
(vcat $ map (pprNatCmmDecl ncgImpl) native)
dumpIfSet_dyn dflags
Opt_D_dump_cfg_weights "CFG Weights"
(pprEdgeWeights nativeCfgWeights)
maybeDumpCfg dflags (Just nativeCfgWeights) "CFG Weights - Native" proc_name
-- tag instructions with register liveness information
-- also drops dead code
-- also drops dead code. We don't keep the cfg in sync on
-- some backends, so don't use it there.
let livenessCfg = if (backendMaintainsCfg dflags)
then Just nativeCfgWeights
else Nothing
......@@ -697,12 +700,13 @@ cmmNativeGen dflags this_mod modLoc ncgImpl us fileIds dbgMap cmm count
cfgRegAllocUpdates = (concatMap Linear.ra_fixupList raStats)
let cfgWithFixupBlks =
addNodesBetween nativeCfgWeights cfgRegAllocUpdates
pure addNodesBetween <*> livenessCfg <*> pure cfgRegAllocUpdates
-- Insert stack update blocks
let postRegCFG =
foldl' (\m (from,to) -> addImmediateSuccessor from to m )
cfgWithFixupBlks stack_updt_blks
let postRegCFG :: Maybe CFG
postRegCFG =
pure (foldl' (\m (from,to) -> addImmediateSuccessor from to m )) <*>
cfgWithFixupBlks <*> pure stack_updt_blks
---- x86fp_kludge. This pass inserts ffree instructions to clear
---- the FPU stack on x86. The x86 ABI requires that the FPU stack
......@@ -729,11 +733,9 @@ cmmNativeGen dflags this_mod modLoc ncgImpl us fileIds dbgMap cmm count
shortcutBranches dflags ncgImpl tabled postRegCFG
let optimizedCFG =
optimizeCFG (cfgWeightInfo dflags) cmm postShortCFG
optimizeCFG (cfgWeightInfo dflags) cmm <$> postShortCFG
dumpIfSet_dyn dflags
Opt_D_dump_cfg_weights "CFG Final Weights"
( pprEdgeWeights optimizedCFG )
maybeDumpCfg dflags optimizedCFG "CFG Weights - Final" proc_name
--TODO: Partially check validity of the cfg.
let getBlks (CmmProc _info _lbl _live (ListGraph blocks)) = blocks
......@@ -743,8 +745,8 @@ cmmNativeGen dflags this_mod modLoc ncgImpl us fileIds dbgMap cmm count
(gopt Opt_DoAsmLinting dflags || debugIsOn )) $ do
let blocks = concatMap getBlks shorted
let labels = setFromList $ fmap blockId blocks :: LabelSet
return $! seq (sanityCheckCfg optimizedCFG labels $
text "cfg not in lockstep") ()
return $! seq (pure sanityCheckCfg <*> optimizedCFG <*> pure labels <*>
pure (text "cfg not in lockstep")) ()
---- sequence blocks
let sequenced :: [NatCmmDecl statics instr]
......@@ -761,6 +763,8 @@ cmmNativeGen dflags this_mod modLoc ncgImpl us fileIds dbgMap cmm count
{-# SCC "invertCondBranches" #-}
map invert sequenced
where
invertConds :: LabelMap CmmStatics -> [NatBasicBlock instr]
-> [NatBasicBlock instr]
invertConds = (invertCondBranches ncgImpl) optimizedCFG
invert top@CmmData {} = top
invert (CmmProc info lbl live (ListGraph blocks)) =
......@@ -793,6 +797,15 @@ cmmNativeGen dflags this_mod modLoc ncgImpl us fileIds dbgMap cmm count
, ppr_raStatsLinear
, unwinds )
maybeDumpCfg :: DynFlags -> Maybe CFG -> String -> SDoc -> IO ()
maybeDumpCfg _dflags Nothing _ _ = return ()
maybeDumpCfg dflags (Just cfg) msg proc_name
| null cfg = return ()
| otherwise
= dumpIfSet_dyn
dflags Opt_D_dump_cfg_weights msg
(proc_name <> char ':' $$ pprEdgeWeights cfg)
-- | Make sure all blocks we want the layout algorithm to place have been placed.
checkLayout :: [NatCmmDecl statics instr] -> [NatCmmDecl statics instr]
-> [NatCmmDecl statics instr]
......@@ -917,13 +930,13 @@ shortcutBranches
:: forall statics instr jumpDest. (Outputable jumpDest) => DynFlags
-> NcgImpl statics instr jumpDest
-> [NatCmmDecl statics instr]
-> CFG
-> ([NatCmmDecl statics instr],CFG)
-> Maybe CFG
-> ([NatCmmDecl statics instr],Maybe CFG)
shortcutBranches dflags ncgImpl tops weights
| gopt Opt_AsmShortcutting dflags
= ( map (apply_mapping ncgImpl mapping) tops'
, shortcutWeightMap weights mappingBid )
, shortcutWeightMap mappingBid <$!> weights )
| otherwise
= (tops, weights)
where
......
......@@ -639,29 +639,31 @@ dropJumps info ((BasicBlock lbl ins):todo)
sequenceTop
:: (Instruction instr, Outputable instr)
=> DynFlags --Use new layout code
-> NcgImpl statics instr jumpDest -> CFG
-> NatCmmDecl statics instr -> NatCmmDecl statics instr
=> DynFlags -- Determine which layout algo to use
-> NcgImpl statics instr jumpDest
-> Maybe CFG -- ^ CFG if we have one.
-> NatCmmDecl statics instr -- ^ Function to serialize
-> NatCmmDecl statics instr
sequenceTop _ _ _ top@(CmmData _ _) = top
sequenceTop dflags ncgImpl edgeWeights
(CmmProc info lbl live (ListGraph blocks))
| (gopt Opt_CfgBlocklayout dflags) && backendMaintainsCfg dflags
--Use chain based algorithm
, Just cfg <- edgeWeights
= CmmProc info lbl live ( ListGraph $ ncgMakeFarBranches ncgImpl info $
sequenceChain info edgeWeights blocks )
{-# SCC layoutBlocks #-}
sequenceChain info cfg blocks )
| otherwise
--Use old algorithm
= CmmProc info lbl live ( ListGraph $ ncgMakeFarBranches ncgImpl info $
sequenceBlocks cfg info blocks)
= let cfg = if dontUseCfg then Nothing else edgeWeights
in CmmProc info lbl live ( ListGraph $ ncgMakeFarBranches ncgImpl info $
{-# SCC layoutBlocks #-}
sequenceBlocks cfg info blocks)
where
cfg
| (gopt Opt_WeightlessBlocklayout dflags) ||
(not $ backendMaintainsCfg dflags)
-- Don't make use of cfg in the old algorithm
= Nothing
-- Use cfg in the old algorithm
| otherwise = Just edgeWeights
dontUseCfg = gopt Opt_WeightlessBlocklayout dflags ||
(not $ backendMaintainsCfg dflags)
-- The old algorithm:
-- It is very simple (and stupid): We make a graph out of
......
......@@ -61,8 +61,6 @@ import qualified DynFlags as D
import Data.List
-- import qualified Data.IntMap.Strict as M --TODO: LabelMap
type Edge = (BlockId, BlockId)
type Edges = [Edge]
......@@ -76,6 +74,13 @@ instance Outputable EdgeWeight where
type EdgeInfoMap edgeInfo = LabelMap (LabelMap edgeInfo)
-- | A control flow graph where edges have been annotated with a weight.
-- Implemented as IntMap (IntMap <edgeData>)
-- We must uphold the invariant that for each edge A -> B we must have:
-- A entry B in the outer map.
-- A entry B in the map we get when looking up A.
-- Maintaining this invariant is useful as any failed lookup now indicates
-- an actual error in code which might go unnoticed for a while
-- otherwise.
type CFG = EdgeInfoMap EdgeInfo
data CfgEdge
......@@ -144,11 +149,20 @@ adjustEdgeWeight cfg f from to
= addEdge from to (info { edgeWeight = f weight}) cfg
| otherwise = cfg
getCfgNodes :: CFG -> LabelSet
getCfgNodes m = mapFoldMapWithKey (\k v -> setFromList (k:mapKeys v)) m
-- | Is this block part of this graph?
hasNode :: CFG -> BlockId -> Bool
hasNode m node = mapMember node m || any (mapMember node) m
hasNode m node =
-- Check the invariant that each node must exist in the first map or not at all.
ASSERT( found || not (any (mapMember node) m))
found
where
found = mapMember node m
-- | Check if the nodes in the cfg and the set of blocks are the same.
-- In a case of a missmatch we panic and show the difference.
......@@ -160,7 +174,7 @@ sanityCheckCfg m blockSet msg
pprPanic "Block list and cfg nodes don't match" (
text "difference:" <+> ppr diff $$
text "blocks:" <+> ppr blockSet $$
text "cfg:" <+> ppr m $$
text "cfg:" <+> pprEdgeWeights m $$
msg )
False
where
......@@ -224,8 +238,8 @@ This function (shortcutWeightMap) takes the same mapping and
applies the mapping to the CFG in the way layed out above.
-}
shortcutWeightMap :: CFG -> LabelMap (Maybe BlockId) -> CFG
shortcutWeightMap cfg cuts =
shortcutWeightMap :: LabelMap (Maybe BlockId) -> CFG -> CFG
shortcutWeightMap cuts cfg =
foldl' applyMapping cfg $ mapToList cuts
where
-- takes the tuple (B,C) from the notation in [Updating the CFG during shortcutting]
......@@ -259,7 +273,7 @@ shortcutWeightMap cfg cuts =
-- \ \
-- -> C => -> C
--
addImmediateSuccessor :: BlockId -> BlockId -> CFG -> CFG
addImmediateSuccessor :: HasDebugCallStack => BlockId -> BlockId -> CFG -> CFG
addImmediateSuccessor node follower cfg
= updateEdges . addWeightEdge node follower uncondWeight $ cfg
where
......@@ -275,10 +289,16 @@ addImmediateSuccessor node follower cfg
-- | Adds a new edge, overwrites existing edges if present
addEdge :: BlockId -> BlockId -> EdgeInfo -> CFG -> CFG
addEdge from to info cfg =
mapAlter addDest from cfg
mapAlter addFromToEdge from $
mapAlter addDestNode to cfg
where
addDest Nothing = Just $ mapSingleton to info
addDest (Just wm) = Just $ mapInsert to info wm
-- Simply insert the edge into the edge list.
addFromToEdge Nothing = Just $ mapSingleton to info
addFromToEdge (Just wm) = Just $ mapInsert to info wm
-- We must add the destination node explicitly as well
addDestNode Nothing = Just $ mapEmpty
addDestNode n@(Just _) = n
-- | Adds a edge with the given weight to the cfg
-- If there already existed an edge it is overwritten.
......@@ -304,8 +324,11 @@ getSuccEdgesSorted m bid =
sortedEdges
-- | Get successors of a given node with edge weights.
getSuccessorEdges :: CFG -> BlockId -> [(BlockId,EdgeInfo)]
getSuccessorEdges m bid = maybe [] mapToList $ mapLookup bid m
getSuccessorEdges :: HasDebugCallStack => CFG -> BlockId -> [(BlockId,EdgeInfo)]
getSuccessorEdges m bid = maybe lookupError mapToList (mapLookup bid m)
where
lookupError = pprPanic "getSuccessorEdges: Block does not exist" $
ppr bid $$ text "CFG:" <+> pprEdgeWeights m
getEdgeInfo :: BlockId -> BlockId -> CFG -> Maybe EdgeInfo
getEdgeInfo from to m
......@@ -316,12 +339,13 @@ getEdgeInfo from to m
= Nothing
reverseEdges :: CFG -> CFG
reverseEdges cfg = foldr add mapEmpty flatElems
reverseEdges cfg = mapFoldlWithKey (\cfg from toMap -> go (addNode cfg from) from toMap) mapEmpty cfg
where
elems = mapToList $ fmap mapToList cfg :: [(BlockId,[(BlockId,EdgeInfo)])]
flatElems =
concatMap (\(from,ws) -> map (\(to,info) -> (to,from,info)) ws ) elems
add (to,from,info) m = addEdge to from info m
-- We preserve nodes without outgoing edges!
addNode :: CFG -> BlockId -> CFG
addNode cfg b = mapInsertWith mapUnion b mapEmpty cfg
go :: CFG -> BlockId -> (LabelMap EdgeInfo) -> CFG
go cfg from toMap = mapFoldlWithKey (\cfg to info -> addEdge to from info cfg) cfg toMap :: CFG
-- | Returns a unordered list of all edges with info
infoEdgeList :: CFG -> [CfgEdge]
......@@ -347,11 +371,14 @@ edgeList m =
mapFoldMapWithKey (\from toMap -> fmap (from,) (mapKeys toMap)) m
-- | Get successors of a given node without edge weights.
getSuccessors :: CFG -> BlockId -> [BlockId]
getSuccessors :: HasDebugCallStack => CFG -> BlockId -> [BlockId]
getSuccessors m bid
| Just wm <- mapLookup bid m
= mapKeys wm
| otherwise = []
| otherwise = lookupError
where
lookupError = pprPanic "getSuccessors: Block does not exist" $
ppr bid <+> pprEdgeWeights m
pprEdgeWeights :: CFG -> SDoc
pprEdgeWeights m =
......@@ -375,6 +402,7 @@ pprEdgeWeights m =
text "}\n"
{-# INLINE updateEdgeWeight #-} --Allows eliminating the tuple when possible
-- | Invariant: The edge **must** exist already in the graph.
updateEdgeWeight :: (EdgeWeight -> EdgeWeight) -> Edge -> CFG -> CFG
updateEdgeWeight f (from, to) cfg
| Just oldInfo <- getEdgeInfo from to cfg
......@@ -422,7 +450,8 @@ addNodesBetween m updates =
| otherwise
= pprPanic "Can't find weight for edge that should have one" (
text "triple" <+> ppr (from,between,old) $$
text "updates" <+> ppr updates )
text "updates" <+> ppr updates $$
text "cfg:" <+> pprEdgeWeights m )
updateWeight :: CFG -> (BlockId,BlockId,BlockId,EdgeInfo) -> CFG
updateWeight m (from,between,old,edgeInfo)
= addEdge from between edgeInfo .
......@@ -550,7 +579,7 @@ getCfg weights graph =
blocks = revPostorder graph :: [CmmBlock]
--Find back edges by BFS
findBackEdges :: BlockId -> CFG -> Edges
findBackEdges :: HasDebugCallStack => BlockId -> CFG -> Edges
findBackEdges root cfg =
--pprTraceIt "Backedges:" $
map fst .
......@@ -562,7 +591,7 @@ findBackEdges root cfg =
classifyEdges root getSuccs edges :: [((BlockId,BlockId),EdgeType)]
optimizeCFG :: D.CfgWeights -> RawCmmDecl -> CFG -> CFG
optimizeCFG :: HasDebugCallStack => D.CfgWeights -> RawCmmDecl -> CFG -> CFG
optimizeCFG _ (CmmData {}) cfg = cfg
optimizeCFG weights (CmmProc info _lab _live graph) cfg =
favourFewerPreds .
......@@ -641,16 +670,17 @@ optimizeCFG weights (CmmProc info _lab _live graph) cfg =
-- | Determine loop membership of blocks based on SCC analysis
-- Ideally we would replace this with a variant giving us loop
-- levels instead but the SCC code will do for now.
loopMembers :: CFG -> LabelMap Bool
loopMembers :: HasDebugCallStack => CFG -> LabelMap Bool
loopMembers cfg =
foldl' (flip setLevel) mapEmpty sccs
where
mkNode :: BlockId -> Node BlockId BlockId
mkNode bid = DigraphNode bid bid (getSuccessors cfg bid)
nodes = map mkNode (setElems $ getCfgNodes cfg)
nodes = map mkNode $ setElems (getCfgNodes cfg)
sccs = stronglyConnCompFromEdgedVerticesOrd nodes
setLevel :: SCC BlockId -> LabelMap Bool -> LabelMap Bool
setLevel (AcyclicSCC bid) m = mapInsert bid False m
setLevel (CyclicSCC bids) m = foldl' (\m k -> mapInsert k True m) m bids
{-# LANGUAGE CPP #-}
{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE BangPatterns #-}
-- -----------------------------------------------------------------------------
--
......@@ -18,7 +20,7 @@ module NCGMonad (
addNodeBetweenNat,
addImmediateSuccessorNat,
updateCfgNat,
getUniqueNat,
getUniqueNat, getCfgNat,
mapAccumLNat,
setDeltaNat,
getDeltaNat,
......@@ -65,6 +67,7 @@ import Instruction
import Outputable (SDoc, pprPanic, ppr)
import Cmm (RawCmmDecl, CmmStatics)
import CFG
import Util
data NcgImpl statics instr jumpDest = NcgImpl {
cmmTopCodeGen :: RawCmmDecl -> NatM [NatCmmDecl statics instr],
......@@ -88,7 +91,7 @@ data NcgImpl statics instr jumpDest = NcgImpl {
-- the block's 'UnwindPoint's
-- See Note [What is this unwinding business?] in Debug
-- and Note [Unwinding information in the NCG] in this module.
invertCondBranches :: CFG -> LabelMap CmmStatics -> [NatBasicBlock instr]
invertCondBranches :: Maybe CFG -> LabelMap CmmStatics -> [NatBasicBlock instr]
-> [NatBasicBlock instr]
-- ^ Turn the sequence of `jcc l1; jmp l2` into `jncc l2; <block_l1>`
-- when possible.
......@@ -206,7 +209,11 @@ addImportNat imp
updateCfgNat :: (CFG -> CFG) -> NatM ()
updateCfgNat f
= NatM $ \ st -> ((), st { natm_cfg = f (natm_cfg st) })
= NatM $ \ st -> let !cfg' = f (natm_cfg st)
in ((), st { natm_cfg = cfg'})
getCfgNat :: NatM CFG
getCfgNat = NatM $ \ st -> (natm_cfg st, st)
-- | Record that we added a block between `from` and `old`.
addNodeBetweenNat :: BlockId -> BlockId -> BlockId -> NatM ()
......@@ -231,7 +238,7 @@ addNodeBetweenNat from between to
-- | Place `succ` after `block` and change any edges
-- block -> X to `succ` -> X
addImmediateSuccessorNat :: BlockId -> BlockId -> NatM ()
addImmediateSuccessorNat :: HasDebugCallStack => BlockId -> BlockId -> NatM ()
addImmediateSuccessorNat block succ
= updateCfgNat (addImmediateSuccessor block succ)
......
......@@ -705,7 +705,7 @@ sccBlocks blocks entries mcfg = map (fmap node_payload) sccs
reachable :: LabelSet
reachable
| Just cfg <- mcfg
-- Our CFG only contains reachable nodes by construction.
-- Our CFG only contains reachable nodes by construction at this point.
= getCfgNodes cfg
| otherwise
= setFromList $ [ node_key node | node <- reachablesG g1 roots ]
......
This diff is collapsed.
......@@ -325,7 +325,9 @@ data Instr
[Maybe JumpDest] -- Targets of the jump table
Section -- Data section jump table should be put in
CLabel -- Label of jump table
| CALL (Either Imm Reg) [Reg]
-- | X86 call instruction
| CALL (Either Imm Reg) -- ^ Jump target
[Reg] -- ^ Arguments (required for register allocation)
-- Other things.
| CLTD Format -- sign extend %eax into %edx:%eax
......
-- Reproducer for T17334
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE UnboxedTuples #-}
--Reproducer uses 64bit literals in reverseWord.
--It's ok to truncate those in x86
{-# OPTIONS_GHC -Wno-overflowed-literals #-}
module Bug (reverseInPlace) where
import Control.Monad.ST
import Data.Bits
import GHC.Exts
import GHC.ST (ST(..))
import Data.Kind
reverseInPlace :: PrimMonad m => UMVector (PrimState m) Bit -> m ()
reverseInPlace xs | len == 0 = pure ()
| otherwise = loop 0
where
len = ulength xs
loop !i
| i' <= j' = do
x <- readWord xs i
y <- readWord xs j'
writeWord xs i (reverseWord y)
writeWord xs j' (reverseWord x)
loop i'
| i' < j = do
let w = (j - i) `shiftR` 1
k = j - w
x <- readWord xs i
y <- readWord xs k
writeWord xs i (meld w (reversePartialWord w y) x)
writeWord xs k (meld w (reversePartialWord w x) y)
loop i'
| otherwise = do
let w = j - i
x <- readWord xs i
writeWord xs i (meld w (reversePartialWord w x) x)
where
!j = len - i
!i' = i + wordSize
!j' = j - wordSize
{-# SPECIALIZE reverseInPlace :: UMVector s Bit -> ST s () #-}
newtype Bit = Bit { unBit :: Bool }
instance Unbox Bit
data instance UMVector s Bit = BitMVec !Int !Int !(MutableByteArray s)
data instance UVector Bit = BitVec !Int !Int !ByteArray
readWord :: PrimMonad m => UMVector (PrimState m) Bit -> Int -> m Word
readWord !(BitMVec _ 0 _) _ = pure 0
readWord !(BitMVec off len' arr) !i' = do
let len = off + len'
i = off + i'
nMod = modWordSize i
loIx = divWordSize i
loWord <- readByteArray arr loIx
if nMod == 0
then pure loWord
else if loIx == divWordSize (len - 1)
then pure (loWord `unsafeShiftR` nMod)
else do
hiWord <- readByteArray arr (loIx + 1)
pure
$ (loWord `unsafeShiftR` nMod)
.|. (hiWord `unsafeShiftL` (wordSize - nMod))
{-# SPECIALIZE readWord :: UMVector s Bit -> Int -> ST s Word #-}
{-# INLINE readWord #-}
writeWord :: PrimMonad m => UMVector (PrimState m) Bit -> Int -> Word -> m ()
writeWord !(BitMVec _ 0 _) _ _ = pure ()
writeWord !(BitMVec off len' arr@(MutableByteArray mba)) !i' !x@(W# x#) = do
let len = off + len'
lenMod = modWordSize len
i = off + i'
nMod = modWordSize i
loIx@(I# loIx#) = divWordSize i
if nMod == 0
then if len >= i + wordSize
then primitive $ \state ->
(# atomicWriteIntArray# mba loIx# (word2Int# x#) state, () #)
else do
let W# andMask# = hiMask lenMod
W# orMask# = x .&. loMask lenMod
primitive $ \state ->
let !(# state', _ #) = fetchAndIntArray# mba loIx# (word2Int# andMask#) state in
let !(# state'', _ #) = fetchOrIntArray# mba loIx# (word2Int# orMask#) state' in
(# state'', () #)
else if loIx == divWordSize (len - 1)
then do
loWord <- readByteArray arr loIx
if lenMod == 0
then
writeByteArray arr loIx
$ (loWord .&. loMask nMod)
.|. (x `unsafeShiftL` nMod)
else
writeByteArray arr loIx
$ (loWord .&. (loMask nMod .|. hiMask lenMod))
.|. ((x `unsafeShiftL` nMod) .&. loMask lenMod)
else do
loWord <- readByteArray arr loIx
writeByteArray arr loIx
$ (loWord .&. loMask nMod)
.|. (x `unsafeShiftL` nMod)
hiWord <- readByteArray arr (loIx + 1)
writeByteArray arr (loIx + 1)
$ (hiWord .&. hiMask nMod)
.|. (x `unsafeShiftR` (wordSize - nMod))
{-# SPECIALIZE writeWord :: UMVector s Bit -> Int -> Word -> ST s () #-}
{-# INLINE writeWord #-}
instance GMVector UMVector Bit where
{-# INLINE basicLength #-}
basicLength (BitMVec _ n _) = n
instance GVector UVector Bit where
wordSize :: Int
wordSize = finiteBitSize (0 :: Word)
lgWordSize :: Int
lgWordSize = case wordSize of
32 -> 5
64 -> 6
_ -> error "wordsToBytes: unknown architecture"
divWordSize :: Bits a => a -> a
divWordSize x = unsafeShiftR x lgWordSize
{-# INLINE divWordSize #-}
modWordSize :: Int -> Int
modWordSize x = x .&. (wordSize - 1)
{-# INLINE modWordSize #-}
mask :: Int -> Word
mask b = m
where
m | b >= finiteBitSize m = complement 0
| b < 0 = 0
| otherwise = bit b - 1
meld :: Int -> Word -> Word -> Word
meld b lo hi = (lo .&. m) .|. (hi .&. complement m) where m = mask b
{-# INLINE meld #-}
reverseWord :: Word -> Word
reverseWord x0 = x6
where
x1 = ((x0 .&. 0x5555555555555555) `shiftL` 1) .|. ((x0 .&. 0xAAAAAAAAAAAAAAAA) `shiftR` 1)
x2 = ((x1 .&. 0x3333333333333333) `shiftL` 2) .|. ((x1 .&. 0xCCCCCCCCCCCCCCCC) `shiftR` 2)
x3 = ((x2 .&. 0x0F0F0F0F0F0F0F0F) `shiftL` 4) .|. ((x2 .&. 0xF0F0F0F0F0F0F0F0) `shiftR` 4)
x4 = ((x3 .&. 0x00FF00FF00FF00FF) `shiftL` 8) .|. ((x3 .&. 0xFF00FF00FF00FF00) `shiftR` 8)
x5 = ((x4 .&. 0x0000FFFF0000FFFF) `shiftL` 16) .|. ((x4 .&. 0xFFFF0000FFFF0000) `shiftR` 16)
x6 = ((x5 .&. 0x00000000FFFFFFFF) `shiftL` 32) .|. ((x5 .&. 0xFFFFFFFF00000000) `shiftR` 32)
reversePartialWord :: Int -> Word -> Word
reversePartialWord n w | n >= wordSize = reverseWord w
| otherwise = reverseWord w `shiftR` (wordSize - n)
loMask :: Int -> Word
loMask n = 1 `unsafeShiftL` n - 1
{-# INLINE loMask #-}
hiMask :: Int -> Word