Skip to content
Commits on Source (3)
...@@ -26,7 +26,7 @@ module BasicTypes( ...@@ -26,7 +26,7 @@ module BasicTypes(
Arity, RepArity, JoinArity, Arity, RepArity, JoinArity,
Alignment, Alignment, mkAlignment, alignmentOf, alignmentBytes,
PromotionFlag(..), isPromoted, PromotionFlag(..), isPromoted,
FunctionOrData(..), FunctionOrData(..),
...@@ -116,6 +116,7 @@ import Outputable ...@@ -116,6 +116,7 @@ import Outputable
import SrcLoc ( Located,unLoc ) import SrcLoc ( Located,unLoc )
import Data.Data hiding (Fixity, Prefix, Infix) import Data.Data hiding (Fixity, Prefix, Infix)
import Data.Function (on) import Data.Function (on)
import Data.Bits
{- {-
************************************************************************ ************************************************************************
...@@ -196,8 +197,39 @@ fIRST_TAG = 1 ...@@ -196,8 +197,39 @@ fIRST_TAG = 1
************************************************************************ ************************************************************************
-} -}
type Alignment = Int -- align to next N-byte boundary (N must be a power of 2). -- | A power-of-two alignment
newtype Alignment = Alignment { alignmentBytes :: Int } deriving (Eq, Ord)
-- Builds an alignment, throws on non power of 2 input. This is not
-- ideal, but convenient for internal use and better then silently
-- passing incorrect data.
mkAlignment :: Int -> Alignment
mkAlignment n
| n == 1 = Alignment 1
| n == 2 = Alignment 2
| n == 4 = Alignment 4
| n == 8 = Alignment 8
| n == 16 = Alignment 16
| n == 32 = Alignment 32
| n == 64 = Alignment 64
| n == 128 = Alignment 128
| n == 256 = Alignment 256
| n == 512 = Alignment 512
| otherwise = panic "mkAlignment: received either a non power of 2 argument or > 512"
-- Calculates an alignment of a number. x is aligned at N bytes means
-- the remainder from x / N is zero. Currently, interested in N <= 8,
-- but can be expanded to N <= 16 or N <= 32 if used within SSE or AVX
-- context.
alignmentOf :: Int -> Alignment
alignmentOf x = case x .&. 7 of
0 -> Alignment 8
4 -> Alignment 4
2 -> Alignment 2
_ -> Alignment 1
instance Outputable Alignment where
ppr (Alignment m) = ppr m
{- {-
************************************************************************ ************************************************************************
* * * *
......
...@@ -2073,10 +2073,17 @@ doCopyAddrToByteArrayOp src_p dst dst_off bytes = do ...@@ -2073,10 +2073,17 @@ doCopyAddrToByteArrayOp src_p dst dst_off bytes = do
-- character. -- character.
doSetByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr doSetByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
-> FCode () -> FCode ()
doSetByteArrayOp ba off len c doSetByteArrayOp ba off len c = do
= do dflags <- getDynFlags dflags <- getDynFlags
p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba (arrWordsHdrSize dflags)) off
emitMemsetCall p c len 1 let byteArrayAlignment = wordAlignment dflags -- known since BA is allocated on heap
offsetAlignment = case off of
CmmLit (CmmInt intOff _) -> alignmentOf (fromInteger intOff)
_ -> mkAlignment 1
align = min byteArrayAlignment offsetAlignment
p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba (arrWordsHdrSize dflags)) off
emitMemsetCall p c len align
-- ---------------------------------------------------------------------------- -- ----------------------------------------------------------------------------
-- Allocating arrays -- Allocating arrays
...@@ -2347,7 +2354,7 @@ emitSetCards dst_start dst_cards_start n = do ...@@ -2347,7 +2354,7 @@ emitSetCards dst_start dst_cards_start n = do
emitMemsetCall (cmmAddWord dflags dst_cards_start start_card) emitMemsetCall (cmmAddWord dflags dst_cards_start start_card)
(mkIntExpr dflags 1) (mkIntExpr dflags 1)
(cmmAddWord dflags (cmmSubWord dflags end_card start_card) (mkIntExpr dflags 1)) (cmmAddWord dflags (cmmSubWord dflags end_card start_card) (mkIntExpr dflags 1))
1 -- no alignment (1 byte) (mkAlignment 1) -- no alignment (1 byte)
-- Convert an element index to a card index -- Convert an element index to a card index
cardCmm :: DynFlags -> CmmExpr -> CmmExpr cardCmm :: DynFlags -> CmmExpr -> CmmExpr
...@@ -2473,11 +2480,11 @@ emitMemmoveCall dst src n align = do ...@@ -2473,11 +2480,11 @@ emitMemmoveCall dst src n align = do
-- | Emit a call to @memset@. The second argument must fit inside an -- | Emit a call to @memset@. The second argument must fit inside an
-- unsigned char. -- unsigned char.
emitMemsetCall :: CmmExpr -> CmmExpr -> CmmExpr -> Int -> FCode () emitMemsetCall :: CmmExpr -> CmmExpr -> CmmExpr -> Alignment -> FCode ()
emitMemsetCall dst c n align = do emitMemsetCall dst c n align = do
emitPrimCall emitPrimCall
[ {- no results -} ] [ {- no results -} ]
(MO_Memset align) (MO_Memset (alignmentBytes align))
[ dst, c, n ] [ dst, c, n ]
emitMemcmpCall :: LocalReg -> CmmExpr -> CmmExpr -> CmmExpr -> Int -> FCode () emitMemcmpCall :: LocalReg -> CmmExpr -> CmmExpr -> CmmExpr -> Int -> FCode ()
......
...@@ -147,6 +147,7 @@ module DynFlags ( ...@@ -147,6 +147,7 @@ module DynFlags (
#include "GHCConstantsHaskellExports.hs" #include "GHCConstantsHaskellExports.hs"
bLOCK_SIZE_W, bLOCK_SIZE_W,
wORD_SIZE_IN_BITS, wORD_SIZE_IN_BITS,
wordAlignment,
tAG_MASK, tAG_MASK,
mAX_PTR_TAG, mAX_PTR_TAG,
tARGET_MIN_INT, tARGET_MAX_INT, tARGET_MAX_WORD, tARGET_MIN_INT, tARGET_MAX_INT, tARGET_MAX_WORD,
...@@ -205,7 +206,7 @@ import Maybes ...@@ -205,7 +206,7 @@ import Maybes
import MonadUtils import MonadUtils
import qualified Pretty import qualified Pretty
import SrcLoc import SrcLoc
import BasicTypes ( IntWithInf, treatZeroAsInf ) import BasicTypes ( Alignment, alignmentOf, IntWithInf, treatZeroAsInf )
import FastString import FastString
import Fingerprint import Fingerprint
import Outputable import Outputable
...@@ -5661,6 +5662,9 @@ bLOCK_SIZE_W dflags = bLOCK_SIZE dflags `quot` wORD_SIZE dflags ...@@ -5661,6 +5662,9 @@ bLOCK_SIZE_W dflags = bLOCK_SIZE dflags `quot` wORD_SIZE dflags
wORD_SIZE_IN_BITS :: DynFlags -> Int wORD_SIZE_IN_BITS :: DynFlags -> Int
wORD_SIZE_IN_BITS dflags = wORD_SIZE dflags * 8 wORD_SIZE_IN_BITS dflags = wORD_SIZE dflags * 8
wordAlignment :: DynFlags -> Alignment
wordAlignment dflags = alignmentOf (wORD_SIZE dflags)
tAG_MASK :: DynFlags -> Int tAG_MASK :: DynFlags -> Int
tAG_MASK dflags = (1 `shiftL` tAG_BITS dflags) - 1 tAG_MASK dflags = (1 `shiftL` tAG_BITS dflags) - 1
......
...@@ -128,7 +128,7 @@ cmmTopCodeGen (CmmProc info lab live graph) = do ...@@ -128,7 +128,7 @@ cmmTopCodeGen (CmmProc info lab live graph) = do
Nothing -> return tops Nothing -> return tops
cmmTopCodeGen (CmmData sec dat) = do cmmTopCodeGen (CmmData sec dat) = do
return [CmmData sec (1, dat)] -- no translation, we just use CmmStatic return [CmmData sec (mkAlignment 1, dat)] -- no translation, we just use CmmStatic
basicBlockCodeGen basicBlockCodeGen
...@@ -569,7 +569,7 @@ getRegister' _ _ (CmmLit lit@(CmmFloat f w)) = ...@@ -569,7 +569,7 @@ getRegister' _ _ (CmmLit lit@(CmmFloat f w)) =
return (Any format code) return (Any format code)
| otherwise = do | otherwise = do
Amode addr code <- memConstant (widthInBytes w) lit Amode addr code <- memConstant (mkAlignment $ widthInBytes w) lit
loadFloatAmode True w addr code loadFloatAmode True w addr code
float_const_x87 = case w of float_const_x87 = case w of
...@@ -583,7 +583,7 @@ getRegister' _ _ (CmmLit lit@(CmmFloat f w)) = ...@@ -583,7 +583,7 @@ getRegister' _ _ (CmmLit lit@(CmmFloat f w)) =
in return (Any FF80 code) in return (Any FF80 code)
_otherwise -> do _otherwise -> do
Amode addr code <- memConstant (widthInBytes w) lit Amode addr code <- memConstant (mkAlignment $ widthInBytes w) lit
loadFloatAmode False w addr code loadFloatAmode False w addr code
-- catch simple cases of zero- or sign-extended load -- catch simple cases of zero- or sign-extended load
...@@ -1247,7 +1247,7 @@ getNonClobberedOperand (CmmLit lit) = do ...@@ -1247,7 +1247,7 @@ getNonClobberedOperand (CmmLit lit) = do
if use_sse2 && isSuitableFloatingPointLit lit if use_sse2 && isSuitableFloatingPointLit lit
then do then do
let CmmFloat _ w = lit let CmmFloat _ w = lit
Amode addr code <- memConstant (widthInBytes w) lit Amode addr code <- memConstant (mkAlignment $ widthInBytes w) lit
return (OpAddr addr, code) return (OpAddr addr, code)
else do else do
...@@ -1303,7 +1303,7 @@ getOperand (CmmLit lit) = do ...@@ -1303,7 +1303,7 @@ getOperand (CmmLit lit) = do
if (use_sse2 && isSuitableFloatingPointLit lit) if (use_sse2 && isSuitableFloatingPointLit lit)
then do then do
let CmmFloat _ w = lit let CmmFloat _ w = lit
Amode addr code <- memConstant (widthInBytes w) lit Amode addr code <- memConstant (mkAlignment $ widthInBytes w) lit
return (OpAddr addr, code) return (OpAddr addr, code)
else do else do
...@@ -1351,7 +1351,7 @@ addAlignmentCheck align reg = ...@@ -1351,7 +1351,7 @@ addAlignmentCheck align reg =
, JXX_GBL NE $ ImmCLbl mkBadAlignmentLabel , JXX_GBL NE $ ImmCLbl mkBadAlignmentLabel
] ]
memConstant :: Int -> CmmLit -> NatM Amode memConstant :: Alignment -> CmmLit -> NatM Amode
memConstant align lit = do memConstant align lit = do
lbl <- getNewLabelNat lbl <- getNewLabelNat
let rosection = Section ReadOnlyData lbl let rosection = Section ReadOnlyData lbl
...@@ -1848,17 +1848,25 @@ genCCall dflags _ (PrimTarget (MO_Memset align)) _ ...@@ -1848,17 +1848,25 @@ genCCall dflags _ (PrimTarget (MO_Memset align)) _
CmmLit (CmmInt c _), CmmLit (CmmInt c _),
CmmLit (CmmInt n _)] CmmLit (CmmInt n _)]
_ _
| fromInteger insns <= maxInlineMemsetInsns dflags && align .&. 3 == 0 = do | fromInteger insns <= maxInlineMemsetInsns dflags = do
code_dst <- getAnyReg dst code_dst <- getAnyReg dst
dst_r <- getNewRegNat format dst_r <- getNewRegNat format
return $ code_dst dst_r `appOL` go dst_r (fromInteger n) if format == II64 && n >= 8 then do
code_imm8byte <- getAnyReg (CmmLit (CmmInt c8 W64))
imm8byte_r <- getNewRegNat II64
return $ code_dst dst_r `appOL`
code_imm8byte imm8byte_r `appOL`
go8 dst_r imm8byte_r (fromInteger n)
else
return $ code_dst dst_r `appOL`
go4 dst_r (fromInteger n)
where where
(format, val) = case align .&. 3 of maxAlignment = wordAlignment dflags -- only machine word wide MOVs are supported
2 -> (II16, c2) effectiveAlignment = min (alignmentOf align) maxAlignment
0 -> (II32, c4) format = intFormat . widthFromBytes $ alignmentBytes effectiveAlignment
_ -> (II8, c)
c2 = c `shiftL` 8 .|. c c2 = c `shiftL` 8 .|. c
c4 = c2 `shiftL` 16 .|. c2 c4 = c2 `shiftL` 16 .|. c2
c8 = c4 `shiftL` 32 .|. c4
-- The number of instructions we will generate (approx). We need 1 -- The number of instructions we will generate (approx). We need 1
-- instructions per move. -- instructions per move.
...@@ -1868,25 +1876,45 @@ genCCall dflags _ (PrimTarget (MO_Memset align)) _ ...@@ -1868,25 +1876,45 @@ genCCall dflags _ (PrimTarget (MO_Memset align)) _
sizeBytes :: Integer sizeBytes :: Integer
sizeBytes = fromIntegral (formatInBytes format) sizeBytes = fromIntegral (formatInBytes format)
go :: Reg -> Integer -> OrdList Instr -- Depending on size returns the widest MOV instruction and its
go dst i -- width.
-- TODO: Add movabs instruction and support 64-bit sets. gen4 :: AddrMode -> Integer -> (InstrBlock, Integer)
| i >= sizeBytes = -- This might be smaller than the below sizes gen4 addr size
unitOL (MOV format (OpImm (ImmInteger val)) (OpAddr dst_addr)) `appOL` | size >= 4 =
go dst (i - sizeBytes) (unitOL (MOV II32 (OpImm (ImmInteger c4)) (OpAddr addr)), 4)
| i >= 4 = -- Will never happen on 32-bit | size >= 2 =
unitOL (MOV II32 (OpImm (ImmInteger c4)) (OpAddr dst_addr)) `appOL` (unitOL (MOV II16 (OpImm (ImmInteger c2)) (OpAddr addr)), 2)
go dst (i - 4) | size >= 1 =
| i >= 2 = (unitOL (MOV II8 (OpImm (ImmInteger c)) (OpAddr addr)), 1)
unitOL (MOV II16 (OpImm (ImmInteger c2)) (OpAddr dst_addr)) `appOL` | otherwise = (nilOL, 0)
go dst (i - 2)
| i >= 1 = -- Generates a 64-bit wide MOV instruction from REG to MEM.
unitOL (MOV II8 (OpImm (ImmInteger c)) (OpAddr dst_addr)) `appOL` gen8 :: AddrMode -> Reg -> InstrBlock
go dst (i - 1) gen8 addr reg8byte =
| otherwise = nilOL unitOL (MOV format (OpReg reg8byte) (OpAddr addr))
-- Unrolls memset when the widest MOV is <= 4 bytes.
go4 :: Reg -> Integer -> InstrBlock
go4 dst left =
if left <= 0 then nilOL
else curMov `appOL` go4 dst (left - curWidth)
where where
dst_addr = AddrBaseIndex (EABaseReg dst) EAIndexNone possibleWidth = minimum [left, sizeBytes]
(ImmInteger (n - i)) dst_addr = AddrBaseIndex (EABaseReg dst) EAIndexNone (ImmInteger (n - left))
(curMov, curWidth) = gen4 dst_addr possibleWidth
-- Unrolls memset when the widest MOV is 8 bytes (thus another Reg
-- argument). Falls back to go4 when all 8 byte moves are
-- exhausted.
go8 :: Reg -> Reg -> Integer -> InstrBlock
go8 dst reg8byte left =
if possibleWidth >= 8 then
let curMov = gen8 dst_addr reg8byte
in curMov `appOL` go8 dst reg8byte (left - 8)
else go4 dst left
where
possibleWidth = minimum [left, sizeBytes]
dst_addr = AddrBaseIndex (EABaseReg dst) EAIndexNone (ImmInteger (n - left))
genCCall _ _ (PrimTarget MO_WriteBarrier) _ _ _ = return nilOL genCCall _ _ (PrimTarget MO_WriteBarrier) _ _ _ = return nilOL
-- write barrier compiles to no code on x86/x86-64; -- write barrier compiles to no code on x86/x86-64;
...@@ -2322,7 +2350,7 @@ genCCall _ is32Bit target dest_regs args bid = do ...@@ -2322,7 +2350,7 @@ genCCall _ is32Bit target dest_regs args bid = do
let let
const | FF32 <- fmt = CmmInt 0x7fffffff W32 const | FF32 <- fmt = CmmInt 0x7fffffff W32
| otherwise = CmmInt 0x7fffffffffffffff W64 | otherwise = CmmInt 0x7fffffffffffffff W64
Amode amode amode_code <- memConstant (widthInBytes w) const Amode amode amode_code <- memConstant (mkAlignment $ widthInBytes w) const
tmp <- getNewRegNat fmt tmp <- getNewRegNat fmt
let let
code dst = x_code dst `appOL` amode_code `appOL` toOL [ code dst = x_code dst `appOL` amode_code `appOL` toOL [
...@@ -3051,7 +3079,7 @@ createJumpTable dflags ids section lbl ...@@ -3051,7 +3079,7 @@ createJumpTable dflags ids section lbl
where blockLabel = blockLbl blockid where blockLabel = blockLbl blockid
in map jumpTableEntryRel ids in map jumpTableEntryRel ids
| otherwise = map (jumpTableEntry dflags) ids | otherwise = map (jumpTableEntry dflags) ids
in CmmData section (1, Statics lbl jumpTable) in CmmData section (mkAlignment 1, Statics lbl jumpTable)
extractUnwindPoints :: [Instr] -> [UnwindPoint] extractUnwindPoints :: [Instr] -> [UnwindPoint]
extractUnwindPoints instrs = extractUnwindPoints instrs =
...@@ -3418,7 +3446,7 @@ sse2NegCode w x = do ...@@ -3418,7 +3446,7 @@ sse2NegCode w x = do
x@FF80 -> wrongFmt x x@FF80 -> wrongFmt x
where where
wrongFmt x = panic $ "sse2NegCode: " ++ show x wrongFmt x = panic $ "sse2NegCode: " ++ show x
Amode amode amode_code <- memConstant (widthInBytes w) const Amode amode amode_code <- memConstant (mkAlignment $ widthInBytes w) const
tmp <- getNewRegNat fmt tmp <- getNewRegNat fmt
let let
code dst = x_code dst `appOL` amode_code `appOL` toOL [ code dst = x_code dst `appOL` amode_code `appOL` toOL [
......
...@@ -36,7 +36,7 @@ import PprBase ...@@ -36,7 +36,7 @@ import PprBase
import Hoopl.Collections import Hoopl.Collections
import Hoopl.Label import Hoopl.Label
import BasicTypes (Alignment) import BasicTypes (Alignment, mkAlignment, alignmentBytes)
import DynFlags import DynFlags
import Cmm hiding (topInfoTable) import Cmm hiding (topInfoTable)
import BlockId import BlockId
...@@ -72,7 +72,7 @@ import Data.Bits ...@@ -72,7 +72,7 @@ import Data.Bits
pprProcAlignment :: SDoc pprProcAlignment :: SDoc
pprProcAlignment = sdocWithDynFlags $ \dflags -> pprProcAlignment = sdocWithDynFlags $ \dflags ->
(maybe empty pprAlign . cmmProcAlignment $ dflags) (maybe empty (pprAlign . mkAlignment) (cmmProcAlignment dflags))
pprNatCmmDecl :: NatCmmDecl (Alignment, CmmStatics) Instr -> SDoc pprNatCmmDecl :: NatCmmDecl (Alignment, CmmStatics) Instr -> SDoc
pprNatCmmDecl (CmmData section dats) = pprNatCmmDecl (CmmData section dats) =
...@@ -236,14 +236,15 @@ pprLabel lbl = pprGloblDecl lbl ...@@ -236,14 +236,15 @@ pprLabel lbl = pprGloblDecl lbl
$$ pprTypeDecl lbl $$ pprTypeDecl lbl
$$ (ppr lbl <> char ':') $$ (ppr lbl <> char ':')
pprAlign :: Int -> SDoc pprAlign :: Alignment -> SDoc
pprAlign bytes pprAlign alignment
= sdocWithPlatform $ \platform -> = sdocWithPlatform $ \platform ->
text ".align " <> int (alignment platform) text ".align " <> int (alignmentOn platform)
where where
alignment platform = if platformOS platform == OSDarwin bytes = alignmentBytes alignment
then log2 bytes alignmentOn platform = if platformOS platform == OSDarwin
else bytes then log2 bytes
else bytes
log2 :: Int -> Int -- cache the common ones log2 :: Int -> Int -- cache the common ones
log2 1 = 0 log2 1 = 0
......
...@@ -1149,7 +1149,6 @@ exactLog2 x ...@@ -1149,7 +1149,6 @@ exactLog2 x
pow2 x | x == 1 = 0 pow2 x | x == 1 = 0
| otherwise = 1 + pow2 (x `shiftR` 1) | otherwise = 1 + pow2 (x `shiftR` 1)
{- {-
-- ----------------------------------------------------------------------------- -- -----------------------------------------------------------------------------
-- Floats -- Floats
......
...@@ -61,6 +61,11 @@ Compiler ...@@ -61,6 +61,11 @@ Compiler
:ghc-flag:`-Wredundant-record-wildcards` which warn users when they have :ghc-flag:`-Wredundant-record-wildcards` which warn users when they have
redundant or unused uses of a record wildcard match. redundant or unused uses of a record wildcard match.
- Calls to `memset` are now unrolled more aggressively and the
produced code is more efficient on `x86_64` with added support for
64-bit `MOV`s. In particular, `setByteArray#` calls that were not
optimized before, now will be. See :ghc-ticket:`16052`.
Runtime system Runtime system
~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~
......
...@@ -1131,9 +1131,9 @@ def do_compile(name, way, should_fail, top_mod, extra_mods, extra_hc_opts, **kwa ...@@ -1131,9 +1131,9 @@ def do_compile(name, way, should_fail, top_mod, extra_mods, extra_hc_opts, **kwa
# no problems found, this test passed # no problems found, this test passed
return passed() return passed()
def compile_cmp_asm( name, way, extra_hc_opts ): def compile_cmp_asm( name, way, ext, extra_hc_opts ):
print('Compile only, extra args = ', extra_hc_opts) print('Compile only, extra args = ', extra_hc_opts)
result = simple_build(name + '.cmm', way, '-keep-s-files -O ' + extra_hc_opts, 0, '', 0, 0) result = simple_build(name + '.' + ext, way, '-keep-s-files -O ' + extra_hc_opts, 0, '', 0, 0)
if badResult(result): if badResult(result):
return result return result
...@@ -1153,6 +1153,24 @@ def compile_cmp_asm( name, way, extra_hc_opts ): ...@@ -1153,6 +1153,24 @@ def compile_cmp_asm( name, way, extra_hc_opts ):
# no problems found, this test passed # no problems found, this test passed
return passed() return passed()
def compile_grep_asm( name, way, ext, is_substring, extra_hc_opts ):
print('Compile only, extra args = ', extra_hc_opts)
result = simple_build(name + '.' + ext, way, '-keep-s-files -O ' + extra_hc_opts, 0, '', 0, 0)
if badResult(result):
return result
expected_pat_file = find_expected_file(name, 'asm')
actual_asm_file = add_suffix(name, 's')
if not grep_output(join_normalisers(normalise_errmsg),
expected_pat_file, actual_asm_file,
is_substring):
return failBecause('asm mismatch')
# no problems found, this test passed
return passed()
# ----------------------------------------------------------------------------- # -----------------------------------------------------------------------------
# Compile-and-run tests # Compile-and-run tests
...@@ -1735,6 +1753,43 @@ def compare_outputs(way, kind, normaliser, expected_file, actual_file, diff_file ...@@ -1735,6 +1753,43 @@ def compare_outputs(way, kind, normaliser, expected_file, actual_file, diff_file
else: else:
return False return False
# Checks that each line from pattern_file is present in actual_file as
# a substring or regex pattern depending on is_substring.
def grep_output(normaliser, pattern_file, actual_file, is_substring=True):
expected_path = in_srcdir(pattern_file)
actual_path = in_testdir(actual_file)
expected_patterns = read_no_crs(expected_path).strip().split('\n')
actual_raw = read_no_crs(actual_path)
actual_str = normaliser(actual_raw)
success = True
failed_patterns = []
def regex_match(pat, actual):
return re.search(pat, actual) is not None
def substring_match(pat, actual):
return pat in actual
def is_match(pat, actual):
if is_substring:
return substring_match(pat, actual)
else:
return regex_match(pat, actual)
for pat in expected_patterns:
if not is_match(pat, actual_str):
success = False
failed_patterns.append(pat)
if not success:
print('Actual output does not contain the following patterns:')
for pat in failed_patterns:
print(pat)
return success
# Note [Output comparison] # Note [Output comparison]
# #
# We do two types of output comparison: # We do two types of output comparison:
......
...@@ -3,7 +3,8 @@ is_amd64_codegen = [ ...@@ -3,7 +3,8 @@ is_amd64_codegen = [
when(unregisterised(), skip), when(unregisterised(), skip),
] ]
test('memcpy', is_amd64_codegen, compile_cmp_asm, ['']) test('memcpy', is_amd64_codegen, compile_cmp_asm, ['cmm', ''])
test('memcpy-unroll', is_amd64_codegen, compile_cmp_asm, ['']) test('memcpy-unroll', is_amd64_codegen, compile_cmp_asm, ['cmm', ''])
test('memcpy-unroll-conprop', is_amd64_codegen, compile_cmp_asm, ['']) test('memcpy-unroll-conprop', is_amd64_codegen, compile_cmp_asm, ['cmm', ''])
test('memset-unroll', is_amd64_codegen, compile_cmp_asm, ['']) test('memset-unroll', is_amd64_codegen, compile_cmp_asm, ['cmm', ''])
test('bytearray-memset-unroll', is_amd64_codegen, compile_grep_asm, ['hs', True, ''])
movq $72340172838076673,%rcx
movq %rcx,0(%rbx)
movq %rcx,8(%rbx)
movl $16843009,16(%rbx)
movw $257,20(%rbx)
movb $1,22(%rbx)
{-# language MagicHash #-}
{-# language UnboxedTuples #-}
module FillArray
( fill
) where
import GHC.Exts
import GHC.IO
data ByteArray = ByteArray ByteArray#
fill :: IO ByteArray
fill = IO $ \s0 -> case newByteArray# 24# s0 of
(# s1, m #) -> case setByteArray# m 0# 23# 1# s1 of
s2 -> case unsafeFreezeByteArray# m s2 of
(# s3, r #) -> (# s3, ByteArray r #)