Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
Glasgow Haskell Compiler
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Locked Files
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Iterations
Merge Requests
0
Merge Requests
0
Requirements
Requirements
List
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Security & Compliance
Security & Compliance
Dependency List
License Compliance
Operations
Operations
Incidents
Environments
Packages & Registries
Packages & Registries
Package Registry
Container Registry
Analytics
Analytics
CI / CD
Code Review
Insights
Issue
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Shayne Fletcher
Glasgow Haskell Compiler
Commits
edcef7b3
Commit
edcef7b3
authored
Apr 11, 2019
by
Artem Pyanykh
Committed by
Marge Bot
Apr 14, 2019
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
codegen: unroll memcpy calls for small bytearrays
parent
6febc444
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
70 additions
and
30 deletions
+70
-30
compiler/cmm/CmmExpr.hs
compiler/cmm/CmmExpr.hs
+10
-1
compiler/codeGen/StgCmmPrim.hs
compiler/codeGen/StgCmmPrim.hs
+26
-24
compiler/nativeGen/X86/CodeGen.hs
compiler/nativeGen/X86/CodeGen.hs
+6
-5
testsuite/tests/codeGen/should_gen_asm/all.T
testsuite/tests/codeGen/should_gen_asm/all.T
+1
-0
testsuite/tests/codeGen/should_gen_asm/bytearray-memcpy-unroll.asm
.../tests/codeGen/should_gen_asm/bytearray-memcpy-unroll.asm
+8
-0
testsuite/tests/codeGen/should_gen_asm/bytearray-memcpy-unroll.hs
...e/tests/codeGen/should_gen_asm/bytearray-memcpy-unroll.hs
+19
-0
No files found.
compiler/cmm/CmmExpr.hs
View file @
edcef7b3
...
...
@@ -5,7 +5,7 @@
{-# LANGUAGE UndecidableInstances #-}
module
CmmExpr
(
CmmExpr
(
..
),
cmmExprType
,
cmmExprWidth
,
maybeInvertCmmExpr
(
CmmExpr
(
..
),
cmmExprType
,
cmmExprWidth
,
cmmExprAlignment
,
maybeInvertCmmExpr
,
CmmReg
(
..
),
cmmRegType
,
cmmRegWidth
,
CmmLit
(
..
),
cmmLitType
,
LocalReg
(
..
),
localRegType
...
...
@@ -43,6 +43,8 @@ import Unique
import
Data.Set
(
Set
)
import
qualified
Data.Set
as
Set
import
BasicTypes
(
Alignment
,
mkAlignment
,
alignmentOf
)
-----------------------------------------------------------------------------
-- CmmExpr
-- An expression. Expressions have no side effects.
...
...
@@ -239,6 +241,13 @@ cmmLabelType dflags lbl
cmmExprWidth
::
DynFlags
->
CmmExpr
->
Width
cmmExprWidth
dflags
e
=
typeWidth
(
cmmExprType
dflags
e
)
-- | Returns an alignment in bytes of a CmmExpr when it's a statically
-- known integer constant, otherwise returns an alignment of 1 byte.
-- The caller is responsible for using with a sensible CmmExpr
-- argument.
cmmExprAlignment
::
CmmExpr
->
Alignment
cmmExprAlignment
(
CmmLit
(
CmmInt
intOff
_
))
=
alignmentOf
(
fromInteger
intOff
)
cmmExprAlignment
_
=
mkAlignment
1
--------
--- Negation for conditional branches
...
...
compiler/codeGen/StgCmmPrim.hs
View file @
edcef7b3
...
...
@@ -2035,8 +2035,8 @@ doCopyByteArrayOp = emitCopyByteArray copy
where
-- Copy data (we assume the arrays aren't overlapping since
-- they're of different types)
copy
_src
_dst
dst_p
src_p
bytes
=
emitMemcpyCall
dst_p
src_p
bytes
1
copy
_src
_dst
dst_p
src_p
bytes
align
=
emitMemcpyCall
dst_p
src_p
bytes
align
-- | Takes a source 'MutableByteArray#', an offset in the source
-- array, a destination 'MutableByteArray#', an offset into the
...
...
@@ -2050,22 +2050,26 @@ doCopyMutableByteArrayOp = emitCopyByteArray copy
-- The only time the memory might overlap is when the two arrays
-- we were provided are the same array!
-- TODO: Optimize branch for common case of no aliasing.
copy
src
dst
dst_p
src_p
bytes
=
do
copy
src
dst
dst_p
src_p
bytes
align
=
do
dflags
<-
getDynFlags
(
moveCall
,
cpyCall
)
<-
forkAltPair
(
getCode
$
emitMemmoveCall
dst_p
src_p
bytes
1
)
(
getCode
$
emitMemcpyCall
dst_p
src_p
bytes
1
)
(
getCode
$
emitMemmoveCall
dst_p
src_p
bytes
align
)
(
getCode
$
emitMemcpyCall
dst_p
src_p
bytes
align
)
emit
=<<
mkCmmIfThenElse
(
cmmEqWord
dflags
src
dst
)
moveCall
cpyCall
emitCopyByteArray
::
(
CmmExpr
->
CmmExpr
->
CmmExpr
->
CmmExpr
->
CmmExpr
->
FCode
()
)
->
Alignment
->
FCode
()
)
->
CmmExpr
->
CmmExpr
->
CmmExpr
->
CmmExpr
->
CmmExpr
->
FCode
()
emitCopyByteArray
copy
src
src_off
dst
dst_off
n
=
do
dflags
<-
getDynFlags
let
byteArrayAlignment
=
wordAlignment
dflags
srcOffAlignment
=
cmmExprAlignment
src_off
dstOffAlignment
=
cmmExprAlignment
dst_off
align
=
minimum
[
byteArrayAlignment
,
srcOffAlignment
,
dstOffAlignment
]
dst_p
<-
assignTempE
$
cmmOffsetExpr
dflags
(
cmmOffsetB
dflags
dst
(
arrWordsHdrSize
dflags
))
dst_off
src_p
<-
assignTempE
$
cmmOffsetExpr
dflags
(
cmmOffsetB
dflags
src
(
arrWordsHdrSize
dflags
))
src_off
copy
src
dst
dst_p
src_p
n
copy
src
dst
dst_p
src_p
n
align
-- | Takes a source 'ByteArray#', an offset in the source array, a
-- destination 'Addr#', and the number of bytes to copy. Copies the given
...
...
@@ -2075,7 +2079,7 @@ doCopyByteArrayToAddrOp src src_off dst_p bytes = do
-- Use memcpy (we are allowed to assume the arrays aren't overlapping)
dflags
<-
getDynFlags
src_p
<-
assignTempE
$
cmmOffsetExpr
dflags
(
cmmOffsetB
dflags
src
(
arrWordsHdrSize
dflags
))
src_off
emitMemcpyCall
dst_p
src_p
bytes
1
emitMemcpyCall
dst_p
src_p
bytes
(
mkAlignment
1
)
-- | Takes a source 'MutableByteArray#', an offset in the source array, a
-- destination 'Addr#', and the number of bytes to copy. Copies the given
...
...
@@ -2092,7 +2096,7 @@ doCopyAddrToByteArrayOp src_p dst dst_off bytes = do
-- Use memcpy (we are allowed to assume the arrays aren't overlapping)
dflags
<-
getDynFlags
dst_p
<-
assignTempE
$
cmmOffsetExpr
dflags
(
cmmOffsetB
dflags
dst
(
arrWordsHdrSize
dflags
))
dst_off
emitMemcpyCall
dst_p
src_p
bytes
1
emitMemcpyCall
dst_p
src_p
bytes
(
mkAlignment
1
)
-- ----------------------------------------------------------------------------
...
...
@@ -2107,9 +2111,7 @@ doSetByteArrayOp ba off len c = do
dflags
<-
getDynFlags
let
byteArrayAlignment
=
wordAlignment
dflags
-- known since BA is allocated on heap
offsetAlignment
=
case
off
of
CmmLit
(
CmmInt
intOff
_
)
->
alignmentOf
(
fromInteger
intOff
)
_
->
mkAlignment
1
offsetAlignment
=
cmmExprAlignment
off
align
=
min
byteArrayAlignment
offsetAlignment
p
<-
assignTempE
$
cmmOffsetExpr
dflags
(
cmmOffsetB
dflags
ba
(
arrWordsHdrSize
dflags
))
off
...
...
@@ -2180,7 +2182,7 @@ doCopyArrayOp = emitCopyArray copy
copy
_src
_dst
dst_p
src_p
bytes
=
do
dflags
<-
getDynFlags
emitMemcpyCall
dst_p
src_p
(
mkIntExpr
dflags
bytes
)
(
w
ORD_SIZE
dflags
)
(
w
ordAlignment
dflags
)
-- | Takes a source 'MutableArray#', an offset in the source array, a
...
...
@@ -2198,9 +2200,9 @@ doCopyMutableArrayOp = emitCopyArray copy
dflags
<-
getDynFlags
(
moveCall
,
cpyCall
)
<-
forkAltPair
(
getCode
$
emitMemmoveCall
dst_p
src_p
(
mkIntExpr
dflags
bytes
)
(
w
ORD_SIZE
dflags
))
(
w
ordAlignment
dflags
))
(
getCode
$
emitMemcpyCall
dst_p
src_p
(
mkIntExpr
dflags
bytes
)
(
w
ORD_SIZE
dflags
))
(
w
ordAlignment
dflags
))
emit
=<<
mkCmmIfThenElse
(
cmmEqWord
dflags
src
dst
)
moveCall
cpyCall
emitCopyArray
::
(
CmmExpr
->
CmmExpr
->
CmmExpr
->
CmmExpr
->
ByteOff
...
...
@@ -2247,7 +2249,7 @@ doCopySmallArrayOp = emitCopySmallArray copy
copy
_src
_dst
dst_p
src_p
bytes
=
do
dflags
<-
getDynFlags
emitMemcpyCall
dst_p
src_p
(
mkIntExpr
dflags
bytes
)
(
w
ORD_SIZE
dflags
)
(
w
ordAlignment
dflags
)
doCopySmallMutableArrayOp
::
CmmExpr
->
CmmExpr
->
CmmExpr
->
CmmExpr
->
WordOff
...
...
@@ -2261,9 +2263,9 @@ doCopySmallMutableArrayOp = emitCopySmallArray copy
dflags
<-
getDynFlags
(
moveCall
,
cpyCall
)
<-
forkAltPair
(
getCode
$
emitMemmoveCall
dst_p
src_p
(
mkIntExpr
dflags
bytes
)
(
w
ORD_SIZE
dflags
))
(
w
ordAlignment
dflags
))
(
getCode
$
emitMemcpyCall
dst_p
src_p
(
mkIntExpr
dflags
bytes
)
(
w
ORD_SIZE
dflags
))
(
w
ordAlignment
dflags
))
emit
=<<
mkCmmIfThenElse
(
cmmEqWord
dflags
src
dst
)
moveCall
cpyCall
emitCopySmallArray
::
(
CmmExpr
->
CmmExpr
->
CmmExpr
->
CmmExpr
->
ByteOff
...
...
@@ -2328,7 +2330,7 @@ emitCloneArray info_p res_r src src_off n = do
(
mkIntExpr
dflags
(
arrPtrsHdrSizeW
dflags
))
src_off
)
emitMemcpyCall
dst_p
src_p
(
mkIntExpr
dflags
(
wordsToBytes
dflags
n
))
(
w
ORD_SIZE
dflags
)
(
w
ordAlignment
dflags
)
emit
$
mkAssign
(
CmmLocal
res_r
)
(
CmmReg
arr
)
...
...
@@ -2365,7 +2367,7 @@ emitCloneSmallArray info_p res_r src src_off n = do
(
mkIntExpr
dflags
(
smallArrPtrsHdrSizeW
dflags
))
src_off
)
emitMemcpyCall
dst_p
src_p
(
mkIntExpr
dflags
(
wordsToBytes
dflags
n
))
(
w
ORD_SIZE
dflags
)
(
w
ordAlignment
dflags
)
emit
$
mkAssign
(
CmmLocal
res_r
)
(
CmmReg
arr
)
...
...
@@ -2493,19 +2495,19 @@ doCasByteArray res mba idx idx_ty old new = do
-- Helpers for emitting function calls
-- | Emit a call to @memcpy@.
emitMemcpyCall
::
CmmExpr
->
CmmExpr
->
CmmExpr
->
I
nt
->
FCode
()
emitMemcpyCall
::
CmmExpr
->
CmmExpr
->
CmmExpr
->
Alignme
nt
->
FCode
()
emitMemcpyCall
dst
src
n
align
=
do
emitPrimCall
[
{-no results-}
]
(
MO_Memcpy
align
)
(
MO_Memcpy
(
alignmentBytes
align
)
)
[
dst
,
src
,
n
]
-- | Emit a call to @memmove@.
emitMemmoveCall
::
CmmExpr
->
CmmExpr
->
CmmExpr
->
I
nt
->
FCode
()
emitMemmoveCall
::
CmmExpr
->
CmmExpr
->
CmmExpr
->
Alignme
nt
->
FCode
()
emitMemmoveCall
dst
src
n
align
=
do
emitPrimCall
[
{- no results -}
]
(
MO_Memmove
align
)
(
MO_Memmove
(
alignmentBytes
align
)
)
[
dst
,
src
,
n
]
-- | Emit a call to @memset@. The second argument must fit inside an
...
...
compiler/nativeGen/X86/CodeGen.hs
View file @
edcef7b3
...
...
@@ -1767,12 +1767,11 @@ genCCall
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-- Unroll memcpy calls if the source and destination pointers are at
-- least DWORD aligned and the number of bytes to copy isn't too
-- Unroll memcpy calls if the number of bytes to copy isn't too
-- large. Otherwise, call C's memcpy.
genCCall
dflags
is32Bit
(
PrimTarget
(
MO_Memcpy
align
))
_
genCCall
dflags
_
(
PrimTarget
(
MO_Memcpy
align
))
_
[
dst
,
src
,
CmmLit
(
CmmInt
n
_
)]
_
|
fromInteger
insns
<=
maxInlineMemcpyInsns
dflags
&&
align
.&.
3
==
0
=
do
|
fromInteger
insns
<=
maxInlineMemcpyInsns
dflags
=
do
code_dst
<-
getAnyReg
dst
dst_r
<-
getNewRegNat
format
code_src
<-
getAnyReg
src
...
...
@@ -1785,7 +1784,9 @@ genCCall dflags is32Bit (PrimTarget (MO_Memcpy align)) _
-- instructions per move.
insns
=
2
*
((
n
+
sizeBytes
-
1
)
`
div
`
sizeBytes
)
format
=
if
align
.&.
4
/=
0
then
II32
else
(
archWordFormat
is32Bit
)
maxAlignment
=
wordAlignment
dflags
-- only machine word wide MOVs are supported
effectiveAlignment
=
min
(
alignmentOf
align
)
maxAlignment
format
=
intFormat
.
widthFromBytes
$
alignmentBytes
effectiveAlignment
-- The size of each move, in bytes.
sizeBytes
::
Integer
...
...
testsuite/tests/codeGen/should_gen_asm/all.T
View file @
edcef7b3
...
...
@@ -8,3 +8,4 @@ test('memcpy-unroll', is_amd64_codegen, compile_cmp_asm, ['cmm', ''])
test
('
memcpy-unroll-conprop
',
is_amd64_codegen
,
compile_cmp_asm
,
['
cmm
',
''])
test
('
memset-unroll
',
is_amd64_codegen
,
compile_cmp_asm
,
['
cmm
',
''])
test
('
bytearray-memset-unroll
',
is_amd64_codegen
,
compile_grep_asm
,
['
hs
',
True
,
''])
test
('
bytearray-memcpy-unroll
',
is_amd64_codegen
,
compile_grep_asm
,
['
hs
',
True
,
''])
testsuite/tests/codeGen/should_gen_asm/bytearray-memcpy-unroll.asm
0 → 100644
View file @
edcef7b3
movw
0
(
%
rax
),
%
dx
movw
%
dx
,
0
(
%
rcx
)
movw
2
(
%
rax
),
%
dx
movw
%
dx
,
2
(
%
rcx
)
movw
4
(
%
rax
),
%
dx
movw
%
dx
,
4
(
%
rcx
)
movw
6
(
%
rax
),
%
ax
movw
%
ax
,
6
(
%
rcx
)
testsuite/tests/codeGen/should_gen_asm/bytearray-memcpy-unroll.hs
0 → 100644
View file @
edcef7b3
{-# language MagicHash #-}
{-# language UnboxedTuples #-}
module
CopyArray
(
smallCopy
)
where
import
GHC.Exts
import
GHC.IO
data
ByteArray
=
ByteArray
ByteArray
#
-- Does an 8 byte copy with sub-word (2 bytes) alignment
-- Should be unrolled into 4 aligned stores (MOVWs)
smallCopy
::
ByteArray
->
IO
ByteArray
smallCopy
(
ByteArray
ba
)
=
IO
$
\
s0
->
case
newByteArray
#
8
#
s0
of
(
#
s1
,
mut
#
)
->
case
copyByteArray
#
ba
2
#
mut
0
#
8
#
s1
of
s2
->
case
unsafeFreezeByteArray
#
mut
s2
of
(
#
s3
,
frozen
#
)
->
(
#
s3
,
ByteArray
frozen
#
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment