From 015886ec78e598f850c4202efdee239bac63b8c7 Mon Sep 17 00:00:00 2001 From: ARATA Mizuki <minorinoki@gmail.com> Date: Tue, 16 May 2023 21:06:31 +0900 Subject: [PATCH] Support 128-bit SIMD on AArch64 via LLVM backend --- compiler/CodeGen.Platform.h | 33 ++++++++++++++++++++++++ compiler/GHC/Cmm/CallConv.hs | 6 +++-- compiler/GHC/StgToCmm/Prim.hs | 31 +++++++++++++++------- rts/include/stg/MachRegs.h | 6 +++++ testsuite/tests/codeGen/should_run/all.T | 2 +- testsuite/tests/unboxedsums/all.T | 2 +- 6 files changed, 66 insertions(+), 14 deletions(-) diff --git a/compiler/CodeGen.Platform.h b/compiler/CodeGen.Platform.h index fb006c9f1a5d..1c3bee7eb3d3 100644 --- a/compiler/CodeGen.Platform.h +++ b/compiler/CodeGen.Platform.h @@ -203,6 +203,39 @@ import GHC.Platform.Reg # define d29 61 # define d30 62 # define d31 63 + +# define q0 32 +# define q1 33 +# define q2 34 +# define q3 35 +# define q4 36 +# define q5 37 +# define q6 38 +# define q7 39 +# define q8 40 +# define q9 41 +# define q10 42 +# define q11 43 +# define q12 44 +# define q13 45 +# define q14 46 +# define q15 47 +# define q16 48 +# define q17 49 +# define q18 50 +# define q19 51 +# define q20 52 +# define q21 53 +# define q22 54 +# define q23 55 +# define q24 56 +# define q25 57 +# define q26 58 +# define q27 59 +# define q28 60 +# define q29 61 +# define q30 62 +# define q31 63 #endif # if defined(MACHREGS_darwin) diff --git a/compiler/GHC/Cmm/CallConv.hs b/compiler/GHC/Cmm/CallConv.hs index 14f3672a4b32..ddc38e2116dd 100644 --- a/compiler/GHC/Cmm/CallConv.hs +++ b/compiler/GHC/Cmm/CallConv.hs @@ -193,8 +193,10 @@ realLongRegs platform = map LongReg $ regList (pc_MAX_Real_Long_REG (pl realXmmRegNos :: Platform -> [Int] realXmmRegNos platform - | isSse2Enabled platform = regList (pc_MAX_Real_XMM_REG (platformConstants platform)) - | otherwise = [] + | isSse2Enabled platform || platformArch platform == ArchAArch64 + = regList (pc_MAX_Real_XMM_REG (platformConstants platform)) + | otherwise + = [] regList :: Int -> [Int] regList n = [1 .. n] diff --git a/compiler/GHC/StgToCmm/Prim.hs b/compiler/GHC/StgToCmm/Prim.hs index 1c8663ebf9f1..6f038d0dc47d 100644 --- a/compiler/GHC/StgToCmm/Prim.hs +++ b/compiler/GHC/StgToCmm/Prim.hs @@ -2303,7 +2303,7 @@ vecCmmCat FloatVec = cmmFloat -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -- Check to make sure that we can generate code for the specified vector type -- given the current set of dynamic flags. --- Currently these checks are specific to x86 and x86_64 architecture. +-- Currently these checks are specific to x86, x86_64 and AArch64 architectures. -- This should be fixed! -- In particular, -- 1) Add better support for other architectures! (this may require a redesign) @@ -2334,27 +2334,38 @@ vecCmmCat FloatVec = cmmFloat checkVecCompatibility :: StgToCmmConfig -> PrimOpVecCat -> Length -> Width -> FCode () checkVecCompatibility cfg vcat l w = case stgToCmmVecInstrsErr cfg of - Nothing -> check vecWidth vcat l w -- We are in a compatible backend - Just err -> sorry err -- incompatible backend, do panic + Nothing | isX86 -> checkX86 vecWidth vcat l w + | platformArch platform == ArchAArch64 -> checkAArch64 vecWidth + | otherwise -> sorry "SIMD vector instructions are not supported on this architecture." + Just err -> sorry err -- incompatible backend, do panic where platform = stgToCmmPlatform cfg - check :: Width -> PrimOpVecCat -> Length -> Width -> FCode () - check W128 FloatVec 4 W32 | not (isSseEnabled platform) = + isX86 = case platformArch platform of + ArchX86_64 -> True + ArchX86 -> True + _ -> False + checkX86 :: Width -> PrimOpVecCat -> Length -> Width -> FCode () + checkX86 W128 FloatVec 4 W32 | not (isSseEnabled platform) = sorry $ "128-bit wide single-precision floating point " ++ "SIMD vector instructions require at least -msse." - check W128 _ _ _ | not (isSse2Enabled platform) = + checkX86 W128 _ _ _ | not (isSse2Enabled platform) = sorry $ "128-bit wide integer and double precision " ++ "SIMD vector instructions require at least -msse2." - check W256 FloatVec _ _ | not (stgToCmmAvx cfg) = + checkX86 W256 FloatVec _ _ | not (stgToCmmAvx cfg) = sorry $ "256-bit wide floating point " ++ "SIMD vector instructions require at least -mavx." - check W256 _ _ _ | not (stgToCmmAvx2 cfg) = + checkX86 W256 _ _ _ | not (stgToCmmAvx2 cfg) = sorry $ "256-bit wide integer " ++ "SIMD vector instructions require at least -mavx2." - check W512 _ _ _ | not (stgToCmmAvx512f cfg) = + checkX86 W512 _ _ _ | not (stgToCmmAvx512f cfg) = sorry $ "512-bit wide " ++ "SIMD vector instructions require -mavx512f." - check _ _ _ _ = return () + checkX86 _ _ _ _ = return () + + checkAArch64 :: Width -> FCode () + checkAArch64 W256 = sorry $ "256-bit wide SIMD vector instructions are not supported." + checkAArch64 W512 = sorry $ "512-bit wide SIMD vector instructions are not supported." + checkAArch64 _ = return () vecWidth = typeWidth (vecVmmType vcat l w) diff --git a/rts/include/stg/MachRegs.h b/rts/include/stg/MachRegs.h index f0253865ea3b..239f505c7965 100644 --- a/rts/include/stg/MachRegs.h +++ b/rts/include/stg/MachRegs.h @@ -457,6 +457,12 @@ the stack. See Note [Overlapping global registers] for implications. #define REG_D3 d14 #define REG_D4 d15 +#define REG_XMM1 q4 +#define REG_XMM2 q5 + +#define CALLER_SAVES_XMM1 +#define CALLER_SAVES_XMM2 + /* ----------------------------------------------------------------------------- The s390x register mapping diff --git a/testsuite/tests/codeGen/should_run/all.T b/testsuite/tests/codeGen/should_run/all.T index bf1110ac7e91..512a9850b1cd 100644 --- a/testsuite/tests/codeGen/should_run/all.T +++ b/testsuite/tests/codeGen/should_run/all.T @@ -222,7 +222,7 @@ test('T21186', normal, compile_and_run, ['']) test('T20640a', normal, compile_and_run, ['']) test('T20640b', normal, compile_and_run, ['']) test('T22296',[only_ways(llvm_ways) - ,unless(arch('x86_64'), skip)],compile_and_run,['']) + ,unless(arch('x86_64') or arch('aarch64'), skip)],compile_and_run,['']) test('T22798', normal, compile_and_run, ['-fregs-graph']) test('CheckBoundsOK', normal, compile_and_run, ['-fcheck-prim-bounds']) test('OrigThunkInfo', normal, compile_and_run, ['-forig-thunk-info']) diff --git a/testsuite/tests/unboxedsums/all.T b/testsuite/tests/unboxedsums/all.T index 300d7892e13c..dc2898702194 100644 --- a/testsuite/tests/unboxedsums/all.T +++ b/testsuite/tests/unboxedsums/all.T @@ -38,7 +38,7 @@ test('T20859', normal, compile, ['']) test('T22187',[only_ways(llvm_ways)],compile,['']) test('T22187_run',[only_ways(llvm_ways) - ,unless(arch('x86_64'), skip)],compile_and_run,['']) + ,unless(arch('x86_64') or arch('aarch64'), skip)],compile_and_run,['']) test('unpack_sums_1', normal, compile_and_run, ['-O']) test('unpack_sums_2', normal, compile, ['-O']) -- GitLab