Commit d87fa343 authored by lukexi's avatar lukexi Committed by Austin Seipp

arm64: 64bit iOS and SMP support (#7942)

Signed-off-by: default avatarAustin Seipp <austin@well-typed.com>
parent 80f6fc17
......@@ -25,7 +25,7 @@ AC_DEFUN([GHC_SELECT_FILE_EXTENSIONS],
x86_64-apple-darwin)
$3='.dylib'
;;
arm-apple-darwin10|i386-apple-darwin11)
arm-apple-darwin10|i386-apple-darwin11|aarch64-apple-darwin14)
$2='.a'
$3='.dylib'
;;
......@@ -1960,7 +1960,7 @@ AC_DEFUN([GHC_CONVERT_VENDOR],[
# converts os from gnu to ghc naming, and assigns the result to $target_var
AC_DEFUN([GHC_CONVERT_OS],[
case "$1-$2" in
darwin10-arm|darwin11-i386)
darwin10-arm|darwin11-i386|darwin14-aarch64)
$3="ios"
;;
*)
......
......@@ -1088,6 +1088,7 @@ cLoad expr rep
bewareLoadStoreAlignment ArchMipseb = True
bewareLoadStoreAlignment ArchMipsel = True
bewareLoadStoreAlignment (ArchARM {}) = True
bewareLoadStoreAlignment ArchARM64 = True
-- Pessimistically assume that they will also cause problems
-- on unknown arches
bewareLoadStoreAlignment ArchUnknown = True
......
......@@ -9,6 +9,7 @@ import Platform
import Reg
import qualified CodeGen.Platform.ARM as ARM
import qualified CodeGen.Platform.ARM64 as ARM64
import qualified CodeGen.Platform.PPC as PPC
import qualified CodeGen.Platform.PPC_Darwin as PPC_Darwin
import qualified CodeGen.Platform.SPARC as SPARC
......@@ -28,6 +29,7 @@ callerSaves platform
ArchX86_64 -> X86_64.callerSaves
ArchSPARC -> SPARC.callerSaves
ArchARM {} -> ARM.callerSaves
ArchARM64 -> ARM64.callerSaves
arch
| arch `elem` [ArchPPC, ArchPPC_64] ->
case platformOS platform of
......@@ -50,6 +52,7 @@ activeStgRegs platform
ArchX86_64 -> X86_64.activeStgRegs
ArchSPARC -> SPARC.activeStgRegs
ArchARM {} -> ARM.activeStgRegs
ArchARM64 -> ARM64.activeStgRegs
arch
| arch `elem` [ArchPPC, ArchPPC_64] ->
case platformOS platform of
......@@ -67,6 +70,7 @@ haveRegBase platform
ArchX86_64 -> X86_64.haveRegBase
ArchSPARC -> SPARC.haveRegBase
ArchARM {} -> ARM.haveRegBase
ArchARM64 -> ARM64.haveRegBase
arch
| arch `elem` [ArchPPC, ArchPPC_64] ->
case platformOS platform of
......@@ -84,6 +88,7 @@ globalRegMaybe platform
ArchX86_64 -> X86_64.globalRegMaybe
ArchSPARC -> SPARC.globalRegMaybe
ArchARM {} -> ARM.globalRegMaybe
ArchARM64 -> ARM64.globalRegMaybe
arch
| arch `elem` [ArchPPC, ArchPPC_64] ->
case platformOS platform of
......@@ -101,6 +106,7 @@ freeReg platform
ArchX86_64 -> X86_64.freeReg
ArchSPARC -> SPARC.freeReg
ArchARM {} -> ARM.freeReg
ArchARM64 -> ARM64.freeReg
arch
| arch `elem` [ArchPPC, ArchPPC_64] ->
case platformOS platform of
......
{-# LANGUAGE CPP #-}
module CodeGen.Platform.ARM64 where
#define MACHREGS_NO_REGS 0
#define MACHREGS_aarch64 1
#include "../../../../includes/CodeGen.Platform.hs"
......@@ -219,6 +219,7 @@ Library
Bitmap
CodeGen.Platform
CodeGen.Platform.ARM
CodeGen.Platform.ARM64
CodeGen.Platform.NoRegs
CodeGen.Platform.PPC
CodeGen.Platform.PPC_Darwin
......
......@@ -614,6 +614,7 @@ compiler_stage2_dll0_MODULES += \
CmmUtils \
CodeGen.Platform \
CodeGen.Platform.ARM \
CodeGen.Platform.ARM64 \
CodeGen.Platform.NoRegs \
CodeGen.Platform.PPC \
CodeGen.Platform.PPC_Darwin \
......
......@@ -80,7 +80,7 @@ ppLlvmGlobal (LMGlobal var@(LMGlobalVar _ _ link x a c) dat) =
const_link = case c of
Global -> ppr link <+> text "global"
Constant -> ppr link <+> text "constant"
Alias -> text "alias" <+> ppr link
Alias -> ppr link <+> text "alias"
in ppAssignment var $ const_link <+> rhs <> sect <> align
$+$ newLine
......
......@@ -65,6 +65,9 @@ moduleLayout = sdocWithPlatform $ \platform ->
Platform { platformArch = ArchX86, platformOS = OSiOS } ->
text "target datalayout = \"e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32\""
$+$ text "target triple = \"i386-apple-darwin11\""
Platform { platformArch = ArchARM64, platformOS = OSiOS } ->
text "target datalayout = \"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128\""
$+$ text "target triple = \"arm64-apple-ios7.0.0\""
_ ->
-- FIX: Other targets
empty
......
......@@ -1934,6 +1934,7 @@ linkBinary' staticLink dflags o_files dep_packages = do
ArchX86 -> True
ArchX86_64 -> True
ArchARM {} -> True
ArchARM64 -> True
_ -> False
then ["-Wl,-no_compact_unwind"]
else [])
......
......@@ -61,6 +61,7 @@ data Arch
isARM :: Arch -> Bool
isARM (ArchARM {}) = True
isARM ArchARM64 = True
isARM _ = False
-- | Operating systems that the native code generator knows about.
......
......@@ -99,7 +99,12 @@ import Reg
# define zmm14 70
# define zmm15 71
#elif MACHREGS_powerpc
-- Note: these are only needed for ARM/ARM64 because globalRegMaybe is now used in CmmSink.hs.
-- Since it's only used to check 'isJust', the actual values don't matter, thus
-- I'm not sure if these are the correct numberings.
-- Normally, the register names are just stringified as part of the REG() macro
#elif MACHREGS_powerpc || MACHREGS_arm || MACHREGS_aarch64
# define r0 0
# define r1 1
......@@ -134,6 +139,76 @@ import Reg
# define r30 30
# define r31 31
-- See note above. These aren't actually used for anything except satisfying the compiler for globalRegMaybe
-- so I'm unsure if they're the correct numberings, should they ever be attempted to be used in the NCG.
#if MACHREGS_aarch64 || MACHREGS_arm
# define s0 32
# define s1 33
# define s2 34
# define s3 35
# define s4 36
# define s5 37
# define s6 38
# define s7 39
# define s8 40
# define s9 41
# define s10 42
# define s11 43
# define s12 44
# define s13 45
# define s14 46
# define s15 47
# define s16 48
# define s17 49
# define s18 50
# define s19 51
# define s20 52
# define s21 53
# define s22 54
# define s23 55
# define s24 56
# define s25 57
# define s26 58
# define s27 59
# define s28 60
# define s29 61
# define s30 62
# define s31 63
# define d0 32
# define d1 33
# define d2 34
# define d3 35
# define d4 36
# define d5 37
# define d6 38
# define d7 39
# define d8 40
# define d9 41
# define d10 42
# define d11 43
# define d12 44
# define d13 45
# define d14 46
# define d15 47
# define d16 48
# define d17 49
# define d18 50
# define d19 51
# define d20 52
# define d21 53
# define d22 54
# define d23 55
# define d24 56
# define d25 57
# define d26 58
# define d27 59
# define d28 60
# define d29 61
# define d30 62
# define d31 63
#endif
# if MACHREGS_darwin
# define f0 32
# define f1 33
......@@ -557,7 +632,7 @@ haveRegBase = False
-- in a real machine register, otherwise returns @'Just' reg@, where
-- reg is the machine register it is stored in.
globalRegMaybe :: GlobalReg -> Maybe RealReg
#if MACHREGS_i386 || MACHREGS_x86_64 || MACHREGS_sparc || MACHREGS_powerpc
#if MACHREGS_i386 || MACHREGS_x86_64 || MACHREGS_sparc || MACHREGS_powerpc || MACHREGS_arm || MACHREGS_aarch64
# ifdef REG_Base
globalRegMaybe BaseReg = Just (RealRegSingle REG_Base)
# endif
......
......@@ -585,6 +585,8 @@
#define REG_D1 d12
#define REG_D2 d13
#define REG_D3 d14
#define REG_D4 d15
#else
......
......@@ -44,6 +44,7 @@
#define MACHREGS_powerpc (powerpc_HOST_ARCH || powerpc64_HOST_ARCH || rs6000_HOST_ARCH)
#define MACHREGS_sparc sparc_HOST_ARCH
#define MACHREGS_arm arm_HOST_ARCH
#define MACHREGS_aarch64 aarch64_HOST_ARCH
#define MACHREGS_darwin darwin_HOST_OS
#endif
......
......@@ -157,6 +157,19 @@ xchg(StgPtr p, StgWord w)
: "r" (w), "r" (p)
: "memory"
);
#elif aarch64_HOST_ARCH
// Don't think we actually use tmp here, but leaving
// it for consistent numbering
StgWord tmp;
__asm__ __volatile__ (
"1: ldaxr %0, [%3]\n"
" stlxr %w0, %2, [%3]\n"
" cbnz %w0, 1b\n"
" dmb sy\n"
: "=&r" (result), "=&r" (tmp)
: "r" (w), "r" (p)
: "memory"
);
#else
#error xchg() unimplemented on this architecture
#endif
......@@ -231,6 +244,24 @@ cas(StgVolatilePtr p, StgWord o, StgWord n)
: "r"(p), "r"(o), "r"(n)
: "cc","memory");
return result;
#elif aarch64_HOST_ARCH
// Don't think we actually use tmp here, but leaving
// it for consistent numbering
StgWord result,tmp;
__asm__ __volatile__(
"1: ldxr %1, [%2]\n"
" mov %w0, #0\n"
" cmp %1, %3\n"
" b.ne 2f\n"
" stxr %w0, %4, [%2]\n"
" cbnz %w0, 1b\n"
"2: dmb sy\n"
: "=&r"(tmp), "=&r"(result)
: "r"(p), "r"(o), "r"(n)
: "cc","memory");
return result;
#else
#error cas() unimplemented on this architecture
......@@ -313,7 +344,7 @@ write_barrier(void) {
__asm__ __volatile__ ("" : : : "memory");
#elif arm_HOST_ARCH && defined(arm_HOST_ARCH_PRE_ARMv7)
__asm__ __volatile__ ("" : : : "memory");
#elif arm_HOST_ARCH && !defined(arm_HOST_ARCH_PRE_ARMv7)
#elif (arm_HOST_ARCH && !defined(arm_HOST_ARCH_PRE_ARMv7)) || aarch64_HOST_ARCH
__asm__ __volatile__ ("dmb st" : : : "memory");
#else
#error memory barriers unimplemented on this architecture
......@@ -334,6 +365,8 @@ store_load_barrier(void) {
__asm__ __volatile__ ("membar #StoreLoad" : : : "memory");
#elif arm_HOST_ARCH && !defined(arm_HOST_ARCH_PRE_ARMv7)
__asm__ __volatile__ ("dmb" : : : "memory");
#elif aarch64_HOST_ARCH
__asm__ __volatile__ ("dmb sy" : : : "memory");
#else
#error memory barriers unimplemented on this architecture
#endif
......@@ -354,6 +387,8 @@ load_load_barrier(void) {
__asm__ __volatile__ ("" : : : "memory");
#elif arm_HOST_ARCH && !defined(arm_HOST_ARCH_PRE_ARMv7)
__asm__ __volatile__ ("dmb" : : : "memory");
#elif aarch64_HOST_ARCH
__asm__ __volatile__ ("dmb sy" : : : "memory");
#else
#error memory barriers unimplemented on this architecture
#endif
......
......@@ -756,22 +756,28 @@ StgRun(StgFunPtr f, StgRegTable *basereg) {
__asm__ volatile (
/*
* save callee-saves registers on behalf of the STG code.
* floating point registers only need the bottom 64 bits preserved.
* x16 and x17 are ip0 and ip1, but we can't refer to them by that name with clang.
*/
"stp fp, lr, [sp, #-16]!\n\t"
"mov fp, sp\n\t"
"stp x16, x17, [sp, #-16]!\n\t"
"stp x19, x20, [sp, #-16]!\n\t"
"stp x21, x22, [sp, #-16]!\n\t"
"stp x23, x24, [sp, #-16]!\n\t"
"stp x25, x26, [sp, #-16]!\n\t"
"stp x27, x28, [sp, #-16]!\n\t"
"stp ip0, ip1, [sp, #-16]!\n\t"
"str lr, [sp, #-8]!\n\t"
"stp d8, d9, [sp, #-16]!\n\t"
"stp d10, d11, [sp, #-16]!\n\t"
"stp d12, d13, [sp, #-16]!\n\t"
"stp d14, d15, [sp, #-16]!\n\t"
/*
* allocate some space for Stg machine's temporary storage.
* Note: RESERVER_C_STACK_BYTES has to be a round number here or
* the assembler can't assemble it.
*/
"str lr, [sp, %3]"
/* "sub sp, sp, %3\n\t" */
"sub sp, sp, %3\n\t"
/*
* Set BaseReg
*/
......@@ -779,16 +785,17 @@ StgRun(StgFunPtr f, StgRegTable *basereg) {
/*
* Jump to function argument.
*/
"bx %1\n\t"
"br %1\n\t"
".globl " STG_RETURN "\n\t"
#if !defined(ios_HOST_OS)
".type " STG_RETURN ", %%function\n"
#endif
STG_RETURN ":\n\t"
/*
* Free the space we allocated
*/
"ldr lr, [sp], %3\n\t"
/* "add sp, sp, %3\n\t" */
"add sp, sp, %3\n\t"
/*
* Return the new register table, taking it from Stg's R1 (ARM64's R22).
*/
......@@ -796,18 +803,23 @@ StgRun(StgFunPtr f, StgRegTable *basereg) {
/*
* restore callee-saves registers.
*/
"ldr lr, [sp], #8\n\t"
"ldp ip0, ip1, [sp], #16\n\t"
"ldp d14, d15, [sp], #16\n\t"
"ldp d12, d13, [sp], #16\n\t"
"ldp d10, d11, [sp], #16\n\t"
"ldp d8, d9, [sp], #16\n\t"
"ldp x27, x28, [sp], #16\n\t"
"ldp x25, x26, [sp], #16\n\t"
"ldp x23, x24, [sp], #16\n\t"
"ldp x21, x22, [sp], #16\n\t"
"ldp x19, x20, [sp], #16\n\t"
"ldp x16, x17, [sp], #16\n\t"
"ldp fp, lr, [sp], #16\n\t"
: "=r" (r)
: "r" (f), "r" (basereg), "i" (RESERVED_C_STACK_BYTES)
: "%x19", "%x20", "%x21", "%x22", "%x23", "%x24", "%x25", "%x26", "%x27", "%x28",
"%ip0", "%ip1", "%lr"
"%x16", "%x17", "%lr"
);
return r;
}
......
......@@ -32,7 +32,7 @@
#include <errno.h>
#if darwin_HOST_OS
#if darwin_HOST_OS || ios_HOST_OS
#include <mach/mach.h>
#include <mach/vm_map.h>
#include <sys/sysctl.h>
......
......@@ -1168,7 +1168,7 @@ calcNeeded (rtsBool force_major, memcount *blocks_needed)
should be modified to use allocateExec instead of VirtualAlloc.
------------------------------------------------------------------------- */
#if defined(arm_HOST_ARCH) && defined(ios_HOST_OS)
#if (defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH)) && defined(ios_HOST_OS)
void sys_icache_invalidate(void *start, size_t len);
#endif
......@@ -1180,7 +1180,7 @@ void flushExec (W_ len, AdjustorExecutable exec_addr)
/* x86 doesn't need to do anything, so just suppress some warnings. */
(void)len;
(void)exec_addr;
#elif defined(arm_HOST_ARCH) && defined(ios_HOST_OS)
#elif (defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH)) && defined(ios_HOST_OS)
/* On iOS we need to use the special 'sys_icache_invalidate' call. */
sys_icache_invalidate(exec_addr, ((unsigned char*)exec_addr)+len);
#elif defined(__GNUC__)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment