Commit d3c1dda6 authored by Peter Trommler's avatar Peter Trommler 🥁 Committed by Ben Gamari

Implement PowerPC 64-bit native code backend for Linux

Extend the PowerPC 32-bit native code generator for "64-bit
PowerPC ELF Application Binary Interface Supplement 1.9" by
Ian Lance Taylor and "Power Architecture 64-Bit ELF V2 ABI Specification --
OpenPOWER ABI for Linux Supplement" by IBM.
The latter ABI is mainly used on POWER7/7+ and POWER8
Linux systems running in little-endian mode. The code generator
supports both static and dynamic linking. PowerPC 64-bit
code for ELF ABI 1.9 and 2 is mostly position independent
anyway, and thus so is all the code emitted by the code
generator. In other words, -fPIC does not make a difference.

rts/stg/SMP.h support is implemented.

Following the spirit of the introductory comment in
PPC/CodeGen.hs, the rest of the code is a straightforward
extension of the 32-bit implementation.

Limitations:
* Code is generated only in the medium code model, which
  is also gcc's default
* Local symbols are not accessed directly, which seems to
  also be the case for 32-bit
* LLVM does not work, but this does not work on 32-bit either
* Must use the system runtime linker in GHCi, because the
  GHC linker for "static" object files (rts/Linker.c) for
  PPC 64-bit is not implemented. The system runtime
  (dynamic) linker works.
* The handling of the system stack (register 1) is not ELF-
  compliant so stack traces break. Instead of allocating a new
  stack frame, spill code should use the "official" spill area
  in the current stack frame and deallocation code should restore
  the back chain
* DWARF support is missing

Fixes #9863

Test Plan: validate (on powerpc, too)

Reviewers: simonmar, trofi, erikd, austin

Reviewed By: trofi

Subscribers: bgamari, arnons1, kgardas, thomie

Differential Revision: https://phabricator.haskell.org/D629

GHC Trac Issues: #9863
parent bdf7f133
......@@ -188,7 +188,10 @@ AC_DEFUN([FPTOOLS_SET_HASKELL_PLATFORM_VARS],
test -z "[$]2" || eval "[$]2=ArchPPC"
;;
powerpc64)
test -z "[$]2" || eval "[$]2=ArchPPC_64"
test -z "[$]2" || eval "[$]2=\"ArchPPC_64 {ppc_64ABI = ELF_V1}\""
;;
powerpc64le)
test -z "[$]2" || eval "[$]2=\"ArchPPC_64 {ppc_64ABI = ELF_V2}\""
;;
sparc)
test -z "[$]2" || eval "[$]2=ArchSPARC"
......@@ -209,7 +212,7 @@ AC_DEFUN([FPTOOLS_SET_HASKELL_PLATFORM_VARS],
mipsel)
test -z "[$]2" || eval "[$]2=ArchMipsel"
;;
hppa|hppa1_1|ia64|m68k|powerpc64le|rs6000|s390|s390x|sparc64|vax)
hppa|hppa1_1|ia64|m68k|rs6000|s390|s390x|sparc64|vax)
test -z "[$]2" || eval "[$]2=ArchUnknown"
;;
*)
......
......@@ -1177,16 +1177,24 @@ pprDynamicLinkerAsmLabel platform dllInfo lbl
else if osElfTarget (platformOS platform)
then if platformArch platform == ArchPPC
then case dllInfo of
CodeStub -> -- See Note [.LCTOC1 in PPC PIC code]
ppr lbl <> text "+32768@plt"
SymbolPtr -> text ".LC_" <> ppr lbl
_ -> panic "pprDynamicLinkerAsmLabel"
CodeStub -> -- See Note [.LCTOC1 in PPC PIC code]
ppr lbl <> text "+32768@plt"
SymbolPtr -> text ".LC_" <> ppr lbl
_ -> panic "pprDynamicLinkerAsmLabel"
else if platformArch platform == ArchX86_64
then case dllInfo of
CodeStub -> ppr lbl <> text "@plt"
GotSymbolPtr -> ppr lbl <> text "@gotpcrel"
GotSymbolOffset -> ppr lbl
SymbolPtr -> text ".LC_" <> ppr lbl
else if platformArch platform == ArchPPC_64 ELF_V1
|| platformArch platform == ArchPPC_64 ELF_V2
then case dllInfo of
GotSymbolPtr -> text ".LC_" <> ppr lbl
<> text "@toc"
GotSymbolOffset -> ppr lbl
SymbolPtr -> text ".LC_" <> ppr lbl
_ -> panic "pprDynamicLinkerAsmLabel"
else case dllInfo of
CodeStub -> ppr lbl <> text "@plt"
SymbolPtr -> text ".LC_" <> ppr lbl
......
......@@ -31,7 +31,7 @@ callerSaves platform
ArchARM {} -> ARM.callerSaves
ArchARM64 -> ARM64.callerSaves
arch
| arch `elem` [ArchPPC, ArchPPC_64] ->
| arch `elem` [ArchPPC, ArchPPC_64 ELF_V1, ArchPPC_64 ELF_V2] ->
case platformOS platform of
OSDarwin -> PPC_Darwin.callerSaves
_ -> PPC.callerSaves
......@@ -54,7 +54,7 @@ activeStgRegs platform
ArchARM {} -> ARM.activeStgRegs
ArchARM64 -> ARM64.activeStgRegs
arch
| arch `elem` [ArchPPC, ArchPPC_64] ->
| arch `elem` [ArchPPC, ArchPPC_64 ELF_V1, ArchPPC_64 ELF_V2] ->
case platformOS platform of
OSDarwin -> PPC_Darwin.activeStgRegs
_ -> PPC.activeStgRegs
......@@ -72,7 +72,7 @@ haveRegBase platform
ArchARM {} -> ARM.haveRegBase
ArchARM64 -> ARM64.haveRegBase
arch
| arch `elem` [ArchPPC, ArchPPC_64] ->
| arch `elem` [ArchPPC, ArchPPC_64 ELF_V1, ArchPPC_64 ELF_V2] ->
case platformOS platform of
OSDarwin -> PPC_Darwin.haveRegBase
_ -> PPC.haveRegBase
......@@ -90,7 +90,7 @@ globalRegMaybe platform
ArchARM {} -> ARM.globalRegMaybe
ArchARM64 -> ARM64.globalRegMaybe
arch
| arch `elem` [ArchPPC, ArchPPC_64] ->
| arch `elem` [ArchPPC, ArchPPC_64 ELF_V1, ArchPPC_64 ELF_V2] ->
case platformOS platform of
OSDarwin -> PPC_Darwin.globalRegMaybe
_ -> PPC.globalRegMaybe
......@@ -108,7 +108,7 @@ freeReg platform
ArchARM {} -> ARM.freeReg
ArchARM64 -> ARM64.freeReg
arch
| arch `elem` [ArchPPC, ArchPPC_64] ->
| arch `elem` [ArchPPC, ArchPPC_64 ELF_V1, ArchPPC_64 ELF_V2] ->
case platformOS platform of
OSDarwin -> PPC_Darwin.freeReg
_ -> PPC.freeReg
......
......@@ -166,18 +166,18 @@ nativeCodeGen dflags this_mod modLoc h us cmms
=> NcgImpl statics instr jumpDest -> IO UniqSupply
nCG' ncgImpl = nativeCodeGen' dflags this_mod modLoc ncgImpl h us cmms
in case platformArch platform of
ArchX86 -> nCG' (x86NcgImpl dflags)
ArchX86_64 -> nCG' (x86_64NcgImpl dflags)
ArchPPC -> nCG' (ppcNcgImpl dflags)
ArchSPARC -> nCG' (sparcNcgImpl dflags)
ArchARM {} -> panic "nativeCodeGen: No NCG for ARM"
ArchARM64 -> panic "nativeCodeGen: No NCG for ARM64"
ArchPPC_64 -> panic "nativeCodeGen: No NCG for PPC 64"
ArchAlpha -> panic "nativeCodeGen: No NCG for Alpha"
ArchMipseb -> panic "nativeCodeGen: No NCG for mipseb"
ArchMipsel -> panic "nativeCodeGen: No NCG for mipsel"
ArchUnknown -> panic "nativeCodeGen: No NCG for unknown arch"
ArchJavaScript -> panic "nativeCodeGen: No NCG for JavaScript"
ArchX86 -> nCG' (x86NcgImpl dflags)
ArchX86_64 -> nCG' (x86_64NcgImpl dflags)
ArchPPC -> nCG' (ppcNcgImpl dflags)
ArchSPARC -> nCG' (sparcNcgImpl dflags)
ArchARM {} -> panic "nativeCodeGen: No NCG for ARM"
ArchARM64 -> panic "nativeCodeGen: No NCG for ARM64"
ArchPPC_64 _ -> nCG' (ppcNcgImpl dflags)
ArchAlpha -> panic "nativeCodeGen: No NCG for Alpha"
ArchMipseb -> panic "nativeCodeGen: No NCG for mipseb"
ArchMipsel -> panic "nativeCodeGen: No NCG for mipsel"
ArchUnknown -> panic "nativeCodeGen: No NCG for unknown arch"
ArchJavaScript-> panic "nativeCodeGen: No NCG for JavaScript"
x86NcgImpl :: DynFlags -> NcgImpl (Alignment, CmmStatics) X86.Instr.Instr X86.Instr.JumpDest
x86NcgImpl dflags
......
......@@ -158,7 +158,14 @@ cmmMakePicReference dflags lbl
-- everything gets relocated at runtime
| OSMinGW32 <- platformOS $ targetPlatform dflags
= CmmLit $ CmmLabel lbl
-- both ABI versions default to medium code model
| ArchPPC_64 _ <- platformArch $ targetPlatform dflags
= CmmMachOp (MO_Add W32) -- code model medium
[ CmmReg (CmmGlobal PicBaseReg)
, CmmLit $ picRelative
(platformArch $ targetPlatform dflags)
(platformOS $ targetPlatform dflags)
lbl ]
| (gopt Opt_PIC dflags || not (gopt Opt_Static dflags)) && absoluteLabel lbl
= CmmMachOp (MO_Add (wordWidth dflags))
......@@ -293,13 +300,17 @@ howToAccessLabel dflags arch OSDarwin this_mod _ lbl
-- from position independent code. It is also required from the main program
-- when dynamic libraries containing Haskell code are used.
howToAccessLabel _ ArchPPC_64 os _ kind _
howToAccessLabel _ (ArchPPC_64 _) os _ kind _
| osElfTarget os
= if kind == DataReference
-- ELF PPC64 (powerpc64-linux), AIX, MacOS 9, BeOS/PPC
then AccessViaSymbolPtr
-- actually, .label instead of label
else AccessDirectly
= case kind of
-- ELF PPC64 (powerpc64-linux), AIX, MacOS 9, BeOS/PPC
DataReference -> AccessViaSymbolPtr
-- RTLD does not generate stubs for function descriptors
-- in tail calls. Create a symbol pointer and generate
-- the code to load the function descriptor at the call site.
JumpReference -> AccessViaSymbolPtr
-- regular calls are handled by the runtime linker
_ -> AccessDirectly
howToAccessLabel dflags _ os _ _ _
-- no PIC -> the dynamic linker does everything for us;
......@@ -430,9 +441,14 @@ needImportedSymbols dflags arch os
, arch == ArchPPC
= gopt Opt_PIC dflags || not (gopt Opt_Static dflags)
-- PowerPC 64 Linux: always
| osElfTarget os
, arch == ArchPPC_64 ELF_V1 || arch == ArchPPC_64 ELF_V2
= True
-- i386 (and others?): -dynamic but not -fPIC
| osElfTarget os
, arch /= ArchPPC_64
, arch /= ArchPPC_64 ELF_V1 && arch /= ArchPPC_64 ELF_V2
= not (gopt Opt_Static dflags) && not (gopt Opt_PIC dflags)
| otherwise
......@@ -467,16 +483,30 @@ pprGotDeclaration dflags ArchX86 OSDarwin
pprGotDeclaration _ _ OSDarwin
= empty
-- PPC 64 ELF v1needs a Table Of Contents (TOC) on Linux
pprGotDeclaration _ (ArchPPC_64 ELF_V1) OSLinux
= ptext (sLit ".section \".toc\",\"aw\"")
-- In ELF v2 we also need to tell the assembler that we want ABI
-- version 2. This would normally be done at the top of the file
-- right after a file directive, but I could not figure out how
-- to do that.
pprGotDeclaration _ (ArchPPC_64 ELF_V2) OSLinux
= vcat [ ptext (sLit ".abiversion 2"),
ptext (sLit ".section \".toc\",\"aw\"")
]
pprGotDeclaration _ (ArchPPC_64 _) _
= panic "pprGotDeclaration: ArchPPC_64 only Linux supported"
-- Emit GOT declaration
-- Output whatever needs to be output once per .s file.
pprGotDeclaration dflags arch os
| osElfTarget os
, arch /= ArchPPC_64
, arch /= ArchPPC_64 ELF_V1 && arch /= ArchPPC_64 ELF_V2
, not (gopt Opt_PIC dflags)
= empty
| osElfTarget os
, arch /= ArchPPC_64
, arch /= ArchPPC_64 ELF_V1 && arch /= ArchPPC_64 ELF_V2
= vcat [
-- See Note [.LCTOC1 in PPC PIC code]
ptext (sLit ".section \".got2\",\"aw\""),
......@@ -635,9 +665,16 @@ pprImportedSymbol _ (Platform { platformOS = OSDarwin }) _
-- the NCG will keep track of all DynamicLinkerLabels it uses
-- and output each of them using pprImportedSymbol.
pprImportedSymbol _ platform@(Platform { platformArch = ArchPPC_64 }) _
pprImportedSymbol _ platform@(Platform { platformArch = ArchPPC_64 _ })
importedLbl
| osElfTarget (platformOS platform)
= empty
= case dynamicLinkerLabelInfo importedLbl of
Just (SymbolPtr, lbl)
-> vcat [
ptext (sLit ".section \".toc\", \"aw\""),
ptext (sLit ".LC_") <> pprCLabel platform lbl <> char ':',
ptext (sLit "\t.quad") <+> pprCLabel platform lbl ]
_ -> empty
pprImportedSymbol dflags platform importedLbl
| osElfTarget (platformOS platform)
......@@ -735,6 +772,28 @@ initializePicBase_ppc ArchPPC OSDarwin picReg
where BasicBlock bID insns = entry
b' = BasicBlock bID (PPC.FETCHPC picReg : insns)
-------------------------------------------------------------------------
-- Load TOC into register 2
-- PowerPC 64-bit ELF ABI 2.0 requires the address of the callee
-- in register 12.
-- We pass the label to FETCHTOC and create a .localentry too.
-- TODO: Explain this better and refer to ABI spec!
{-
We would like to do approximately this, but spill slot allocation
might be added before the first BasicBlock. That violates the ABI.
For now we will emit the prologue code in the pretty printer,
which is also what we do for ELF v1.
initializePicBase_ppc (ArchPPC_64 ELF_V2) OSLinux picReg
(CmmProc info lab live (ListGraph (entry:blocks)) : statics)
= do
bID <-getUniqueM
return (CmmProc info lab live (ListGraph (b':entry:blocks))
: statics)
where BasicBlock entryID _ = entry
b' = BasicBlock bID [PPC.FETCHTOC picReg lab,
PPC.BCC PPC.ALWAYS entryID]
-}
initializePicBase_ppc _ _ _ _
= panic "initializePicBase_ppc: not needed"
......
......@@ -78,14 +78,24 @@ cmmTopCodeGen
cmmTopCodeGen (CmmProc info lab live graph) = do
let blocks = toBlockListEntryFirst graph
(nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
picBaseMb <- getPicBaseMaybeNat
dflags <- getDynFlags
let proc = CmmProc info lab live (ListGraph $ concat nat_blocks)
tops = proc : concat statics
os = platformOS $ targetPlatform dflags
case picBaseMb of
Just picBase -> initializePicBase_ppc ArchPPC os picBase tops
Nothing -> return tops
arch = platformArch $ targetPlatform dflags
case arch of
ArchPPC -> do
picBaseMb <- getPicBaseMaybeNat
case picBaseMb of
Just picBase -> initializePicBase_ppc arch os picBase tops
Nothing -> return tops
ArchPPC_64 ELF_V1 -> return tops
-- generating function descriptor is handled in
-- pretty printer
ArchPPC_64 ELF_V2 -> return tops
-- generating function prologue is handled in
-- pretty printer
_ -> panic "PPC.cmmTopCodeGen: unknown arch"
cmmTopCodeGen (CmmData sec dat) = do
return [CmmData sec dat] -- no translation, we just use CmmStatic
......@@ -198,26 +208,6 @@ getRegisterReg platform (CmmGlobal mid)
-- ones which map to a real machine register on this
-- platform. Hence ...
{-
Now, given a tree (the argument to an CmmLoad) that references memory,
produce a suitable addressing mode.
A Rule of the Game (tm) for Amodes: use of the addr bit must
immediately follow use of the code part, since the code part puts
values in registers which the addr then refers to. So you can't put
anything in between, lest it overwrite some of those registers. If
you need to do some other computation between the code part and use of
the addr bit, first store the effective address from the amode in a
temporary, then do the other computation, and then use the temporary:
code
LEA amode, tmp
... other computation ...
... (tmp) ...
-}
-- | Convert a BlockId to some CmmStatic data
jumpTableEntry :: DynFlags -> Maybe BlockId -> CmmStatic
jumpTableEntry dflags Nothing = CmmStaticLit (CmmInt 0 (wordWidth dflags))
......@@ -265,7 +255,7 @@ data ChildCode64 -- a.k.a "Register64"
-- Reg may be modified
-- | The dual to getAnyReg: compute an expression into a register, but
-- | Compute an expression into a register, but
-- we don't mind which one it is.
getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg expr = do
......@@ -279,7 +269,7 @@ getSomeReg expr = do
getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
getI64Amodes addrTree = do
Amode hi_addr addr_code <- getAmode addrTree
Amode hi_addr addr_code <- getAmode D addrTree
case addrOffset hi_addr 4 of
Just lo_addr -> return (hi_addr, lo_addr, addr_code)
Nothing -> do (hi_ptr, code) <- getSomeReg addrTree
......@@ -390,10 +380,12 @@ getRegister e = do dflags <- getDynFlags
getRegister' :: DynFlags -> CmmExpr -> NatM Register
getRegister' _ (CmmReg (CmmGlobal PicBaseReg))
= do
reg <- getPicBaseNat archWordSize
return (Fixed archWordSize reg nilOL)
getRegister' dflags (CmmReg (CmmGlobal PicBaseReg))
| target32Bit (targetPlatform dflags) = do
reg <- getPicBaseNat $ archWordSize (target32Bit (targetPlatform dflags))
return (Fixed (archWordSize (target32Bit (targetPlatform dflags)))
reg nilOL)
| otherwise = return (Fixed II64 toc nilOL)
getRegister' dflags (CmmReg reg)
= return (Fixed (cmmTypeSize (cmmRegType dflags reg))
......@@ -428,30 +420,54 @@ getRegister' dflags (CmmMachOp (MO_SS_Conv W64 W32) [x])
return $ Fixed II32 rlo code
getRegister' dflags (CmmLoad mem pk)
| not (isWord64 pk)
= do
| not (isWord64 pk) = do
let platform = targetPlatform dflags
Amode addr addr_code <- getAmode mem
Amode addr addr_code <- getAmode D mem
let code dst = ASSERT((targetClassOfReg platform dst == RcDouble) == isFloatType pk)
addr_code `snocOL` LD size dst addr
return (Any size code)
| not (target32Bit (targetPlatform dflags)) = do
Amode addr addr_code <- getAmode DS mem
let code dst = addr_code `snocOL` LD II64 dst addr
return (Any II64 code)
where size = cmmTypeSize pk
-- catch simple cases of zero- or sign-extended load
getRegister' _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad mem _]) = do
Amode addr addr_code <- getAmode mem
Amode addr addr_code <- getAmode D mem
return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr))
getRegister' _ (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad mem _]) = do
Amode addr addr_code <- getAmode D mem
return (Any II64 (\dst -> addr_code `snocOL` LD II8 dst addr))
-- Note: there is no Load Byte Arithmetic instruction, so no signed case here
getRegister' _ (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad mem _]) = do
Amode addr addr_code <- getAmode mem
Amode addr addr_code <- getAmode D mem
return (Any II32 (\dst -> addr_code `snocOL` LD II16 dst addr))
getRegister' _ (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad mem _]) = do
Amode addr addr_code <- getAmode mem
Amode addr addr_code <- getAmode D mem
return (Any II32 (\dst -> addr_code `snocOL` LA II16 dst addr))
getRegister' _ (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad mem _]) = do
Amode addr addr_code <- getAmode D mem
return (Any II64 (\dst -> addr_code `snocOL` LD II16 dst addr))
getRegister' _ (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad mem _]) = do
Amode addr addr_code <- getAmode D mem
return (Any II64 (\dst -> addr_code `snocOL` LA II16 dst addr))
getRegister' _ (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad mem _]) = do
Amode addr addr_code <- getAmode D mem
return (Any II64 (\dst -> addr_code `snocOL` LD II32 dst addr))
getRegister' _ (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad mem _]) = do
Amode addr addr_code <- getAmode D mem
return (Any II64 (\dst -> addr_code `snocOL` LA II32 dst addr))
getRegister' dflags (CmmMachOp mop [x]) -- unary MachOps
= case mop of
MO_Not rep -> triv_ucode_int rep NOT
......@@ -469,7 +485,16 @@ getRegister' dflags (CmmMachOp mop [x]) -- unary MachOps
| from == to -> conversionNop (intSize to) x
-- narrowing is a nop: we treat the high bits as undefined
MO_SS_Conv W32 to -> conversionNop (intSize to) x
MO_SS_Conv W64 to
| arch32 -> panic "PPC.CodeGen.getRegister no 64 bit int register"
| otherwise -> conversionNop (intSize to) x
MO_SS_Conv W32 to
| arch32 -> conversionNop (intSize to) x
| otherwise -> case to of
W64 -> triv_ucode_int to (EXTS II32)
W16 -> conversionNop II16 x
W8 -> conversionNop II8 x
_ -> panic "PPC.CodeGen.getRegister: no match"
MO_SS_Conv W16 W8 -> conversionNop II8 x
MO_SS_Conv W8 to -> triv_ucode_int to (EXTS II8)
MO_SS_Conv W16 to -> triv_ucode_int to (EXTS II16)
......@@ -477,7 +502,17 @@ getRegister' dflags (CmmMachOp mop [x]) -- unary MachOps
MO_UU_Conv from to
| from == to -> conversionNop (intSize to) x
-- narrowing is a nop: we treat the high bits as undefined
MO_UU_Conv W32 to -> conversionNop (intSize to) x
MO_UU_Conv W64 to
| arch32 -> panic "PPC.CodeGen.getRegister no 64 bit target"
| otherwise -> conversionNop (intSize to) x
MO_UU_Conv W32 to
| arch32 -> conversionNop (intSize to) x
| otherwise ->
case to of
W64 -> trivialCode to False AND x (CmmLit (CmmInt 4294967295 W64))
W16 -> conversionNop II16 x
W8 -> conversionNop II8 x
_ -> panic "PPC.CodeGen.getRegister: no match"
MO_UU_Conv W16 W8 -> conversionNop II8 x
MO_UU_Conv W8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 W32))
MO_UU_Conv W16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 W32))
......@@ -490,8 +525,9 @@ getRegister' dflags (CmmMachOp mop [x]) -- unary MachOps
conversionNop new_size expr
= do e_code <- getRegister' dflags expr
return (swizzleRegisterRep e_code new_size)
arch32 = target32Bit $ targetPlatform dflags
getRegister' _ (CmmMachOp mop [x, y]) -- dyadic PrimOps
getRegister' dflags (CmmMachOp mop [x, y]) -- dyadic PrimOps
= case mop of
MO_F_Eq _ -> condFltReg EQQ x y
MO_F_Ne _ -> condFltReg NE x y
......@@ -500,18 +536,28 @@ getRegister' _ (CmmMachOp mop [x, y]) -- dyadic PrimOps
MO_F_Lt _ -> condFltReg LTT x y
MO_F_Le _ -> condFltReg LE x y
MO_Eq rep -> condIntReg EQQ (extendUExpr rep x) (extendUExpr rep y)
MO_Ne rep -> condIntReg NE (extendUExpr rep x) (extendUExpr rep y)
MO_S_Gt rep -> condIntReg GTT (extendSExpr rep x) (extendSExpr rep y)
MO_S_Ge rep -> condIntReg GE (extendSExpr rep x) (extendSExpr rep y)
MO_S_Lt rep -> condIntReg LTT (extendSExpr rep x) (extendSExpr rep y)
MO_S_Le rep -> condIntReg LE (extendSExpr rep x) (extendSExpr rep y)
MO_U_Gt rep -> condIntReg GU (extendUExpr rep x) (extendUExpr rep y)
MO_U_Ge rep -> condIntReg GEU (extendUExpr rep x) (extendUExpr rep y)
MO_U_Lt rep -> condIntReg LU (extendUExpr rep x) (extendUExpr rep y)
MO_U_Le rep -> condIntReg LEU (extendUExpr rep x) (extendUExpr rep y)
MO_Eq rep -> condIntReg EQQ (extendUExpr dflags rep x)
(extendUExpr dflags rep y)
MO_Ne rep -> condIntReg NE (extendUExpr dflags rep x)
(extendUExpr dflags rep y)
MO_S_Gt rep -> condIntReg GTT (extendSExpr dflags rep x)
(extendSExpr dflags rep y)
MO_S_Ge rep -> condIntReg GE (extendSExpr dflags rep x)
(extendSExpr dflags rep y)
MO_S_Lt rep -> condIntReg LTT (extendSExpr dflags rep x)
(extendSExpr dflags rep y)
MO_S_Le rep -> condIntReg LE (extendSExpr dflags rep x)
(extendSExpr dflags rep y)
MO_U_Gt rep -> condIntReg GU (extendUExpr dflags rep x)
(extendUExpr dflags rep y)
MO_U_Ge rep -> condIntReg GEU (extendUExpr dflags rep x)
(extendUExpr dflags rep y)
MO_U_Lt rep -> condIntReg LU (extendUExpr dflags rep x)
(extendUExpr dflags rep y)
MO_U_Le rep -> condIntReg LEU (extendUExpr dflags rep x)
(extendUExpr dflags rep y)
MO_F_Add w -> triv_float w FADD
MO_F_Sub w -> triv_float w FSUB
......@@ -542,32 +588,53 @@ getRegister' _ (CmmMachOp mop [x, y]) -- dyadic PrimOps
-> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
_ -> trivialCodeNoImm' (intSize rep) SUBF y x
MO_Mul rep -> trivialCode rep True MULLW x y
MO_Mul rep
| arch32 -> trivialCode rep True MULLW x y
| otherwise -> trivialCode rep True MULLD x y
MO_S_MulMayOflo W32 -> trivialCodeNoImm' II32 MULLW_MayOflo x y
MO_S_MulMayOflo W64 -> trivialCodeNoImm' II64 MULLD_MayOflo x y
MO_S_MulMayOflo _ -> panic "S_MulMayOflo (rep /= II32): not implemented"
MO_S_MulMayOflo _ -> panic "S_MulMayOflo: (II8/16) not implemented"
MO_U_MulMayOflo _ -> panic "U_MulMayOflo: not implemented"
MO_S_Quot rep -> trivialCodeNoImm' (intSize rep) DIVW (extendSExpr rep x) (extendSExpr rep y)
MO_U_Quot rep -> trivialCodeNoImm' (intSize rep) DIVWU (extendUExpr rep x) (extendUExpr rep y)
MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
MO_S_Quot rep
| arch32 -> trivialCodeNoImm' (intSize rep) DIVW
(extendSExpr dflags rep x) (extendSExpr dflags rep y)
| otherwise -> trivialCodeNoImm' (intSize rep) DIVD
(extendSExpr dflags rep x) (extendSExpr dflags rep y)
MO_U_Quot rep
| arch32 -> trivialCodeNoImm' (intSize rep) DIVWU
(extendUExpr dflags rep x) (extendUExpr dflags rep y)
| otherwise -> trivialCodeNoImm' (intSize rep) DIVDU
(extendUExpr dflags rep x) (extendUExpr dflags rep y)
MO_S_Rem rep
| arch32 -> remainderCode rep DIVW (extendSExpr dflags rep x)
(extendSExpr dflags rep y)
| otherwise -> remainderCode rep DIVD (extendSExpr dflags rep x)
(extendSExpr dflags rep y)
MO_U_Rem rep
| arch32 -> remainderCode rep DIVWU (extendSExpr dflags rep x)
(extendSExpr dflags rep y)
| otherwise -> remainderCode rep DIVDU (extendSExpr dflags rep x)
(extendSExpr dflags rep y)
MO_And rep -> trivialCode rep False AND x y
MO_Or rep -> trivialCode rep False OR x y
MO_Xor rep -> trivialCode rep False XOR x y
MO_Shl rep -> trivialCode rep False SLW x y
MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
MO_Shl rep -> shiftCode rep SL x y
MO_S_Shr rep -> shiftCode rep SRA (extendSExpr dflags rep x) y
MO_U_Shr rep -> shiftCode rep SR (extendUExpr dflags rep x) y
_ -> panic "PPC.CodeGen.getRegister: no match"
where
triv_float :: Width -> (Size -> Reg -> Reg -> Reg -> Instr) -> NatM Register
triv_float width instr = trivialCodeNoImm (floatSize width) instr x y
arch32 = target32Bit $ targetPlatform dflags
getRegister' _ (CmmLit (CmmInt i rep))
| Just imm <- makeImmediate rep True i
= let
......@@ -579,7 +646,7 @@ getRegister' _ (CmmLit (CmmFloat f frep)) = do
lbl <- getNewLabelNat
dflags <- getDynFlags
dynRef <- cmmMakeDynamicReference dflags DataReference lbl
Amode addr addr_code <- getAmode dynRef
Amode addr addr_code <- getAmode D dynRef
let size = floatSize frep
code dst =
LDATA ReadOnlyData (Statics lbl
......@@ -588,6 +655,7 @@ getRegister' _ (CmmLit (CmmFloat f frep)) = do
return (Any size code)
getRegister' dflags (CmmLit lit)
| target32Bit (targetPlatform dflags)
= let rep = cmmLitType dflags lit
imm = litToImm lit
code dst = toOL [
......@@ -595,18 +663,46 @@ getRegister' dflags (CmmLit lit)
ADD dst dst (RIImm (LO imm))
]
in return (Any (cmmTypeSize rep) code)
| otherwise
= do lbl <- getNewLabelNat
dflags <- getDynFlags
dynRef <- cmmMakeDynamicReference dflags DataReference lbl
Amode addr addr_code <- getAmode D dynRef
let rep = cmmLitType dflags lit
size = cmmTypeSize rep
code dst =
LDATA ReadOnlyData (Statics lbl
[CmmStaticLit lit])
`consOL` (addr_code `snocOL` LD size dst addr)
return (Any size code)
getRegister' _ other = pprPanic "getRegister(ppc)" (pprExpr other)
-- extend?Rep: wrap integer expression of type rep
-- in a conversion to II32
extendSExpr :: Width -> CmmExpr -> CmmExpr
extendSExpr W32 x = x
extendSExpr rep x = CmmMachOp (MO_SS_Conv rep W32) [x]
extendUExpr :: Width -> CmmExpr -> CmmExpr
extendUExpr W32 x = x
extendUExpr rep x = CmmMachOp (MO_UU_Conv rep W32) [x]
-- in a conversion to II32 or II64 resp.
extendSExpr :: DynFlags -> Width -> CmmExpr -> CmmExpr
extendSExpr dflags W32 x
| target32Bit (targetPlatform dflags) = x
extendSExpr dflags W64 x
| not (target32Bit (targetPlatform dflags)) = x