Commit 83e09d3c authored by Peter Trommler's avatar Peter Trommler 🥁 Committed by Marge Bot

PPC NCG: Use liveness information in CmmCall

We make liveness information for global registers
available on `JMP` and `BCTR`, which were the last instructions
missing. With complete liveness information we do not need to
reserve global registers in `freeReg` anymore. Moreover we
assign R9 and R10 to callee saves registers.

Cleanup by removing `Reg_Su`, which was unused, from `freeReg`
and removing unused register definitions.

The calculation of the number of floating point registers is too
conservative. Just follow X86 and specify the constants directly.

Overall on PowerPC this results in 0.3 % smaller code size in nofib
while runtime is slightly better in some tests.
parent 97032ed9
Pipeline #3555 passed with stages
in 463 minutes and 3 seconds
......@@ -180,10 +180,16 @@ stmtToInstrs stmt = do
return (b1 `appOL` b2)
CmmSwitch arg ids -> do dflags <- getDynFlags
genSwitch dflags arg ids
CmmCall { cml_target = arg } -> genJump arg
CmmCall { cml_target = arg
, cml_args_regs = gregs } -> do
dflags <- getDynFlags
genJump arg (jumpRegs dflags gregs)
_ ->
panic "stmtToInstrs: statement should have been cps'd away"
jumpRegs :: DynFlags -> [GlobalReg] -> [Reg]
jumpRegs dflags gregs = [ RegReal r | Just r <- map (globalRegMaybe platform) gregs ]
where platform = targetPlatform dflags
--------------------------------------------------------------------------------
-- | 'InstrBlock's are the insn sequences generated by the insn selectors.
......@@ -1042,19 +1048,19 @@ assignReg_FltCode = assignReg_IntCode
genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
genJump :: CmmExpr{-the branch target-} -> [Reg] -> NatM InstrBlock
genJump (CmmLit (CmmLabel lbl))
= return (unitOL $ JMP lbl)
genJump (CmmLit (CmmLabel lbl)) regs
= return (unitOL $ JMP lbl regs)
genJump tree
genJump tree gregs
= do
dflags <- getDynFlags
genJump' tree (platformToGCP (targetPlatform dflags))
genJump' tree (platformToGCP (targetPlatform dflags)) gregs
genJump' :: CmmExpr -> GenCCallPlatform -> NatM InstrBlock
genJump' :: CmmExpr -> GenCCallPlatform -> [Reg] -> NatM InstrBlock
genJump' tree (GCP64ELF 1)
genJump' tree (GCP64ELF 1) regs
= do
(target,code) <- getSomeReg tree
return (code
......@@ -1062,20 +1068,20 @@ genJump' tree (GCP64ELF 1)
`snocOL` LD II64 toc (AddrRegImm target (ImmInt 8))
`snocOL` MTCTR r11
`snocOL` LD II64 r11 (AddrRegImm target (ImmInt 16))
`snocOL` BCTR [] Nothing)
`snocOL` BCTR [] Nothing regs)
genJump' tree (GCP64ELF 2)
genJump' tree (GCP64ELF 2) regs
= do
(target,code) <- getSomeReg tree
return (code
`snocOL` MR r12 target
`snocOL` MTCTR r12
`snocOL` BCTR [] Nothing)
`snocOL` BCTR [] Nothing regs)
genJump' tree _
genJump' tree _ regs
= do
(target,code) <- getSomeReg tree
return (code `snocOL` MTCTR target `snocOL` BCTR [] Nothing)
return (code `snocOL` MTCTR target `snocOL` BCTR [] Nothing regs)
-- -----------------------------------------------------------------------------
-- Unconditional branches
......@@ -2044,7 +2050,7 @@ genSwitch dflags expr targets
SL fmt tmp reg (RIImm (ImmInt sha)),
LD fmt tmp (AddrRegReg tableReg tmp),
MTCTR tmp,
BCTR ids (Just lbl)
BCTR ids (Just lbl) []
]
return code
......@@ -2062,7 +2068,7 @@ genSwitch dflags expr targets
LD fmt tmp (AddrRegReg tableReg tmp),
ADD tmp tmp (RIReg tableReg),
MTCTR tmp,
BCTR ids (Just lbl)
BCTR ids (Just lbl) []
]
return code
| otherwise
......@@ -2077,14 +2083,14 @@ genSwitch dflags expr targets
ADDIS tmp tmp (HA (ImmCLbl lbl)),
LD fmt tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
MTCTR tmp,
BCTR ids (Just lbl)
BCTR ids (Just lbl) []
]
return code
where (offset, ids) = switchTargetsToTable targets
generateJumpTableForInstr :: DynFlags -> Instr
-> Maybe (NatCmmDecl CmmStatics Instr)
generateJumpTableForInstr dflags (BCTR ids (Just lbl)) =
generateJumpTableForInstr dflags (BCTR ids (Just lbl) _) =
let jumpTable
| (positionIndependent dflags)
|| (not $ target32Bit $ targetPlatform dflags)
......
......@@ -151,12 +151,12 @@ allocMoreStack platform slots (CmmProc info lbl live (ListGraph code)) = do
-- "labeled-goto" we use JMP, and for "computed-goto" we
-- use MTCTR followed by BCTR. See 'PPC.CodeGen.genJump'.
= case insn of
JMP _ -> dealloc ++ (insn : r)
BCTR [] Nothing -> dealloc ++ (insn : r)
BCTR ids label -> BCTR (map (fmap retarget) ids) label : r
BCCFAR cond b p -> BCCFAR cond (retarget b) p : r
BCC cond b p -> BCC cond (retarget b) p : r
_ -> insn : r
JMP _ _ -> dealloc ++ (insn : r)
BCTR [] Nothing _ -> dealloc ++ (insn : r)
BCTR ids label rs -> BCTR (map (fmap retarget) ids) label rs : r
BCCFAR cond b p -> BCCFAR cond (retarget b) p : r
BCC cond b p -> BCC cond (retarget b) p : r
_ -> insn : r
-- BL and BCTRL are call-like instructions rather than
-- jumps, and are used only for C calls.
......@@ -223,10 +223,13 @@ data Instr
-- Just True: branch likely taken
-- Just False: branch likely not taken
-- Nothing: no hint
| JMP CLabel -- same as branch,
| JMP CLabel [Reg] -- same as branch,
-- but with CLabel instead of block ID
-- and live global registers
| MTCTR Reg
| BCTR [Maybe BlockId] (Maybe CLabel) -- with list of local destinations, and jump table location if necessary
| BCTR [Maybe BlockId] (Maybe CLabel) [Reg]
-- with list of local destinations, and
-- jump table location if necessary
| BL CLabel [Reg] -- with list of argument regs
| BCTRL [Reg]
......@@ -324,8 +327,9 @@ ppc_regUsageOfInstr platform instr
CMPL _ reg ri -> usage (reg : regRI ri,[])
BCC _ _ _ -> noUsage
BCCFAR _ _ _ -> noUsage
JMP _ regs -> usage (regs, [])
MTCTR reg -> usage ([reg],[])
BCTR _ _ -> noUsage
BCTR _ _ regs -> usage (regs, [])
BL _ params -> usage (params, callClobberedRegs platform)
BCTRL params -> usage (params, callClobberedRegs platform)
......@@ -416,8 +420,9 @@ ppc_patchRegsOfInstr instr env
CMPL fmt reg ri -> CMPL fmt (env reg) (fixRI ri)
BCC cond lbl p -> BCC cond lbl p
BCCFAR cond lbl p -> BCCFAR cond lbl p
JMP l regs -> JMP l regs -- global regs will not be remapped
MTCTR reg -> MTCTR (env reg)
BCTR targets lbl -> BCTR targets lbl
BCTR targets lbl rs -> BCTR targets lbl rs
BL imm argRegs -> BL imm argRegs -- argument regs
BCTRL argRegs -> BCTRL argRegs -- cannot be remapped
ADD reg1 reg2 ri -> ADD (env reg1) (env reg2) (fixRI ri)
......@@ -506,10 +511,10 @@ ppc_isJumpishInstr instr
ppc_jumpDestsOfInstr :: Instr -> [BlockId]
ppc_jumpDestsOfInstr insn
= case insn of
BCC _ id _ -> [id]
BCCFAR _ id _ -> [id]
BCTR targets _ -> [id | Just id <- targets]
_ -> []
BCC _ id _ -> [id]
BCCFAR _ id _ -> [id]
BCTR targets _ _ -> [id | Just id <- targets]
_ -> []
-- | Change the destination of this jump instruction.
......@@ -520,7 +525,7 @@ ppc_patchJumpInstr insn patchF
= case insn of
BCC cc id p -> BCC cc (patchF id) p
BCCFAR cc id p -> BCCFAR cc (patchF id) p
BCTR ids lbl -> BCTR (map (fmap patchF) ids) lbl
BCTR ids lbl rs -> BCTR (map (fmap patchF) ids) lbl rs
_ -> insn
......
......@@ -548,7 +548,7 @@ pprInstr (BCCFAR cond blockid prediction) = vcat [
Just True -> char '-'
Just False -> char '+'
pprInstr (JMP lbl)
pprInstr (JMP lbl _)
-- We never jump to ForeignLabels; if we ever do, c.f. handling for "BL"
| isForeignLabel lbl = panic "PPC.Ppr.pprInstr: JMP to ForeignLabel"
| otherwise =
......@@ -565,7 +565,7 @@ pprInstr (MTCTR reg) = hcat [
char '\t',
pprReg reg
]
pprInstr (BCTR _ _) = hcat [
pprInstr (BCTR _ _ _) = hcat [
char '\t',
text "bctr"
]
......
......@@ -37,9 +37,9 @@ module PPC.Regs (
fits16Bits,
makeImmediate,
fReg,
r0, sp, toc, r3, r4, r11, r12, r27, r28, r30,
r0, sp, toc, r3, r4, r11, r12, r30,
tmpReg,
f1, f20, f21,
f1,
allocatableRegs
......@@ -306,7 +306,7 @@ point registers.
fReg :: Int -> RegNo
fReg x = (32 + x)
r0, sp, toc, r3, r4, r11, r12, r27, r28, r30, f1, f20, f21 :: Reg
r0, sp, toc, r3, r4, r11, r12, r30, f1 :: Reg
r0 = regSingle 0
sp = regSingle 1
toc = regSingle 2
......@@ -314,12 +314,8 @@ r3 = regSingle 3
r4 = regSingle 4
r11 = regSingle 11
r12 = regSingle 12
r27 = regSingle 27
r28 = regSingle 28
r30 = regSingle 30
f1 = regSingle $ fReg 1
f20 = regSingle $ fReg 20
f21 = regSingle $ fReg 21
-- allocatableRegs is allMachRegNos with the fixed-use regs removed.
-- i.e., these are the regs for which we are prepared to allow the
......@@ -335,4 +331,4 @@ tmpReg platform =
case platformArch platform of
ArchPPC -> regSingle 13
ArchPPC_64 _ -> regSingle 30
_ -> panic "PPC.Regs.tmpReg: unknowm arch"
_ -> panic "PPC.Regs.tmpReg: unknown arch"
......@@ -904,78 +904,9 @@ freeReg 30 = False
# if defined(REG_Base)
freeReg REG_Base = False
# endif
# if defined(REG_R1)
freeReg REG_R1 = False
# endif
# if defined(REG_R2)
freeReg REG_R2 = False
# endif
# if defined(REG_R3)
freeReg REG_R3 = False
# endif
# if defined(REG_R4)
freeReg REG_R4 = False
# endif
# if defined(REG_R5)
freeReg REG_R5 = False
# endif
# if defined(REG_R6)
freeReg REG_R6 = False
# endif
# if defined(REG_R7)
freeReg REG_R7 = False
# endif
# if defined(REG_R8)
freeReg REG_R8 = False
# endif
# if defined(REG_R9)
freeReg REG_R9 = False
# endif
# if defined(REG_R10)
freeReg REG_R10 = False
# endif
# if defined(REG_F1)
freeReg REG_F1 = False
# endif
# if defined(REG_F2)
freeReg REG_F2 = False
# endif
# if defined(REG_F3)
freeReg REG_F3 = False
# endif
# if defined(REG_F4)
freeReg REG_F4 = False
# endif
# if defined(REG_F5)
freeReg REG_F5 = False
# endif
# if defined(REG_F6)
freeReg REG_F6 = False
# endif
# if defined(REG_D1)
freeReg REG_D1 = False
# endif
# if defined(REG_D2)
freeReg REG_D2 = False
# endif
# if defined(REG_D3)
freeReg REG_D3 = False
# endif
# if defined(REG_D4)
freeReg REG_D4 = False
# endif
# if defined(REG_D5)
freeReg REG_D5 = False
# endif
# if defined(REG_D6)
freeReg REG_D6 = False
# endif
# if defined(REG_Sp)
freeReg REG_Sp = False
# endif
# if defined(REG_Su)
freeReg REG_Su = False
# endif
# if defined(REG_SpLim)
freeReg REG_SpLim = False
# endif
......@@ -1119,9 +1050,6 @@ freeReg REG_D6_2 = False
# if defined(REG_Sp)
freeReg REG_Sp = False
# endif
# if defined(REG_Su)
freeReg REG_Su = False
# endif
# if defined(REG_SpLim)
freeReg REG_SpLim = False
# endif
......
......@@ -312,6 +312,8 @@ the stack. See Note [Overlapping global registers] for implications.
#define REG_R6 r19
#define REG_R7 r20
#define REG_R8 r21
#define REG_R9 r22
#define REG_R10 r23
#define REG_F1 fr14
#define REG_F2 fr15
......@@ -327,13 +329,14 @@ the stack. See Note [Overlapping global registers] for implications.
#define REG_D5 fr24
#define REG_D6 fr25
#define REG_Sp r22
#define REG_SpLim r24
#define REG_Hp r25
#define REG_Sp r24
#define REG_SpLim r25
#define REG_Hp r26
#define REG_Base r27
#define MAX_REAL_FLOAT_REG 6
#define MAX_REAL_DOUBLE_REG 6
/* -----------------------------------------------------------------------------
The Sun SPARC register mapping
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment