Commit 22bbcd1d authored by mrchebas@gmail.com's avatar mrchebas@gmail.com
Browse files

optimisation: shortcut branches when possible (x86/x86_64 only for now)

This is only turned on with -O, and probably won't make much
difference at the moment, but it will be important for semi-tagging.
parent 7f1bc015
......@@ -98,7 +98,7 @@ module CLabel (
mkHpcModuleOffsetLabel,
infoLblToEntryLbl, entryLblToInfoLbl,
needsCDecl, isAsmTemp, externallyVisibleCLabel,
needsCDecl, isAsmTemp, maybeAsmTemp, externallyVisibleCLabel,
CLabelType(..), labelType, labelDynamic,
pprCLabel
......@@ -497,6 +497,10 @@ isAsmTemp :: CLabel -> Bool -- is a local temporary for native code generati
isAsmTemp (AsmTempLabel _) = True
isAsmTemp _ = False
maybeAsmTemp :: CLabel -> Maybe Unique
maybeAsmTemp (AsmTempLabel uq) = Just uq
maybeAsmTemp _ = Nothing
-- -----------------------------------------------------------------------------
-- Is a CLabel visible outside this object file or not?
......
......@@ -133,6 +133,42 @@ data CmmStmt
| CmmJump CmmExpr [LocalReg] -- Jump to another function, with these
-- parameters.
{-
Discussion
~~~~~~~~~~
One possible problem with the above type is that the only way to do a
non-local conditional jump is to encode it as a branch to a block that
contains a single jump. This leads to inefficient code in the back end.
One possible way to fix this would be:
data CmmStat =
...
| CmmJump CmmBranchDest
| CmmCondJump CmmExpr CmmBranchDest
...
data CmmBranchDest
= Local BlockId
| NonLocal CmmExpr [LocalReg]
In favour:
+ one fewer constructors in CmmStmt
+ allows both cond branch and switch to jump to non-local destinations
Against:
- not strictly necessary: can already encode as branch+jump
- not always possible to implement any better in the back end
- could do the optimisation in the back end (but then plat-specific?)
- C-- doesn't have it
- back-end optimisation might be more general (jump shortcutting)
So we'll stick with the way it is, and add the optimisation to the NCG.
-}
-----------------------------------------------------------------------------
-- CmmCallTarget
--
......
......@@ -17,7 +17,7 @@ import MachRegs
import MachCodeGen
import PprMach
import RegisterAlloc
import RegAllocInfo ( jumpDests )
import RegAllocInfo
import NCGMonad
import PositionIndependentCode
......@@ -25,7 +25,7 @@ import Cmm
import CmmOpt ( cmmMiniInline, cmmMachOpFold )
import PprCmm ( pprStmt, pprCmms )
import MachOp
import CLabel ( CLabel, mkSplitMarkerLabel, mkAsmTempLabel )
import CLabel
#if powerpc_TARGET_ARCH
import CLabel ( mkRtsCodeLabel )
#endif
......@@ -37,8 +37,9 @@ import FastTypes
import List ( groupBy, sortBy )
import CLabel ( pprCLabel )
import ErrUtils ( dumpIfSet_dyn )
import DynFlags ( DynFlags, DynFlag(..), dopt )
import DynFlags
import StaticFlags ( opt_Static, opt_PIC )
import Util
import Config ( cProjectVersion )
import Digraph
......@@ -212,8 +213,10 @@ cmmNativeGen dflags cmm
genMachCode cmm `thenUs` \ (pre_regalloc, lastMinuteImports) ->
{-# SCC "regAlloc" #-}
mapUs regAlloc pre_regalloc `thenUs` \ with_regs ->
{-# SCC "shortcutBranches" #-}
shortcutBranches dflags with_regs `bind` \ shorted ->
{-# SCC "sequenceBlocks" #-}
map sequenceTop with_regs `bind` \ sequenced ->
map sequenceTop shorted `bind` \ sequenced ->
{-# SCC "x86fp_kludge" #-}
map x86fp_kludge sequenced `bind` \ final_mach_code ->
{-# SCC "vcat" #-}
......@@ -329,6 +332,48 @@ makeFarBranches blocks
makeFarBranches = id
#endif
-- -----------------------------------------------------------------------------
-- Shortcut branches
shortcutBranches :: DynFlags -> [NatCmmTop] -> [NatCmmTop]
shortcutBranches dflags tops
| optLevel dflags < 1 = tops -- only with -O or higher
| otherwise = map (apply_mapping mapping) tops'
where
(tops', mappings) = mapAndUnzip build_mapping tops
mapping = foldr plusUFM emptyUFM mappings
build_mapping top@(CmmData _ _) = (top, emptyUFM)
build_mapping (CmmProc info lbl params []) $
= (CmmProc info lbl params [], emptyUFM)
build_mapping (CmmProc info lbl params (head:blocks))
= (CmmProc info lbl params (head:others), mapping)
-- drop the shorted blocks, but don't ever drop the first one,
-- because it is pointed to by a global label.
where
-- find all the blocks that just consist of a jump that can be
-- shorted.
(shortcut_blocks, others) = partitionWith split blocks
split (BasicBlock id [insn]) | Just dest <- canShortcut insn
= Left (id,dest)
split other = Right other
-- build a mapping from BlockId to JumpDest for shorting branches
mapping = foldl add emptyUFM shortcut_blocks
add ufm (id,dest) = addToUFM ufm id dest
apply_mapping ufm (CmmData sec statics)
= CmmData sec (map (shortcutStatic (lookupUFM ufm)) statics)
-- we need to get the jump tables, so apply the mapping to the entries
-- of a CmmData too.
apply_mapping ufm (CmmProc info lbl params blocks)
= CmmProc info lbl params (map short_bb blocks)
where
short_bb (BasicBlock id insns) = BasicBlock id $! map short_insn insns
short_insn i = shortcutJump (lookupUFM ufm) i
-- shortcutJump should apply the mapping repeatedly,
-- just in case we can short multiple branches.
-- -----------------------------------------------------------------------------
-- Instruction selection
......
......@@ -520,6 +520,7 @@ bit or 64 bit precision.
-- Jumping around.
| JMP Operand
| JXX Cond BlockId -- includes unconditional branches
| JXX_GBL Cond Imm -- non-local version of JXX
| JMP_TBL Operand [BlockId] -- table jump
| CALL (Either Imm Reg) [Reg]
......
......@@ -1291,6 +1291,8 @@ pprInstr (JXX cond (BlockId id))
= pprCondInstr SLIT("j") cond (pprCLabel_asm lab)
where lab = mkAsmTempLabel id
pprInstr (JXX_GBL cond imm) = pprCondInstr SLIT("j") cond (pprImm imm)
pprInstr (JMP (OpImm imm)) = (<>) (ptext SLIT("\tjmp ")) (pprImm imm)
pprInstr (JMP op) = (<>) (ptext SLIT("\tjmp *")) (pprOperand wordRep op)
pprInstr (JMP_TBL op ids) = pprInstr (JMP op)
......
......@@ -17,6 +17,8 @@ module RegAllocInfo (
patchJump,
isRegRegMove,
JumpDest, canShortcut, shortcutJump, shortcutStatic,
maxSpillSlots,
mkSpillInstr,
mkLoadInstr,
......@@ -26,7 +28,8 @@ module RegAllocInfo (
#include "HsVersions.h"
import Cmm ( BlockId )
import Cmm
import CLabel
import MachOp ( MachRep(..), wordRep )
import MachInstrs
import MachRegs
......@@ -172,6 +175,7 @@ regUsage instr = case instr of
CMP sz src dst -> mkRUR (use_R src ++ use_R dst)
SETCC cond op -> mkRU [] (def_W op)
JXX cond lbl -> mkRU [] []
JXX_GBL cond lbl -> mkRU [] []
JMP op -> mkRUR (use_R op)
JMP_TBL op ids -> mkRUR (use_R op)
CALL (Left imm) params -> mkRU params callClobberedRegs
......@@ -417,6 +421,45 @@ patchJump insn old new
#endif
_other -> insn
data JumpDest = DestBlockId BlockId | DestImm Imm
canShortcut :: Instr -> Maybe JumpDest
#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
canShortcut (JXX ALWAYS id) = Just (DestBlockId id)
canShortcut (JMP (OpImm imm)) = Just (DestImm imm)
#endif
canShortcut _ = Nothing
shortcutJump :: (BlockId -> Maybe JumpDest) -> Instr -> Instr
#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
shortcutJump fn insn@(JXX cc id) =
case fn id of
Nothing -> insn
Just (DestBlockId id') -> shortcutJump fn (JXX cc id')
Just (DestImm imm) -> shortcutJump fn (JXX_GBL cc imm)
#endif
shortcutJump fn other = other
-- Here because it knows about JumpDest
shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic
shortcutStatic fn (CmmStaticLit (CmmLabel lab))
| Just uq <- maybeAsmTemp lab
= CmmStaticLit (CmmLabel (shortBlockId fn (BlockId uq)))
shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off))
| Just uq <- maybeAsmTemp lbl1
= CmmStaticLit (CmmLabelDiffOff (shortBlockId fn (BlockId uq)) lbl2 off)
-- slightly dodgy, we're ignoring the second label, but this
-- works with the way we use CmmLabelDiffOff for jump tables now.
shortcutStatic fn other_static
= other_static
shortBlockId fn blockid@(BlockId uq) =
case fn blockid of
Nothing -> mkAsmTempLabel uq
Just (DestBlockId blockid') -> shortBlockId fn blockid'
Just (DestImm (ImmCLbl lbl)) -> lbl
_other -> panic "shortBlockId"
-- -----------------------------------------------------------------------------
-- 'patchRegs' function
......@@ -562,6 +605,7 @@ patchRegs instr env = case instr of
COMMENT _ -> instr
DELTA _ -> instr
JXX _ _ -> instr
JXX_GBL _ _ -> instr
CLTD _ -> instr
_other -> panic "patchRegs: unrecognised instr"
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment