Commit adcb5fb4 authored by Kavon Farvardin's avatar Kavon Farvardin Committed by Ben Gamari

Multiple fixes / improvements for LLVM backend

- Fix for #13904 -- stop "trashing" callee-saved registers, since it is
  not actually doing anything useful.

- Fix for #14251 -- fixes the calling convention for functions passing
  raw SSE-register values by adding padding as needed to get the values
  in the right registers. This problem cropped up when some args were
  unused an dropped from the live list.

- Fixed a typo in 'readnone' attribute

- Added 'lower-expect' pass to level 0 LLVM optimization passes to
  improve block layout in LLVM for stack checks, etc.

Test Plan: `make test WAYS=optllvm` and `make test WAYS=llvm`

Reviewers: bgamari, simonmar, angerman

Reviewed By: angerman

Subscribers: rwbarton, carter

GHC Trac Issues: #13904, #14251

Differential Revision: https://phabricator.haskell.org/D5190
parent 07083fc4
......@@ -564,7 +564,7 @@ instance Outputable LlvmFuncAttr where
ppr OptSize = text "optsize"
ppr NoReturn = text "noreturn"
ppr NoUnwind = text "nounwind"
ppr ReadNone = text "readnon"
ppr ReadNone = text "readnone"
ppr ReadOnly = text "readonly"
ppr Ssp = text "ssp"
ppr SspReq = text "ssqreq"
......
......@@ -26,7 +26,7 @@ module LlvmCodeGen.Base (
cmmToLlvmType, widthToLlvmFloat, widthToLlvmInt, llvmFunTy,
llvmFunSig, llvmFunArgs, llvmStdFunAttrs, llvmFunAlign, llvmInfAlign,
llvmPtrBits, tysToParams, llvmFunSection,
llvmPtrBits, tysToParams, llvmFunSection, padLiveArgs, isSSE,
strCLabel_llvm, strDisplayName_llvm, strProcedureName_llvm,
getGlobalPtr, generateExternDecls,
......@@ -58,6 +58,8 @@ import ErrUtils
import qualified Stream
import Control.Monad (ap)
import Data.List (sort)
import Data.Maybe (mapMaybe)
-- ----------------------------------------------------------------------------
-- * Some Data Types
......@@ -147,16 +149,58 @@ llvmFunSection dflags lbl
-- | A Function's arguments
llvmFunArgs :: DynFlags -> LiveGlobalRegs -> [LlvmVar]
llvmFunArgs dflags live =
map (lmGlobalRegArg dflags) (filter isPassed (activeStgRegs platform))
map (lmGlobalRegArg dflags) (filter isPassed allRegs)
where platform = targetPlatform dflags
isLive r = not (isSSE r) || r `elem` alwaysLive || r `elem` live
allRegs = activeStgRegs platform
paddedLive = map (\(_,r) -> r) $ padLiveArgs live
isLive r = r `elem` alwaysLive || r `elem` paddedLive
isPassed r = not (isSSE r) || isLive r
isSSE (FloatReg _) = True
isSSE (DoubleReg _) = True
isSSE (XmmReg _) = True
isSSE (YmmReg _) = True
isSSE (ZmmReg _) = True
isSSE _ = False
isSSE :: GlobalReg -> Bool
isSSE (FloatReg _) = True
isSSE (DoubleReg _) = True
isSSE (XmmReg _) = True
isSSE (YmmReg _) = True
isSSE (ZmmReg _) = True
isSSE _ = False
sseRegNum :: GlobalReg -> Maybe Int
sseRegNum (FloatReg i) = Just i
sseRegNum (DoubleReg i) = Just i
sseRegNum (XmmReg i) = Just i
sseRegNum (YmmReg i) = Just i
sseRegNum (ZmmReg i) = Just i
sseRegNum _ = Nothing
-- the bool indicates whether the global reg was added as padding.
-- the returned list is not sorted in any particular order,
-- but does indicate the set of live registers needed, with SSE padding.
padLiveArgs :: LiveGlobalRegs -> [(Bool, GlobalReg)]
padLiveArgs live = allRegs
where
sseRegNums = sort $ mapMaybe sseRegNum live
(_, padding) = foldl assignSlots (1, []) $ sseRegNums
allRegs = padding ++ map (\r -> (False, r)) live
assignSlots (i, acc) regNum
| i == regNum = -- don't need padding here
(i+1, acc)
| i < regNum = let -- add padding for slots i .. regNum-1
numNeeded = regNum-i
acc' = genPad i numNeeded ++ acc
in
(regNum+1, acc')
| otherwise = error "padLiveArgs -- i > regNum ??"
genPad start n =
take n $ flip map (iterate (+1) start) (\i ->
(True, FloatReg i))
-- NOTE: Picking float should be fine for the following reasons:
-- (1) Float aliases with all the other SSE register types on
-- the given platform.
-- (2) The argument is not live anyways.
-- | Llvm standard fun attributes
llvmStdFunAttrs :: [LlvmFuncAttr]
......
......@@ -14,7 +14,7 @@ import LlvmCodeGen.Base
import LlvmCodeGen.Regs
import BlockId
import CodeGen.Platform ( activeStgRegs, callerSaves )
import CodeGen.Platform ( activeStgRegs )
import CLabel
import Cmm
import PprCmm
......@@ -211,7 +211,6 @@ genCall t@(PrimTarget (MO_Prefetch_Data localityInt)) [] args
fptr <- liftExprData $ getFunPtr funTy t
argVars' <- castVarsW Signed $ zip argVars argTy
doTrashStmts
let argSuffix = [mkIntLit i32 0, mkIntLit i32 localityInt, mkIntLit i32 1]
statement $ Expr $ Call StdCall fptr (argVars' ++ argSuffix) []
| otherwise = panic $ "prefetch locality level integer must be between 0 and 3, given: " ++ (show localityInt)
......@@ -294,7 +293,6 @@ genCall t@(PrimTarget op) [] args
fptr <- getFunPtrW funTy t
argVars' <- castVarsW Signed $ zip argVars argTy
doTrashStmts
let alignVal = mkIntLit i32 align
arguments = argVars' ++ (alignVal:isVolVal)
statement $ Expr $ Call StdCall fptr arguments []
......@@ -449,7 +447,6 @@ genCall target res args = runStmtsDecls $ do
| never_returns = statement $ Unreachable
| otherwise = return ()
doTrashStmts
-- make the actual call
case retTy of
......@@ -1786,12 +1783,9 @@ genLit _ CmmHighStackMark
funPrologue :: LiveGlobalRegs -> [CmmBlock] -> LlvmM StmtData
funPrologue live cmmBlocks = do
trash <- getTrashRegs
let getAssignedRegs :: CmmNode O O -> [CmmReg]
getAssignedRegs (CmmAssign reg _) = [reg]
-- Calls will trash all registers. Unfortunately, this needs them to
-- be stack-allocated in the first place.
getAssignedRegs (CmmUnsafeForeignCall _ rs _) = map CmmGlobal trash ++ map CmmLocal rs
getAssignedRegs (CmmUnsafeForeignCall _ rs _) = map CmmLocal rs
getAssignedRegs _ = []
getRegsBlock (_, body, _) = concatMap getAssignedRegs $ blockToList body
assignedRegs = nub $ concatMap (getRegsBlock . blockSplit) cmmBlocks
......@@ -1821,14 +1815,9 @@ funPrologue live cmmBlocks = do
funEpilogue :: LiveGlobalRegs -> LlvmM ([LlvmVar], LlvmStatements)
funEpilogue live = do
-- Have information and liveness optimisation is enabled?
let liveRegs = alwaysLive ++ live
isSSE (FloatReg _) = True
isSSE (DoubleReg _) = True
isSSE (XmmReg _) = True
isSSE (YmmReg _) = True
isSSE (ZmmReg _) = True
isSSE _ = False
-- the bool indicates whether the register is padding.
let alwaysNeeded = map (\r -> (False, r)) alwaysLive
livePadded = alwaysNeeded ++ padLiveArgs live
-- Set to value or "undef" depending on whether the register is
-- actually live
......@@ -1840,39 +1829,17 @@ funEpilogue live = do
let ty = (pLower . getVarType $ lmGlobalRegVar dflags r)
return (Just $ LMLitVar $ LMUndefLit ty, nilOL)
platform <- getDynFlag targetPlatform
loads <- flip mapM (activeStgRegs platform) $ \r -> case () of
_ | r `elem` liveRegs -> loadExpr r
| not (isSSE r) -> loadUndef r
let allRegs = activeStgRegs platform
loads <- flip mapM allRegs $ \r -> case () of
_ | (False, r) `elem` livePadded
-> loadExpr r -- if r is not padding, load it
| not (isSSE r) || (True, r) `elem` livePadded
-> loadUndef r
| otherwise -> return (Nothing, nilOL)
let (vars, stmts) = unzip loads
return (catMaybes vars, concatOL stmts)
-- | A series of statements to trash all the STG registers.
--
-- In LLVM we pass the STG registers around everywhere in function calls.
-- So this means LLVM considers them live across the entire function, when
-- in reality they usually aren't. For Caller save registers across C calls
-- the saving and restoring of them is done by the Cmm code generator,
-- using Cmm local vars. So to stop LLVM saving them as well (and saving
-- all of them since it thinks they're always live, we trash them just
-- before the call by assigning the 'undef' value to them. The ones we
-- need are restored from the Cmm local var and the ones we don't need
-- are fine to be trashed.
getTrashStmts :: LlvmM LlvmStatements
getTrashStmts = do
regs <- getTrashRegs
stmts <- flip mapM regs $ \ r -> do
reg <- getCmmReg (CmmGlobal r)
let ty = (pLower . getVarType) reg
return $ Store (LMLitVar $ LMUndefLit ty) reg
return $ toOL stmts
getTrashRegs :: LlvmM [GlobalReg]
getTrashRegs = do plat <- getLlvmPlatform
return $ filter (callerSaves plat) (activeStgRegs plat)
-- | Get a function pointer to the CLabel specified.
--
-- This is for Haskell functions, function type is assumed, so doesn't work
......@@ -1994,11 +1961,6 @@ getCmmRegW = lift . getCmmReg
genLoadW :: Atomic -> CmmExpr -> CmmType -> WriterT LlvmAccum LlvmM LlvmVar
genLoadW atomic e ty = liftExprData $ genLoad atomic e ty
doTrashStmts :: WriterT LlvmAccum LlvmM ()
doTrashStmts = do
stmts <- lift getTrashStmts
tell $ LlvmAccum stmts mempty
-- | Return element of single-element list; 'panic' if list is not a single-element list
singletonPanic :: String -> [a] -> a
singletonPanic _ [x] = x
......
[
(0, "-mem2reg -globalopt"),
(0, "-mem2reg -globalopt -lower-expect"),
(1, "-O1 -globalopt"),
(2, "-O2")
]
......@@ -172,5 +172,4 @@ test('T13825-unit',
test('T14619', normal, compile_and_run, [''])
test('T14754', normal, compile_and_run, [''])
test('T14346', only_ways(['threaded1','threaded2']), compile_and_run, ['-O -threaded'])
test('T14251', [expect_broken_for(14251, ['optllvm'])],
compile_and_run, [''])
test('T14251', normal, compile_and_run, [''])
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment