Commit e251a51a authored by rrnewton's avatar rrnewton
Browse files

Merge branch 'master' into atomics

parents 6fd60b23 ea87014a
......@@ -25,7 +25,7 @@ AC_DEFUN([GHC_SELECT_FILE_EXTENSIONS],
x86_64-apple-darwin)
$3='.dylib'
;;
arm-apple-darwin10)
arm-apple-darwin10|i386-apple-darwin11)
$2='.a'
$3='.dylib'
;;
......@@ -103,7 +103,7 @@ AC_DEFUN([FPTOOLS_SET_PLATFORM_VARS],
echo "Can't work out target platform"
exit 1
fi
TargetArch=`echo "$target" | sed 's/-.*//'`
TargetVendor=`echo "$target" | sed -e 's/.*-\(.*\)-.*/\1/'`
TargetOS=`echo "$target" | sed 's/.*-//'`
......@@ -464,6 +464,7 @@ AC_DEFUN([FP_SETTINGS],
SettingsPerlCommand="$PerlCmd"
SettingsDllWrapCommand="/bin/false"
SettingsWindresCommand="/bin/false"
SettingsLibtoolCommand="libtool"
SettingsTouchCommand='touch'
if test -z "$LlcCmd"
then
......@@ -490,6 +491,7 @@ AC_DEFUN([FP_SETTINGS],
AC_SUBST(SettingsPerlCommand)
AC_SUBST(SettingsDllWrapCommand)
AC_SUBST(SettingsWindresCommand)
AC_SUBST(SettingsLibtoolCommand)
AC_SUBST(SettingsTouchCommand)
AC_SUBST(SettingsLlcCommand)
AC_SUBST(SettingsOptCommand)
......@@ -1602,7 +1604,7 @@ then
# optimistiaclly assume that it actually works properly.
AC_DEFINE([USE_TIMER_CREATE], 1, [Define to 1 if we can use timer_create(CLOCK_PROCESS_CPUTIME_ID,...)])
else
AC_CACHE_CHECK([for a working timer_create(CLOCK_REALTIME)],
AC_CACHE_CHECK([for a working timer_create(CLOCK_REALTIME)],
[fptools_cv_timer_create_works],
[AC_TRY_RUN([
#include <stdio.h>
......@@ -1722,7 +1724,7 @@ out:
[fptools_cv_timer_create_works=no])
])
case $fptools_cv_timer_create_works in
yes) AC_DEFINE([USE_TIMER_CREATE], 1,
yes) AC_DEFINE([USE_TIMER_CREATE], 1,
[Define to 1 if we can use timer_create(CLOCK_PROCESS_CPUTIME_ID,...)]);;
esac
fi
......@@ -1918,7 +1920,7 @@ AC_DEFUN([GHC_CONVERT_VENDOR],[
# converts os from gnu to ghc naming, and assigns the result to $target_var
AC_DEFUN([GHC_CONVERT_OS],[
case "$1-$2" in
darwin10-arm)
darwin10-arm|darwin11-i386)
$3="ios"
;;
*)
......
......@@ -381,17 +381,16 @@ data OverlapFlag
-- its ambiguous which to choose)
| OverlapOk { isSafeOverlap :: Bool }
-- | Like OverlapOk, but also ignore this instance
-- if it doesn't match the constraint you are
-- trying to resolve, but could match if the type variables
-- in the constraint were instantiated
-- | Silently ignore this instance if you find any other that matches the
-- constraing you are trying to resolve, including when checking if there are
-- instances that do not match, but unify.
--
-- Example: constraint (Foo [b])
-- instances (Foo [Int]) Incoherent
-- (Foo [a])
-- Without the Incoherent flag, we'd complain that
-- instantiating 'b' would change which instance
-- was chosen
-- was chosen. See also note [Incoherent instances]
| Incoherent { isSafeOverlap :: Bool }
deriving (Eq, Data, Typeable)
......
......@@ -18,7 +18,7 @@ module CmmExpr
, plusRegSet, minusRegSet, timesRegSet, sizeRegSet, nullRegSet
, regSetToList
, regUsedIn
, Area(..)
, module CmmMachOp
, module CmmType
......@@ -119,7 +119,11 @@ data CmmLit
-- Invariant: must be a continuation BlockId
-- See Note [Continuation BlockId] in CmmNode.
| CmmHighStackMark -- stands for the max stack space used during a procedure
| CmmHighStackMark -- A late-bound constant that stands for the max
-- #bytes of stack space used during a procedure.
-- During the stack-layout pass, CmmHighStackMark
-- is replaced by a CmmInt for the actual number
-- of bytes used
deriving Eq
cmmExprType :: DynFlags -> CmmExpr -> CmmType
......@@ -336,7 +340,7 @@ data GlobalReg
| LongReg -- long int registers (64-bit, really)
{-# UNPACK #-} !Int -- its number
| XmmReg -- 128-bit SIMD vector register
| XmmReg -- 128-bit SIMD vector register
{-# UNPACK #-} !Int -- its number
-- STG registers
......
......@@ -79,7 +79,7 @@ Things to do:
into separate C procedures.
Short term:
compute and attach liveness into to LastCall
compute and attach liveness into LastCall
right at end, split, cvt to old rep
[must split before cvt, because old rep is not expressive enough]
......
......@@ -257,13 +257,10 @@ cgDataCon data_con
-- Stuff to support splitting
---------------------------------------------------------------
-- If we're splitting the object, we need to externalise all the
-- top-level names (and then make sure we only use the externalised
-- one in any C label we use which refers to this name).
maybeExternaliseId :: DynFlags -> Id -> FCode Id
maybeExternaliseId dflags id
| gopt Opt_SplitObjs dflags, -- Externalise the name for -split-objs
| gopt Opt_SplitObjs dflags, -- See Note [Externalise when splitting]
-- in StgCmmMonad
isInternalName name = do { mod <- getModuleName
; returnFC (setIdName id (externalise mod)) }
| otherwise = returnFC id
......
......@@ -30,6 +30,7 @@ import StgCmmForeign (emitPrimCall)
import MkGraph
import CoreSyn ( AltCon(..) )
import SMRep
import BlockId
import Cmm
import CmmInfo
import CmmUtils
......@@ -476,7 +477,17 @@ closureCodeBody top_lvl bndr cl_info cc args arity body fv_details
; let node_points = nodeMustPointToIt dflags lf_info
node' = if node_points then Just node else Nothing
; when node_points (ldvEnterClosure cl_info)
-- Emit new label that might potentially be a header
-- of a self-recursive tail call. See Note
-- [Self-recursive tail calls] in StgCmmExpr
; u <- newUnique
; let loop_header_id = mkBlockId u
; emitLabel loop_header_id
-- Extend reader monad with information that
-- self-recursive tail calls can be optimized into local
-- jumps
; withSelfLoop (bndr, loop_header_id, arg_regs) $ do
{
-- Main payload
; entryHeapCheck cl_info node' arity arg_regs $ do
{ -- ticky after heap check to avoid double counting
......@@ -490,7 +501,8 @@ closureCodeBody top_lvl bndr cl_info cc args arity body fv_details
-- heap check, to reduce live vars over check
; when node_points $ load_fvs node lf_info fv_bindings
; void $ cgExpr body
}}
}}}
}
-- A function closure pointer may be tagged, so we
......
......@@ -27,10 +27,9 @@ module StgCmmClosure (
lfDynTag,
maybeIsLFCon, isLFThunk, isLFReEntrant, lfUpdatable,
nodeMustPointToIt,
CallMethod(..), getCallMethod,
isKnownFun, funTag, tagForArity,
-- * Used by other modules
CgLoc(..), SelfLoopInfo, CallMethod(..),
nodeMustPointToIt, isKnownFun, funTag, tagForArity, getCallMethod,
-- * ClosureInfo
ClosureInfo,
......@@ -69,11 +68,14 @@ module StgCmmClosure (
import StgSyn
import SMRep
import Cmm
import PprCmmExpr()
import BlockId
import CLabel
import Id
import IdInfo
import DataCon
import FastString
import Name
import Type
import TypeRep
......@@ -84,6 +86,37 @@ import Outputable
import DynFlags
import Util
-----------------------------------------------------------------------------
-- Data types and synonyms
-----------------------------------------------------------------------------
-- These data types are mostly used by other modules, especially StgCmmMonad,
-- but we define them here because some functions in this module need to
-- have access to them as well
data CgLoc
= CmmLoc CmmExpr -- A stable CmmExpr; that is, one not mentioning
-- Hp, so that it remains valid across calls
| LneLoc BlockId [LocalReg] -- A join point
-- A join point (= let-no-escape) should only
-- be tail-called, and in a saturated way.
-- To tail-call it, assign to these locals,
-- and branch to the block id
instance Outputable CgLoc where
ppr (CmmLoc e) = ptext (sLit "cmm") <+> ppr e
ppr (LneLoc b rs) = ptext (sLit "lne") <+> ppr b <+> ppr rs
type SelfLoopInfo = (Id, BlockId, [LocalReg])
-- used by ticky profiling
isKnownFun :: LambdaFormInfo -> Bool
isKnownFun (LFReEntrant _ _ _ _) = True
isKnownFun LFLetNoEscape = True
isKnownFun _ = False
-----------------------------------------------------------------------------
-- Representations
-----------------------------------------------------------------------------
......@@ -122,23 +155,23 @@ isGcPtrRep _ = False
-- tail call or return that identifier.
data LambdaFormInfo
= LFReEntrant -- Reentrant closure (a function)
TopLevelFlag -- True if top level
!RepArity -- Arity. Invariant: always > 0
!Bool -- True <=> no fvs
= LFReEntrant -- Reentrant closure (a function)
TopLevelFlag -- True if top level
!RepArity -- Arity. Invariant: always > 0
!Bool -- True <=> no fvs
ArgDescr -- Argument descriptor (should really be in ClosureInfo)
| LFThunk -- Thunk (zero arity)
| LFThunk -- Thunk (zero arity)
TopLevelFlag
!Bool -- True <=> no free vars
!Bool -- True <=> updatable (i.e., *not* single-entry)
!Bool -- True <=> no free vars
!Bool -- True <=> updatable (i.e., *not* single-entry)
StandardFormInfo
!Bool -- True <=> *might* be a function type
!Bool -- True <=> *might* be a function type
| LFCon -- A saturated constructor application
DataCon -- The constructor
| LFCon -- A saturated constructor application
DataCon -- The constructor
| LFUnknown -- Used for function arguments and imported things.
| LFUnknown -- Used for function arguments and imported things.
-- We know nothing about this closure.
-- Treat like updatable "LFThunk"...
-- Imported things which we *do* know something about use
......@@ -149,10 +182,10 @@ data LambdaFormInfo
-- because then we know the entry code will do
-- For a function, the entry code is the fast entry point
| LFUnLifted -- A value of unboxed type;
| LFUnLifted -- A value of unboxed type;
-- always a value, needs evaluation
| LFLetNoEscape -- See LetNoEscape module for precise description
| LFLetNoEscape -- See LetNoEscape module for precise description
| LFBlackHole -- Used for the closures allocated to hold the result
-- of a CAF. We want the target of the update frame to
......@@ -175,7 +208,7 @@ data StandardFormInfo
-- case x of
-- con a1,..,an -> ak
-- and the constructor is from a single-constr type.
WordOff -- 0-origin offset of ak within the "goods" of
WordOff -- 0-origin offset of ak within the "goods" of
-- constructor (Recall that the a1,...,an may be laid
-- out in the heap in a non-obvious order.)
......@@ -205,9 +238,9 @@ mkLFLetNoEscape :: LambdaFormInfo
mkLFLetNoEscape = LFLetNoEscape
-------------
mkLFReEntrant :: TopLevelFlag -- True of top level
-> [Id] -- Free vars
-> [Id] -- Args
mkLFReEntrant :: TopLevelFlag -- True of top level
-> [Id] -- Free vars
-> [Id] -- Args
-> ArgDescr -- Argument descriptor
-> LambdaFormInfo
......@@ -256,7 +289,7 @@ mkLFImported :: Id -> LambdaFormInfo
mkLFImported id
| Just con <- isDataConWorkId_maybe id
, isNullaryRepDataCon con
= LFCon con -- An imported nullary constructor
= LFCon con -- An imported nullary constructor
-- We assume that the constructor is evaluated so that
-- the id really does point directly to the constructor
......@@ -465,49 +498,65 @@ When black-holing, single-entry closures could also be entered via node
(rather than directly) to catch double-entry. -}
data CallMethod
= EnterIt -- No args, not a function
= EnterIt -- No args, not a function
| JumpToIt -- A join point
| JumpToIt BlockId [LocalReg] -- A join point or a header of a local loop
| ReturnIt -- It's a value (function, unboxed value,
| ReturnIt -- It's a value (function, unboxed value,
-- or constructor), so just return it.
| SlowCall -- Unknown fun, or known fun with
-- too few args.
| DirectEntry -- Jump directly, with args in regs
CLabel -- The code label
RepArity -- Its arity
CLabel -- The code label
RepArity -- Its arity
getCallMethod :: DynFlags
-> Name -- Function being applied
-> CafInfo -- Can it refer to CAF's?
-> LambdaFormInfo -- Its info
-> RepArity -- Number of available arguments
-> Id -- Function Id used to chech if it can refer to
-- CAF's and whether the function is tail-calling
-- itself
-> LambdaFormInfo -- Its info
-> RepArity -- Number of available arguments
-> CgLoc -- Passed in from cgIdApp so that we can
-- handle let-no-escape bindings and self-recursive
-- tail calls using the same data constructor,
-- JumpToIt. This saves us one case branch in
-- cgIdApp
-> Maybe SelfLoopInfo -- can we perform a self-recursive tail call?
-> CallMethod
getCallMethod dflags _name _ lf_info _n_args
getCallMethod _ _ id _ n_args _cg_loc (Just (self_loop_id, block_id, args))
| id == self_loop_id, n_args == length args
-- If these patterns match then we know that:
-- * function is performing a self-recursive call in a tail position
-- * number of parameters of the function matches functions arity.
-- See Note [Self-recursive tail calls] in StgCmmExpr for more details
= JumpToIt block_id args
getCallMethod dflags _name _ lf_info _n_args _cg_loc _self_loop_info
| nodeMustPointToIt dflags lf_info && gopt Opt_Parallel dflags
= -- If we're parallel, then we must always enter via node.
= -- If we're parallel, then we must always enter via node.
-- The reason is that the closure may have been
-- fetched since we allocated it.
EnterIt
getCallMethod dflags name caf (LFReEntrant _ arity _ _) n_args
getCallMethod dflags name id (LFReEntrant _ arity _ _) n_args _cg_loc _self_loop_info
| n_args == 0 = ASSERT( arity /= 0 )
ReturnIt -- No args at all
| n_args < arity = SlowCall -- Not enough args
| otherwise = DirectEntry (enterIdLabel dflags name caf) arity
| otherwise = DirectEntry (enterIdLabel dflags name (idCafInfo id)) arity
getCallMethod _ _name _ LFUnLifted n_args
getCallMethod _ _name _ LFUnLifted n_args _cg_loc _self_loop_info
= ASSERT( n_args == 0 ) ReturnIt
getCallMethod _ _name _ (LFCon _) n_args
getCallMethod _ _name _ (LFCon _) n_args _cg_loc _self_loop_info
= ASSERT( n_args == 0 ) ReturnIt
getCallMethod dflags name caf (LFThunk _ _ updatable std_form_info is_fun) n_args
| is_fun -- it *might* be a function, so we must "call" it (which is always safe)
= SlowCall -- We cannot just enter it [in eval/apply, the entry code
getCallMethod dflags name id (LFThunk _ _ updatable std_form_info is_fun) n_args _cg_loc _self_loop_info
| is_fun -- it *might* be a function, so we must "call" it (which is always safe)
= SlowCall -- We cannot just enter it [in eval/apply, the entry code
-- is the fast-entry code]
-- Since is_fun is False, we are *definitely* looking at a data value
......@@ -527,27 +576,24 @@ getCallMethod dflags name caf (LFThunk _ _ updatable std_form_info is_fun) n_arg
| otherwise -- Jump direct to code for single-entry thunks
= ASSERT( n_args == 0 )
DirectEntry (thunkEntryLabel dflags name caf std_form_info updatable) 0
DirectEntry (thunkEntryLabel dflags name (idCafInfo id) std_form_info updatable) 0
getCallMethod _ _name _ (LFUnknown True) _n_args
getCallMethod _ _name _ (LFUnknown True) _n_arg _cg_locs _self_loop_info
= SlowCall -- might be a function
getCallMethod _ name _ (LFUnknown False) n_args
getCallMethod _ name _ (LFUnknown False) n_args _cg_loc _self_loop_info
= ASSERT2( n_args == 0, ppr name <+> ppr n_args )
EnterIt -- Not a function
getCallMethod _ _name _ LFBlackHole _n_args
= SlowCall -- Presumably the black hole has by now
getCallMethod _ _name _ LFBlackHole _n_args _cg_loc _self_loop_info
= SlowCall -- Presumably the black hole has by now
-- been updated, but we don't know with
-- what, so we slow call it
getCallMethod _ _name _ LFLetNoEscape _n_args
= JumpToIt
getCallMethod _ _name _ LFLetNoEscape _n_args (LneLoc blk_id lne_regs) _self_loop_info
= JumpToIt blk_id lne_regs
isKnownFun :: LambdaFormInfo -> Bool
isKnownFun (LFReEntrant _ _ _ _) = True
isKnownFun LFLetNoEscape = True
isKnownFun _ = False
getCallMethod _ _ _ _ _ _ _ = panic "Unknown call method"
-----------------------------------------------------------------------------
-- staticClosureRequired
......@@ -680,7 +726,6 @@ mkCmmInfo ClosureInfo {..}
, cit_prof = closureProf
, cit_srt = NoC_SRT }
--------------------------------------
-- Building ClosureInfos
--------------------------------------
......
......@@ -160,7 +160,7 @@ cgLetNoEscapeClosure bndr cc_slot _unused_cc args body
return ( lneIdInfo dflags bndr args
, code )
where
code = forkProc $ do {
code = forkLneBody $ do {
; withNewTickyCounterLNE (idName bndr) args $ do
; restoreCurrentCostCentre cc_slot
; arg_regs <- bindArgsToRegs args
......@@ -632,14 +632,20 @@ cgConApp con stg_args
cgIdApp :: Id -> [StgArg] -> FCode ReturnKind
cgIdApp fun_id [] | isVoidId fun_id = emitReturn []
cgIdApp fun_id args = do
dflags <- getDynFlags
fun_info <- getCgIdInfo fun_id
let fun_arg = StgVarArg fun_id
fun_name = idName fun_id
fun = idInfoToAmode fun_info
lf_info = cg_lf fun_info
dflags <- getDynFlags
fun_info <- getCgIdInfo fun_id
self_loop_info <- getSelfLoop
let cg_fun_id = cg_id fun_info
-- NB: use (cg_id fun_info) instead of fun_id, because
-- the former may be externalised for -split-objs.
-- See Note [Externalise when splitting] in StgCmmMonad
fun_arg = StgVarArg cg_fun_id
fun_name = idName cg_fun_id
fun = idInfoToAmode fun_info
lf_info = cg_lf fun_info
node_points dflags = nodeMustPointToIt dflags lf_info
case (getCallMethod dflags fun_name (idCafInfo fun_id) lf_info (length args)) of
case (getCallMethod dflags fun_name cg_fun_id lf_info (length args) (cg_loc fun_info) self_loop_info) of
-- A value in WHNF, so we can just return it.
ReturnIt -> emitReturn [fun] -- ToDo: does ReturnIt guarantee tagged?
......@@ -659,14 +665,87 @@ cgIdApp fun_id args = do
then directCall NativeNodeCall lbl arity (fun_arg:args)
else directCall NativeDirectCall lbl arity args }
-- Let-no-escape call
JumpToIt -> let (LneLoc blk_id lne_regs) = cg_loc fun_info
in do
{ adjustHpBackwards -- always do this before a tail-call
; cmm_args <- getNonVoidArgAmodes args
; emitMultiAssign lne_regs cmm_args
; emit (mkBranch blk_id)
; return AssignedDirectly }
-- Let-no-escape call or self-recursive tail-call
JumpToIt blk_id lne_regs -> do
{ adjustHpBackwards -- always do this before a tail-call
; cmm_args <- getNonVoidArgAmodes args
; emitMultiAssign lne_regs cmm_args
; emit (mkBranch blk_id)
; return AssignedDirectly }
-- Note [Self-recursive tail calls]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--
-- Self-recursive tail calls can be optimized into a local jump in the same
-- way as let-no-escape bindings (see Note [What is a non-escaping let] in
-- stgSyn/CoreToStg.lhs). Consider this:
--
-- foo.info:
-- a = R1 // calling convention
-- b = R2
-- goto L1
-- L1: ...
-- ...
-- ...
-- L2: R1 = x
-- R2 = y
-- call foo(R1,R2)
--
-- Instead of putting x and y into registers (or other locations required by the
-- calling convention) and performing a call we can put them into local
-- variables a and b and perform jump to L1:
--
-- foo.info:
-- a = R1
-- b = R2
-- goto L1
-- L1: ...
-- ...
-- ...
-- L2: a = x
-- b = y
-- goto L1
--
-- This can be done only when function is calling itself in a tail position
-- and only if the call passes number of parameters equal to function's arity.
-- Note that this cannot be performed if a function calls itself with a
-- continuation.
--
-- This in fact implements optimization known as "loopification". It was
-- described in "Low-level code optimizations in the Glasgow Haskell Compiler"
-- by Krzysztof Woś, though we use different approach. Krzysztof performed his
-- optimization at the Cmm level, whereas we perform ours during code generation
-- (Stg-to-Cmm pass) essentially making sure that optimized Cmm code is
-- generated in the first place.
--
-- Implementation is spread across a couple of places in the code:
--
-- * FCode monad stores additional information in its reader environment
-- (cgd_self_loop field). This information tells us which function can
-- tail call itself in an optimized way (it is the function currently
-- being compiled), what is the label of a loop header (L1 in example above)
-- and information about local registers in which we should arguments
-- before making a call (this would be a and b in example above).
--
-- * Whenever we are compiling a function, we set that information to reflect
-- the fact that function currently being compiled can be jumped to, instead
-- of called. We also have to emit a label to which we will be jumping. Both
-- things are done in closureCodyBody in StgCmmBind.
--
-- * When we began compilation of another closure we remove the additional
-- information from the environment. This is done by forkClosureBody
-- in StgCmmMonad. Other functions that duplicate the environment -
-- forkLneBody, forkAlts, codeOnly - duplicate that information. In other
-- words, we only need to clean the environment of the self-loop information
-- when compiling right hand side of a closure (binding).
--
-- * When compiling a call (cgIdApp) we use getCallMethod to decide what kind
-- of call will be generated. getCallMethod decides to generate a self
-- recursive tail call when (a) environment stores information about
-- possible self tail-call; (b) that tail call is to a function currently
-- being compiled; (c) number of passed arguments is equal to function's
-- arity.
emitEnter :: CmmExpr -> FCode ReturnKind
emitEnter fun = do
......
......@@ -26,7 +26,7 @@ module StgCmmMonad (
mkCmmIfThenElse, mkCmmIfThen, mkCmmIfGoto,
mkCall, mkCmmCall,
forkClosureBody, forkAlts, forkProc, codeOnly,
forkClosureBody, forkLneBody, forkAlts, codeOnly,
ConTagZ,
......@@ -44,10 +44,10 @@ module StgCmmMonad (
getModuleName,
-- ideally we wouldn't export these, but some other modules access internal state
getState, setState, getInfoDown, getDynFlags, getThisPackage,
getState, setState, getSelfLoop, withSelfLoop, getInfoDown, getDynFlags, getThisPackage,
-- more localised access to monad state
CgIdInfo(..), CgLoc(..),
CgIdInfo(..),
getBinds, setBinds,
-- out of general friendliness, we also export ...
......@@ -60,6 +60,7 @@ import Cmm
import StgCmmClosure
import DynFlags
import Hoopl
import Maybes
import MkGraph
import BlockId
import CLabel
......@@ -100,11 +101,10 @@ infixr 9 `thenFC`
-- - A reader monad, for CgInfoDownwards, containing
-- - DynFlags,
-- - the current Module
-- - the static top-level environmnet
-- - the update-frame offset
-- - the ticky counter label
-- - the Sequel (the continuation to return to)
-- - the self-recursive tail call information
--------------------------------------------------------
......@@ -169,51 +169,48 @@ fixC fcode = FCode (
data CgInfoDownwards -- information only passed *downwards* by the monad
= MkCgInfoDown {