Ben Gamari · Marge Bot · Andreas Klebinger · Marge Bot · Ryan Scott · Marge Bot
--- a/.ghcid
+++ b/.ghcid
--command utils/ghc-in-ghci/run.sh
+--command ./hadrian/ghci.sh
 --reload compiler
 --reload ghc
 --reload includes
--restart utils/ghc-in-ghci/run.sh
+--restart hadrian/
--restart utils/ghc-in-ghci/load-main.ghci
--restart utils/ghc-in-ghci/settings.ghci
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -4,6 +4,10 @@ variables:
  # Commit of ghc/ci-images repository from which to pull Docker images
  DOCKER_REV: cefaee3c742af193e0f7783f87edb0d35374515c
+  # Sequential version number capturing the versions of all tools fetched by
+  # .gitlab/win32-init.sh.
+  WINDOWS_TOOLCHAIN_VERSION: 1
 before_script:
  - python3 .gitlab/fix-submodules.py
  - git submodule sync --recursive
@@ -48,7 +52,7 @@ ghc-linters:
  stage: lint
  image: "registry.gitlab.haskell.org/ghc/ci-images/linters:$DOCKER_REV"
  script:
-    - git fetch origin $CI_MERGE_REQUEST_TARGET_BRANCH_NAME
+    - git fetch "$CI_MERGE_REQUEST_PROJECT_URL" $CI_MERGE_REQUEST_TARGET_BRANCH_NAME
    - base="$(git merge-base FETCH_HEAD $CI_COMMIT_SHA)"
    - "echo Merge base $base"
    #    - validate-commit-msg .git $(git rev-list $base..$CI_COMMIT_SHA)
@@ -525,7 +529,7 @@ validate-x86_64-windows-hadrian:
  variables:
    MSYSTEM: MINGW64
  cache:
-    key: x86_64-windows-hadrian
+    key: "x86_64-windows-hadrian-$WINDOWS_TOOLCHAIN_VERSION"
 nightly-i386-windows-hadrian:
  extends: .build-windows-hadrian
@@ -535,7 +539,7 @@ nightly-i386-windows-hadrian:
    variables:
      - $NIGHTLY
  cache:
-    key: i386-windows-hadrian
+    key: "i386-windows-hadrian-$WINDOWS_TOOLCHAIN_VERSION"
 .build-windows-make:
  extends: .build-windows
@@ -571,7 +575,7 @@ validate-x86_64-windows:
    MSYSTEM: MINGW64
    CONFIGURE_ARGS: "--target=x86_64-unknown-mingw32"
  cache:
-    key: x86_64-windows
+    key: "x86_64-windows-$WINDOWS_TOOLCHAIN_VERSION"
 # Normal Windows validate builds are profiled; that won't do for releases.
 release-x86_64-windows:
@@ -592,7 +596,7 @@ release-i386-windows:
    BUILD_FLAVOUR: "perf"
    CONFIGURE_ARGS: "--target=i386-unknown-mingw32"
  cache:
-    key: i386-windows
+    key: "i386-windows-$WINDOWS_TOOLCHAIN_VERSION"
 nightly-i386-windows:
  extends: .build-windows-make
@@ -603,7 +607,7 @@ nightly-i386-windows:
    MSYSTEM: MINGW32
    CONFIGURE_ARGS: "--target=i386-unknown-mingw32"
  cache:
-    key: i386-windows
+    key: "i386-windows-$WINDOWS_TOOLCHAIN_VERSION"
 ############################################################
 # Cleanup
@@ -764,4 +768,3 @@ pages:
  artifacts:
    paths:
      - public
--- a/.gitlab/win32-init.sh
+++ b/.gitlab/win32-init.sh
@@ -27,7 +27,8 @@ if [ ! -e $toolchain/bin/ghc ]; then
 fi
 if [ ! -e $toolchain/bin/cabal ]; then
-    curl https://www.haskell.org/cabal/release/cabal-install-2.2.0.0/cabal-install-2.2.0.0-i386-unknown-mingw32.zip > /tmp/cabal.zip
+    url="https://downloads.haskell.org/~cabal/cabal-install-latest/cabal-install-2.4.1.0-x86_64-unknown-mingw32.zip"
+    curl $url > /tmp/cabal.zip
    unzip /tmp/cabal.zip
    mv cabal.exe $toolchain/bin
 fi

--- a/compiler/basicTypes/BasicTypes.hs
+++ b/compiler/basicTypes/BasicTypes.hs
@@ -26,7 +26,7 @@ module BasicTypes(
        Arity, RepArity, JoinArity,
-        Alignment,
+        Alignment, mkAlignment, alignmentOf, alignmentBytes,
        PromotionFlag(..), isPromoted,
        FunctionOrData(..),
@@ -116,6 +116,7 @@ import Outputable
 import SrcLoc ( Located,unLoc )
 import Data.Data hiding (Fixity, Prefix, Infix)
 import Data.Function (on)
+import Data.Bits
 {-
 ************************************************************************
@@ -196,8 +197,39 @@ fIRST_TAG =  1
 ************************************************************************
 -}
-type Alignment = Int -- align to next N-byte boundary (N must be a power of 2).
+-- | A power-of-two alignment
+newtype Alignment = Alignment { alignmentBytes :: Int } deriving (Eq, Ord)
+-- Builds an alignment, throws on non power of 2 input. This is not
+-- ideal, but convenient for internal use and better then silently
+-- passing incorrect data.
+mkAlignment :: Int -> Alignment
+mkAlignment n
+  | n == 1 = Alignment 1
+  | n == 2 = Alignment 2
+  | n == 4 = Alignment 4
+  | n == 8 = Alignment 8
+  | n == 16 = Alignment 16
+  | n == 32 = Alignment 32
+  | n == 64 = Alignment 64
+  | n == 128 = Alignment 128
+  | n == 256 = Alignment 256
+  | n == 512 = Alignment 512
+  | otherwise = panic "mkAlignment: received either a non power of 2 argument or > 512"
+-- Calculates an alignment of a number. x is aligned at N bytes means
+-- the remainder from x / N is zero. Currently, interested in N <= 8,
+-- but can be expanded to N <= 16 or N <= 32 if used within SSE or AVX
+-- context.
+alignmentOf :: Int -> Alignment
+alignmentOf x = case x .&. 7 of
+  0 -> Alignment 8
+  4 -> Alignment 4
+  2 -> Alignment 2
+  _ -> Alignment 1
+instance Outputable Alignment where
+  ppr (Alignment m) = ppr m
 {-
 ************************************************************************
 *                                                                      *

--- a/compiler/basicTypes/Demand.hs
+++ b/compiler/basicTypes/Demand.hs
@@ -22,7 +22,7 @@ module Demand (
        DmdType(..), dmdTypeDepth, lubDmdType, bothDmdType,
        nopDmdType, botDmdType, mkDmdType,
-        addDemand, removeDmdTyArgs,
+        addDemand, ensureArgs,
        BothDmdArg, mkBothDmdArg, toBothDmdArg,
        DmdEnv, emptyDmdEnv,
@@ -34,7 +34,7 @@ module Demand (
        vanillaCprProdRes, cprSumRes,
        appIsBottom, isBottomingSig, pprIfaceStrictSig,
        trimCPRInfo, returnsCPR_maybe,
-        StrictSig(..), mkStrictSig, mkClosedStrictSig,
+        StrictSig(..), mkStrictSigForArity, mkClosedStrictSig,
        nopSig, botSig, cprProdSig,
        isTopSig, hasDemandEnvSig,
        splitStrictSig, strictSigDmdEnv,
@@ -47,10 +47,10 @@ module Demand (
        deferAfterIO,
        postProcessUnsat, postProcessDmdType,
-        splitProdDmd_maybe, peelCallDmd, peelManyCalls, mkCallDmd,
+        splitProdDmd_maybe, peelCallDmd, peelManyCalls, mkCallDmd, mkCallDmds,
        mkWorkerDemand, dmdTransformSig, dmdTransformDataConSig,
        dmdTransformDictSelSig, argOneShots, argsOneShots, saturatedByOneShots,
-        trimToType, TypeShape(..),
+        TypeShape(..), peelTsFuns, trimToType,
        useCount, isUsedOnce, reuseEnv,
        killUsageDemand, killUsageSig, zapUsageDemand, zapUsageEnvSig,
@@ -675,10 +675,15 @@ mkProdDmd dx
  = JD { sd = mkSProd $ map getStrDmd dx
       , ud = mkUProd $ map getUseDmd dx }
+-- | Wraps the 'CleanDemand' with a one-shot call demand: @d@ -> @C1(d)@.
 mkCallDmd :: CleanDemand -> CleanDemand
 mkCallDmd (JD {sd = d, ud = u})
  = JD { sd = mkSCall d, ud = mkUCall One u }
+-- | @mkCallDmds n d@ returns @C1(C1...(C1 d))@ where there are @n@ @C1@'s.
+mkCallDmds :: Arity -> CleanDemand -> CleanDemand
+mkCallDmds arity cd = iterate mkCallDmd cd !! arity
 -- See Note [Demand on the worker] in WorkWrap
 mkWorkerDemand :: Int -> Demand
 mkWorkerDemand n = JD { sd = Lazy, ud = Use One (go n) }
@@ -804,6 +809,13 @@ instance Outputable TypeShape where
  ppr (TsFun ts)   = text "TsFun" <> parens (ppr ts)
  ppr (TsProd tss) = parens (hsep $ punctuate comma $ map ppr tss)
+-- | @peelTsFuns n ts@ tries to peel off @n@ 'TsFun' constructors from @ts@ and
+-- returns 'Just' the wrapped 'TypeShape' on success, and 'Nothing' otherwise.
+peelTsFuns :: Arity -> TypeShape -> Maybe TypeShape
+peelTsFuns 0 ts         = Just ts
+peelTsFuns n (TsFun ts) = peelTsFuns (n-1) ts
+peelTsFuns _ _          = Nothing
 trimToType :: Demand -> TypeShape -> Demand
 -- See Note [Trimming a demand to a type]
 trimToType (JD { sd = ms, ud = mu }) ts
@@ -1207,12 +1219,8 @@ mkDmdType fv ds res = DmdType fv ds res
 dmdTypeDepth :: DmdType -> Arity
 dmdTypeDepth (DmdType _ ds _) = length ds
-- Remove any demand on arguments. This is used in dmdAnalRhs on the body
+-- | This makes sure we can use the demand type with n arguments.
-removeDmdTyArgs :: DmdType -> DmdType
+-- It extends the argument list with the correct resTypeArgDmd.
-removeDmdTyArgs = ensureArgs 0
-- This makes sure we can use the demand type with n arguments,
-- It extends the argument list with the correct resTypeArgDmd
 -- It also adjusts the DmdResult: Divergence survives additional arguments,
 -- CPR information does not (and definite converge also would not).
 ensureArgs :: Arity -> DmdType -> DmdType
@@ -1567,8 +1575,56 @@ and <L,U(U,U)> on the second, then returning a constructor.
 If this same function is applied to one arg, all we can say is that it
 uses x with <L,U>, and its arg with demand <L,U>.
+Note [Understanding DmdType and StrictSig]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Demand types are sound approximations of an expression's semantics relative to
+the incoming demand we put the expression under. Consider the following
+expression:
+    \x y -> x `seq` (y, 2*x)
+Here is a table with demand types resulting from different incoming demands we
+put that expression under. Note the monotonicity; a stronger incoming demand
+yields a more precise demand type:
+    incoming demand                  |  demand type
+    ----------------------------------------------------
+    <S           ,HU              >  |  <L,U><L,U>{}
+    <C(C(S     )),C1(C1(U       ))>  |  <S,U><L,U>{}
+    <C(C(S(S,L))),C1(C1(U(1*U,A)))>  |  <S,1*HU><S,1*U>{}
+Note that in the first example, the depth of the demand type was *higher* than
+the arity of the incoming call demand due to the anonymous lambda.
+The converse is also possible and happens when we unleash demand signatures.
+In @f x y@, the incoming call demand on f has arity 2. But if all we have is a
+demand signature with depth 1 for @f@ (which we can safely unleash, see below),
+the demand type of @f@ under a call demand of arity 2 has a *lower* depth of 1.
+So: Demand types are elicited by putting an expression under an incoming (call)
+demand, the arity of which can be lower or higher than the depth of the
+resulting demand type.
+In contrast, a demand signature summarises a function's semantics *without*
+immediately specifying the incoming demand it was produced under. Despite StrSig
+being a newtype wrapper around DmdType, it actually encodes two things:
+  * The threshold (i.e., minimum arity) to unleash the signature
+  * A demand type that is sound to unleash when the minimum arity requirement is
+    met.
+Here comes the subtle part: The threshold is encoded in the wrapped demand
+type's depth! So in mkStrictSigForArity we make sure to trim the list of
+argument demands to the given threshold arity. Call sites will make sure that
+this corresponds to the arity of the call demand that elicited the wrapped
+demand type. See also Note [What are demand signatures?] in DmdAnal.
+Besides trimming argument demands, mkStrictSigForArity will also trim CPR
+information if necessary.
 -}
+-- | The depth of the wrapped 'DmdType' encodes the arity at which it is safe
+-- to unleash. Better construct this through 'mkStrictSigForArity'.
+-- See Note [Understanding DmdType and StrictSig]
 newtype StrictSig = StrictSig DmdType
                  deriving( Eq )
@@ -1580,34 +1636,43 @@ pprIfaceStrictSig :: StrictSig -> SDoc
 pprIfaceStrictSig (StrictSig (DmdType _ dmds res))
  = hcat (map ppr dmds) <> ppr res
-mkStrictSig :: DmdType -> StrictSig
+-- | Turns a 'DmdType' computed for the particular 'Arity' into a 'StrictSig'
-mkStrictSig dmd_ty = StrictSig dmd_ty
+-- unleashable at that arity. See Note [Understanding DmdType and StrictSig]
+mkStrictSigForArity :: Arity -> DmdType -> StrictSig
+mkStrictSigForArity arity dmd_ty = StrictSig (ensureArgs arity dmd_ty)
 mkClosedStrictSig :: [Demand] -> DmdResult -> StrictSig
-mkClosedStrictSig ds res = mkStrictSig (DmdType emptyDmdEnv ds res)
+mkClosedStrictSig ds res = mkStrictSigForArity (length ds) (DmdType emptyDmdEnv ds res)
 splitStrictSig :: StrictSig -> ([Demand], DmdResult)
 splitStrictSig (StrictSig (DmdType _ dmds res)) = (dmds, res)
 increaseStrictSigArity :: Int -> StrictSig -> StrictSig
-- Add extra arguments to a strictness signature
+-- ^ Add extra arguments to a strictness signature.
+-- In contrast to 'etaExpandStrictSig', this /prepends/ additional argument
+-- demands and leaves CPR info intact.
 increaseStrictSigArity arity_increase sig@(StrictSig dmd_ty@(DmdType env dmds res))
  | isTopDmdType dmd_ty = sig
-  | arity_increase <= 0 = sig
+  | arity_increase == 0 = sig
+  | arity_increase < 0  = WARN( True, text "increaseStrictSigArity:"
+                                  <+> text "negative arity increase"
+                                  <+> ppr arity_increase )
+                          nopSig
  | otherwise           = StrictSig (DmdType env dmds' res)
  where
    dmds' = replicate arity_increase topDmd ++ dmds
 etaExpandStrictSig :: Arity -> StrictSig -> StrictSig
-- We are expanding (\x y. e) to (\x y z. e z)
+-- ^ We are expanding (\x y. e) to (\x y z. e z).
-- Add exta demands to the /end/ of the arg demands if necessary
+-- In contrast to 'increaseStrictSigArity', this /appends/ extra arg demands if
-etaExpandStrictSig arity sig@(StrictSig dmd_ty@(DmdType env dmds res))
+-- necessary, potentially destroying the signature's CPR property.
-  | isTopDmdType dmd_ty = sig
+etaExpandStrictSig arity (StrictSig dmd_ty)
-  | arity_increase <= 0 = sig
+  | arity < dmdTypeDepth dmd_ty
-  | otherwise           = StrictSig (DmdType env dmds' res)
+  -- an arity decrease must zap the whole signature, because it was possibly
-  where
+  -- computed for a higher incoming call demand.
-    arity_increase = arity - length dmds
+  = nopSig
-    dmds' = dmds ++ replicate arity_increase topDmd
+  | otherwise
+  = StrictSig $ ensureArgs arity dmd_ty
 isTopSig :: StrictSig -> Bool
 isTopSig (StrictSig ty) = isTopDmdType ty

--- a/compiler/basicTypes/Id.hs
+++ b/compiler/basicTypes/Id.hs
@@ -668,6 +668,7 @@ isBottomingId v
  | isId v    = isBottomingSig (idStrictness v)
  | otherwise = False
+-- | Accesses the 'Id''s 'strictnessInfo'.
 idStrictness :: Id -> StrictSig
 idStrictness id = strictnessInfo (idInfo id)

--- a/compiler/basicTypes/IdInfo.hs
+++ b/compiler/basicTypes/IdInfo.hs
@@ -237,22 +237,34 @@ pprIdDetails other     = brackets (pp other)
 -- too big.
 data IdInfo
  = IdInfo {
-        arityInfo       :: !ArityInfo,          -- ^ 'Id' arity
+        arityInfo       :: !ArityInfo,
-        ruleInfo        :: RuleInfo,            -- ^ Specialisations of the 'Id's function which exist
+        -- ^ 'Id' arity, as computed by 'CoreArity'. Specifies how many
-                                                -- See Note [Specialisations and RULES in IdInfo]
+        -- arguments this 'Id' has to be applied to before it doesn any
-        unfoldingInfo   :: Unfolding,           -- ^ The 'Id's unfolding
+        -- meaningful work.
-        cafInfo         :: CafInfo,             -- ^ 'Id' CAF info
+        ruleInfo        :: RuleInfo,
-        oneShotInfo     :: OneShotInfo,         -- ^ Info about a lambda-bound variable, if the 'Id' is one
+        -- ^ Specialisations of the 'Id's function which exist.
-        inlinePragInfo  :: InlinePragma,        -- ^ Any inline pragma atached to the 'Id'
+        -- See Note [Specialisations and RULES in IdInfo]
-        occInfo         :: OccInfo,             -- ^ How the 'Id' occurs in the program
+        unfoldingInfo   :: Unfolding,
+        -- ^ The 'Id's unfolding
-        strictnessInfo  :: StrictSig,      --  ^ A strictness signature
+        cafInfo         :: CafInfo,
+        -- ^ 'Id' CAF info
-        demandInfo      :: Demand,       -- ^ ID demand information
+        oneShotInfo     :: OneShotInfo,
-        callArityInfo   :: !ArityInfo,   -- ^ How this is called.
+        -- ^ Info about a lambda-bound variable, if the 'Id' is one
-                                         -- n <=> all calls have at least n arguments
+        inlinePragInfo  :: InlinePragma,
+        -- ^ Any inline pragma atached to the 'Id'
-        levityInfo      :: LevityInfo    -- ^ when applied, will this Id ever have a levity-polymorphic type?
+        occInfo         :: OccInfo,
+        -- ^ How the 'Id' occurs in the program
+        strictnessInfo  :: StrictSig,
+        -- ^ A strictness signature. Digests how a function uses its arguments
+        -- if applied to at least 'arityInfo' arguments.
+        demandInfo      :: Demand,
+        -- ^ ID demand information
+        callArityInfo   :: !ArityInfo,
+        -- ^ How this is called. This is the number of arguments to which a
+        -- binding can be eta-expanded without losing any sharing.
+        -- n <=> all calls have at least n arguments
+        levityInfo      :: LevityInfo
+        -- ^ when applied, will this Id ever have a levity-polymorphic type?
    }
 -- Setters

--- a/compiler/basicTypes/Var.hs
+++ b/compiler/basicTypes/Var.hs
@@ -700,6 +700,8 @@ setIdNotExported id = ASSERT( isLocalId id )
 ************************************************************************
 -}
+-- | Is this a type-level (i.e., computationally irrelevant, thus erasable)
+-- variable? Satisfies @isTyVar = not . isId@.
 isTyVar :: Var -> Bool        -- True of both TyVar and TcTyVar
 isTyVar (TyVar {})   = True
 isTyVar (TcTyVar {}) = True
@@ -712,17 +714,21 @@ isTcTyVar _            = False
 isTyCoVar :: Var -> Bool
 isTyCoVar v = isTyVar v || isCoVar v
+-- | Is this a value-level (i.e., computationally relevant) 'Id'entifier?
+-- Satisfies @isId = not . isTyVar@.
 isId :: Var -> Bool
 isId (Id {}) = True
 isId _       = False
+-- | Is this a coercion variable?
+-- Satisfies @'isId' v ==> 'isCoVar' v == not ('isNonCoVarId' v)@.
 isCoVar :: Var -> Bool
-- A coercion variable
 isCoVar (Id { id_details = details }) = isCoVarDetails details
 isCoVar _                             = False
+-- | Is this a term variable ('Id') that is /not/ a coercion variable?
+-- Satisfies @'isId' v ==> 'isCoVar' v == not ('isNonCoVarId' v)@.
 isNonCoVarId :: Var -> Bool
-- A term variable (Id) that is /not/ a coercion variable
 isNonCoVarId (Id { id_details = details }) = not (isCoVarDetails details)
 isNonCoVarId _                             = False

--- a/compiler/cmm/CmmCallConv.hs
+++ b/compiler/cmm/CmmCallConv.hs
@@ -81,7 +81,6 @@ assignArgumentsPos dflags off conv arg_ty reps = (stk_off, assignments)
                            | passFloatInXmm          -> k (RegisterParam (DoubleReg s), (vs, fs, ds, ls, ss))
                        (W64, (vs, fs, d:ds, ls, ss))
                            | not passFloatInXmm      -> k (RegisterParam d, (vs, fs, ds, ls, ss))
-                        (W80, _) -> panic "F80 unsupported register type"
                        _ -> (assts, (r:rs))
              int = case (w, regs) of
                      (W128, _) -> panic "W128 unsupported register type"
@@ -100,6 +99,7 @@ assignArgumentsPos dflags off conv arg_ty reps = (stk_off, assignments)
 passFloatArgsInXmm :: DynFlags -> Bool
 passFloatArgsInXmm dflags = case platformArch (targetPlatform dflags) of
                              ArchX86_64 -> True
+                              ArchX86    -> False
                              _          -> False
 -- We used to spill vector registers to the stack since the LLVM backend didn't

--- a/compiler/cmm/CmmExpr.hs
+++ b/compiler/cmm/CmmExpr.hs
@@ -474,6 +474,9 @@ instance Eq GlobalReg where
   FloatReg i == FloatReg j = i==j
   DoubleReg i == DoubleReg j = i==j
   LongReg i == LongReg j = i==j
+   -- NOTE: XMM, YMM, ZMM registers actually are the same registers
+   -- at least with respect to store at YMM i and then read from XMM i
+   -- and similarly for ZMM etc.
   XmmReg i == XmmReg j = i==j
   YmmReg i == YmmReg j = i==j
   ZmmReg i == ZmmReg j = i==j
@@ -584,6 +587,9 @@ globalRegType dflags (VanillaReg _ VNonGcPtr) = bWord dflags
 globalRegType _      (FloatReg _)      = cmmFloat W32
 globalRegType _      (DoubleReg _)     = cmmFloat W64
 globalRegType _      (LongReg _)       = cmmBits W64
+-- TODO: improve the internal model of SIMD/vectorized registers
+-- the right design SHOULd improve handling of float and double code too.
+-- see remarks in "NOTE [SIMD Design for the future]"" in StgCmmPrim
 globalRegType _      (XmmReg _)        = cmmVec 4 (cmmBits W32)
 globalRegType _      (YmmReg _)        = cmmVec 8 (cmmBits W32)
 globalRegType _      (ZmmReg _)        = cmmVec 16 (cmmBits W32)

--- a/compiler/cmm/CmmType.hs
+++ b/compiler/cmm/CmmType.hs
@@ -166,9 +166,6 @@ isFloat64 _other                 = False
 -----------------------------------------------------------------------------
 data Width   = W8 | W16 | W32 | W64
-             | W80      -- Extended double-precision float,
-                        -- used in x86 native codegen only.
-                        -- (we use Ord, so it'd better be in this order)
             | W128
             | W256
             | W512
@@ -185,7 +182,7 @@ mrStr W64  = sLit("W64")
 mrStr W128 = sLit("W128")
 mrStr W256 = sLit("W256")
 mrStr W512 = sLit("W512")
-mrStr W80  = sLit("W80")
 -------- Common Widths  ------------
@@ -222,7 +219,7 @@ widthInBits W64  = 64
 widthInBits W128 = 128
 widthInBits W256 = 256
 widthInBits W512 = 512
-widthInBits W80  = 80
 widthInBytes :: Width -> Int
 widthInBytes W8   = 1
@@ -232,7 +229,7 @@ widthInBytes W64  = 8
 widthInBytes W128 = 16
 widthInBytes W256 = 32
 widthInBytes W512 = 64
-widthInBytes W80  = 10
 widthFromBytes :: Int -> Width
 widthFromBytes 1  = W8
@@ -242,7 +239,7 @@ widthFromBytes 8  = W64
 widthFromBytes 16 = W128
 widthFromBytes 32 = W256
 widthFromBytes 64 = W512
-widthFromBytes 10 = W80
 widthFromBytes n  = pprPanic "no width for given number of bytes" (ppr n)
 -- log_2 of the width in bytes, useful for generating shifts.
@@ -254,7 +251,7 @@ widthInLog W64  = 3
 widthInLog W128 = 4
 widthInLog W256 = 5
 widthInLog W512 = 6
-widthInLog W80  = panic "widthInLog: F80"
 -- widening / narrowing

--- a/compiler/codeGen/StgCmmPrim.hs
+++ b/compiler/codeGen/StgCmmPrim.hs
@@ -1727,8 +1727,38 @@ vecElemProjectCast dflags WordVec  W32 =  Just (mo_u_32ToWord dflags)
 vecElemProjectCast _      WordVec  W64 =  Nothing
 vecElemProjectCast _      _        _   =  Nothing
+-- NOTE [SIMD Design for the future]
 -- Check to make sure that we can generate code for the specified vector type
 -- given the current set of dynamic flags.
+-- Currently these checks are specific to x86 and x86_64 architecture.
+-- This should be fixed!
+-- In particular,
+-- 1) Add better support for other architectures! (this may require a redesign)
+-- 2) Decouple design choices from LLVM's pseudo SIMD model!
+--   The high level LLVM naive rep makes per CPU family SIMD generation is own
+--   optimization problem, and hides important differences in eg ARM vs x86_64 simd
+-- 3) Depending on the architecture, the SIMD registers may also support general
+--    computations on Float/Double/Word/Int scalars, but currently on
+--    for example x86_64, we always put Word/Int (or sized) in GPR
+--    (general purpose) registers. Would relaxing that allow for
+--    useful optimization opportunities?
+--      Phrased differently, it is worth experimenting with supporting
+--    different register mapping strategies than we currently have, especially if
+--    someday we want SIMD to be a first class denizen in GHC along with scalar
+--    values!
+--      The current design with respect to register mapping of scalars could
+--    very well be the best,but exploring the  design space and doing careful
+--    measurments is the only only way to validate that.
+--      In some next generation CPU ISAs, notably RISC V, the SIMD extension
+--    includes  support for a sort of run time CPU dependent vectorization parameter,
+--    where a loop may act upon a single scalar each iteration OR some 2,4,8 ...
+--    element chunk! Time will tell if that direction sees wide adoption,
+--    but it is from that context that unifying our handling of simd and scalars
+--    may benefit. It is not likely to benefit current architectures, though
+--    it may very well be a design perspective that helps guide improving the NCG.
 checkVecCompatibility :: DynFlags -> PrimOpVecCat -> Length -> Width -> FCode ()
 checkVecCompatibility dflags vcat l w = do
    when (hscTarget dflags /= HscLlvm) $ do
@@ -2073,10 +2103,17 @@ doCopyAddrToByteArrayOp src_p dst dst_off bytes = do
 -- character.
 doSetByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
                 -> FCode ()
-doSetByteArrayOp ba off len c
+doSetByteArrayOp ba off len c = do
-    = do dflags <- getDynFlags
+    dflags <- getDynFlags
-         p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba (arrWordsHdrSize dflags)) off
-         emitMemsetCall p c len 1
+    let byteArrayAlignment = wordAlignment dflags -- known since BA is allocated on heap
+        offsetAlignment = case off of
+            CmmLit (CmmInt intOff _) -> alignmentOf (fromInteger intOff)
+            _ -> mkAlignment 1
+        align = min byteArrayAlignment offsetAlignment
+    p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba (arrWordsHdrSize dflags)) off
+    emitMemsetCall p c len align
 -- ----------------------------------------------------------------------------
 -- Allocating arrays
@@ -2105,17 +2142,11 @@ doNewArrayOp res_r rep info payload n init = do
    -- Initialise all elements of the array
    p <- assignTemp $ cmmOffsetB dflags (CmmReg arr) (hdrSize dflags rep)
-    for <- newBlockId
+    let initialization =
-    emitLabel for
+            [ mkStore (cmmOffsetW dflags (CmmReg (CmmLocal p)) off) init
-    let loopBody =
+            | off <- [0.. n - 1]
-            [ mkStore (CmmReg (CmmLocal p)) init
+            ]
-            , mkAssign (CmmLocal p) (cmmOffsetW dflags (CmmReg (CmmLocal p)) 1)
+    emit (catAGraphs initialization)
-            , mkBranch for ]
-    emit =<< mkCmmIfThen
-        (cmmULtWord dflags (CmmReg (CmmLocal p))
-         (cmmOffsetW dflags (CmmReg arr)
-          (hdrSizeW dflags rep + n)))
-        (catAGraphs loopBody)
    emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
@@ -2353,7 +2384,7 @@ emitSetCards dst_start dst_cards_start n = do
    emitMemsetCall (cmmAddWord dflags dst_cards_start start_card)
        (mkIntExpr dflags 1)
        (cmmAddWord dflags (cmmSubWord dflags end_card start_card) (mkIntExpr dflags 1))
-        1 -- no alignment (1 byte)
+        (mkAlignment 1) -- no alignment (1 byte)
 -- Convert an element index to a card index
 cardCmm :: DynFlags -> CmmExpr -> CmmExpr
@@ -2479,11 +2510,11 @@ emitMemmoveCall dst src n align = do
 -- | Emit a call to @memset@.  The second argument must fit inside an
 -- unsigned char.
-emitMemsetCall :: CmmExpr -> CmmExpr -> CmmExpr -> Int -> FCode ()
+emitMemsetCall :: CmmExpr -> CmmExpr -> CmmExpr -> Alignment -> FCode ()
 emitMemsetCall dst c n align = do
    emitPrimCall
        [ {- no results -} ]
-        (MO_Memset align)
+        (MO_Memset (alignmentBytes align))
        [ dst, c, n ]
 emitMemcmpCall :: LocalReg -> CmmExpr -> CmmExpr -> CmmExpr -> Int -> FCode ()

--- a/compiler/coreSyn/CoreArity.hs
+++ b/compiler/coreSyn/CoreArity.hs
@@ -158,7 +158,7 @@ exprBotStrictness_maybe e
 {-
 Note [exprArity invariant]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
-exprArity has the following invariant:
+exprArity has the following invariants:
  (1) If typeArity (exprType e) = n,
      then manifestArity (etaExpand e n) = n

--- a/compiler/coreSyn/CoreLint.hs
+++ b/compiler/coreSyn/CoreLint.hs
@@ -570,15 +570,9 @@ lintSingleBinding top_lvl_flag rec_flag (binder,rhs)
              (addWarnL (text "INLINE binder is (non-rule) loop breaker:" <+> ppr binder))
              -- Only non-rule loop breakers inhibit inlining
-      -- Check whether arity and demand type are consistent (only if demand analysis
+       -- We used to check that the dmdTypeDepth of a demand signature never
-      -- already happened)
+       -- exceeds idArity, but that is an unnecessary complication, see
-      --
+       -- Note [idArity varies independently of dmdTypeDepth] in DmdAnal
-      -- Note (Apr 2014): this is actually ok.  See Note [Demand analysis for trivial right-hand sides]
-      --                  in DmdAnal.  After eta-expansion in CorePrep the rhs is no longer trivial.
-      --       ; let dmdTy = idStrictness binder
-      --       ; checkL (case dmdTy of
-      --                  StrictSig dmd_ty -> idArity binder >= dmdTypeDepth dmd_ty || exprIsTrivial rhs)
-      --           (mkArityMsg binder)
       -- Check that the binder's arity is within the bounds imposed by
       -- the type and the strictness signature. See Note [exprArity invariant]
@@ -2562,20 +2556,6 @@ mkKindErrMsg tyvar arg_ty
          hang (text "Arg type:")
                 4 (ppr arg_ty <+> dcolon <+> ppr (typeKind arg_ty))]
-{- Not needed now
-mkArityMsg :: Id -> MsgDoc
-mkArityMsg binder
-  = vcat [hsep [text "Demand type has",
-                ppr (dmdTypeDepth dmd_ty),
-                text "arguments, rhs has",
-                ppr (idArity binder),
-                text "arguments,",
-                ppr binder],
-              hsep [text "Binder's strictness signature:", ppr dmd_ty]
-         ]
-           where (StrictSig dmd_ty) = idStrictness binder
-}
 mkCastErr :: CoreExpr -> Coercion -> Type -> Type -> MsgDoc
 mkCastErr expr = mk_cast_err "expression" "type" (ppr expr)

--- a/compiler/coreSyn/CoreUnfold.hs
+++ b/compiler/coreSyn/CoreUnfold.hs
@@ -1149,15 +1149,15 @@ certainlyWillInline dflags fn_info
        -- INLINABLE functions come via this path
        --    See Note [certainlyWillInline: INLINABLE]
    do_cunf expr (UnfIfGoodArgs { ug_size = size, ug_args = args })
-      | not (null args)  -- See Note [certainlyWillInline: be careful of thunks]
+      | arityInfo fn_info > 0  -- See Note [certainlyWillInline: be careful of thunks]
      , not (isBottomingSig (strictnessInfo fn_info))
              -- Do not unconditionally inline a bottoming functions even if
              -- it seems smallish. We've carefully lifted it out to top level,
              -- so we don't want to re-inline it.
-      , let arity = length args
+      , let unf_arity = length args
-      , size - (10 * (arity + 1)) <= ufUseThreshold dflags
+      , size - (10 * (unf_arity + 1)) <= ufUseThreshold dflags
      = Just (fn_unf { uf_src      = InlineStable
-                     , uf_guidance = UnfWhen { ug_arity     = arity
+                     , uf_guidance = UnfWhen { ug_arity     = unf_arity
                                             , ug_unsat_ok  = unSaturatedOk
                                             , ug_boring_ok = inlineBoringOk expr } })
             -- Note the "unsaturatedOk". A function like  f = \ab. a
@@ -1175,6 +1175,17 @@ found that the WorkWrap phase thought that
       y = case x of F# v -> F# (v +# v)
 was certainlyWillInline, so the addition got duplicated.
+Note that we check arityInfo instead of the arity of the unfolding to detect
+this case. This is so that we don't accidentally fail to inline small partial
+applications, like `f = g 42` (where `g` recurses into `f`) where g has arity 2
+(say). Here there is no risk of work duplication, and the RHS is tiny, so
+certainlyWillInline should return True. But `unf_arity` is zero! However f's
+arity, gotten from `arityInfo fn_info`, is 1.
+Failing to say that `f` will inline forces W/W to generate a potentially huge
+worker for f that will immediately cancel with `g`'s wrapper anyway, causing
+unnecessary churn in the Simplifier while arriving at the same result.
 Note [certainlyWillInline: INLINABLE]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 certainlyWillInline /must/ return Nothing for a large INLINABLE thing,

--- a/compiler/deSugar/Check.hs
+++ b/compiler/deSugar/Check.hs
@@ -4,9 +4,13 @@ Author: George Karachalias <george.karachalias@cs.kuleuven.be>
 Pattern Matching Coverage Checking.
 -}
-{-# LANGUAGE CPP, GADTs, DataKinds, KindSignatures #-}
+{-# LANGUAGE CPP            #-}
-{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE GADTs          #-}
-{-# LANGUAGE ViewPatterns  #-}
+{-# LANGUAGE DataKinds      #-}
+{-# LANGUAGE KindSignatures #-}
+{-# LANGUAGE TupleSections  #-}
+{-# LANGUAGE ViewPatterns   #-}
+{-# LANGUAGE MultiWayIf     #-}
 module Check (
        -- Checking and printing
@@ -55,7 +59,7 @@ import qualified GHC.LanguageExtensions as LangExt
 import Data.List     (find)
 import Data.Maybe    (catMaybes, isJust, fromMaybe)
-import Control.Monad (forM, when, forM_, zipWithM)
+import Control.Monad (forM, when, forM_, zipWithM, filterM)
 import Coercion
 import TcEvidence
 import TcSimplify    (tcNormalise)
@@ -289,6 +293,14 @@ data PmResult =
    , pmresultUncovered    :: UncoveredCandidates
    , pmresultInaccessible :: [Located [LPat GhcTc]] }
+instance Outputable PmResult where
+  ppr pmr = hang (text "PmResult") 2 $ vcat
+    [ text "pmresultProvenance" <+> ppr (pmresultProvenance pmr)
+    , text "pmresultRedundant" <+> ppr (pmresultRedundant pmr)
+    , text "pmresultUncovered" <+> ppr (pmresultUncovered pmr)
+    , text "pmresultInaccessible" <+> ppr (pmresultInaccessible pmr)
+    ]
 -- | Either a list of patterns that are not covered, or their type, in case we
 -- have no patterns at hand. Not having patterns at hand can arise when
 -- handling EmptyCase expressions, in two cases:
@@ -303,6 +315,10 @@ data PmResult =
 data UncoveredCandidates = UncoveredPatterns Uncovered
                         | TypeOfUncovered Type
+instance Outputable UncoveredCandidates where
+  ppr (UncoveredPatterns uc) = text "UnPat" <+> ppr uc
+  ppr (TypeOfUncovered ty)   = text "UnTy" <+> ppr ty
 -- | The empty pattern check result
 emptyPmResult :: PmResult
 emptyPmResult = PmResult FromBuiltin [] (UncoveredPatterns []) []
@@ -987,7 +1003,7 @@ translatePat fam_insts pat = case pat of
    | otherwise -> do
        ps      <- translatePat fam_insts p
        (xp,xe) <- mkPmId2Forms ty
-        let g = mkGuard ps (mkHsWrap wrapper (unLoc xe))
+        g <- mkGuard ps (mkHsWrap wrapper (unLoc xe))
        return [xp,g]
  -- (n + k)  ===>   x (True <- x >= k) (n <- x-k)
@@ -997,10 +1013,11 @@ translatePat fam_insts pat = case pat of
  ViewPat arg_ty lexpr lpat -> do
    ps <- translatePat fam_insts (unLoc lpat)
    -- See Note [Guards and Approximation]
-    case all cantFailPattern ps of
+    res <- allM cantFailPattern ps
+    case res of
      True  -> do
        (xp,xe) <- mkPmId2Forms arg_ty
-        let g = mkGuard ps (HsApp noExt lexpr xe)
+        g <- mkGuard ps (HsApp noExt lexpr xe)
        return [xp,g]
      False -> mkCanFailPmPat arg_ty
@@ -1255,41 +1272,38 @@ translateMatch _ _ = panic "translateMatch"
 translateGuards :: FamInstEnvs -> [GuardStmt GhcTc] -> DsM PatVec
 translateGuards fam_insts guards = do
  all_guards <- concat <$> mapM (translateGuard fam_insts) guards
-  return (replace_unhandled all_guards)
+  let
-  -- It should have been (return all_guards) but it is too expressive.
+    shouldKeep :: Pattern -> DsM Bool
+    shouldKeep p
+      | PmVar {} <- p = pure True
+      | PmCon {} <- p = (&&)
+                          <$> singleMatchConstructor (pm_con_con p) (pm_con_arg_tys p)
+                          <*> allM shouldKeep (pm_con_args p)
+    shouldKeep (PmGrd pv e)
+      | isNotPmExprOther e = pure True  -- expensive but we want it
+      | otherwise          = allM shouldKeep pv
+    shouldKeep _other_pat  = pure False -- let the rest..
+  all_handled <- allM shouldKeep all_guards
+  -- It should have been @pure all_guards@ but it is too expressive.
  -- Since the term oracle does not handle all constraints we generate,
  -- we (hackily) replace all constraints the oracle cannot handle with a
-  -- single one (we need to know if there is a possibility of falure).
+  -- single one (we need to know if there is a possibility of failure).
  -- See Note [Guards and Approximation] for all guard-related approximations
  -- we implement.
-  where
+  if all_handled
-    replace_unhandled :: PatVec -> PatVec
+    then pure all_guards
-    replace_unhandled gv
+    else do
-      | any_unhandled gv = fake_pat : [ p | p <- gv, shouldKeep p ]
+      kept <- filterM shouldKeep all_guards
-      | otherwise        = gv
+      pure (fake_pat : kept)
-    any_unhandled :: PatVec -> Bool
-    any_unhandled gv = any (not . shouldKeep) gv
-    shouldKeep :: Pattern -> Bool
-    shouldKeep p
-      | PmVar {} <- p      = True
-      | PmCon {} <- p      = singleConstructor (pm_con_con p)
-                             && all shouldKeep (pm_con_args p)
-    shouldKeep (PmGrd pv e)
-      | all shouldKeep pv  = True
-      | isNotPmExprOther e = True  -- expensive but we want it
-    shouldKeep _other_pat  = False -- let the rest..
 -- | Check whether a pattern can fail to match
-cantFailPattern :: Pattern -> Bool
+cantFailPattern :: Pattern -> DsM Bool
-cantFailPattern p
+cantFailPattern PmVar {}      = pure True
-  | PmVar {} <- p = True
+cantFailPattern PmCon { pm_con_con = c, pm_con_arg_tys = tys, pm_con_args = ps}
-  | PmCon {} <- p = singleConstructor (pm_con_con p)
+  = (&&) <$> singleMatchConstructor c tys <*> allM cantFailPattern ps
-                    && all cantFailPattern (pm_con_args p)
+cantFailPattern (PmGrd pv _e) = allM cantFailPattern pv
-cantFailPattern (PmGrd pv _e)
+cantFailPattern _             = pure False
-                  = all cantFailPattern pv
-cantFailPattern _ = False
 -- | Translate a guard statement to Pattern
 translateGuard :: FamInstEnvs -> GuardStmt GhcTc -> DsM PatVec
@@ -1312,7 +1326,8 @@ translateLet _binds = return []
 translateBind :: FamInstEnvs -> LPat GhcTc -> LHsExpr GhcTc -> DsM PatVec
 translateBind fam_insts (dL->L _ p) e = do
  ps <- translatePat fam_insts p
-  return [mkGuard ps (unLoc e)]
+  g <- mkGuard ps (unLoc e)
+  return [g]
 -- | Translate a boolean guard
 translateBoolGuard :: LHsExpr GhcTc -> DsM PatVec
@@ -1321,7 +1336,7 @@ translateBoolGuard e
    -- The formal thing to do would be to generate (True <- True)
    -- but it is trivial to solve so instead we give back an empty
    -- PatVec for efficiency
-  | otherwise = return [mkGuard [truePattern] (unLoc e)]
+  | otherwise = (:[]) <$> mkGuard [truePattern] (unLoc e)
 {- Note [Guards and Approximation]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -1658,13 +1673,14 @@ mkOneConFull x con = do
 -- * More smart constructors and fresh variable generation
 -- | Create a guard pattern
-mkGuard :: PatVec -> HsExpr GhcTc -> Pattern
+mkGuard :: PatVec -> HsExpr GhcTc -> DsM Pattern
-mkGuard pv e
+mkGuard pv e = do
-  | all cantFailPattern pv = PmGrd pv expr
+  res <- allM cantFailPattern pv
-  | PmExprOther {} <- expr = fake_pat
+  let expr = hsExprToPmExpr e
-  | otherwise              = PmGrd pv expr
+  tracePmD "mkGuard" (vcat [ppr pv, ppr e, ppr res, ppr expr])
-  where
+  if | res                    -> pure (PmGrd pv expr)
-    expr = hsExprToPmExpr e
+     | PmExprOther {} <- expr -> pure fake_pat
+     | otherwise              -> pure (PmGrd pv expr)
 -- | Create a term equality of the form: `(False ~ (x ~ lit))`
 mkNegEq :: Id -> PmLit -> ComplexEq
@@ -1738,14 +1754,37 @@ coercePmPat (PmCon { pm_con_con = con, pm_con_arg_tys = arg_tys
           , pm_con_args = coercePatVec args }]
 coercePmPat (PmGrd {}) = [] -- drop the guards
-- | Check whether a data constructor is the only way to construct
+-- | Check whether a 'ConLike' has the /single match/ property, i.e. whether
-- a data type.
+-- it is the only possible match in the given context. See also
-singleConstructor :: ConLike -> Bool
+-- 'allCompleteMatches' and Note [Single match constructors].
-singleConstructor (RealDataCon dc) =
+singleMatchConstructor :: ConLike -> [Type] -> DsM Bool
-  case tyConDataCons (dataConTyCon dc) of
+singleMatchConstructor cl tys =
-    [_] -> True
+  any (isSingleton . snd) <$> allCompleteMatches cl tys
-    _   -> False
-singleConstructor _ = False
+{-
+Note [Single match constructors]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+When translating pattern guards for consumption by the checker, we desugar
+every pattern guard that might fail ('cantFailPattern') to 'fake_pat'
+(True <- _). Which patterns can't fail? Exactly those that only match on
+'singleMatchConstructor's.
+Here are a few examples:
+  * @f a | (a, b) <- foo a = 42@: Product constructors are generally
+    single match. This extends to single constructors of GADTs like 'Refl'.
+  * If @f | Id <- id () = 42@, where @pattern Id = ()@ and 'Id' is part of a
+    singleton `COMPLETE` set, then 'Id' has the single match property.
+In effect, we can just enumerate 'allCompleteMatches' and check if the conlike
+occurs as a singleton set.
+There's the chance that 'Id' is part of multiple `COMPLETE` sets. That's
+irrelevant; If the user specified a singleton set, it is single-match.
+Note that this doesn't really take into account incoming type constraints;
+It might be obvious from type context that a particular GADT constructor has
+the single-match property. We currently don't (can't) check this in the
+translation step. See #15753 for why this yields surprising results.
+-}
 -- | For a given conlike, finds all the sets of patterns which could
 -- be relevant to that conlike by consulting the result type.

--- a/compiler/llvmGen/LlvmCodeGen/Base.hs
+++ b/compiler/llvmGen/LlvmCodeGen/Base.hs
@@ -97,7 +97,6 @@ cmmToLlvmType ty | isVecType ty   = LMVector (vecLength ty) (cmmToLlvmType (vecE
 widthToLlvmFloat :: Width -> LlvmType
 widthToLlvmFloat W32  = LMFloat
 widthToLlvmFloat W64  = LMDouble
-widthToLlvmFloat W80  = LMFloat80
 widthToLlvmFloat W128 = LMFloat128
 widthToLlvmFloat w    = panic $ "widthToLlvmFloat: Bad float size: " ++ show w

--- a/compiler/main/DriverPhases.hs
+++ b/compiler/main/DriverPhases.hs
@@ -369,4 +369,3 @@ isHaskellSigFilename     f = isHaskellSigSuffix     (drop 1 $ takeExtension f)
 isObjectFilename, isDynLibFilename :: Platform -> FilePath -> Bool
 isObjectFilename platform f = isObjectSuffix platform (drop 1 $ takeExtension f)
 isDynLibFilename platform f = isDynLibSuffix platform (drop 1 $ takeExtension f)
--- a/compiler/main/DriverPipeline.hs
+++ b/compiler/main/DriverPipeline.hs
@@ -1218,17 +1218,8 @@ runPhase (RealPhase cc_phase) input_fn dflags
        ghcVersionH <- liftIO $ getGhcVersionPathName dflags
-        let gcc_lang_opt | cc_phase `eqPhase` Ccxx    = "c++"
+        liftIO $ SysTools.runCc (phaseForeignLanguage cc_phase) dflags (
-                         | cc_phase `eqPhase` Cobjc   = "objective-c"
+                        [ SysTools.FileOption "" input_fn
-                         | cc_phase `eqPhase` Cobjcxx = "objective-c++"
-                         | otherwise                  = "c"
-        liftIO $ SysTools.runCc dflags (
-                -- force the C compiler to interpret this file as C when
-                -- compiling .hc files, by adding the -x c option.
-                -- Also useful for plain .c files, just in case GHC saw a
-                -- -x c option.
-                        [ SysTools.Option "-x", SysTools.Option gcc_lang_opt
-                        , SysTools.FileOption "" input_fn
                        , SysTools.Option "-o"
                        , SysTools.FileOption "" output_fn
                        ]
@@ -1917,7 +1908,7 @@ doCpp dflags raw input_fn output_fn = do
    let verbFlags = getVerbFlags dflags
    let cpp_prog args | raw       = SysTools.runCpp dflags args
-                      | otherwise = SysTools.runCc dflags (SysTools.Option "-E" : args)
+                      | otherwise = SysTools.runCc Nothing dflags (SysTools.Option "-E" : args)
    let target_defs =
          [ "-D" ++ HOST_OS     ++ "_BUILD_OS",

--- a/compiler/main/DynFlags.hs
+++ b/compiler/main/DynFlags.hs
@@ -58,7 +58,7 @@ module DynFlags (
        fFlags, fLangFlags, xFlags,
        wWarningFlags,
        dynFlagDependencies,
-        tablesNextToCode, mkTablesNextToCode,
+        tablesNextToCode,
        makeDynFlagsConsistent,
        shouldUseColor,
        shouldUseHexWordLiterals,
@@ -92,7 +92,8 @@ module DynFlags (
        extraGccViaCFlags, systemPackageConfig,
        pgm_L, pgm_P, pgm_F, pgm_c, pgm_a, pgm_l, pgm_dll, pgm_T,
        pgm_windres, pgm_libtool, pgm_ar, pgm_ranlib, pgm_lo, pgm_lc,
-        pgm_lcc, pgm_i, opt_L, opt_P, opt_F, opt_c, opt_a, opt_l, opt_i,
+        pgm_lcc, pgm_i,
+        opt_L, opt_P, opt_F, opt_c, opt_cxx, opt_a, opt_l, opt_i,
        opt_P_signature,
        opt_windres, opt_lo, opt_lc, opt_lcc,
@@ -146,6 +147,7 @@ module DynFlags (
 #include "GHCConstantsHaskellExports.hs"
        bLOCK_SIZE_W,
        wORD_SIZE_IN_BITS,
+        wordAlignment,
        tAG_MASK,
        mAX_PTR_TAG,
        tARGET_MIN_INT, tARGET_MAX_INT, tARGET_MAX_WORD,
@@ -204,7 +206,7 @@ import Maybes
 import MonadUtils
 import qualified Pretty
 import SrcLoc
-import BasicTypes       ( IntWithInf, treatZeroAsInf )
+import BasicTypes       ( Alignment, alignmentOf, IntWithInf, treatZeroAsInf )
 import FastString
 import Fingerprint
 import Outputable
@@ -1340,6 +1342,7 @@ data Settings = Settings {
                                         -- See Note [Repeated -optP hashing]
  sOpt_F                 :: [String],
  sOpt_c                 :: [String],
+  sOpt_cxx               :: [String],
  sOpt_a                 :: [String],
  sOpt_l                 :: [String],
  sOpt_windres           :: [String],
@@ -1423,6 +1426,8 @@ opt_F dflags = sOpt_F (settings dflags)
 opt_c                 :: DynFlags -> [String]
 opt_c dflags = concatMap (wayOptc (targetPlatform dflags)) (ways dflags)
            ++ sOpt_c (settings dflags)
+opt_cxx               :: DynFlags -> [String]
+opt_cxx dflags = sOpt_cxx (settings dflags)
 opt_a                 :: DynFlags -> [String]
 opt_a dflags = sOpt_a (settings dflags)
 opt_l                 :: DynFlags -> [String]
@@ -2520,7 +2525,7 @@ setObjectDir, setHiDir, setHieDir, setStubDir, setDumpDir, setOutputDir,
         setDynObjectSuf, setDynHiSuf,
         setDylibInstallName,
         setObjectSuf, setHiSuf, setHieSuf, setHcSuf, parseDynLibLoaderMode,
-         setPgmP, addOptl, addOptc, addOptP,
+         setPgmP, addOptl, addOptc, addOptcxx, addOptP,
         addCmdlineFramework, addHaddockOpts, addGhciScript,
         setInteractivePrint
   :: String -> DynFlags -> DynFlags
@@ -2636,6 +2641,7 @@ setDumpPrefixForce f d = d { dumpPrefixForce = f}
 setPgmP   f = let (pgm:args) = words f in alterSettings (\s -> s { sPgm_P   = (pgm, map Option args)})
 addOptl   f = alterSettings (\s -> s { sOpt_l   = f : sOpt_l s})
 addOptc   f = alterSettings (\s -> s { sOpt_c   = f : sOpt_c s})
+addOptcxx f = alterSettings (\s -> s { sOpt_cxx = f : sOpt_cxx s})
 addOptP   f = alterSettings (\s -> s { sOpt_P   = f : sOpt_P s
                                     , sOpt_P_fingerprint = fingerprintStrings (f : sOpt_P s)
                                     })
@@ -3038,6 +3044,8 @@ dynamic_flags_deps = [
      (hasArg (\f -> alterSettings (\s -> s { sOpt_F   = f : sOpt_F s})))
  , make_ord_flag defFlag "optc"
      (hasArg addOptc)
+  , make_ord_flag defFlag "optcxx"
+      (hasArg addOptcxx)
  , make_ord_flag defFlag "opta"
      (hasArg (\f -> alterSettings (\s -> s { sOpt_a   = f : sOpt_a s})))
  , make_ord_flag defFlag "optl"
@@ -5654,6 +5662,9 @@ bLOCK_SIZE_W dflags = bLOCK_SIZE dflags `quot` wORD_SIZE dflags
 wORD_SIZE_IN_BITS :: DynFlags -> Int
 wORD_SIZE_IN_BITS dflags = wORD_SIZE dflags * 8
+wordAlignment :: DynFlags -> Alignment
+wordAlignment dflags = alignmentOf (wORD_SIZE dflags)
 tAG_MASK :: DynFlags -> Int
 tAG_MASK dflags = (1 `shiftL` tAG_BITS dflags) - 1
@@ -5822,20 +5833,24 @@ data SseVersion = SSE1
 isSseEnabled :: DynFlags -> Bool
 isSseEnabled dflags = case platformArch (targetPlatform dflags) of
    ArchX86_64 -> True
-    ArchX86    -> sseVersion dflags >= Just SSE1
+    ArchX86    -> True
    _          -> False
 isSse2Enabled :: DynFlags -> Bool
 isSse2Enabled dflags = case platformArch (targetPlatform dflags) of
-    ArchX86_64 -> -- SSE2 is fixed on for x86_64.  It would be
+  -- We Assume  SSE1 and SSE2 operations are available on both
-                  -- possible to make it optional, but we'd need to
+  -- x86 and x86_64. Historically we didn't default to SSE2 and
-                  -- fix at least the foreign call code where the
+  -- SSE1 on x86, which results in defacto nondeterminism for how
-                  -- calling convention specifies the use of xmm regs,
+  -- rounding behaves in the associated x87 floating point instructions
-                  -- and possibly other places.
+  -- because variations in the spill/fpu stack placement of arguments for
-                  True
+  -- operations would change the precision and final result of what
-    ArchX86    -> sseVersion dflags >= Just SSE2
+  -- would otherwise be the same expressions with respect to single or
+  -- double precision IEEE floating point computations.
+    ArchX86_64 -> True
+    ArchX86    -> True
    _          -> False
 isSse4_2Enabled :: DynFlags -> Bool
 isSse4_2Enabled dflags = sseVersion dflags >= Just SSE42