diff --git a/compiler/GHC/StgToCmm/Bind.hs b/compiler/GHC/StgToCmm/Bind.hs
index 9e192a0ac8a86631fd25b6dcb540fcb9e07127e4..325686aaa8a1d20c38d2ffb1a0b6db39804b8a3b 100644
--- a/compiler/GHC/StgToCmm/Bind.hs
+++ b/compiler/GHC/StgToCmm/Bind.hs
@@ -52,6 +52,8 @@ import DynFlags
 
 import Control.Monad
 
+import TyCon (PrimRep (..))
+
 ------------------------------------------------------------------------
 --              Top-level bindings
 ------------------------------------------------------------------------
@@ -716,9 +718,9 @@ link_caf node = do
   ; let newCAF_lbl = mkForeignLabel (fsLit "newCAF") Nothing
                                     ForeignLabelInExternalPackage IsFunction
   ; bh <- newTemp (bWord dflags)
-  ; emitRtsCallGen [(bh,AddrHint)] newCAF_lbl
-      [ (baseExpr,  AddrHint),
-        (CmmReg (CmmLocal node), AddrHint) ]
+  ; emitRtsCallGen [(bh, AddrRep, AddrHint)] newCAF_lbl
+      [ (baseExpr, AddrRep, AddrHint),
+        (CmmReg (CmmLocal node), AddrRep, AddrHint) ]
       False
 
   -- see Note [atomic CAF entry] in rts/sm/Storage.c
diff --git a/compiler/GHC/StgToCmm/Expr.hs b/compiler/GHC/StgToCmm/Expr.hs
index 07113a4e82481b59bd9cae2daa23c7b96048767e..c2d466d7d83a0f7b45fbf5629d0364b5c5a183f0 100644
--- a/compiler/GHC/StgToCmm/Expr.hs
+++ b/compiler/GHC/StgToCmm/Expr.hs
@@ -571,7 +571,7 @@ isSimpleScrut _                _           = return False
 
 isSimpleOp :: StgOp -> [StgArg] -> FCode Bool
 -- True iff the op cannot block or allocate
-isSimpleOp (StgFCallOp (CCall (CCallSpec _ _ safe)) _) _ = return $! not (playSafe safe)
+isSimpleOp (StgFCallOp (CCall (CCallSpec _ _ safe _ _)) _) _ = return $! not (playSafe safe)
 -- dataToTag# evalautes its argument, see Note [dataToTag#] in primops.txt.pp
 isSimpleOp (StgPrimOp DataToTagOp) _ = return False
 isSimpleOp (StgPrimOp op) stg_args                  = do
diff --git a/compiler/GHC/StgToCmm/Foreign.hs b/compiler/GHC/StgToCmm/Foreign.hs
index 37eb7c5021f4da22d7826996cd5ef27c3bdb5003..9c61b22c63f1faa2d383903e5197e9a4a55f718c 100644
--- a/compiler/GHC/StgToCmm/Foreign.hs
+++ b/compiler/GHC/StgToCmm/Foreign.hs
@@ -65,7 +65,7 @@ cgForeignCall :: ForeignCall            -- the op
               -> Type                   -- result type
               -> FCode ReturnKind
 
-cgForeignCall (CCall (CCallSpec target cconv safety)) typ stg_args res_ty
+cgForeignCall (CCall (CCallSpec target cconv safety ret_rep arg_reps)) typ stg_args res_ty
   = do  { dflags <- getDynFlags
         ; let -- in the stdcall calling convention, the symbol needs @size appended
               -- to it, where size is the total number of bytes of arguments.  We
@@ -97,7 +97,7 @@ cgForeignCall (CCall (CCallSpec target cconv safety)) typ stg_args res_ty
                    DynamicTarget    ->  case cmm_args of
                                            (fn,_):rest -> (unzip rest, fn)
                                            [] -> panic "cgForeignCall []"
-              fc = ForeignConvention cconv arg_hints res_hints CmmMayReturn
+              fc = ForeignConvention cconv arg_hints res_hints CmmMayReturn ret_rep arg_reps
               call_target = ForeignTarget cmm_target fc
 
         -- we want to emit code for the call, and then emitReturn.
@@ -188,17 +188,22 @@ continuation, resulting in just one proc point instead of two. Yay!
 -}
 
 
-emitCCall :: [(CmmFormal,ForeignHint)]
+emitCCall :: [(CmmFormal, PrimRep, ForeignHint)]
           -> CmmExpr
-          -> [(CmmActual,ForeignHint)]
+          -> [(CmmActual, PrimRep, ForeignHint)]
           -> FCode ()
 emitCCall hinted_results fn hinted_args
   = void $ emitForeignCall PlayRisky results target args
   where
-    (args, arg_hints) = unzip hinted_args
-    (results, result_hints) = unzip hinted_results
+    (args, arg_reps, arg_hints) = unzip3 hinted_args
+    (results, result_reps, result_hints) = unzip3 hinted_results
+    -- extract result, we can only deal with 0 or 1 result types.
+    res_rep = case result_reps of
+      []  -> VoidRep
+      [r] -> r
+      _   -> error "can not deal with multiple return values in emitCCall"
     target = ForeignTarget fn fc
-    fc = ForeignConvention CCallConv arg_hints result_hints CmmMayReturn
+    fc = ForeignConvention CCallConv arg_hints result_hints CmmMayReturn res_rep arg_reps
 
 
 emitPrimCall :: [CmmFormal] -> CallishMachOp -> [CmmActual] -> FCode ()
@@ -653,4 +658,3 @@ typeToStgFArgType typ
   -- a type in a foreign function signature with a representationally
   -- equivalent newtype.
   tycon = tyConAppTyCon (unwrapType typ)
-
diff --git a/compiler/GHC/StgToCmm/Prim.hs b/compiler/GHC/StgToCmm/Prim.hs
index 09eb8bae47254f2167bffa574508b1f657ed0fa1..f6689ff9791b793453849223e6873448868ddc62 100644
--- a/compiler/GHC/StgToCmm/Prim.hs
+++ b/compiler/GHC/StgToCmm/Prim.hs
@@ -290,15 +290,16 @@ emitPrimOp dflags = \case
       -> opAllDone $ \ [res] -> emitCloneSmallArray mkSMAP_DIRTY_infoLabel res src src_off (fromInteger n)
     _ -> PrimopCmmEmit_External
 
--- First we handle various awkward cases specially.
-
+  -- First we handle various awkward cases specially.
+  -- Note: StgInt newSpark (StgRegTable *reg, StgClosure *p)
+  --       StgInt is Int_64 on 64bit platforms, Int_32 on others
   ParOp -> \[arg] -> opAllDone $ \[res] -> do
     -- for now, just implement this in a C function
     -- later, we might want to inline it.
     emitCCall
-        [(res,NoHint)]
+        [(res, Int64Rep, SignedHint)]
         (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
-        [(baseExpr, AddrHint), (arg,AddrHint)]
+        [(baseExpr, AddrRep, AddrHint), (arg, AddrRep, AddrHint)]
 
   SparkOp -> \[arg] -> opAllDone $ \[res] -> do
     -- returns the value of arg in res.  We're going to therefore
@@ -307,9 +308,9 @@ emitPrimOp dflags = \case
     tmp <- assignTemp arg
     tmp2 <- newTemp (bWord dflags)
     emitCCall
-        [(tmp2,NoHint)]
+        [(tmp2, Int64Rep, SignedHint)]
         (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
-        [(baseExpr, AddrHint), ((CmmReg (CmmLocal tmp)), AddrHint)]
+        [(baseExpr, AddrRep, AddrHint), ((CmmReg (CmmLocal tmp)), AddrRep, AddrHint)]
     emitAssign (CmmLocal res) (CmmReg (CmmLocal tmp))
 
   GetCCSOfOp -> \[arg] -> opAllDone $ \[res] -> do
@@ -342,7 +343,7 @@ emitPrimOp dflags = \case
     emitCCall
             [{-no results-}]
             (CmmLit (CmmLabel mkDirty_MUT_VAR_Label))
-            [(baseExpr, AddrHint), (mutv, AddrHint), (CmmReg old_val, AddrHint)]
+            [(baseExpr, AddrRep, AddrHint), (mutv, AddrRep, AddrHint), (CmmReg old_val, AddrRep, AddrHint)]
 
 --  #define sizzeofByteArrayzh(r,a) \
 --     r = ((StgArrBytes *)(a))->bytes
diff --git a/compiler/GHC/StgToCmm/Prof.hs b/compiler/GHC/StgToCmm/Prof.hs
index 984c371360fcb80e0106b50ab5d2fb99a47517a0..c1cb023d897d2352ee018beda17f071803b0f224 100644
--- a/compiler/GHC/StgToCmm/Prof.hs
+++ b/compiler/GHC/StgToCmm/Prof.hs
@@ -44,6 +44,8 @@ import Outputable
 import Control.Monad
 import Data.Char (ord)
 
+import TyCon (PrimRep (..))
+
 -----------------------------------------------------------------------------
 --
 -- Cost-centre-stack Profiling
@@ -178,8 +180,8 @@ enterCostCentreFun ccs closure =
     if isCurrentCCS ccs
        then do dflags <- getDynFlags
                emitRtsCall rtsUnitId (fsLit "enterFunCCS")
-                   [(baseExpr, AddrHint),
-                    (costCentreFrom dflags closure, AddrHint)] False
+                   [(baseExpr, AddrRep, AddrHint),
+                    (costCentreFrom dflags closure, AddrRep, AddrHint)] False
        else return () -- top-level function, nothing to do
 
 ifProfiling :: FCode () -> FCode ()
@@ -278,10 +280,10 @@ emitSetCCC cc tick push
 
 pushCostCentre :: LocalReg -> CmmExpr -> CostCentre -> FCode ()
 pushCostCentre result ccs cc
-  = emitRtsCallWithResult result AddrHint
+  = emitRtsCallWithResult result AddrRep AddrHint
         rtsUnitId
-        (fsLit "pushCostCentre") [(ccs,AddrHint),
-                                (CmmLit (mkCCostCentre cc), AddrHint)]
+        (fsLit "pushCostCentre") [(ccs, AddrRep, AddrHint),
+                                (CmmLit (mkCCostCentre cc), AddrRep, AddrHint)]
         False
 
 bumpSccCount :: DynFlags -> CmmExpr -> CmmAGraph
diff --git a/compiler/GHC/StgToCmm/Utils.hs b/compiler/GHC/StgToCmm/Utils.hs
index 3b145b5441d5863c17a8078d421965536fe9e699..d7b95bc9aa1d58a3d1732569eef5600bb2930e64 100644
--- a/compiler/GHC/StgToCmm/Utils.hs
+++ b/compiler/GHC/StgToCmm/Utils.hs
@@ -179,19 +179,19 @@ tagToClosure dflags tycon tag
 --
 -------------------------------------------------------------------------
 
-emitRtsCall :: UnitId -> FastString -> [(CmmExpr,ForeignHint)] -> Bool -> FCode ()
+emitRtsCall :: UnitId -> FastString -> [(CmmExpr, PrimRep, ForeignHint)] -> Bool -> FCode ()
 emitRtsCall pkg fun args safe = emitRtsCallGen [] (mkCmmCodeLabel pkg fun) args safe
 
-emitRtsCallWithResult :: LocalReg -> ForeignHint -> UnitId -> FastString
-        -> [(CmmExpr,ForeignHint)] -> Bool -> FCode ()
-emitRtsCallWithResult res hint pkg fun args safe
-   = emitRtsCallGen [(res,hint)] (mkCmmCodeLabel pkg fun) args safe
+emitRtsCallWithResult :: LocalReg -> PrimRep -> ForeignHint -> UnitId -> FastString
+        -> [(CmmExpr, PrimRep, ForeignHint)] -> Bool -> FCode ()
+emitRtsCallWithResult res rep hint pkg fun args safe
+   = emitRtsCallGen [(res, rep, hint)] (mkCmmCodeLabel pkg fun) args safe
 
 -- Make a call to an RTS C procedure
 emitRtsCallGen
-   :: [(LocalReg,ForeignHint)]
+   :: [(LocalReg, PrimRep, ForeignHint)]
    -> CLabel
-   -> [(CmmExpr,ForeignHint)]
+   -> [(CmmExpr, PrimRep, ForeignHint)]
    -> Bool -- True <=> CmmSafe call
    -> FCode ()
 emitRtsCallGen res lbl args safe
@@ -206,10 +206,14 @@ emitRtsCallGen res lbl args safe
       if safe then
         emit =<< mkCmmCall fun_expr res' args' updfr_off
       else do
-        let conv = ForeignConvention CCallConv arg_hints res_hints CmmMayReturn
+        let conv = ForeignConvention CCallConv arg_hints res_hints CmmMayReturn res_rep arg_reps
         emit $ mkUnsafeCall (ForeignTarget fun_expr conv) res' args'
-    (args', arg_hints) = unzip args
-    (res',  res_hints) = unzip res
+    (args', arg_reps, arg_hints) = unzip3 args
+    (res',  res_reps, res_hints) = unzip3 res
+    res_rep = case res_reps of
+      []  -> VoidRep
+      [r] -> r
+      _   -> error "can not deal with multiple return values"
     fun_expr = mkLblExpr lbl
 
 
@@ -608,8 +612,8 @@ emitUpdRemSetPush ptr = do
     emitRtsCall
       rtsUnitId
       (fsLit "updateRemembSetPushClosure_")
-      [(CmmReg (CmmGlobal BaseReg), AddrHint),
-       (ptr, AddrHint)]
+      [(CmmReg (CmmGlobal BaseReg), AddrRep, AddrHint),
+       (ptr, AddrRep, AddrHint)]
       False
 
 emitUpdRemSetPushThunk :: CmmExpr -- ^ the thunk
@@ -618,6 +622,6 @@ emitUpdRemSetPushThunk ptr = do
     emitRtsCall
       rtsUnitId
       (fsLit "updateRemembSetPushThunk_")
-      [(CmmReg (CmmGlobal BaseReg), AddrHint),
-       (ptr, AddrHint)]
+      [(CmmReg (CmmGlobal BaseReg), AddrRep, AddrHint),
+       (ptr, AddrRep, AddrHint)]
       False
diff --git a/compiler/cmm/CmmLayoutStack.hs b/compiler/cmm/CmmLayoutStack.hs
index e26f2878c0b1362be3308d1fc1b3394a923697cd..bd91e8171c6d5ad8227057aedf1f785382fb0cba 100644
--- a/compiler/cmm/CmmLayoutStack.hs
+++ b/compiler/cmm/CmmLayoutStack.hs
@@ -37,6 +37,7 @@ import Control.Monad.Fix
 import Data.Array as Array
 import Data.Bits
 import Data.List (nub)
+import TyCon (PrimRep (..))
 
 {- Note [Stack Layout]
 
@@ -1185,18 +1186,20 @@ lowerSafeForeignCall dflags block
 foreignLbl :: FastString -> CmmExpr
 foreignLbl name = CmmLit (CmmLabel (mkForeignLabel name Nothing ForeignLabelInExternalPackage IsFunction))
 
+-- void * suspendThread (StgRegTable *, bool interruptible);
 callSuspendThread :: DynFlags -> LocalReg -> Bool -> CmmNode O O
 callSuspendThread dflags id intrbl =
   CmmUnsafeForeignCall
        (ForeignTarget (foreignLbl (fsLit "suspendThread"))
-        (ForeignConvention CCallConv [AddrHint, NoHint] [AddrHint] CmmMayReturn))
+        (ForeignConvention CCallConv [AddrHint, NoHint] [AddrHint] CmmMayReturn AddrRep [AddrRep, Word32Rep]))
        [id] [baseExpr, mkIntExpr dflags (fromEnum intrbl)]
 
+-- StgRegTable * resumeThread  (void *);
 callResumeThread :: LocalReg -> LocalReg -> CmmNode O O
 callResumeThread new_base id =
   CmmUnsafeForeignCall
        (ForeignTarget (foreignLbl (fsLit "resumeThread"))
-            (ForeignConvention CCallConv [AddrHint] [AddrHint] CmmMayReturn))
+            (ForeignConvention CCallConv [AddrHint] [AddrHint] CmmMayReturn AddrRep [AddrRep]))
        [new_base] [CmmReg (CmmLocal id)]
 
 -- -----------------------------------------------------------------------------
diff --git a/compiler/cmm/CmmMachOp.hs b/compiler/cmm/CmmMachOp.hs
index becf5fab84740f3c3f1731b8ffd7cf2122711913..d4a8bff431998acb2c458cb46f43cb684302131d 100644
--- a/compiler/cmm/CmmMachOp.hs
+++ b/compiler/cmm/CmmMachOp.hs
@@ -17,7 +17,7 @@ module CmmMachOp
     , mo_32To8, mo_32To16, mo_WordTo8, mo_WordTo16, mo_WordTo32, mo_WordTo64
 
     -- CallishMachOp
-    , CallishMachOp(..), callishMachOpHints
+    , CallishMachOp(..), callishMachOpHints, callishMachOpReps
     , pprCallishMachOp
     , machOpMemcpyishAlign
 
@@ -32,6 +32,8 @@ import CmmType
 import Outputable
 import DynFlags
 
+import TyCon (PrimRep (..))
+
 -----------------------------------------------------------------------------
 --              MachOp
 -----------------------------------------------------------------------------
@@ -649,13 +651,93 @@ pprCallishMachOp mo = text (show mo)
 
 callishMachOpHints :: CallishMachOp -> ([ForeignHint], [ForeignHint])
 callishMachOpHints op = case op of
-  MO_Memcpy _  -> ([], [AddrHint,AddrHint,NoHint])
-  MO_Memset _  -> ([], [AddrHint,NoHint,NoHint])
-  MO_Memmove _ -> ([], [AddrHint,AddrHint,NoHint])
+  --  void * memcpy(void *restrict dst, const void *restrict src, size_t n);
+  MO_Memcpy _  -> ([], [AddrHint, AddrHint, NoHint])
+  -- void * memset(void *b, int c, size_t len);
+  MO_Memset _  -> ([], [AddrHint, SignedHint, NoHint])
+  -- void * memmove(void *dst, const void *src, size_t len);
+  MO_Memmove _ -> ([], [AddrHint, AddrHint, NoHint])
+  --  int memcmp(const void *s1, const void *s2, size_t n);
   MO_Memcmp _  -> ([], [AddrHint, AddrHint, NoHint])
   _            -> ([],[])
   -- empty lists indicate NoHint
 
+callishMachOpReps :: CallishMachOp -> (PrimRep, [PrimRep])
+callishMachOpReps op = case op of
+  MO_Memcpy _  -> (AddrRep, [AddrRep, AddrRep, WordRep])
+  MO_Memset _  -> (AddrRep, [AddrRep, IntRep,  WordRep])
+  MO_Memmove _ -> (AddrRep, [AddrRep, AddrRep, WordRep])
+  MO_Memcmp _  -> (IntRep, [AddrRep, AddrRep, WordRep])
+
+  MO_F64_Pwr   -> (DoubleRep, [DoubleRep, DoubleRep])
+
+  MO_F64_Sin   -> (DoubleRep, [DoubleRep])
+  MO_F64_Cos   -> (DoubleRep, [DoubleRep])
+  MO_F64_Tan   -> (DoubleRep, [DoubleRep])
+
+  MO_F64_Sinh  -> (DoubleRep, [DoubleRep])
+  MO_F64_Cosh  -> (DoubleRep, [DoubleRep])
+  MO_F64_Tanh  -> (DoubleRep, [DoubleRep])
+
+  MO_F64_Asin  -> (DoubleRep, [DoubleRep])
+  MO_F64_Acos  -> (DoubleRep, [DoubleRep])
+  MO_F64_Atan  -> (DoubleRep, [DoubleRep])
+
+  MO_F64_Asinh -> (DoubleRep, [DoubleRep])
+  MO_F64_Acosh -> (DoubleRep, [DoubleRep])
+  MO_F64_Atanh -> (DoubleRep, [DoubleRep])
+
+  MO_F64_Log   -> (DoubleRep, [DoubleRep])
+  MO_F64_Log1P -> (DoubleRep, [DoubleRep])
+  MO_F64_Exp   -> (DoubleRep, [DoubleRep])
+  MO_F64_ExpM1 -> (DoubleRep, [DoubleRep])
+
+  MO_F64_Fabs  -> (DoubleRep, [DoubleRep])
+  MO_F64_Sqrt  -> (DoubleRep, [DoubleRep])
+
+  MO_F32_Pwr   -> (FloatRep, [FloatRep, FloatRep])
+
+  MO_F32_Sin   -> (FloatRep, [FloatRep])
+  MO_F32_Cos   -> (FloatRep, [FloatRep])
+  MO_F32_Tan   -> (FloatRep, [FloatRep])
+
+  MO_F32_Sinh  -> (FloatRep, [FloatRep])
+  MO_F32_Cosh  -> (FloatRep, [FloatRep])
+  MO_F32_Tanh  -> (FloatRep, [FloatRep])
+
+  MO_F32_Asin  -> (FloatRep, [FloatRep])
+  MO_F32_Acos  -> (FloatRep, [FloatRep])
+  MO_F32_Atan  -> (FloatRep, [FloatRep])
+
+  MO_F32_Asinh -> (FloatRep, [FloatRep])
+  MO_F32_Acosh -> (FloatRep, [FloatRep])
+  MO_F32_Atanh -> (FloatRep, [FloatRep])
+
+  MO_F32_Log   -> (FloatRep, [FloatRep])
+  MO_F32_Log1P -> (FloatRep, [FloatRep])
+  MO_F32_Exp   -> (FloatRep, [FloatRep])
+  MO_F32_ExpM1 -> (FloatRep, [FloatRep])
+
+  MO_F32_Fabs  -> (FloatRep, [FloatRep])
+  MO_F32_Sqrt  -> (FloatRep, [FloatRep])
+
+  MO_PopCnt W8 -> (Word8Rep, [Word8Rep])
+  MO_PopCnt W16 -> (Word16Rep, [Word16Rep])
+  MO_PopCnt W32 -> (Word32Rep, [Word32Rep])
+  MO_PopCnt W64 -> (Word64Rep, [Word64Rep])
+
+  MO_BSwap W8 -> (Word8Rep, [Word8Rep])
+  MO_BSwap W16 -> (Word16Rep, [Word16Rep])
+  MO_BSwap W32 -> (Word32Rep, [Word32Rep])
+  MO_BSwap W64 -> (Word64Rep, [Word64Rep])
+
+  MO_BRev W8 -> (Word8Rep, [Word8Rep])
+  MO_BRev W16 -> (Word16Rep, [Word16Rep])
+  MO_BRev W32 -> (Word32Rep, [Word32Rep])
+  MO_BRev W64 -> (Word64Rep, [Word64Rep])
+
+  _            -> (VoidRep, [])
+
 -- | The alignment of a 'memcpy'-ish operation.
 machOpMemcpyishAlign :: CallishMachOp -> Maybe Int
 machOpMemcpyishAlign op = case op of
diff --git a/compiler/cmm/CmmNode.hs b/compiler/cmm/CmmNode.hs
index 9d6fa7f29b21733aa81a448945cb05c4c734804d..9b8b2e3fcb847d80835a5ab1d4eb0c956a6f4222 100644
--- a/compiler/cmm/CmmNode.hs
+++ b/compiler/cmm/CmmNode.hs
@@ -9,13 +9,12 @@
 {-# LANGUAGE UndecidableInstances #-}
 {-# LANGUAGE ScopedTypeVariables #-}
 
-
 -- CmmNode type for representation using Hoopl graphs.
 
 module CmmNode (
      CmmNode(..), CmmFormal, CmmActual, CmmTickish,
      UpdFrameOffset, Convention(..),
-     ForeignConvention(..), ForeignTarget(..), foreignTargetHints,
+     ForeignConvention(..), ForeignTarget(..), foreignTargetHints, foreignTargetReps,
      CmmReturnInfo(..),
      mapExp, mapExpDeep, wrapRecExp, foldExp, foldExpDeep, wrapRecExpf,
      mapExpM, mapExpDeepM, wrapRecExpM, mapSuccessors, mapCollectSuccessors,
@@ -46,6 +45,7 @@ import Data.List (tails,sortBy)
 import Unique (nonDetCmpUnique)
 import Util
 
+import TyCon (PrimRep)
 
 ------------------------
 -- CmmNode
@@ -281,12 +281,36 @@ data Convention
        -- (TODO: I don't think we need this --SDM)
   deriving( Eq )
 
+
+--------------------------------------------------
+-- Note [ForeignConvention PrimRep Carry]
+--
+-- With the advert of aarch64-darwin, a new AAPCS was brought into mainstream.
+-- This AAPCS requires us to pack arguments in excess of registers by their
+-- size on the stack as well as extends values as necessary.
+--
+-- GHC's internal represetnation of values ends up being either Words or Ints,
+-- both of which are assumed to be Word size[1].  Thus in the CodeGen there is no
+-- way to recover the origial size of arguments.
+--
+-- In GHC 9.2 this has been rectified in !4390 (commit 3e3555cc); however for
+-- GHCs before 9.2 to support aarch64-darwin, we need a more lightweight solution.
+-- Thus we inject the PrimRep signature during the desugar phase into the
+-- ForeignConvention and carry it through to the CodeGen where we can inspect
+-- it and produce the correct ABI calls.
+--
+-- See https://developer.apple.com/documentation/xcode/writing_arm64_code_for_apple_platforms
+--
+-- [1]: Int8 = I8# Int#, Word8 = W8# Word#
+
 data ForeignConvention
   = ForeignConvention
         CCallConv               -- Which foreign-call convention
         [ForeignHint]           -- Extra info about the args
         [ForeignHint]           -- Extra info about the result
         CmmReturnInfo
+        PrimRep                 -- return prim rep
+        [PrimRep]               -- argument prim reps
   deriving Eq
 
 data CmmReturnInfo
@@ -302,7 +326,11 @@ data ForeignTarget        -- The target of a foreign call
         CallishMachOp            -- Which one
   deriving Eq
 
-foreignTargetHints :: ForeignTarget -> ([ForeignHint], [ForeignHint])
+foreignTargetReps :: HasCallStack => ForeignTarget -> (PrimRep, [PrimRep])
+foreignTargetReps (ForeignTarget _ (ForeignConvention _ _ _ _ rr ras)) = (rr, ras)
+foreignTargetReps (PrimTarget op) = callishMachOpReps op
+
+foreignTargetHints :: HasCallStack => ForeignTarget -> ([ForeignHint], [ForeignHint])
 foreignTargetHints target
   = ( res_hints ++ repeat NoHint
     , arg_hints ++ repeat NoHint )
@@ -310,7 +338,7 @@ foreignTargetHints target
     (res_hints, arg_hints) =
        case target of
           PrimTarget op -> callishMachOpHints op
-          ForeignTarget _ (ForeignConvention _ arg_hints res_hints _) ->
+          ForeignTarget _ (ForeignConvention _ arg_hints res_hints _ _ _) ->
              (res_hints, arg_hints)
 
 --------------------------------------------------
@@ -376,7 +404,7 @@ instance DefinerOfRegs GlobalReg (CmmNode e x) where
           activeRegs = activeStgRegs platform
           activeCallerSavesRegs = filter (callerSaves platform) activeRegs
 
-          foreignTargetRegs (ForeignTarget _ (ForeignConvention _ _ _ CmmNeverReturns)) = []
+          foreignTargetRegs (ForeignTarget _ (ForeignConvention _ _ _ CmmNeverReturns _ _)) = []
           foreignTargetRegs _ = activeCallerSavesRegs
 
 -- Note [Safe foreign calls clobber STG registers]
diff --git a/compiler/cmm/CmmParse.y b/compiler/cmm/CmmParse.y
index e7527f8e50d9cb59c370cea4f5f15f8133f75265..4d2166d92055d8bdaa9cb57d6da382f074fb6362 100644
--- a/compiler/cmm/CmmParse.y
+++ b/compiler/cmm/CmmParse.y
@@ -262,6 +262,8 @@ import Data.Maybe
 import qualified Data.Map as M
 import qualified Data.ByteString.Char8 as BS8
 
+import TyCon (PrimRep(..))
+
 #include "HsVersions.h"
 }
 
@@ -1203,7 +1205,27 @@ foreignCall conv_string results_code expr_code args_code safety ret
                   expr' = adjCallTarget dflags conv expr args
                   (arg_exprs, arg_hints) = unzip args
                   (res_regs,  res_hints) = unzip results
-                  fc = ForeignConvention conv arg_hints res_hints ret
+                  res_cmm_tys = zip (map localRegType res_regs) res_hints
+                  arg_cmm_tys = zip (map (cmmExprType dflags) arg_exprs) arg_hints
+                  res_rep :: (CmmType, ForeignHint) -> PrimRep
+                  res_rep (_, AddrHint)                       = AddrRep
+                  res_rep (t, _)          | isGcPtrType t     = Word64Rep
+                  res_rep (t, SignedHint) | t `cmmEqType` b8  = Int8Rep
+                  res_rep (t, SignedHint) | t `cmmEqType` b16 = Int16Rep
+                  res_rep (t, SignedHint) | t `cmmEqType` b32 = Int32Rep
+                  res_rep (t, SignedHint) | t `cmmEqType` b64 = Int64Rep
+                  res_rep (t, NoHint)     | t `cmmEqType` b8  = Word8Rep
+                  res_rep (t, NoHint)     | t `cmmEqType` b16 = Word16Rep
+                  res_rep (t, NoHint)     | t `cmmEqType` b32 = Word32Rep
+                  res_rep (t, NoHint)     | t `cmmEqType` b64 = Word64Rep
+                  res_rep (t, _)          | t `cmmEqType` f32 = FloatRep
+                  res_rep (t, _)          | t `cmmEqType` f64 = DoubleRep
+
+                  ret_rep = case (map res_rep res_cmm_tys) of
+                    [] -> VoidRep
+                    [r] -> r
+                    x -> (panic $ show x)
+                  fc = ForeignConvention conv arg_hints res_hints ret ret_rep (map res_rep arg_cmm_tys)
                   target = ForeignTarget expr' fc
           _ <- code $ emitForeignCall safety res_regs target arg_exprs
           return ()
diff --git a/compiler/cmm/CmmUtils.hs b/compiler/cmm/CmmUtils.hs
index 1a28f94a0c211446207cb52b2e248a54aea4a539..1b662121272f68f99912370a1f27d0358dac4666 100644
--- a/compiler/cmm/CmmUtils.hs
+++ b/compiler/cmm/CmmUtils.hs
@@ -11,7 +11,7 @@
 
 module CmmUtils(
         -- CmmType
-        primRepCmmType, slotCmmType, slotForeignHint,
+        primRepCmmType, slotCmmType,
         typeCmmType, typeForeignHint, primRepForeignHint,
 
         -- CmmLit
@@ -156,13 +156,6 @@ primRepForeignHint FloatRep     = NoHint
 primRepForeignHint DoubleRep    = NoHint
 primRepForeignHint (VecRep {})  = NoHint
 
-slotForeignHint :: SlotTy -> ForeignHint
-slotForeignHint PtrSlot       = AddrHint
-slotForeignHint WordSlot      = NoHint
-slotForeignHint Word64Slot    = NoHint
-slotForeignHint FloatSlot     = NoHint
-slotForeignHint DoubleSlot    = NoHint
-
 typeForeignHint :: UnaryType -> ForeignHint
 typeForeignHint = primRepForeignHint . typePrimRep1
 
diff --git a/compiler/cmm/PprC.hs b/compiler/cmm/PprC.hs
index 3ccf944c8e590f8873619d8d2b738a1f6e39f5d9..15560e2a5de2e6209364a997cc8e6c2921419a81 100644
--- a/compiler/cmm/PprC.hs
+++ b/compiler/cmm/PprC.hs
@@ -235,7 +235,7 @@ pprStmt stmt =
         hresults = zip results res_hints
         hargs    = zip args arg_hints
 
-        ForeignConvention cconv _ _ ret = conv
+        ForeignConvention cconv _ _ ret _ _ = conv
 
         cast_fn = parens (cCast (pprCFunType (char '*') cconv hresults hargs) fn)
 
diff --git a/compiler/cmm/PprCmm.hs b/compiler/cmm/PprCmm.hs
index 397a6660229f5de22a0b6aed1a1a62d8b113dc8b..8662a32728e711fec5c543258c521c2a3d60f8d7 100644
--- a/compiler/cmm/PprCmm.hs
+++ b/compiler/cmm/PprCmm.hs
@@ -155,7 +155,7 @@ pprConvention  Slow                 = text "<slow-convention>"
 pprConvention  GC                   = text "<gc-convention>"
 
 pprForeignConvention :: ForeignConvention -> SDoc
-pprForeignConvention (ForeignConvention c args res ret) =
+pprForeignConvention (ForeignConvention c args res ret _ _) =
           doubleQuotes (ppr c) <+> text "arg hints: " <+> ppr args <+> text " result hints: " <+> ppr res <+> ppr ret
 
 pprReturnInfo :: CmmReturnInfo -> SDoc
diff --git a/compiler/deSugar/DsCCall.hs b/compiler/deSugar/DsCCall.hs
index 3df8ee11e002bc96bad610a27f3b17e39f2ecd72..f306dcaf1a4643a27aa6fce1d70198d8ddfef79e 100644
--- a/compiler/deSugar/DsCCall.hs
+++ b/compiler/deSugar/DsCCall.hs
@@ -47,6 +47,7 @@ import Util
 
 import Data.Maybe
 
+import RepType (mkCCallSpec)
 {-
 Desugaring of @ccall@s consists of adding some state manipulation,
 unboxing any boxed primitive arguments and boxing the result if
@@ -97,8 +98,14 @@ dsCCall lbl args may_gc result_ty
        uniq <- newUnique
        dflags <- getDynFlags
        let
+           arg_tys = map exprType args
+
+           raw_res_ty = case tcSplitIOType_maybe result_ty of
+             Just (_ioTyCon, res_ty) -> res_ty
+             Nothing                 -> result_ty
+
            target = StaticTarget NoSourceText lbl Nothing True
-           the_fcall    = CCall (CCallSpec target CCallConv may_gc)
+           the_fcall    = CCall (mkCCallSpec target CCallConv may_gc raw_res_ty arg_tys)
            the_prim_app = mkFCall dflags uniq the_fcall unboxed_args ccall_result_ty
        return (foldr ($) (res_wrapper the_prim_app) arg_wrappers)
 
diff --git a/compiler/deSugar/DsForeign.hs b/compiler/deSugar/DsForeign.hs
index 6a6a2eece83fe15a3e05378b190955eadb3627c3..b8e6947d2be2b67d306ce6c15a5f8d3661354c21 100644
--- a/compiler/deSugar/DsForeign.hs
+++ b/compiler/deSugar/DsForeign.hs
@@ -173,9 +173,13 @@ dsCImport id co (CLabel cid) cconv _ _ = do
     return ([(id, rhs')], empty, empty)
 
 dsCImport id co (CFunction target) cconv@PrimCallConv safety _
-  = dsPrimCall id co (CCall (CCallSpec target cconv safety))
+  = dsPrimCall id co (CCall (mkCCallSpec target cconv safety
+                                         (panic "Missing Return PrimRep")
+                                         (panic "Missing Argument PrimReps")))
 dsCImport id co (CFunction target) cconv safety mHeader
-  = dsFCall id co (CCall (CCallSpec target cconv safety)) mHeader
+  = dsFCall id co (CCall (mkCCallSpec target cconv safety
+                                      (panic "Missing Return PrimRep")
+                                      (panic "Missing Argument PrimReps"))) mHeader
 dsCImport id co CWrapper cconv _ _
   = dsFExportDynamic id co cconv
 
@@ -203,7 +207,7 @@ fun_type_arg_stdcall_info _ _other_conv _
 
 dsFCall :: Id -> Coercion -> ForeignCall -> Maybe Header
         -> DsM ([(Id, Expr TyVar)], SDoc, SDoc)
-dsFCall fn_id co fcall mDeclHeader = do
+dsFCall fn_id co (CCall (CCallSpec target cconv safety _ _)) mDeclHeader = do
     let
         ty                   = pFst $ coercionKind co
         (tv_bndrs, rho)      = tcSplitForAllVarBndrs ty
@@ -221,16 +225,19 @@ dsFCall fn_id co fcall mDeclHeader = do
     work_uniq  <- newUnique
 
     dflags <- getDynFlags
-    (fcall', cDoc) <-
-              case fcall of
+
+    let
+      fcall = CCall (mkCCallSpec target cconv safety io_res_ty arg_tys)
+
+    (fcall', cDoc) <- case fcall of
               CCall (CCallSpec (StaticTarget _ cName mUnitId isFun)
-                               CApiConv safety) ->
+                               CApiConv safety _ _) ->
                do wrapperName <- mkWrapperName "ghc_wrapper" (unpackFS cName)
-                  let fcall' = CCall (CCallSpec
+                  let fcall' = CCall (mkCCallSpec
                                       (StaticTarget NoSourceText
                                                     wrapperName mUnitId
                                                     True)
-                                      CApiConv safety)
+                                      CApiConv safety io_res_ty arg_tys)
                       c = includes
                        $$ fun_proto <+> braces (cRet <> semi)
                       includes = vcat [ text "#include \"" <> ftext h
@@ -304,7 +311,7 @@ for calling convention they are really prim ops.
 
 dsPrimCall :: Id -> Coercion -> ForeignCall
            -> DsM ([(Id, Expr TyVar)], SDoc, SDoc)
-dsPrimCall fn_id co fcall = do
+dsPrimCall fn_id co (CCall (CCallSpec target cconv safety _ _)) = do
     let
         ty                   = pFst $ coercionKind co
         (tvs, fun_ty)        = tcSplitForAllTys ty
@@ -315,6 +322,7 @@ dsPrimCall fn_id co fcall = do
     ccall_uniq <- newUnique
     dflags <- getDynFlags
     let
+        fcall = CCall (mkCCallSpec target cconv safety io_res_ty arg_tys)
         call_app = mkFCall dflags ccall_uniq fcall (map Var args) io_res_ty
         rhs      = mkLams tvs (mkLams args call_app)
         rhs'     = Cast rhs co
@@ -820,6 +828,12 @@ primTyDescChar dflags ty
  = case typePrimRep1 (getPrimTyOf ty) of
      IntRep      -> signed_word
      WordRep     -> unsigned_word
+     Int8Rep     -> 'B'
+     Word8Rep    -> 'b'
+     Int16Rep    -> 'S'
+     Word16Rep   -> 's'
+     Int32Rep    -> 'W'
+     Word32Rep   -> 'w'
      Int64Rep    -> 'L'
      Word64Rep   -> 'l'
      AddrRep     -> 'p'
diff --git a/compiler/ghci/ByteCodeGen.hs b/compiler/ghci/ByteCodeGen.hs
index fb60c21f9d587b77510a7f6311191c9d9750ba43..2b761a7186a8216ab5e9c9cffa614c90806222ed 100644
--- a/compiler/ghci/ByteCodeGen.hs
+++ b/compiler/ghci/ByteCodeGen.hs
@@ -1151,7 +1151,7 @@ generateCCall
     -> Id                      -- of target, for type info
     -> [AnnExpr' Id DVarSet]   -- args (atoms)
     -> BcM BCInstrList
-generateCCall d0 s p (CCallSpec target cconv safety) fn args_r_to_l
+generateCCall d0 s p (CCallSpec target cconv safety _rep_ret _rep_args) fn args_r_to_l
  = do
      dflags <- getDynFlags
 
@@ -1359,6 +1359,12 @@ primRepToFFIType dflags r
      VoidRep     -> FFIVoid
      IntRep      -> signed_word
      WordRep     -> unsigned_word
+     Int8Rep     -> FFISInt8
+     Word8Rep    -> FFIUInt8
+     Int16Rep    -> FFISInt16
+     Word16Rep   -> FFIUInt16
+     Int32Rep    -> FFISInt32
+     Word32Rep   -> FFIUInt32
      Int64Rep    -> FFISInt64
      Word64Rep   -> FFIUInt64
      AddrRep     -> FFIPointer
@@ -1834,7 +1840,7 @@ multiValException = throwGhcException (ProgramError
 
 -- | Indicate if the calling convention is supported
 isSupportedCConv :: CCallSpec -> Bool
-isSupportedCConv (CCallSpec _ cconv _) = case cconv of
+isSupportedCConv (CCallSpec _ cconv _ _ _) = case cconv of
    CCallConv            -> True     -- we explicitly pattern match on every
    StdCallConv          -> True     -- convention to ensure that a warning
    PrimCallConv         -> False    -- is triggered when a new one is added
diff --git a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
index 8b3ef24e1b5ad7c164fc1bb69ddfbffc3150c2b1..2c4a26669600821d9dbc9a505d30fce5f2e315db 100644
--- a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
+++ b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
@@ -43,6 +43,8 @@ import qualified Data.Semigroup as Semigroup
 import Data.List ( nub )
 import Data.Maybe ( catMaybes )
 
+import TyCon (PrimRep(..))
+
 type Atomic = Bool
 type LlvmStatements = OrdList LlvmStatement
 
@@ -400,23 +402,39 @@ genCall t@(PrimTarget (MO_SubWordC w)) [dstV, dstO] [lhs, rhs] =
 -- Handle all other foreign calls and prim ops.
 genCall target res args = runStmtsDecls $ do
     dflags <- getDynFlags
+    platform <- lift $ getLlvmPlatform
+
 
-    -- parameter types
-    let arg_type (_, AddrHint) = i8Ptr
-        -- cast pointers to i8*. Llvm equivalent of void*
-        arg_type (expr, _) = cmmToLlvmType $ cmmExprType dflags expr
+    let primRepToLlvmTy VoidRep     = (Unsigned, LMVoid)
+        primRepToLlvmTy IntRep      = (Signed, widthToLlvmInt (cIntWidth dflags))
+        primRepToLlvmTy Int8Rep     = (Signed, i8)
+        primRepToLlvmTy Int16Rep    = (Signed, i16)
+        primRepToLlvmTy Int32Rep    = (Signed, i32)
+        primRepToLlvmTy Int64Rep    = (Signed, i64)
+        primRepToLlvmTy WordRep     = (Unsigned, widthToLlvmInt (wordWidth dflags))
+        primRepToLlvmTy Word8Rep    = (Unsigned, i8)
+        primRepToLlvmTy Word16Rep   = (Unsigned, i16)
+        primRepToLlvmTy Word32Rep   = (Unsigned, i32)
+        primRepToLlvmTy Word64Rep   = (Unsigned, i64)
+        primRepToLlvmTy FloatRep    = (Signed, LMFloat)
+        primRepToLlvmTy DoubleRep   = (Signed, LMDouble)
+        -- pointers
+        primRepToLlvmTy AddrRep     = (Unsigned, i8Ptr)
+        primRepToLlvmTy LiftedRep   = (Unsigned, i8Ptr)
+        primRepToLlvmTy UnliftedRep = (Unsigned, i8Ptr)
+        primRepToLlvmTy _ = panic "LlvmCodeGen.CodeGen.genCall: Invalid primRep; cannot convert to llvm type"
 
     -- ret type
-    let ret_type [] = LMVoid
-        ret_type [(_, AddrHint)] = i8Ptr
-        ret_type [(reg, _)]      = cmmToLlvmType $ localRegType reg
-        ret_type t = panic $ "genCall: Too many return values! Can only handle"
-                        ++ " 0 or 1, given " ++ show (length t) ++ "."
+    let -- similarly to arg_type_cmm, we may need to widen/narrow the result.
+        -- most likely widen, as cmm regs will be 64bit wide.
+        ret_type_cmm []              = LMVoid
+        ret_type_cmm [(_, AddrHint)] = i8Ptr
+        ret_type_cmm [(reg, _)]      = cmmToLlvmType $ localRegType reg
+        ret_type_cmm _               = panic "LlvmCodeGen.CodeGen.genCall: invalid return type; we only support single return values"
 
     -- extract Cmm call convention, and translate to LLVM call convention
-    platform <- lift $ getLlvmPlatform
     let lmconv = case target of
-            ForeignTarget _ (ForeignConvention conv _ _ _) ->
+            ForeignTarget _ (ForeignConvention conv _ _ _ _ _) ->
               case conv of
                  StdCallConv  -> case platformArch platform of
                                  ArchX86    -> CC_X86_Stdcc
@@ -442,22 +460,25 @@ genCall target res args = runStmtsDecls $ do
                 | otherwise     = llvmStdFunAttrs
 
         never_returns = case target of
-             ForeignTarget _ (ForeignConvention _ _ _ CmmNeverReturns) -> True
+             ForeignTarget _ (ForeignConvention _ _ _ CmmNeverReturns _ _) -> True
              _ -> False
 
     -- fun type
-    let (res_hints, arg_hints) = foreignTargetHints target
-    let args_hints = zip args arg_hints
+    let (res_hints, _arg_hints) = foreignTargetHints target
+    let (ret_rep, args_rep) = foreignTargetReps target
+
     let ress_hints = zip res  res_hints
     let ccTy  = StdCall -- tail calls should be done through CmmJump
-    let retTy = ret_type ress_hints
-    let argTy = tysToParams $ map arg_type args_hints
+
+    let retTyCmm = ret_type_cmm ress_hints
+
+    let argTy = tysToParams $ map (snd . primRepToLlvmTy) args_rep
+    let retTy = snd $ primRepToLlvmTy ret_rep
     let funTy = \name -> LMFunction $ LlvmFunctionDecl name ExternallyVisible
                              lmconv retTy FixedArgs argTy (llvmFunAlign dflags)
 
 
-    argVars <- arg_varsW args_hints ([], nilOL, [])
-    fptr    <- getFunPtrW funTy target
+    let args_with_reps = zip args (map primRepToLlvmTy args_rep)
 
     let doReturn | ccTy == TailCall  = statement $ Return Nothing
                  | never_returns     = statement $ Unreachable
@@ -466,19 +487,23 @@ genCall target res args = runStmtsDecls $ do
     doTrashStmts
 
     -- make the actual call
+    argVars <- arg_varsW2 args_with_reps ([], nilOL, [])
+    fptr    <- getFunPtrW funTy target
     case retTy of
         LMVoid -> do
             statement $ Expr $ Call ccTy fptr argVars fnAttrs
 
         _ -> do
-            v1 <- doExprW retTy $ Call ccTy fptr argVars fnAttrs
+            let (signage, retTy) = primRepToLlvmTy ret_rep
+            v0 <- doExprW retTy $ Call ccTy fptr argVars fnAttrs
+            v1 <- castVarW signage v0 retTyCmm
             -- get the return register
             let ret_reg [reg] = reg
                 ret_reg t = panic $ "genCall: Bad number of registers! Can only handle"
                                 ++ " 1, given " ++ show (length t) ++ "."
             let creg = ret_reg res
             vreg <- getCmmRegW (CmmLocal creg)
-            if retTy == pLower (getVarType vreg)
+            if retTyCmm == pLower (getVarType vreg)
                 then do
                     statement $ Store v1 vreg
                     doReturn
@@ -580,8 +605,7 @@ genCallSimpleCast w t@(PrimTarget op) [dst] args = do
 
     dstV                        <- getCmmReg (CmmLocal dst)
 
-    let (_, arg_hints) = foreignTargetHints t
-    let args_hints = zip args arg_hints
+    let args_hints = zip args (snd (foreignTargetHints t))
     (argsV, stmts2, top2)       <- arg_vars args_hints ([], nilOL, [])
     (argsV', stmts4)            <- castVars Signed $ zip argsV [width]
     (retV, s1)                  <- doExpr width $ Call StdCall fptr argsV' []
@@ -668,6 +692,14 @@ arg_varsW xs ys = do
     tell $ LlvmAccum stmts decls
     return vars
 
+arg_varsW2 :: [(CmmActual, (Signage, LlvmType))]
+           -> ([LlvmVar], LlvmStatements, [LlvmCmmDecl])
+           -> WriterT LlvmAccum LlvmM [LlvmVar]
+arg_varsW2 xs ys = do
+  (vars, stmts, decls) <- lift $ arg_vars2 xs ys
+  tell $ LlvmAccum stmts decls
+  return vars
+
 -- | Conversion of call arguments.
 arg_vars :: [(CmmActual, ForeignHint)]
          -> ([LlvmVar], LlvmStatements, [LlvmCmmDecl])
@@ -678,14 +710,11 @@ arg_vars [] (vars, stmts, tops)
 
 arg_vars ((e, AddrHint):rest) (vars, stmts, tops)
   = do (v1, stmts', top') <- exprToVar e
-       dflags <- getDynFlags
        let op = case getVarType v1 of
                ty | isPointer ty -> LM_Bitcast
                ty | isInt ty     -> LM_Inttoptr
 
-               a  -> panic $ "genCall: Can't cast llvmType to i8*! ("
-                           ++ showSDoc dflags (ppr a) ++ ")"
-
+               a -> pprPanic "genCall: Can't cast llvmType to i8*!" (ppr a)
        (v2, s1) <- doExpr i8Ptr $ Cast op v1 i8Ptr
        arg_vars rest (vars ++ [v2], stmts `appOL` stmts' `snocOL` s1,
                                tops ++ top')
@@ -695,6 +724,27 @@ arg_vars ((e, _):rest) (vars, stmts, tops)
        arg_vars rest (vars ++ [v1], stmts `appOL` stmts', tops ++ top')
 
 
+arg_vars2 :: [(CmmActual, (Signage, LlvmType))]
+          -> ([LlvmVar], LlvmStatements, [LlvmCmmDecl])
+          -> LlvmM ([LlvmVar], LlvmStatements, [LlvmCmmDecl])
+arg_vars2 [] x = return x
+arg_vars2 ((e, (_s, ty0)):rest) (vars, stmts, tops)
+  | ty0 == i8Ptr
+  = do (v1, stmts', top') <- exprToVar e
+       let op = case getVarType v1 of
+             ty | isPointer ty -> LM_Bitcast
+             ty | isInt ty     -> LM_Inttoptr
+
+             a -> pprPanic "genCall: Can't cast llvmType to i8*!" (ppr a)
+       (v2, s1) <- doExpr i8Ptr $ Cast op v1 i8Ptr
+       arg_vars2 rest (vars ++ [v2], stmts `appOL` stmts' `snocOL` s1,
+                      tops ++ top')
+
+arg_vars2 ((e, (s, ty)):rest) (vars, stmts, tops)
+  = do (v1, stmts', top') <- exprToVar e
+       (v2, s1) <- castVar s v1 ty
+       arg_vars2 rest (vars ++ [v2], stmts `appOL` stmts' `snocOL` s1, tops ++ top')
+
 -- | Cast a collection of LLVM variables to specific types.
 castVarsW :: Signage
           -> [(LlvmVar, LlvmType)]
@@ -739,6 +789,12 @@ castVar signage v t | getVarType v == t
             Signed      -> LM_Sext
             Unsigned    -> LM_Zext
 
+castVarW :: Signage -> LlvmVar -> LlvmType -> WriterT LlvmAccum LlvmM LlvmVar
+castVarW signage var ty = do
+  (var, stmt) <- lift $ castVar signage var ty
+  statement $ stmt
+  return var
+
 
 cmmPrimOpRetValSignage :: CallishMachOp -> Signage
 cmmPrimOpRetValSignage mop = case mop of
diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs
index 702cd98e77f7235e20257dba1512061a78ccded1..f02f04d68f42edd58930ccb02a67f909de2a2ed6 100644
--- a/compiler/nativeGen/X86/CodeGen.hs
+++ b/compiler/nativeGen/X86/CodeGen.hs
@@ -2440,7 +2440,12 @@ genCCall' dflags is32Bit (PrimTarget (MO_PopCnt width)) dest_regs@[dst]
                           CallReference lbl
             let target = ForeignTarget targetExpr (ForeignConvention CCallConv
                                                            [NoHint] [NoHint]
-                                                           CmmMayReturn)
+                                                           CmmMayReturn
+                                                           -- this is only safe, because
+                                                           -- genCCall32' and genCCall64'
+                                                           -- never inspect these arguments.
+                                                           (panic "Missing Return PrimRep")
+                                                           (panic "Missing Argument PrimReps"))
             genCCall' dflags is32Bit target dest_regs args bid
   where
     format = intFormat width
@@ -2473,7 +2478,12 @@ genCCall' dflags is32Bit (PrimTarget (MO_Pdep width)) dest_regs@[dst]
                           CallReference lbl
             let target = ForeignTarget targetExpr (ForeignConvention CCallConv
                                                            [NoHint] [NoHint]
-                                                           CmmMayReturn)
+                                                           CmmMayReturn
+                                                           -- this is only safe, because
+                                                           -- genCCall32' and genCCall64'
+                                                           -- never inspect these arguments.
+                                                           (panic "Missing Return PrimRep")
+                                                           (panic "Missing Argument PrimReps"))
             genCCall' dflags is32Bit target dest_regs args bid
   where
     format = intFormat width
@@ -2506,7 +2516,12 @@ genCCall' dflags is32Bit (PrimTarget (MO_Pext width)) dest_regs@[dst]
                           CallReference lbl
             let target = ForeignTarget targetExpr (ForeignConvention CCallConv
                                                            [NoHint] [NoHint]
-                                                           CmmMayReturn)
+                                                           CmmMayReturn
+                                                           -- this is only safe, because
+                                                           -- genCCall32' and genCCall64'
+                                                           -- never inspect these arguments.
+                                                           (panic "Missing Return PrimRep")
+                                                           (panic "Missing Argument PrimReps"))
             genCCall' dflags is32Bit target dest_regs args bid
   where
     format = intFormat width
@@ -2518,7 +2533,12 @@ genCCall' dflags is32Bit (PrimTarget (MO_Clz width)) dest_regs@[dst] args@[src]
     targetExpr <- cmmMakeDynamicReference dflags CallReference lbl
     let target = ForeignTarget targetExpr (ForeignConvention CCallConv
                                            [NoHint] [NoHint]
-                                           CmmMayReturn)
+                                           CmmMayReturn
+                                           -- this is only safe, because
+                                           -- genCCall32' and genCCall64'
+                                           -- never inspect these arguments.
+                                           (panic "Missing Return PrimRep")
+                                           (panic "Missing Argument PrimReps"))
     genCCall' dflags is32Bit target dest_regs args bid
 
   | otherwise = do
@@ -2561,7 +2581,12 @@ genCCall' dflags is32Bit (PrimTarget (MO_UF_Conv width)) dest_regs args bid = do
                   CallReference lbl
     let target = ForeignTarget targetExpr (ForeignConvention CCallConv
                                            [NoHint] [NoHint]
-                                           CmmMayReturn)
+                                           CmmMayReturn
+                                           -- this is only safe, because
+                                           -- genCCall32' and genCCall64'
+                                           -- never inspect these arguments.
+                                           (panic "Missing Return PrimRep")
+                                           (panic "Missing Argument PrimReps"))
     genCCall' dflags is32Bit target dest_regs args bid
   where
     lbl = mkCmmCodeLabel primUnitId (fsLit (word2FloatLabel width))
@@ -2900,7 +2925,7 @@ genCCall32' dflags target dest_regs args = do
               -- We have to pop any stack padding we added
               -- even if we are doing stdcall, though (#5052)
             pop_size
-               | ForeignConvention StdCallConv _ _ _ <- cconv = arg_pad_size
+               | ForeignConvention StdCallConv _ _ _ _ _ <- cconv = arg_pad_size
                | otherwise = tot_arg_size
 
             call = callinsns `appOL`
@@ -3274,7 +3299,12 @@ outOfLineCmmOp bid mop res args
       dflags <- getDynFlags
       targetExpr <- cmmMakeDynamicReference dflags CallReference lbl
       let target = ForeignTarget targetExpr
-                           (ForeignConvention CCallConv [] [] CmmMayReturn)
+                           (ForeignConvention CCallConv [] [] CmmMayReturn
+                                              -- this is only safe, because
+                                              -- genCCall32' and genCCall64'
+                                              -- never inspect these arguments.
+                                              (panic "Missing Return PrimRep")
+                                              (panic "Missing Argument PrimReps"))
 
       -- We know foreign calls results in no new basic blocks, so we can ignore
       -- the returned block id.
diff --git a/compiler/prelude/ForeignCall.hs b/compiler/prelude/ForeignCall.hs
index c143b1ed1e9e917db73ea1f7a86db640680e2adb..d73befa3c44c1a868aec18511dbfa1f65dc13760 100644
--- a/compiler/prelude/ForeignCall.hs
+++ b/compiler/prelude/ForeignCall.hs
@@ -28,6 +28,7 @@ import BasicTypes ( SourceText, pprWithSourceText )
 
 import Data.Char
 import Data.Data
+import {-# SOURCE #-} TyCon (PrimRep)
 
 {-
 ************************************************************************
@@ -41,7 +42,7 @@ newtype ForeignCall = CCall CCallSpec
   deriving Eq
 
 isSafeForeignCall :: ForeignCall -> Bool
-isSafeForeignCall (CCall (CCallSpec _ _ safe)) = playSafe safe
+isSafeForeignCall (CCall (CCallSpec _ _ safe _ _)) = playSafe safe
 
 -- We may need more clues to distinguish foreign calls
 -- but this simple printer will do for now
@@ -100,6 +101,8 @@ data CCallSpec
   =  CCallSpec  CCallTarget     -- What to call
                 CCallConv       -- Calling convention to use.
                 Safety
+                PrimRep         -- result
+                [PrimRep]       -- args
   deriving( Eq )
 
 -- The call target:
@@ -197,7 +200,7 @@ instance Outputable CExportSpec where
   ppr (CExportStatic _ str _) = pprCLabelString str
 
 instance Outputable CCallSpec where
-  ppr (CCallSpec fun cconv safety)
+  ppr (CCallSpec fun cconv safety _ret_ty _arg_tys)
     = hcat [ whenPprDebug callconv, ppr_fun fun ]
     where
       callconv = text "{-" <> ppr cconv <> text "-}"
@@ -283,15 +286,19 @@ instance Binary CExportSpec where
           return (CExportStatic ss aa ab)
 
 instance Binary CCallSpec where
-    put_ bh (CCallSpec aa ab ac) = do
+    put_ bh (CCallSpec aa ab ac ad ae) = do
             put_ bh aa
             put_ bh ab
             put_ bh ac
+            put_ bh ad
+            put_ bh ae
     get bh = do
           aa <- get bh
           ab <- get bh
           ac <- get bh
-          return (CCallSpec aa ab ac)
+          ad <- get bh
+          ae <- get bh
+          return (CCallSpec aa ab ac ad ae)
 
 instance Binary CCallTarget where
     put_ bh (StaticTarget ss aa ab ac) = do
diff --git a/compiler/prelude/TysWiredIn.hs-boot b/compiler/prelude/TysWiredIn.hs-boot
index 023682fe5b7cc3a81832d8ee066e82520fa46baf..f695a1d7d9a48dc08462be87de1177d7b3be7144 100644
--- a/compiler/prelude/TysWiredIn.hs-boot
+++ b/compiler/prelude/TysWiredIn.hs-boot
@@ -5,6 +5,7 @@ import {-# SOURCE #-} TyCoRep    (Type, Kind)
 
 import BasicTypes (Arity, TupleSort)
 import Name (Name)
+import Unique (Unique)
 
 listTyCon :: TyCon
 typeNatKind, typeSymbolKind :: Type
@@ -43,3 +44,5 @@ unboxedTupleKind :: [Type] -> Type
 mkPromotedListTy :: Type -> [Type] -> Type
 
 tupleTyConName :: TupleSort -> Arity -> Name
+
+unitTyConKey :: Unique
diff --git a/compiler/simplStg/RepType.hs b/compiler/simplStg/RepType.hs
index 75fde79d87adc86cd9d893b3868ffb8b65e0d969..d849d3e67effb2e99e7afd9962ef50aa99b1eb7e 100644
--- a/compiler/simplStg/RepType.hs
+++ b/compiler/simplStg/RepType.hs
@@ -18,7 +18,9 @@ module RepType
 
     -- * Unboxed sum representation type
     ubxSumRepType, layoutUbxSum, typeSlotTy, SlotTy (..),
-    slotPrimRep, primRepSlot
+    slotPrimRep, primRepSlot,
+
+    mkCCallSpec
   ) where
 
 #include "HsVersions.h"
@@ -35,11 +37,55 @@ import TyCoRep
 import Type
 import Util
 import TysPrim
-import {-# SOURCE #-} TysWiredIn ( anyTypeOfKind )
+import {-# SOURCE #-} TysWiredIn ( anyTypeOfKind, unitTyConKey )
+import {-# SOURCE #-} TcType (tcSplitIOType_maybe)
 
 import Data.List (sort)
 import qualified Data.IntSet as IS
 
+import ForeignCall (CCallSpec(..), CCallTarget(..), CCallConv(..), Safety(..), CCallTarget(..))
+
+mkCCallSpec :: CCallTarget -> CCallConv -> Safety -> Type -> [Type] -> CCallSpec
+mkCCallSpec t c s r as = CCallSpec t c s (myTypePrimRep1 r') (map myTypePrimRep1 as')
+        where r'= case tcSplitIOType_maybe r of
+                Just (_ioTyCon, res_ty) -> res_ty
+                Nothing                 -> r
+
+              -- for dynamic targets, we want to drop the first
+              -- represetnation, as that is the stable pointer to
+              -- the fucntion we are invocing, which is irrelevant
+              -- for the argument repsenstation.
+              as' = case t of
+                DynamicTarget -> tail as
+                _             -> as
+
+              typeTyCon :: Type -> TyCon
+              typeTyCon ty
+                | Just (tc, _) <- tcSplitTyConApp_maybe (unwrapType ty)
+                = tc
+                | otherwise
+                = pprPanic "DsForeign.typeTyCon" (ppr ty)
+
+              myTypePrimRep1 :: Type -> PrimRep
+              myTypePrimRep1 t = case typePrimRep1 t of
+                LiftedRep -> case getUnique (typeTyCon t) of
+                  key | key == int8TyConKey   -> Int8Rep
+                      | key == int16TyConKey  -> Int16Rep
+                      | key == int32TyConKey  -> Int32Rep
+                      | key == int64TyConKey  -> Int64Rep
+                      | key == word8TyConKey  -> Word8Rep
+                      | key == word16TyConKey -> Word16Rep
+                      | key == word32TyConKey -> Word32Rep
+                      | key == word64TyConKey -> Word64Rep
+                      | key == intTyConKey    -> IntRep
+                      | key == wordTyConKey   -> WordRep
+                      | key == floatTyConKey  -> FloatRep
+                      | key == doubleTyConKey -> DoubleRep
+                      | key == unitTyConKey   -> VoidRep
+                  _                           -> LiftedRep
+                other     -> other
+
+
 {- **********************************************************************
 *                                                                       *
                 Representation types
diff --git a/compiler/stgSyn/CoreToStg.hs b/compiler/stgSyn/CoreToStg.hs
index 634b74be5bc6017c1ab149c3ba980ac5bf173ddb..4e75ad04a273c8704e9392b4bb973de2abb476f0 100644
--- a/compiler/stgSyn/CoreToStg.hs
+++ b/compiler/stgSyn/CoreToStg.hs
@@ -561,7 +561,7 @@ coreToStgApp f args ticks = do
 
                 -- A call to some primitive Cmm function.
                 FCallId (CCall (CCallSpec (StaticTarget _ lbl (Just pkgId) True)
-                                          PrimCallConv _))
+                                          PrimCallConv _ _ _))
                                  -> ASSERT( saturated )
                                     StgOpApp (StgPrimCallOp (PrimCall lbl pkgId)) args' res_ty
 
diff --git a/compiler/typecheck/TcType.hs-boot b/compiler/typecheck/TcType.hs-boot
index 2bc14735f15364f487d28d9f47865242cce0ce93..2e2bcb58b812c0a13007203cb30063604887d1e6 100644
--- a/compiler/typecheck/TcType.hs-boot
+++ b/compiler/typecheck/TcType.hs-boot
@@ -1,8 +1,12 @@
 module TcType where
 import Outputable( SDoc )
+import {-# SOURCE #-} TyCoRep( Type )
+import {-# SOURCE #-} TyCon (TyCon)
+import Data.Maybe (Maybe)
 
 data MetaDetails
 
 data TcTyVarDetails
 pprTcTyVarDetails :: TcTyVarDetails -> SDoc
 vanillaSkolemTv :: TcTyVarDetails
+tcSplitIOType_maybe :: Type -> Maybe (TyCon, Type)
diff --git a/compiler/types/TyCon.hs b/compiler/types/TyCon.hs
index 551dcb58179db7bffe7e05c29b577aacb8b2723e..ddf1e6bba4d115b2a7d4a25c182ce6a0c6b915c1 100644
--- a/compiler/types/TyCon.hs
+++ b/compiler/types/TyCon.hs
@@ -6,7 +6,7 @@
 The @TyCon@ datatype
 -}
 
-{-# LANGUAGE CPP, FlexibleInstances #-}
+{-# LANGUAGE CPP, FlexibleInstances, LambdaCase #-}
 
 module TyCon(
         -- * Main TyCon data types
@@ -1437,7 +1437,45 @@ data PrimRep
   | FloatRep
   | DoubleRep
   | VecRep Int PrimElemRep  -- ^ A vector
-  deriving( Show )
+  deriving( Eq, Show )
+
+instance Binary PrimRep where
+  put_ bh VoidRep     = putByte bh 0
+  put_ bh LiftedRep   = putByte bh 1
+  put_ bh UnliftedRep = putByte bh 2
+  put_ bh Int8Rep     = putByte bh 3
+  put_ bh Int16Rep    = putByte bh 4
+  put_ bh Int32Rep    = putByte bh 5
+  put_ bh Int64Rep    = putByte bh 6
+  put_ bh IntRep      = putByte bh 7
+  put_ bh Word8Rep    = putByte bh 8
+  put_ bh Word16Rep   = putByte bh 9
+  put_ bh Word32Rep   = putByte bh 10
+  put_ bh Word64Rep   = putByte bh 11
+  put_ bh WordRep     = putByte bh 12
+  put_ bh AddrRep     = putByte bh 13
+  put_ bh FloatRep    = putByte bh 14
+  put_ bh DoubleRep   = putByte bh 15
+  put_ bh (VecRep n el) = putByte bh 16 >> put_ bh n >> put_ bh el
+  get bh = getByte bh >>= \case
+    0 -> pure VoidRep
+    1 -> pure LiftedRep
+    2 -> pure UnliftedRep
+    3 -> pure Int8Rep
+    4 -> pure Int16Rep
+    5 -> pure Int32Rep
+    6 -> pure Int64Rep
+    7 -> pure IntRep
+    8 -> pure Word8Rep
+    9 -> pure Word16Rep
+    10 -> pure Word32Rep
+    11 -> pure Word64Rep
+    12 -> pure WordRep
+    13 -> pure AddrRep
+    14 -> pure FloatRep
+    15 -> pure DoubleRep
+    16 -> VecRep <$> get bh <*> get bh
+    _  -> panic "Decoding PrimRep, invalid byte."
 
 data PrimElemRep
   = Int8ElemRep
@@ -1452,6 +1490,30 @@ data PrimElemRep
   | DoubleElemRep
    deriving( Eq, Show )
 
+instance Binary PrimElemRep where
+  put_ bh Int8ElemRep   = putByte bh 0
+  put_ bh Int16ElemRep  = putByte bh 1
+  put_ bh Int32ElemRep  = putByte bh 2
+  put_ bh Int64ElemRep  = putByte bh 3
+  put_ bh Word8ElemRep  = putByte bh 4
+  put_ bh Word16ElemRep = putByte bh 5
+  put_ bh Word32ElemRep = putByte bh 6
+  put_ bh Word64ElemRep = putByte bh 7
+  put_ bh FloatElemRep  = putByte bh 8
+  put_ bh DoubleElemRep = putByte bh 9
+  get bh = getByte bh >>= \case
+    0 -> pure Int8ElemRep
+    1 -> pure Int16ElemRep
+    2 -> pure Int32ElemRep
+    3 -> pure Int64ElemRep
+    4 -> pure Word8ElemRep
+    5 -> pure Word16ElemRep
+    6 -> pure Word32ElemRep
+    7 -> pure Word64ElemRep
+    8 -> pure FloatElemRep
+    9 -> pure DoubleElemRep
+    _  -> panic "Decoding PrimElemRep, invalid byte."
+
 instance Outputable PrimRep where
   ppr r = text (show r)
 
diff --git a/compiler/types/TyCon.hs-boot b/compiler/types/TyCon.hs-boot
index 4db8d0f1c16a17371278f4950530959c0198cb71..df4c400ff6ce6c4acf5323c86d4cbb552c0ec8fd 100644
--- a/compiler/types/TyCon.hs-boot
+++ b/compiler/types/TyCon.hs-boot
@@ -1,8 +1,13 @@
 module TyCon where
 
 import GhcPrelude
+import Binary
 
 data TyCon
+data PrimRep
+
+instance Eq PrimRep
+instance Binary PrimRep
 
 isTupleTyCon        :: TyCon -> Bool
 isUnboxedTupleTyCon :: TyCon -> Bool
diff --git a/compiler/types/Type.hs-boot b/compiler/types/Type.hs-boot
index 16c6bfe07b771f2c93ebf01bde3d400b3763d772..b135e41d247d8b1a6cf8009cae77f62fc58bfbd1 100644
--- a/compiler/types/Type.hs-boot
+++ b/compiler/types/Type.hs-boot
@@ -3,7 +3,7 @@
 module Type where
 
 import GhcPrelude
-import TyCon
+import {-# SOURCE #-} TyCon
 import {-# SOURCE #-} TyCoRep( Type, Coercion )
 import Util