Commit 2c959a18 authored by Michal Terepeta's avatar Michal Terepeta Committed by Ben Gamari

Add Int8# and Word8#

This is the first step of implementing:
https://github.com/ghc-proposals/ghc-proposals/pull/74

The main highlights/changes:

    primops.txt.pp gets two new sections for two new primitive types for
    signed and unsigned 8-bit integers (Int8# and Word8 respectively) along
    with basic arithmetic and comparison operations. PrimRep/RuntimeRep get
    two new constructors for them. All of the primops translate into the
    existing MachOPs.

    For CmmCalls the codegen will now zero-extend the values at call
    site (so that they can be moved to the right register) and then truncate
    them back their original width.

    x86 native codegen needed some updates, since it wasn't able to deal
    with the new widths, but all the changes are quite localized. LLVM
    backend seems to just work.

This is the second attempt at merging this, after the first attempt in
D4475 had to be backed out due to regressions on i386.

Bumps binary submodule.
Signed-off-by: Michal Terepeta's avatarMichal Terepeta <michal.terepeta@gmail.com>

Test Plan: ./validate (on both x86-{32,64})

Reviewers: bgamari, hvr, goldfire, simonmar

Subscribers: rwbarton, carter

Differential Revision: https://phabricator.haskell.org/D5258
parent 6bb8aaa3
......@@ -6,7 +6,7 @@
module CmmExpr
( CmmExpr(..), cmmExprType, cmmExprWidth, maybeInvertCmmExpr
, CmmReg(..), cmmRegType
, CmmReg(..), cmmRegType, cmmRegWidth
, CmmLit(..), cmmLitType
, LocalReg(..), localRegType
, GlobalReg(..), isArgReg, globalRegType
......@@ -273,6 +273,9 @@ cmmRegType :: DynFlags -> CmmReg -> CmmType
cmmRegType _ (CmmLocal reg) = localRegType reg
cmmRegType dflags (CmmGlobal reg) = globalRegType dflags reg
cmmRegWidth :: DynFlags -> CmmReg -> Width
cmmRegWidth dflags = typeWidth . cmmRegType dflags
localRegType :: LocalReg -> CmmType
localRegType (LocalReg _ rep) = rep
......
......@@ -107,6 +107,14 @@ data MachOp
| MO_FS_Conv Width Width -- Float -> Signed int
| MO_SS_Conv Width Width -- Signed int -> Signed int
| MO_UU_Conv Width Width -- unsigned int -> unsigned int
| MO_XX_Conv Width Width -- int -> int; puts no requirements on the
-- contents of upper bits when extending;
-- narrowing is simply truncation; the only
-- expectation is that we can recover the
-- original value by applying the opposite
-- MO_XX_Conv, e.g.,
-- MO_XX_CONV W64 W8 (MO_XX_CONV W8 W64 x)
-- is equivalent to just x.
| MO_FF_Conv Width Width -- Float -> Float
-- Vector element insertion and extraction operations
......@@ -392,6 +400,7 @@ machOpResultType dflags mop tys =
MO_SS_Conv _ to -> cmmBits to
MO_UU_Conv _ to -> cmmBits to
MO_XX_Conv _ to -> cmmBits to
MO_FS_Conv _ to -> cmmBits to
MO_SF_Conv _ to -> cmmFloat to
MO_FF_Conv _ to -> cmmFloat to
......@@ -483,6 +492,7 @@ machOpArgReps dflags op =
MO_SS_Conv from _ -> [from]
MO_UU_Conv from _ -> [from]
MO_XX_Conv from _ -> [from]
MO_SF_Conv from _ -> [from]
MO_FS_Conv from _ -> [from]
MO_FF_Conv from _ -> [from]
......
......@@ -97,6 +97,8 @@ primRepCmmType dflags LiftedRep = gcWord dflags
primRepCmmType dflags UnliftedRep = gcWord dflags
primRepCmmType dflags IntRep = bWord dflags
primRepCmmType dflags WordRep = bWord dflags
primRepCmmType _ Int8Rep = b8
primRepCmmType _ Word8Rep = b8
primRepCmmType _ Int64Rep = b64
primRepCmmType _ Word64Rep = b64
primRepCmmType dflags AddrRep = bWord dflags
......@@ -131,8 +133,10 @@ primRepForeignHint VoidRep = panic "primRepForeignHint:VoidRep"
primRepForeignHint LiftedRep = AddrHint
primRepForeignHint UnliftedRep = AddrHint
primRepForeignHint IntRep = SignedHint
primRepForeignHint WordRep = NoHint
primRepForeignHint Int8Rep = SignedHint
primRepForeignHint Int64Rep = SignedHint
primRepForeignHint WordRep = NoHint
primRepForeignHint Word8Rep = NoHint
primRepForeignHint Word64Rep = NoHint
primRepForeignHint AddrRep = AddrHint -- NB! AddrHint, but NonPtrArg
primRepForeignHint FloatRep = NoHint
......
......@@ -38,6 +38,7 @@ import OrdList
import SMRep (ByteOff)
import UniqSupply
import Util
import Panic
-----------------------------------------------------------------------------
......@@ -309,18 +310,33 @@ copyIn :: DynFlags -> Convention -> Area
copyIn dflags conv area formals extra_stk
= (stk_size, [r | (_, RegisterParam r) <- args], map ci (stk_args ++ args))
where
ci (reg, RegisterParam r) =
CmmAssign (CmmLocal reg) (CmmReg (CmmGlobal r))
ci (reg, StackParam off) =
CmmAssign (CmmLocal reg) (CmmLoad (CmmStackSlot area off) ty)
where ty = localRegType reg
-- See Note [Width of parameters]
ci (reg, RegisterParam r@(VanillaReg {})) =
let local = CmmLocal reg
global = CmmReg (CmmGlobal r)
width = cmmRegWidth dflags local
expr
| width == wordWidth dflags = global
| width < wordWidth dflags =
CmmMachOp (MO_XX_Conv (wordWidth dflags) width) [global]
| otherwise = panic "Parameter width greater than word width"
init_offset = widthInBytes (wordWidth dflags) -- infotable
in CmmAssign local expr
(stk_off, stk_args) = assignStack dflags init_offset localRegType extra_stk
-- Non VanillaRegs
ci (reg, RegisterParam r) =
CmmAssign (CmmLocal reg) (CmmReg (CmmGlobal r))
(stk_size, args) = assignArgumentsPos dflags stk_off conv
localRegType formals
ci (reg, StackParam off) =
CmmAssign (CmmLocal reg) (CmmLoad (CmmStackSlot area off) ty)
where ty = localRegType reg
init_offset = widthInBytes (wordWidth dflags) -- infotable
(stk_off, stk_args) = assignStack dflags init_offset localRegType extra_stk
(stk_size, args) = assignArgumentsPos dflags stk_off conv
localRegType formals
-- Factoring out the common parts of the copyout functions yielded something
-- more complicated:
......@@ -346,8 +362,21 @@ copyOutOflow dflags conv transfer area actuals updfr_off extra_stack_stuff
where
(regs, graph) = foldr co ([], mkNop) (setRA ++ args ++ stack_params)
co (v, RegisterParam r) (rs, ms)
= (r:rs, mkAssign (CmmGlobal r) v <*> ms)
-- See Note [Width of parameters]
co (v, RegisterParam r@(VanillaReg {})) (rs, ms) =
let width = cmmExprWidth dflags v
value
| width == wordWidth dflags = v
| width < wordWidth dflags =
CmmMachOp (MO_XX_Conv width (wordWidth dflags)) [v]
| otherwise = panic "Parameter width greater than word width"
in (r:rs, mkAssign (CmmGlobal r) value <*> ms)
-- Non VanillaRegs
co (v, RegisterParam r) (rs, ms) =
(r:rs, mkAssign (CmmGlobal r) v <*> ms)
co (v, StackParam off) (rs, ms)
= (rs, mkStore (CmmStackSlot area off) v <*> ms)
......@@ -374,6 +403,28 @@ copyOutOflow dflags conv transfer area actuals updfr_off extra_stack_stuff
(cmmExprType dflags) actuals
-- Note [Width of parameters]
--
-- Consider passing a small (< word width) primitive like Int8# to a function
-- through a register. It's actually non-trivial to do this without
-- extending/narrowing:
-- * Global registers are considered to have native word width (i.e., 64-bits on
-- x86-64), so CmmLint would complain if we assigne an 8-bit parameter to a
-- global register.
-- * Same problem exists with LLVM IR.
-- * Lowering gets harder since on x86-32 not every register exposes its lower
-- 8 bits (e.g., for %eax we can use %al, but there isn't a corresponding
-- 8-bit register for %edi). So we would either need to extend/narrow anyway,
-- or complicate the calling convention.
-- So instead, we always extend every parameter smaller than native word width
-- in copyOutOflow and then truncate it back to the expected width in copyIn.
-- Note that we do this in cmm using MO_XX_Conv to avoid requiring
-- zero-/sign-extending - it's up to a backend to handle this in a most
-- efficient way (e.g., a simple register move)
--
-- There was some discussion about this on this PR:
-- https://github.com/ghc-proposals/ghc-proposals/pull/74
mkCallEntry :: DynFlags -> Convention -> [CmmFormal] -> [CmmFormal]
-> (Int, [GlobalReg], CmmAGraph)
......
......@@ -646,6 +646,9 @@ pprMachOp_for_C mop = case mop of
MO_SS_Conv from to | from == to -> empty
MO_SS_Conv _from to -> parens (machRep_S_CType to)
MO_XX_Conv from to | from == to -> empty
MO_XX_Conv _from to -> parens (machRep_U_CType to)
MO_FF_Conv from to | from == to -> empty
MO_FF_Conv _from to -> parens (machRep_F_CType to)
......
......@@ -70,6 +70,8 @@ toArgRep LiftedRep = P
toArgRep UnliftedRep = P
toArgRep IntRep = N
toArgRep WordRep = N
toArgRep Int8Rep = N -- Gets widened to native word width for calls
toArgRep Word8Rep = N -- Gets widened to native word width for calls
toArgRep AddrRep = N
toArgRep Int64Rep = L
toArgRep Word64Rep = L
......
......@@ -875,19 +875,29 @@ type GenericOp = [CmmFormal] -> [CmmActual] -> FCode ()
callishPrimOpSupported :: DynFlags -> PrimOp -> Either CallishMachOp GenericOp
callishPrimOpSupported dflags op
= case op of
IntQuotRemOp | ncg && (x86ish
|| ppc) -> Left (MO_S_QuotRem (wordWidth dflags))
| otherwise -> Right (genericIntQuotRemOp dflags)
IntQuotRemOp | ncg && (x86ish || ppc) ->
Left (MO_S_QuotRem (wordWidth dflags))
| otherwise ->
Right (genericIntQuotRemOp (wordWidth dflags))
WordQuotRemOp | ncg && (x86ish
|| ppc) -> Left (MO_U_QuotRem (wordWidth dflags))
| otherwise -> Right (genericWordQuotRemOp dflags)
Int8QuotRemOp | (ncg && x86ish)
|| llvm -> Left (MO_S_QuotRem W8)
| otherwise -> Right (genericIntQuotRemOp W8)
WordQuotRemOp | ncg && (x86ish || ppc) ->
Left (MO_U_QuotRem (wordWidth dflags))
| otherwise ->
Right (genericWordQuotRemOp (wordWidth dflags))
WordQuotRem2Op | (ncg && (x86ish
|| ppc))
|| llvm -> Left (MO_U_QuotRem2 (wordWidth dflags))
| otherwise -> Right (genericWordQuotRem2Op dflags)
Word8QuotRemOp | (ncg && x86ish)
|| llvm -> Left (MO_U_QuotRem W8)
| otherwise -> Right (genericWordQuotRemOp W8)
WordAdd2Op | (ncg && (x86ish
|| ppc))
|| llvm -> Left (MO_Add2 (wordWidth dflags))
......@@ -943,20 +953,20 @@ callishPrimOpSupported dflags op
ArchPPC_64 _ -> True
_ -> False
genericIntQuotRemOp :: DynFlags -> GenericOp
genericIntQuotRemOp dflags [res_q, res_r] [arg_x, arg_y]
genericIntQuotRemOp :: Width -> GenericOp
genericIntQuotRemOp width [res_q, res_r] [arg_x, arg_y]
= emit $ mkAssign (CmmLocal res_q)
(CmmMachOp (MO_S_Quot (wordWidth dflags)) [arg_x, arg_y]) <*>
(CmmMachOp (MO_S_Quot width) [arg_x, arg_y]) <*>
mkAssign (CmmLocal res_r)
(CmmMachOp (MO_S_Rem (wordWidth dflags)) [arg_x, arg_y])
(CmmMachOp (MO_S_Rem width) [arg_x, arg_y])
genericIntQuotRemOp _ _ _ = panic "genericIntQuotRemOp"
genericWordQuotRemOp :: DynFlags -> GenericOp
genericWordQuotRemOp dflags [res_q, res_r] [arg_x, arg_y]
genericWordQuotRemOp :: Width -> GenericOp
genericWordQuotRemOp width [res_q, res_r] [arg_x, arg_y]
= emit $ mkAssign (CmmLocal res_q)
(CmmMachOp (MO_U_Quot (wordWidth dflags)) [arg_x, arg_y]) <*>
(CmmMachOp (MO_U_Quot width) [arg_x, arg_y]) <*>
mkAssign (CmmLocal res_r)
(CmmMachOp (MO_U_Rem (wordWidth dflags)) [arg_x, arg_y])
(CmmMachOp (MO_U_Rem width) [arg_x, arg_y])
genericWordQuotRemOp _ _ _ = panic "genericWordQuotRemOp"
genericWordQuotRem2Op :: DynFlags -> GenericOp
......@@ -1310,6 +1320,42 @@ translateOp dflags AddrLeOp = Just (mo_wordULe dflags)
translateOp dflags AddrGtOp = Just (mo_wordUGt dflags)
translateOp dflags AddrLtOp = Just (mo_wordULt dflags)
-- Int8# signed ops
translateOp dflags Int8Extend = Just (MO_SS_Conv W8 (wordWidth dflags))
translateOp dflags Int8Narrow = Just (MO_SS_Conv (wordWidth dflags) W8)
translateOp _ Int8NegOp = Just (MO_S_Neg W8)
translateOp _ Int8AddOp = Just (MO_Add W8)
translateOp _ Int8SubOp = Just (MO_Sub W8)
translateOp _ Int8MulOp = Just (MO_Mul W8)
translateOp _ Int8QuotOp = Just (MO_S_Quot W8)
translateOp _ Int8RemOp = Just (MO_S_Rem W8)
translateOp _ Int8EqOp = Just (MO_Eq W8)
translateOp _ Int8GeOp = Just (MO_S_Ge W8)
translateOp _ Int8GtOp = Just (MO_S_Gt W8)
translateOp _ Int8LeOp = Just (MO_S_Le W8)
translateOp _ Int8LtOp = Just (MO_S_Lt W8)
translateOp _ Int8NeOp = Just (MO_Ne W8)
-- Word8# unsigned ops
translateOp dflags Word8Extend = Just (MO_UU_Conv W8 (wordWidth dflags))
translateOp dflags Word8Narrow = Just (MO_UU_Conv (wordWidth dflags) W8)
translateOp _ Word8NotOp = Just (MO_Not W8)
translateOp _ Word8AddOp = Just (MO_Add W8)
translateOp _ Word8SubOp = Just (MO_Sub W8)
translateOp _ Word8MulOp = Just (MO_Mul W8)
translateOp _ Word8QuotOp = Just (MO_U_Quot W8)
translateOp _ Word8RemOp = Just (MO_U_Rem W8)
translateOp _ Word8EqOp = Just (MO_Eq W8)
translateOp _ Word8GeOp = Just (MO_U_Ge W8)
translateOp _ Word8GtOp = Just (MO_U_Gt W8)
translateOp _ Word8LeOp = Just (MO_U_Le W8)
translateOp _ Word8LtOp = Just (MO_U_Lt W8)
translateOp _ Word8NeOp = Just (MO_Ne W8)
-- Char# ops
translateOp dflags CharEqOp = Just (MO_Eq (wordWidth dflags))
......
......@@ -805,7 +805,7 @@ mkConAppCode orig_d _ p con args_r_to_l =
do_pushery !d (arg : args) = do
(push, arg_bytes) <- case arg of
(Padding l _) -> pushPadding l
(Padding l _) -> return $! pushPadding l
(FieldOff a _) -> pushConstrAtom d p (fromNonVoid a)
more_push_code <- do_pushery (d + arg_bytes) args
return (push `appOL` more_push_code)
......@@ -1570,11 +1570,16 @@ pushConstrAtom d p (AnnVar v)
pushConstrAtom d p expr = pushAtom d p expr
pushPadding :: Int -> BcM (BCInstrList, ByteOff)
pushPadding 1 = return (unitOL (PUSH_PAD8), 1)
pushPadding 2 = return (unitOL (PUSH_PAD16), 2)
pushPadding 4 = return (unitOL (PUSH_PAD32), 4)
pushPadding x = panic $ "pushPadding x=" ++ show x
pushPadding :: Int -> (BCInstrList, ByteOff)
pushPadding !n = go n (nilOL, 0)
where
go n acc@(!instrs, !off) = case n of
0 -> acc
1 -> (instrs `mappend` unitOL PUSH_PAD8, off + 1)
2 -> (instrs `mappend` unitOL PUSH_PAD16, off + 2)
3 -> go 1 (go 2 acc)
4 -> (instrs `mappend` unitOL PUSH_PAD32, off + 4)
_ -> go (n - 4) (go 4 acc)
-- -----------------------------------------------------------------------------
-- Given a bunch of alts code and their discrs, do the donkey work
......
......@@ -1193,6 +1193,9 @@ genMachOp _ op [x] = case op of
MO_UU_Conv from to
-> sameConv from (widthToLlvmInt to) LM_Trunc LM_Zext
MO_XX_Conv from to
-> sameConv from (widthToLlvmInt to) LM_Bitcast LM_Bitcast
MO_FF_Conv from to
-> sameConv from (widthToLlvmFloat to) LM_Fptrunc LM_Fpext
......@@ -1454,6 +1457,7 @@ genMachOp_slow opt op [x, y] = case op of
MO_FS_Conv _ _ -> panicOp
MO_SS_Conv _ _ -> panicOp
MO_UU_Conv _ _ -> panicOp
MO_XX_Conv _ _ -> panicOp
MO_FF_Conv _ _ -> panicOp
MO_V_Insert {} -> panicOp
......
......@@ -644,20 +644,27 @@ getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
-- Nop conversions
MO_UU_Conv W32 W8 -> toI8Reg W32 x
MO_SS_Conv W32 W8 -> toI8Reg W32 x
MO_XX_Conv W32 W8 -> toI8Reg W32 x
MO_UU_Conv W16 W8 -> toI8Reg W16 x
MO_SS_Conv W16 W8 -> toI8Reg W16 x
MO_XX_Conv W16 W8 -> toI8Reg W16 x
MO_UU_Conv W32 W16 -> toI16Reg W32 x
MO_SS_Conv W32 W16 -> toI16Reg W32 x
MO_XX_Conv W32 W16 -> toI16Reg W32 x
MO_UU_Conv W64 W32 | not is32Bit -> conversionNop II64 x
MO_SS_Conv W64 W32 | not is32Bit -> conversionNop II64 x
MO_XX_Conv W64 W32 | not is32Bit -> conversionNop II64 x
MO_UU_Conv W64 W16 | not is32Bit -> toI16Reg W64 x
MO_SS_Conv W64 W16 | not is32Bit -> toI16Reg W64 x
MO_XX_Conv W64 W16 | not is32Bit -> toI16Reg W64 x
MO_UU_Conv W64 W8 | not is32Bit -> toI8Reg W64 x
MO_SS_Conv W64 W8 | not is32Bit -> toI8Reg W64 x
MO_XX_Conv W64 W8 | not is32Bit -> toI8Reg W64 x
MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intFormat rep1) x
MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intFormat rep1) x
MO_XX_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intFormat rep1) x
-- widenings
MO_UU_Conv W8 W32 -> integerExtend W8 W32 MOVZxL x
......@@ -668,16 +675,32 @@ getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x
MO_SS_Conv W8 W16 -> integerExtend W8 W16 MOVSxL x
-- We don't care about the upper bits for MO_XX_Conv, so MOV is enough. However, on 32-bit we
-- have 8-bit registers only for a few registers (as opposed to x86-64 where every register
-- has 8-bit version). So for 32-bit code, we'll just zero-extend.
MO_XX_Conv W8 W32
| is32Bit -> integerExtend W8 W32 MOVZxL x
| otherwise -> integerExtend W8 W32 MOV x
MO_XX_Conv W8 W16
| is32Bit -> integerExtend W8 W16 MOVZxL x
| otherwise -> integerExtend W8 W16 MOV x
MO_XX_Conv W16 W32 -> integerExtend W16 W32 MOV x
MO_UU_Conv W8 W64 | not is32Bit -> integerExtend W8 W64 MOVZxL x
MO_UU_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVZxL x
MO_UU_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOVZxL x
MO_SS_Conv W8 W64 | not is32Bit -> integerExtend W8 W64 MOVSxL x
MO_SS_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVSxL x
MO_SS_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOVSxL x
-- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
-- However, we don't want the register allocator to throw it
-- away as an unnecessary reg-to-reg move, so we keep it in
-- the form of a movzl and print it as a movl later.
-- For 32-to-64 bit zero extension, amd64 uses an ordinary movl.
-- However, we don't want the register allocator to throw it
-- away as an unnecessary reg-to-reg move, so we keep it in
-- the form of a movzl and print it as a movl later.
-- This doesn't apply to MO_XX_Conv since in this case we don't care about
-- the upper bits. So we can just use MOV.
MO_XX_Conv W8 W64 | not is32Bit -> integerExtend W8 W64 MOV x
MO_XX_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOV x
MO_XX_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOV x
MO_FF_Conv W32 W64
| sse2 -> coerceFP2FP W64 x
......@@ -787,6 +810,7 @@ getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
MO_S_MulMayOflo rep -> imulMayOflo rep x y
MO_Mul W8 -> imulW8 x y
MO_Mul rep -> triv_op rep IMUL
MO_And rep -> triv_op rep AND
MO_Or rep -> triv_op rep OR
......@@ -822,6 +846,21 @@ getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
triv_op width instr = trivialCode width op (Just op) x y
where op = instr (intFormat width)
-- Special case for IMUL for bytes, since the result of IMULB will be in
-- %ax, the split to %dx/%edx/%rdx and %ax/%eax/%rax happens only for wider
-- values.
imulW8 :: CmmExpr -> CmmExpr -> NatM Register
imulW8 arg_a arg_b = do
(a_reg, a_code) <- getNonClobberedReg arg_a
b_code <- getAnyReg arg_b
let code = a_code `appOL` b_code eax `appOL`
toOL [ IMUL2 format (OpReg a_reg) ]
format = intFormat W8
return (Fixed format eax code)
imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
imulMayOflo rep a b = do
(a_reg, a_code) <- getNonClobberedReg a
......@@ -916,6 +955,18 @@ getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
return (Any format code)
----------------------
-- See Note [DIV/IDIV for bytes]
div_code W8 signed quotient x y = do
let widen | signed = MO_SS_Conv W8 W16
| otherwise = MO_UU_Conv W8 W16
div_code
W16
signed
quotient
(CmmMachOp widen [x])
(CmmMachOp widen [y])
div_code width signed quotient x y = do
(y_op, y_code) <- getRegOrMem y -- cannot be clobbered
x_code <- getAnyReg x
......@@ -2277,6 +2328,18 @@ genCCall _ is32Bit target dest_regs args = do
= divOp platform signed width results (Just arg_x_high) arg_x_low arg_y
divOp2 _ _ _ _ _
= panic "genCCall: Wrong number of arguments for divOp2"
-- See Note [DIV/IDIV for bytes]
divOp platform signed W8 [res_q, res_r] m_arg_x_high arg_x_low arg_y =
let widen | signed = MO_SS_Conv W8 W16
| otherwise = MO_UU_Conv W8 W16
arg_x_low_16 = CmmMachOp widen [arg_x_low]
arg_y_16 = CmmMachOp widen [arg_y]
m_arg_x_high_16 = (\p -> CmmMachOp widen [p]) <$> m_arg_x_high
in divOp
platform signed W16 [res_q, res_r]
m_arg_x_high_16 arg_x_low_16 arg_y_16
divOp platform signed width [res_q, res_r]
m_arg_x_high arg_x_low arg_y
= do let format = intFormat width
......@@ -2318,6 +2381,22 @@ genCCall _ is32Bit target dest_regs args = do
addSubIntC _ _ _ _ _ _ _ _
= panic "genCCall: Wrong number of arguments/results for addSubIntC"
-- Note [DIV/IDIV for bytes]
--
-- IDIV reminder:
-- Size Dividend Divisor Quotient Remainder
-- byte %ax r/m8 %al %ah
-- word %dx:%ax r/m16 %ax %dx
-- dword %edx:%eax r/m32 %eax %edx
-- qword %rdx:%rax r/m64 %rax %rdx
--
-- We do a special case for the byte division because the current
-- codegen doesn't deal well with accessing %ah register (also,
-- accessing %ah in 64-bit mode is complicated because it cannot be an
-- operand of many instructions). So we just widen operands to 16 bits
-- and get the results from %al, %dl. This is not optimal, but a few
-- register moves are probably not a huge deal when doing division.
genCCall32' :: DynFlags
-> ForeignTarget -- function to call
-> [CmmFormal] -- where to put the result
......@@ -2330,7 +2409,7 @@ genCCall32' dflags target dest_regs args = do
-- Align stack to 16n for calls, assuming a starting stack
-- alignment of 16n - word_size on procedure entry. Which we
-- maintiain. See Note [rts/StgCRun.c : Stack Alignment on X86]
sizes = map (arg_size . cmmExprType dflags) (reverse args)
sizes = map (arg_size_bytes . cmmExprType dflags) (reverse args)
raw_arg_size = sum sizes + wORD_SIZE dflags
arg_pad_size = (roundTo 16 $ raw_arg_size) - raw_arg_size
tot_arg_size = raw_arg_size + arg_pad_size - wORD_SIZE dflags
......@@ -2421,8 +2500,9 @@ genCCall32' dflags target dest_regs args = do
assign_code dest_regs)
where
arg_size :: CmmType -> Int -- Width in bytes
arg_size ty = widthInBytes (typeWidth ty)
-- If the size is smaller than the word, we widen things (see maybePromoteCArg)
arg_size_bytes :: CmmType -> Int
arg_size_bytes ty = max (widthInBytes (typeWidth ty)) (widthInBytes (wordWidth dflags))
roundTo a x | x `mod` a == 0 = x
| otherwise = x + a - (x `mod` a)
......@@ -2461,6 +2541,10 @@ genCCall32' dflags target dest_regs args = do
)
| otherwise = do
-- Arguments can be smaller than 32-bit, but we still use @PUSH
-- II32@ - the usual calling conventions expect integers to be
-- 4-byte aligned.
ASSERT((typeWidth arg_ty) <= W32) return ()
(operand, code) <- getOperand arg
delta <- getDeltaNat
setDeltaNat (delta-size)
......@@ -2470,7 +2554,7 @@ genCCall32' dflags target dest_regs args = do
where
arg_ty = cmmExprType dflags arg
size = arg_size arg_ty -- Byte size
size = arg_size_bytes arg_ty -- Byte size
genCCall64' :: DynFlags
-> ForeignTarget -- function to call
......@@ -2700,7 +2784,10 @@ genCCall64' dflags target dest_regs args = do
push_args rest code'
| otherwise = do
ASSERT(width == W64) return ()
-- Arguments can be smaller than 64-bit, but we still use @PUSH
-- II64@ - the usual calling conventions expect integers to be
-- 8-byte aligned.
ASSERT(width <= W64) return ()
(arg_op, arg_code) <- getOperand arg
delta <- getDeltaNat
setDeltaNat (delta-arg_size)
......
......@@ -383,7 +383,13 @@ x86_regUsageOfInstr platform instr
SUB _ src dst -> usageRM src dst
SBB _ src dst -> usageRM src dst
IMUL _ src dst -> usageRM src dst
IMUL2 _ src -> mkRU (eax:use_R src []) [eax,edx]
-- Result of IMULB will be in just in %ax
IMUL2 II8 src -> mkRU (eax:use_R src []) [eax]
-- Result of IMUL for wider values, will be split between %dx/%edx/%rdx and
-- %ax/%eax/%rax.
IMUL2 _ src -> mkRU (eax:use_R src []) [eax,edx]
MUL _ src dst -> usageRM src dst
MUL2 _ src -> mkRU (eax:use_R src []) [eax,edx]
DIV _ op -> mkRU (eax:edx:use_R op []) [eax,edx]
......
......@@ -328,7 +328,7 @@ pprReg f r
(case i of {
0 -> sLit "%al"; 1 -> sLit "%bl";
2 -> sLit "%cl"; 3 -> sLit "%dl";
_ -> sLit "very naughty I386 byte register"
_ -> sLit $ "very naughty I386 byte register: " ++ show i
})
ppr32_reg_word i = ptext
......@@ -365,7 +365,7 @@ pprReg f r
10 -> sLit "%r10b"; 11 -> sLit "%r11b";
12 -> sLit "%r12b"; 13 -> sLit "%r13b";
14 -> sLit "%r14b"; 15 -> sLit "%r15b";
_ -> sLit "very naughty x86_64 byte register"
_ -> sLit $ "very naughty x86_64 byte register: " ++ show i
})
ppr64_reg_word i = ptext
......@@ -790,8 +790,11 @@ pprInstr (POP format op) = pprFormatOp (sLit "pop") format op
-- pprInstr POPA = text "\tpopal"
pprInstr NOP = text "\tnop"
pprInstr (CLTD II8) = text "\tcbtw"
pprInstr (CLTD II16) = text "\tcwtd"
pprInstr (CLTD II32) = text "\tcltd"
pprInstr (CLTD II64) = text "\tcqto"
pprInstr (CLTD x) = panic $ "pprInstr: " ++ show x
pprInstr (SETCC cond op) = pprCondInstr (sLit "set") cond (pprOperand II8 op)
......@@ -1077,9 +1080,6 @@ pprInstr (XADD format src dst) = pprFormatOpOp (sLit "xadd") format src dst
pprInstr (CMPXCHG format src dst)
= pprFormatOpOp (sLit "cmpxchg") format src dst
pprInstr _
= panic "X86.Ppr.pprInstr: no match"
pprTrigOp :: String -> Bool -> CLabel -> CLabel
-> Reg -> Reg -> Format -> SDoc
......
......@@ -1682,7 +1682,7 @@ addrPrimTyConKey, arrayPrimTyConKey, arrayArrayPrimTyConKey, boolTyConKey,
byteArrayPrimTyConKey, charPrimTyConKey, charTyConKey, doublePrimTyConKey,
doubleTyConKey, floatPrimTyConKey, floatTyConKey, funTyConKey,
intPrimTyConKey, intTyConKey, int8TyConKey, int16TyConKey,
int32PrimTyConKey, int32TyConKey, int64PrimTyConKey, int64TyConKey,
int8PrimTyConKey, int32PrimTyConKey, int32TyConKey, int64PrimTyConKey, int64TyConKey,
integerTyConKey, naturalTyConKey,
listTyConKey, foreignObjPrimTyConKey, maybeTyConKey,
weakPrimTyConKey, mutableArrayPrimTyConKey, mutableArrayArrayPrimTyConKey,
......@@ -1703,37 +1703,39 @@ floatTyConKey = mkPreludeTyConUnique 12
funTyConKey = mkPreludeTyConUnique 13
intPrimTyConKey = mkPreludeTyConUnique 14
intTyConKey = mkPreludeTyConUnique 15
int8TyConKey = mkPreludeTyConUnique 16
int16TyConKey = mkPreludeTyConUnique 17
int32PrimTyConKey = mkPreludeTyConUnique 18
int32TyConKey = mkPreludeTyConUnique 19
int64PrimTyConKey = mkPreludeTyConUnique 20
int64TyConKey = mkPreludeTyConUnique 21
integerTyConKey = mkPreludeTyConUnique 22
naturalTyConKey = mkPreludeTyConUnique 23
listTyConKey = mkPreludeTyConUnique 24
foreignObjPrimTyConKey = mkPreludeTyConUnique 25
maybeTyConKey = mkPreludeTyConUnique 26
weakPrimTyConKey = mkPreludeTyConUnique 27
mutableArrayPrimTyConKey = mkPreludeTyConUnique 28
mutableByteArrayPrimTyConKey = mkPreludeTyConUnique 29
orderingTyConKey = mkPreludeTyConUnique 30
mVarPrimTyConKey = mkPreludeTyConUnique 31
ratioTyConKey = mkPreludeTyConUnique 32
rationalTyConKey = mkPreludeTyConUnique 33
realWorldTyConKey = mkPreludeTyConUnique 34
stablePtrPrimTyConKey = mkPreludeTyConUnique 35
stablePtrTyConKey = mkPreludeTyConUnique 36
eqTyConKey = mkPreludeTyConUnique 38
heqTyConKey = mkPreludeTyConUnique 39
arrayArrayPrimTyConKey = mkPreludeTyConUnique 40
mutableArrayArrayPrimTyConKey = mkPreludeTyConUnique 41
int8PrimTyConKey = mkPreludeTyConUnique 16
int8TyConKey = mkPreludeTyConUnique 17
int16TyConKey = mkPreludeTyConUnique 18
int32PrimTyConKey = mkPreludeTyConUnique 19
int32TyConKey = mkPreludeTyConUnique 20
int64PrimTyConKey = mkPreludeTyConUnique 21
int64TyConKey = mkPreludeTyConUnique 22
integerTyConKey = mkPreludeTyConUnique 23
naturalTyConKey = mkPreludeTyConUnique 24
listTyConKey = mkPreludeTyConUnique 25
foreignObjPrimTyConKey = mkPreludeTyConUnique 26
maybeTyConKey = mkPreludeTyConUnique 27
weakPrimTyConKey = mkPreludeTyConUnique 28
mutableArrayPrimTyConKey = mkPreludeTyConUnique 29
mutableByteArrayPrimTyConKey = mkPreludeTyConUnique 30
orderingTyConKey = mkPreludeTyConUnique 31
mVarPrimTyConKey = mkPreludeTyConUnique 32
ratioTyConKey = mkPreludeTyConUnique 33
rationalTyConKey = mkPreludeTyConUnique 34
realWorldTyConKey = mkPreludeTyConUnique 35
stablePtrPrimTyConKey = mkPreludeTyConUnique 36
stablePtrTyConKey = mkPreludeTyConUnique 37
eqTyConKey = mkPreludeTyConUnique 39
heqTyConKey = mkPreludeTyConUnique 40
arrayArrayPrimTyConKey = mkPreludeTyConUnique 41
mutableArrayArrayPrimTyConKey = mkPreludeTyConUnique 42