diff --git a/compiler/GHC/Cmm/MachOp.hs b/compiler/GHC/Cmm/MachOp.hs index 508359e40ab72966d55fe8f671936bf095e15034..09e5dc8d97b2e491636dca2191274bcfca0b9677 100644 --- a/compiler/GHC/Cmm/MachOp.hs +++ b/compiler/GHC/Cmm/MachOp.hs @@ -140,8 +140,8 @@ data MachOp -- Conversions. Some of these will be NOPs. -- Floating-point conversions use the signed variant. - | MO_SF_Conv Width Width -- Signed int -> Float - | MO_FS_Conv Width Width -- Float -> Signed int + | MO_SF_Round Width Width -- Signed int -> Float + | MO_FS_Truncate Width Width -- Float -> Signed int | MO_SS_Conv Width Width -- Signed int -> Signed int | MO_UU_Conv Width Width -- unsigned int -> unsigned int | MO_XX_Conv Width Width -- int -> int; puts no requirements on the @@ -152,7 +152,10 @@ data MachOp -- MO_XX_Conv, e.g., -- MO_XX_CONV W64 W8 (MO_XX_CONV W8 W64 x) -- is equivalent to just x. - | MO_FF_Conv Width Width -- Float -> Float + | MO_FF_Conv Width Width -- Float -> Float + + | MO_WF_Bitcast Width -- Word32/Word64 -> Float/Double + | MO_FW_Bitcast Width -- Float/Double -> Word32/Word64 -- Vector element insertion and extraction operations | MO_V_Insert Length Width -- Insert scalar into vector @@ -476,9 +479,11 @@ machOpResultType platform mop tys = MO_SS_Conv _ to -> cmmBits to MO_UU_Conv _ to -> cmmBits to MO_XX_Conv _ to -> cmmBits to - MO_FS_Conv _ to -> cmmBits to - MO_SF_Conv _ to -> cmmFloat to + MO_FS_Truncate _ to -> cmmBits to + MO_SF_Round _ to -> cmmFloat to MO_FF_Conv _ to -> cmmFloat to + MO_WF_Bitcast w -> cmmFloat w + MO_FW_Bitcast w -> cmmBits w MO_V_Insert l w -> cmmVec l (cmmBits w) MO_V_Extract _ w -> cmmBits w @@ -568,12 +573,14 @@ machOpArgReps platform op = MO_U_Shr r -> [r, wordWidth platform] MO_S_Shr r -> [r, wordWidth platform] - MO_SS_Conv from _ -> [from] - MO_UU_Conv from _ -> [from] - MO_XX_Conv from _ -> [from] - MO_SF_Conv from _ -> [from] - MO_FS_Conv from _ -> [from] - MO_FF_Conv from _ -> [from] + MO_SS_Conv from _ -> [from] + MO_UU_Conv from _ -> [from] + MO_XX_Conv from _ -> [from] + MO_SF_Round from _ -> [from] + MO_FS_Truncate from _ -> [from] + MO_FF_Conv from _ -> [from] + MO_WF_Bitcast w -> [w] + MO_FW_Bitcast w -> [w] MO_V_Insert l r -> [typeWidth (vec l (cmmBits r)),r, W32] MO_V_Extract l r -> [typeWidth (vec l (cmmBits r)), W32] diff --git a/compiler/GHC/Cmm/Opt.hs b/compiler/GHC/Cmm/Opt.hs index 2d0b54232d71d821c705ca3a5b892b07519fac2c..9012b7adaf41ff945c9705a2d45f22c188234b2c 100644 --- a/compiler/GHC/Cmm/Opt.hs +++ b/compiler/GHC/Cmm/Opt.hs @@ -89,7 +89,7 @@ cmmMachOpFoldM _ op [CmmLit (CmmInt x rep)] -- "from" type, in order to truncate to the correct size. -- The final narrow/widen to the destination type -- is implicit in the CmmLit. - MO_SF_Conv _from to -> CmmLit (CmmFloat (fromInteger x) to) + MO_SF_Round _frm to -> CmmLit (CmmFloat (fromInteger x) to) MO_SS_Conv from to -> CmmLit (CmmInt (narrowS from x) to) MO_UU_Conv from to -> CmmLit (CmmInt (narrowU from x) to) MO_XX_Conv from to -> CmmLit (CmmInt (narrowS from x) to) diff --git a/compiler/GHC/Cmm/Parser.y b/compiler/GHC/Cmm/Parser.y index 18c34896cd052ba0f18d158a6884f690e4c314b7..365ede9f35dfda0d75314deb73a81908b98e438f 100644 --- a/compiler/GHC/Cmm/Parser.y +++ b/compiler/GHC/Cmm/Parser.y @@ -1078,12 +1078,12 @@ machOps = listToUFM $ ( "f2f32", flip MO_FF_Conv W32 ), -- TODO; rounding mode ( "f2f64", flip MO_FF_Conv W64 ), -- TODO; rounding mode - ( "f2i8", flip MO_FS_Conv W8 ), - ( "f2i16", flip MO_FS_Conv W16 ), - ( "f2i32", flip MO_FS_Conv W32 ), - ( "f2i64", flip MO_FS_Conv W64 ), - ( "i2f32", flip MO_SF_Conv W32 ), - ( "i2f64", flip MO_SF_Conv W64 ) + ( "f2i8", flip MO_FS_Truncate W8 ), + ( "f2i16", flip MO_FS_Truncate W16 ), + ( "f2i32", flip MO_FS_Truncate W32 ), + ( "f2i64", flip MO_FS_Truncate W64 ), + ( "i2f32", flip MO_SF_Round W32 ), + ( "i2f64", flip MO_SF_Round W64 ) ] callishMachOps :: Platform -> UniqFM FastString ([CmmExpr] -> (CallishMachOp, [CmmExpr])) diff --git a/compiler/GHC/CmmToAsm/AArch64/CodeGen.hs b/compiler/GHC/CmmToAsm/AArch64/CodeGen.hs index f2810b38b9fb7dd3a88abbc5e04e0b02fc0970b3..fa68fedfede54fa73e40fbbccd96bb9d2bf6e9ab 100644 --- a/compiler/GHC/CmmToAsm/AArch64/CodeGen.hs +++ b/compiler/GHC/CmmToAsm/AArch64/CodeGen.hs @@ -755,8 +755,8 @@ getRegister' config plat expr MO_S_Neg w -> negate code w reg MO_F_Neg w -> return $ Any (floatFormat w) (\dst -> code `snocOL` NEG (OpReg w dst) (OpReg w reg)) - MO_SF_Conv from to -> return $ Any (floatFormat to) (\dst -> code `snocOL` SCVTF (OpReg to dst) (OpReg from reg)) -- (Signed ConVerT Float) - MO_FS_Conv from to -> return $ Any (intFormat to) (\dst -> code `snocOL` FCVTZS (OpReg to dst) (OpReg from reg)) -- (float convert (-> zero) signed) + MO_SF_Round from to -> return $ Any (floatFormat to) (\dst -> code `snocOL` SCVTF (OpReg to dst) (OpReg from reg)) -- (Signed ConVerT Float) + MO_FS_Truncate from to -> return $ Any (intFormat to) (\dst -> code `snocOL` FCVTZS (OpReg to dst) (OpReg from reg)) -- (float convert (-> zero) signed) -- TODO this is very hacky -- Note, UBFM and SBFM expect source and target register to be of the same size, so we'll use @max from to@ @@ -764,6 +764,8 @@ getRegister' config plat expr MO_UU_Conv from to -> return $ Any (intFormat to) (\dst -> code `snocOL` UBFM (OpReg (max from to) dst) (OpReg (max from to) reg) (OpImm (ImmInt 0)) (toImm (min from to))) MO_SS_Conv from to -> ss_conv from to reg code MO_FF_Conv from to -> return $ Any (floatFormat to) (\dst -> code `snocOL` FCVT (OpReg to dst) (OpReg from reg)) + MO_WF_Bitcast w -> return $ Any (floatFormat w) (\dst -> code `snocOL` FMOV (OpReg w dst) (OpReg w reg)) + MO_FW_Bitcast w -> return $ Any (intFormat w) (\dst -> code `snocOL` FMOV (OpReg w dst) (OpReg w reg)) -- Conversions MO_XX_Conv _from to -> swizzleRegisterRep (intFormat to) <$> getRegister e diff --git a/compiler/GHC/CmmToAsm/AArch64/Instr.hs b/compiler/GHC/CmmToAsm/AArch64/Instr.hs index 6d4dad4f774b3a0e90a7bfee14e29b06fdc35e55..8836ab7d9f7fa05123c65a33c246ca709e0927a9 100644 --- a/compiler/GHC/CmmToAsm/AArch64/Instr.hs +++ b/compiler/GHC/CmmToAsm/AArch64/Instr.hs @@ -132,6 +132,7 @@ regUsageOfInstr platform instr = case instr of DMBISH -> usage ([], []) -- 9. Floating Point Instructions -------------------------------------------- + FMOV dst src -> usage (regOp src, regOp dst) FCVT dst src -> usage (regOp src, regOp dst) SCVTF dst src -> usage (regOp src, regOp dst) FCVTZS dst src -> usage (regOp src, regOp dst) @@ -267,6 +268,7 @@ patchRegsOfInstr instr env = case instr of DMBISH -> DMBISH -- 9. Floating Point Instructions ------------------------------------------ + FMOV o1 o2 -> FMOV (patchOp o1) (patchOp o2) FCVT o1 o2 -> FCVT (patchOp o1) (patchOp o2) SCVTF o1 o2 -> SCVTF (patchOp o1) (patchOp o2) FCVTZS o1 o2 -> FCVTZS (patchOp o1) (patchOp o2) @@ -623,6 +625,8 @@ data Instr -- 8. Synchronization Instructions ----------------------------------------- | DMBISH -- 9. Floating Point Instructions + -- move to/from general purpose <-> floating, or floating to floating + | FMOV Operand Operand -- Float ConVerT | FCVT Operand Operand -- Signed ConVerT Float @@ -694,6 +698,7 @@ instrCon i = BL{} -> "BL" BCOND{} -> "BCOND" DMBISH{} -> "DMBISH" + FMOV{} -> "FMOV" FCVT{} -> "FCVT" SCVTF{} -> "SCVTF" FCVTZS{} -> "FCVTZS" diff --git a/compiler/GHC/CmmToAsm/AArch64/Ppr.hs b/compiler/GHC/CmmToAsm/AArch64/Ppr.hs index dd8dcf574d7a5572be15d2e737fa380df1316758..8c5d08167bfc663fcad575537a7257554d8d1780 100644 --- a/compiler/GHC/CmmToAsm/AArch64/Ppr.hs +++ b/compiler/GHC/CmmToAsm/AArch64/Ppr.hs @@ -524,6 +524,7 @@ pprInstr platform instr = case instr of DMBISH -> line $ text "\tdmb ish" -- 9. Floating Point Instructions -------------------------------------------- + FMOV o1 o2 -> op2 (text "\tfmov") o1 o2 FCVT o1 o2 -> op2 (text "\tfcvt") o1 o2 SCVTF o1 o2 -> op2 (text "\tscvtf") o1 o2 FCVTZS o1 o2 -> op2 (text "\tfcvtzs") o1 o2 diff --git a/compiler/GHC/CmmToAsm/PPC/CodeGen.hs b/compiler/GHC/CmmToAsm/PPC/CodeGen.hs index b831db1e1900b050a30e554e7dd67ac90911072c..c95f62d37e86ed86f242c01aa2c1759d8682ec1d 100644 --- a/compiler/GHC/CmmToAsm/PPC/CodeGen.hs +++ b/compiler/GHC/CmmToAsm/PPC/CodeGen.hs @@ -516,8 +516,8 @@ getRegister' config platform (CmmMachOp mop [x]) -- unary MachOps MO_FF_Conv W64 W32 -> trivialUCode FF32 FRSP x MO_FF_Conv W32 W64 -> conversionNop FF64 x - MO_FS_Conv from to -> coerceFP2Int from to x - MO_SF_Conv from to -> coerceInt2FP from to x + MO_FS_Truncate from to -> coerceFP2Int from to x + MO_SF_Round from to -> coerceInt2FP from to x MO_SS_Conv from to | from >= to -> conversionNop (intFormat to) x diff --git a/compiler/GHC/CmmToAsm/Wasm/FromCmm.hs b/compiler/GHC/CmmToAsm/Wasm/FromCmm.hs index 92e96ab83bf28224a3dd89c77ae1b633f769e8dc..778c6a916613714dc26210b832b9a37750d2673b 100644 --- a/compiler/GHC/CmmToAsm/Wasm/FromCmm.hs +++ b/compiler/GHC/CmmToAsm/Wasm/FromCmm.hs @@ -620,6 +620,49 @@ lower_MO_FF_Conv lbl W64 W32 [x] = do x_instr `WasmConcat` WasmF32DemoteF64 lower_MO_FF_Conv _ _ _ _ = panic "lower_MO_FF_Conv: unreachable" + +-- | Lower a 'MO_WF_Bitcast' operation. Note that this is not a conversion, +-- rather it reinterprets the data. +lower_MO_WF_Bitcast :: + CLabel -> + Width -> + [CmmActual] -> + WasmCodeGenM w (SomeWasmExpr w) +lower_MO_WF_Bitcast lbl W32 [x] = do + WasmExpr x_instr <- lower_CmmExpr_Typed lbl TagI32 x + pure $ + SomeWasmExpr TagF32 $ + WasmExpr $ + x_instr `WasmConcat` WasmReinterpret TagI32 TagF32 +lower_MO_WF_Bitcast lbl W64 [x] = do + WasmExpr x_instr <- lower_CmmExpr_Typed lbl TagI64 x + pure $ + SomeWasmExpr TagF64 $ + WasmExpr $ + x_instr `WasmConcat` WasmReinterpret TagI64 TagF64 +lower_MO_WF_Bitcast _ _ _ = panic "lower_MO_WF_Bitcast: unreachable" + +-- | Lower a 'MO_FW_Bitcast' operation. Note that this is not a conversion, +-- rather it reinterprets the data. +lower_MO_FW_Bitcast :: + CLabel -> + Width -> + [CmmActual] -> + WasmCodeGenM w (SomeWasmExpr w) +lower_MO_FW_Bitcast lbl W32 [x] = do + WasmExpr x_instr <- lower_CmmExpr_Typed lbl TagF32 x + pure $ + SomeWasmExpr TagI32 $ + WasmExpr $ + x_instr `WasmConcat` WasmReinterpret TagF32 TagI32 +lower_MO_FW_Bitcast lbl W64 [x] = do + WasmExpr x_instr <- lower_CmmExpr_Typed lbl TagF64 x + pure $ + SomeWasmExpr TagI64 $ + WasmExpr $ + x_instr `WasmConcat` WasmReinterpret TagF64 TagI64 +lower_MO_FW_Bitcast _ _ _ = panic "lower_MO_FW_Bitcast: unreachable" + -- | Lower a 'CmmMachOp'. lower_CmmMachOp :: CLabel -> @@ -799,14 +842,14 @@ lower_CmmMachOp lbl (MO_Not w0) [x] = lower_CmmMachOp lbl (MO_Shl w0) xs = lower_MO_Shl lbl w0 xs lower_CmmMachOp lbl (MO_U_Shr w0) xs = lower_MO_U_Shr lbl w0 xs lower_CmmMachOp lbl (MO_S_Shr w0) xs = lower_MO_S_Shr lbl w0 xs -lower_CmmMachOp lbl (MO_SF_Conv w0 w1) xs = +lower_CmmMachOp lbl (MO_SF_Round w0 w1) xs = lower_MO_Un_Conv (WasmConvert Signed) lbl (cmmBits w0) (cmmFloat w1) xs -lower_CmmMachOp lbl (MO_FS_Conv w0 w1) xs = +lower_CmmMachOp lbl (MO_FS_Truncate w0 w1) xs = lower_MO_Un_Conv (WasmTruncSat Signed) lbl @@ -817,6 +860,8 @@ lower_CmmMachOp lbl (MO_SS_Conv w0 w1) xs = lower_MO_SS_Conv lbl w0 w1 xs lower_CmmMachOp lbl (MO_UU_Conv w0 w1) xs = lower_MO_UU_Conv lbl w0 w1 xs lower_CmmMachOp lbl (MO_XX_Conv w0 w1) xs = lower_MO_UU_Conv lbl w0 w1 xs lower_CmmMachOp lbl (MO_FF_Conv w0 w1) xs = lower_MO_FF_Conv lbl w0 w1 xs +lower_CmmMachOp lbl (MO_FW_Bitcast w) xs = lower_MO_FW_Bitcast lbl w xs +lower_CmmMachOp lbl (MO_WF_Bitcast w) xs = lower_MO_WF_Bitcast lbl w xs lower_CmmMachOp _ mop _ = pprPanic "lower_CmmMachOp: unreachable" $ vcat [ text "offending MachOp:" <+> pprMachOp mop ] diff --git a/compiler/GHC/CmmToAsm/X86/CodeGen.hs b/compiler/GHC/CmmToAsm/X86/CodeGen.hs index 103a5f639ea0fcda426a4a19cbb1329f101f5659..1409e9b469418e2f3bc970c81a304fb111d99d00 100644 --- a/compiler/GHC/CmmToAsm/X86/CodeGen.hs +++ b/compiler/GHC/CmmToAsm/X86/CodeGen.hs @@ -848,10 +848,8 @@ getRegister' _ is32Bit (CmmReg reg) do let fmt = cmmTypeFormat (cmmRegType reg) - format = fmt - -- platform <- ncgPlatform <$> getConfig - return (Fixed format + return (Fixed fmt (getRegisterReg platform reg) nilOL) @@ -1004,6 +1002,11 @@ getRegister' platform is32Bit (CmmMachOp mop [x]) = -- unary MachOps MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intFormat rep1) x MO_XX_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intFormat rep1) x + MO_FW_Bitcast W32 -> bitcast FF32 II32 x + MO_WF_Bitcast W32 -> bitcast II32 FF32 x + MO_FW_Bitcast W64 -> bitcast FF64 II64 x + MO_WF_Bitcast W64 -> bitcast II64 FF64 x + -- widenings MO_UU_Conv W8 W32 -> integerExtend W8 W32 MOVZxL x MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x @@ -1045,8 +1048,8 @@ getRegister' platform is32Bit (CmmMachOp mop [x]) = -- unary MachOps MO_FF_Conv W64 W32 -> coerceFP2FP W32 x - MO_FS_Conv from to -> coerceFP2Int from to x - MO_SF_Conv from to -> coerceInt2FP from to x + MO_FS_Truncate from to -> coerceFP2Int from to x + MO_SF_Round from to -> coerceInt2FP from to x MO_V_Insert {} -> needLlvm MO_V_Extract {} -> needLlvm @@ -1084,6 +1087,12 @@ getRegister' platform is32Bit (CmmMachOp mop [x]) = -- unary MachOps instr (intFormat from) (OpReg reg) (OpReg dst) return (Any (intFormat to) code) + bitcast :: Format -> Format -> CmmExpr -> NatM Register + bitcast fmt rfmt expr = + do (src, e_code) <- getSomeReg expr + let code = \dst -> e_code `snocOL` (MOVD fmt (OpReg src) (OpReg dst)) + return (Any rfmt code) + toI8Reg :: Width -> CmmExpr -> NatM Register toI8Reg new_rep expr = do codefn <- getAnyReg expr diff --git a/compiler/GHC/CmmToAsm/X86/Instr.hs b/compiler/GHC/CmmToAsm/X86/Instr.hs index 8714fe488bdc0ff4ce4f229bfd26d10e9e372924..6bcb56f825c5a8d174da67ce8116e715b54431c1 100644 --- a/compiler/GHC/CmmToAsm/X86/Instr.hs +++ b/compiler/GHC/CmmToAsm/X86/Instr.hs @@ -202,6 +202,11 @@ data Instr -- operand is interpreted to be a 32-bit sign-extended value. -- True 64-bit operands need to be moved with @MOVABS@, which we -- currently don't use. + | MOVD Format Operand Operand -- ^ MOVD/MOVQ SSE2 instructions + -- (bitcast between a general purpose + -- register and a float register). + -- Format is input format, output format is + -- calculated in Ppr.hs | CMOV Cond Format Operand Reg | MOVZxL Format Operand Operand -- ^ The format argument is the size of operand 1 (the number of bits we keep) @@ -368,6 +373,7 @@ regUsageOfInstr :: Platform -> Instr -> RegUsage regUsageOfInstr platform instr = case instr of MOV _ src dst -> usageRW src dst + MOVD _ src dst -> usageRW src dst CMOV _ _ src dst -> mkRU (use_R src [dst]) [dst] MOVZxL _ src dst -> usageRW src dst MOVSxL _ src dst -> usageRW src dst @@ -549,6 +555,7 @@ patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr patchRegsOfInstr instr env = case instr of MOV fmt src dst -> patch2 (MOV fmt) src dst + MOVD fmt src dst -> patch2 (MOVD fmt) src dst CMOV cc fmt src dst -> CMOV cc fmt (patchOp src) (env dst) MOVZxL fmt src dst -> patch2 (MOVZxL fmt) src dst MOVSxL fmt src dst -> patch2 (MOVSxL fmt) src dst diff --git a/compiler/GHC/CmmToAsm/X86/Ppr.hs b/compiler/GHC/CmmToAsm/X86/Ppr.hs index 9d9df691a2718a7e538a30a378e1313099e430b6..333d2693f4443e5aaadfb800250db08d8bc726d1 100644 --- a/compiler/GHC/CmmToAsm/X86/Ppr.hs +++ b/compiler/GHC/CmmToAsm/X86/Ppr.hs @@ -591,6 +591,9 @@ pprInstr platform i = case i of CMOV cc format src dst -> pprCondOpReg (text "cmov") format cc src dst + MOVD format src dst + -> pprMovdOpOp (text "mov") format src dst + MOVZxL II32 src dst -> pprFormatOpOp (text "mov") II32 src dst -- 32-to-64 bit zero extension on x86_64 is accomplished by a simple @@ -980,6 +983,29 @@ pprInstr platform i = case i of pprOperand platform format op2 ] + pprMovdOpOp :: Line doc -> Format -> Operand -> Operand -> doc + pprMovdOpOp name format op1 op2 + = let instr = case format of + -- bitcasts to/from a general purpose register to a floating point + -- register require II32 or II64. + II32 -> text "d" + II64 -> text "q" + FF32 -> text "d" + FF64 -> text "q" + _ -> panic "X86.Ppr.pprMovdOpOp: improper format for movd/movq." + out_fmt = case format of + II32 -> FF32 + II64 -> FF64 + FF32 -> II32 + FF64 -> II64 + _ -> panic "X86.Ppr.pprMovdOpOp: improper format for movd/movq." + in line $ hcat [ + char '\t' <> name <> instr <> space, + pprOperand platform format op1, + comma, + pprOperand platform out_fmt op2 + ] + pprFormatOpRegReg :: Line doc -> Format -> Operand -> Reg -> Reg -> doc pprFormatOpRegReg name format op1 op2 op3 = line $ hcat [ diff --git a/compiler/GHC/CmmToC.hs b/compiler/GHC/CmmToC.hs index 7881350f4b7ef379305f9ff79b432fa79010a591..a8e060085447cebaac242d05a5a7cf2ec4611a56 100644 --- a/compiler/GHC/CmmToC.hs +++ b/compiler/GHC/CmmToC.hs @@ -784,6 +784,28 @@ pprMachOp_for_C platform mop = case mop of -- We won't know to generate (void*) casts here, but maybe from -- context elsewhere +-- bitcasts, in the C backend these are performed with __builtin_memcpy. +-- See rts/include/stg/Prim.h + + MO_FW_Bitcast W32 -> text "hs_bitcastfloat2word" + MO_FW_Bitcast W64 -> text "hs_bitcastdouble2word64" + + MO_WF_Bitcast W32 -> text "hs_bitcastword2float" + MO_WF_Bitcast W64 -> text "hs_bitcastword642double" + + MO_FW_Bitcast w -> pprTrace "offending mop:" + (text "MO_FW_Bitcast") + (panic $ "PprC.pprMachOp_for_C: MO_FW_Bitcast" + ++ " called with improper width!" + ++ show w) + + MO_WF_Bitcast w -> pprTrace "offending mop:" + (text "MO_WF_Bitcast") + (panic $ "PprC.pprMachOp_for_C: MO_WF_Bitcast" + ++ " called with improper width!" + ++ show w) + + -- noop casts MO_UU_Conv from to | from == to -> empty MO_UU_Conv _from to -> parens (machRep_U_CType platform to) @@ -797,8 +819,8 @@ pprMachOp_for_C platform mop = case mop of MO_FF_Conv from to | from == to -> empty MO_FF_Conv _from to -> parens (machRep_F_CType to) - MO_SF_Conv _from to -> parens (machRep_F_CType to) - MO_FS_Conv _from to -> parens (machRep_S_CType platform to) + MO_SF_Round _from to -> parens (machRep_F_CType to) + MO_FS_Truncate _from to -> parens (machRep_S_CType platform to) MO_RelaxedRead _ -> pprTrace "offending mop:" (text "MO_RelaxedRead") @@ -896,7 +918,7 @@ signedOp (MO_S_Gt _) = True signedOp (MO_S_Lt _) = True signedOp (MO_S_Shr _) = True signedOp (MO_SS_Conv _ _) = True -signedOp (MO_SF_Conv _ _) = True +signedOp (MO_SF_Round _ _) = True signedOp _ = False shiftOp :: MachOp -> Maybe Width @@ -1447,7 +1469,6 @@ floatToWord32 r = CmmInt (toInteger (castFloatToWord32 (fromRational r))) W32 doubleToWord64 :: Rational -> CmmLit doubleToWord64 r = CmmInt (toInteger (castDoubleToWord64 (fromRational r))) W64 - -- --------------------------------------------------------------------------- -- Utils diff --git a/compiler/GHC/CmmToLlvm/CodeGen.hs b/compiler/GHC/CmmToLlvm/CodeGen.hs index 2b84434f7edcd2fc22f005a8b5b03560ce2e1db9..44c8788c3feb3d3d2adbb564bf071f295b810a9c 100644 --- a/compiler/GHC/CmmToLlvm/CodeGen.hs +++ b/compiler/GHC/CmmToLlvm/CodeGen.hs @@ -1431,8 +1431,8 @@ genMachOp _ op [x] = case op of let all0 = LMLitVar $ LMFloatLit (-0) (widthToLlvmFloat w) in negate (widthToLlvmFloat w) all0 LM_MO_FSub - MO_SF_Conv _ w -> fiConv (widthToLlvmFloat w) LM_Sitofp - MO_FS_Conv _ w -> fiConv (widthToLlvmInt w) LM_Fptosi + MO_SF_Round _ w -> fiConv (widthToLlvmFloat w) LM_Sitofp + MO_FS_Truncate _ w -> fiConv (widthToLlvmInt w) LM_Fptosi MO_SS_Conv from to -> sameConv from (widthToLlvmInt to) LM_Trunc LM_Sext @@ -1446,6 +1446,9 @@ genMachOp _ op [x] = case op of MO_FF_Conv from to -> sameConv from (widthToLlvmFloat to) LM_Fptrunc LM_Fpext + MO_WF_Bitcast w -> fiConv (widthToLlvmFloat w) LM_Bitcast + MO_FW_Bitcast w -> fiConv (widthToLlvmInt w) LM_Bitcast + MO_VS_Neg len w -> let ty = widthToLlvmInt w vecty = LMVector len ty @@ -1704,13 +1707,16 @@ genMachOp_slow opt op [x, y] = case op of MO_S_Neg _ -> panicOp MO_F_Neg _ -> panicOp - MO_SF_Conv _ _ -> panicOp - MO_FS_Conv _ _ -> panicOp + MO_SF_Round _ _ -> panicOp + MO_FS_Truncate _ _ -> panicOp MO_SS_Conv _ _ -> panicOp MO_UU_Conv _ _ -> panicOp MO_XX_Conv _ _ -> panicOp MO_FF_Conv _ _ -> panicOp + MO_WF_Bitcast _to -> panicOp + MO_FW_Bitcast _to -> panicOp + MO_V_Insert {} -> panicOp MO_VS_Neg {} -> panicOp diff --git a/compiler/GHC/Platform.hs b/compiler/GHC/Platform.hs index 273082cdb6af8b6843f768630977dcddaab15dee..8514e591d1807c87c66b32bf8bbe4d56a46dcf32 100644 --- a/compiler/GHC/Platform.hs +++ b/compiler/GHC/Platform.hs @@ -17,6 +17,7 @@ module GHC.Platform , ByteOrder(..) , target32Bit , isARM + , isPPC , osElfTarget , osMachOTarget , osSubsectionsViaSymbols diff --git a/compiler/GHC/StgToCmm/Prim.hs b/compiler/GHC/StgToCmm/Prim.hs index c19377de807eab3038e7968ee2a8835153f3b6de..0b5c3ef9f9f96268c39aaae078b72a9a8f0f6993 100644 --- a/compiler/GHC/StgToCmm/Prim.hs +++ b/compiler/GHC/StgToCmm/Prim.hs @@ -1543,15 +1543,24 @@ emitPrimOp cfg primop = -- Conversions - IntToDoubleOp -> \args -> opTranslate args (MO_SF_Conv (wordWidth platform) W64) - DoubleToIntOp -> \args -> opTranslate args (MO_FS_Conv W64 (wordWidth platform)) + IntToDoubleOp -> \args -> opTranslate args (MO_SF_Round (wordWidth platform) W64) + DoubleToIntOp -> \args -> opTranslate args (MO_FS_Truncate W64 (wordWidth platform)) - IntToFloatOp -> \args -> opTranslate args (MO_SF_Conv (wordWidth platform) W32) - FloatToIntOp -> \args -> opTranslate args (MO_FS_Conv W32 (wordWidth platform)) + IntToFloatOp -> \args -> opTranslate args (MO_SF_Round (wordWidth platform) W32) + FloatToIntOp -> \args -> opTranslate args (MO_FS_Truncate W32 (wordWidth platform)) FloatToDoubleOp -> \args -> opTranslate args (MO_FF_Conv W32 W64) DoubleToFloatOp -> \args -> opTranslate args (MO_FF_Conv W64 W32) + CastFloatToWord32Op -> + \args -> translateBitcasts (MO_FW_Bitcast W32) args + CastWord32ToFloatOp -> + \args -> translateBitcasts (MO_WF_Bitcast W32) args + CastDoubleToWord64Op -> + \args -> translateBitcasts (MO_FW_Bitcast W64) args + CastWord64ToDoubleOp -> + \args -> translateBitcasts (MO_WF_Bitcast W64) args + IntQuotRemOp -> \args -> opCallishHandledLater args $ if allowQuotRem then Left (MO_S_QuotRem (wordWidth platform)) @@ -1746,10 +1755,6 @@ emitPrimOp cfg primop = TraceMarkerOp -> alwaysExternal SetThreadAllocationCounter -> alwaysExternal KeepAliveOp -> alwaysExternal - CastWord32ToFloatOp -> alwaysExternal - CastWord64ToDoubleOp -> alwaysExternal - CastDoubleToWord64Op -> alwaysExternal - CastFloatToWord32Op -> alwaysExternal where profile = stgToCmmProfile cfg @@ -1834,6 +1839,14 @@ emitPrimOp cfg primop = allowInt2Mul = stgToCmmAllowIntMul2Instr cfg allowWord2Mul = stgToCmmAllowWordMul2Instr cfg + -- a bit of a hack, for certain code generaters, e.g. PPC, and i386 we + -- continue to use the cmm versions of these functions instead of inline + -- assembly. Tracked in #24841. + ppc = isPPC $ platformArch platform + i386 = target32Bit platform + translateBitcasts mop args | ppc || i386 = alwaysExternal args + | otherwise = opTranslate args mop + allowFMA = stgToCmmAllowFMAInstr cfg fmaOp :: FMASign -> Width -> [CmmActual] -> PrimopCmmEmit diff --git a/libraries/ghc-platform/src/GHC/Platform/ArchOS.hs b/libraries/ghc-platform/src/GHC/Platform/ArchOS.hs index 3286337923e5d900def2dd047dea82136ae4e400..23a046c785a2560eff8dd6de782068d07576a874 100644 --- a/libraries/ghc-platform/src/GHC/Platform/ArchOS.hs +++ b/libraries/ghc-platform/src/GHC/Platform/ArchOS.hs @@ -10,7 +10,7 @@ module GHC.Platform.ArchOS , ArmISAExt(..) , ArmABI(..) , PPC_64ABI(..) - , isARM + , isARM, isPPC , stringEncodeArch -- * Operating systems @@ -188,6 +188,11 @@ isARM (ArchARM {}) = True isARM ArchAArch64 = True isARM _ = False +isPPC :: Arch -> Bool +isPPC (ArchPPC_64 _) = True +isPPC ArchPPC = True +isPPC _ = False + -- | This predicate tells us whether the OS support Mach-O shared libraries. osMachOTarget :: OS -> Bool osMachOTarget OSDarwin = True diff --git a/rts/include/stg/Prim.h b/rts/include/stg/Prim.h index c4595cf4fd4d7852d6eb4a9cd663634372316fc2..ea8a91c23af70349f3030c72d259201cffa7a3f8 100644 --- a/rts/include/stg/Prim.h +++ b/rts/include/stg/Prim.h @@ -135,3 +135,30 @@ StgWord hs_ctz8(StgWord x); StgWord hs_ctz16(StgWord x); StgWord hs_ctz32(StgWord x); StgWord hs_ctz64(StgWord64 x); + +/* bitcasts, instead of creating a new C file we static inline these here. We + * use __builtin_memcpy instead of memcpy from string.h to avoid function + * prototype conflicts that occur in the C backend with the inclusion of + * string.h*/ +static inline StgFloat hs_bitcastword2float(StgWord32 x) { + StgFloat dest; + __builtin_memcpy(&dest, &x, sizeof(StgFloat)); + return dest; +} + +static inline StgDouble hs_bitcastword642double(StgWord64 x) { + StgDouble dest; + __builtin_memcpy(&dest, &x, sizeof(StgDouble)); + return dest; +} + +static inline StgWord32 hs_bitcastfloat2word(StgFloat x) { + StgWord32 dest; + __builtin_memcpy(&dest, &x, sizeof(StgWord32)); + return dest; +} +static inline StgWord64 hs_bitcastdouble2word64(StgDouble x) { + StgWord64 dest; + __builtin_memcpy(&dest, &x, sizeof(StgWord64)); + return dest; +}