diff --git a/compiler/GHC/CmmToAsm/X86/CodeGen.hs b/compiler/GHC/CmmToAsm/X86/CodeGen.hs
index 9b0e0b741ff03a97a07e8738ae8b244bf9b8419c..1e28a573329b0c44dfaebd16bda22c903bc8c9a2 100644
--- a/compiler/GHC/CmmToAsm/X86/CodeGen.hs
+++ b/compiler/GHC/CmmToAsm/X86/CodeGen.hs
@@ -3448,32 +3448,9 @@ genFMA3Code w signs x y z = do
   (y_reg, y_code) <- getNonClobberedReg y
   (z_reg, z_code) <- getNonClobberedReg z
   x_code <- getAnyReg x
-  y_tmp <- getNewRegNat rep
-  z_tmp <- getNewRegNat rep
   let
      fma213 = FMA3 rep signs FMA213
      code dst
-         | dst == y_reg
-         , dst == z_reg
-         = y_code `appOL`
-           unitOL (MOV rep (OpReg y_reg) (OpReg y_tmp)) `appOL`
-           z_code `appOL`
-           unitOL (MOV rep (OpReg z_reg) (OpReg z_tmp)) `appOL`
-           x_code dst `snocOL`
-           fma213 (OpReg z_tmp) y_tmp dst
-        | dst == y_reg
-        = y_code `appOL`
-          unitOL (MOV rep (OpReg y_reg) (OpReg z_tmp)) `appOL`
-          z_code `appOL`
-          x_code dst `snocOL`
-          fma213 (OpReg z_reg) y_tmp dst
-        | dst == z_reg
-        = y_code `appOL`
-          z_code `appOL`
-          unitOL (MOV rep (OpReg z_reg) (OpReg z_tmp)) `appOL`
-          x_code dst `snocOL`
-          fma213 (OpReg z_tmp) y_reg dst
-        | otherwise
         = y_code `appOL`
           z_code `appOL`
           x_code dst `snocOL`
diff --git a/compiler/GHC/CmmToAsm/X86/Instr.hs b/compiler/GHC/CmmToAsm/X86/Instr.hs
index 98570b0a45b183b4dcd3756a73aa6097090d21d5..5e43836bcc0f4faeff0eb6fad3a5302dd1a7cb47 100644
--- a/compiler/GHC/CmmToAsm/X86/Instr.hs
+++ b/compiler/GHC/CmmToAsm/X86/Instr.hs
@@ -277,7 +277,8 @@ data Instr
 
         -- | FMA3 fused multiply-add operations.
         | FMA3         Format FMASign FMAPermutation Operand Reg Reg
-          -- src1 (r/m), src2 (r), dst (r)
+          -- src3 (r/m), src2 (r), dst/src1 (r)
+          -- The is exactly reversed from how intel lists the arguments.
 
         -- use ADD, SUB, and SQRT for arithmetic.  In both cases, operands
         -- are  Operand Reg.
@@ -358,6 +359,7 @@ data Operand
         | OpImm  Imm            -- immediate value
         | OpAddr AddrMode       -- memory reference
 
+-- NB: As of 2023 we only use the FMA213 permutation.
 data FMAPermutation = FMA132 | FMA213 | FMA231
 
 -- | Returns which registers are read and written as a (read, written)
@@ -447,7 +449,7 @@ regUsageOfInstr platform instr
     PDEP   _ src mask dst -> mkRU (use_R src $ use_R mask []) [dst]
     PEXT   _ src mask dst -> mkRU (use_R src $ use_R mask []) [dst]
 
-    FMA3 _ _ _ src1 src2 dst -> usageFMA src1 src2 dst
+    FMA3 _ _ _ src3 src2 dst -> usageFMA src3 src2 dst
 
     -- note: might be a better way to do this
     PREFETCH _  _ src -> mkRU (use_R src []) []
diff --git a/testsuite/tests/primops/should_compile/T24160_FMA.hs b/testsuite/tests/primops/should_compile/T24160_FMA.hs
new file mode 100644
index 0000000000000000000000000000000000000000..632e8df24222fe52c2a01aa49102168210d0b0b8
--- /dev/null
+++ b/testsuite/tests/primops/should_compile/T24160_FMA.hs
@@ -0,0 +1,8 @@
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE UnboxedTuples #-}
+module TwoProdFMA where
+import GHC.Exts
+
+twoProductFloat# :: Float# -> Float# -> (# Float#, Float# #)
+twoProductFloat# x y = let !r = x `timesFloat#` y
+                       in (# r, fmsubFloat# x y r #)
diff --git a/testsuite/tests/primops/should_compile/all.T b/testsuite/tests/primops/should_compile/all.T
index 9ba0fe40e8a5fa0aad11996784500292ed149911..a696259050be2511f0125ccd93a3e14802d126c0 100644
--- a/testsuite/tests/primops/should_compile/all.T
+++ b/testsuite/tests/primops/should_compile/all.T
@@ -8,3 +8,8 @@ test('UnliftedStableName', normal, compile, [''])
 test('KeepAliveWrapper', normal, compile, ['-O'])
 test('T22152', normal, compile, [''])
 test('T22152b', normal, compile, [''])
+test('T24160_FMA'
+    , [ js_skip # JS backend doesn't have an FMA implementation ]
+      # , omit_ghci # fails during compilation phase, remove after !10563
+      ]
+    , compile, ['-O'])
\ No newline at end of file