From a36f9dc94823c75fb789710bc67b92e87a630440 Mon Sep 17 00:00:00 2001
From: Sven Tennie <sven.tennie@wire.com>
Date: Fri, 30 Jun 2023 08:11:04 +0000
Subject: [PATCH] Add test for %mulmayoflo primop

The test expects a perfect implementation with no false positives.
---
 compiler/GHC/Cmm/MachOp.hs                    | 22 ++++-
 .../tests/codeGen/should_run/MulMayOflo.hs    | 26 +++++
 .../codeGen/should_run/MulMayOflo_full.cmm    | 98 +++++++++++++++++++
 testsuite/tests/codeGen/should_run/all.T      | 11 +++
 4 files changed, 156 insertions(+), 1 deletion(-)
 create mode 100644 testsuite/tests/codeGen/should_run/MulMayOflo.hs
 create mode 100644 testsuite/tests/codeGen/should_run/MulMayOflo_full.cmm

diff --git a/compiler/GHC/Cmm/MachOp.hs b/compiler/GHC/Cmm/MachOp.hs
index 6293fcbae79c..630486f30f90 100644
--- a/compiler/GHC/Cmm/MachOp.hs
+++ b/compiler/GHC/Cmm/MachOp.hs
@@ -56,6 +56,25 @@ Note that there are variety of places in the native code generator where we
 assume that the code produced for a MachOp does not introduce new blocks.
 -}
 
+-- Note [MO_S_MulMayOflo significant width]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- There are two interpretations in the code about what a multiplication
+-- overflow exactly means:
+--
+-- 1. The result does not fit into the specified width (of type Width.)
+-- 2. The result does not fit into a register.
+--
+-- (2) has some flaws: A following MO_Mul has a width, too. So MO_S_MulMayOflo
+-- may signal no overflow, while MO_Mul truncates the result. There are
+-- architectures with several register widths and it might be hard to decide
+-- what's an overflow and what not. Both attributes can easily lead to subtle
+-- bugs.
+--
+-- (1) has the benefit that its interpretation is completely independent of the
+-- architecture. So, the mid-term plan is to migrate to this
+-- interpretation/sematics.
+
 data MachOp
   -- Integer operations (insensitive to signed/unsigned)
   = MO_Add Width
@@ -65,7 +84,8 @@ data MachOp
   | MO_Mul Width                -- low word of multiply
 
   -- Signed multiply/divide
-  | MO_S_MulMayOflo Width       -- nonzero if signed multiply overflows
+  | MO_S_MulMayOflo Width       -- nonzero if signed multiply overflows. See
+                                -- Note [MO_S_MulMayOflo significant width]
   | MO_S_Quot Width             -- signed / (same semantics as IntQuotOp)
   | MO_S_Rem  Width             -- signed % (same semantics as IntRemOp)
   | MO_S_Neg  Width             -- unary -
diff --git a/testsuite/tests/codeGen/should_run/MulMayOflo.hs b/testsuite/tests/codeGen/should_run/MulMayOflo.hs
new file mode 100644
index 000000000000..c6d84857f711
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/MulMayOflo.hs
@@ -0,0 +1,26 @@
+{-# LANGUAGE ForeignFunctionInterface #-}
+{-# LANGUAGE GHCForeignImportPrim #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE UnliftedFFITypes #-}
+
+{-
+ N.B. the contract of '%mulmayoflo' is a bit weak: "Return non-zero if there is
+ any possibility that the signed multiply of a and b might overflow. Return zero
+ only if you are absolutely sure that it won't overflow. If in doubt, return
+ non-zero." (Stg.h)
+
+This test verifies the a stronger contract: It's expected that there are no
+false positives. This requirement is e.g. met by code generation backends which
+execute the multiplication to check for overflow.
+-}
+
+module Main where
+
+import GHC.Exts
+
+-- The argument and return types are unimportant: They're only used to force
+-- evaluation, but carry no information.
+foreign import prim "runCmmzh" runCmmzh# :: Word# -> Word#
+
+main :: IO ()
+main = print . show $ W# (runCmmzh# 42##)
diff --git a/testsuite/tests/codeGen/should_run/MulMayOflo_full.cmm b/testsuite/tests/codeGen/should_run/MulMayOflo_full.cmm
new file mode 100644
index 000000000000..e0b8ac184204
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/MulMayOflo_full.cmm
@@ -0,0 +1,98 @@
+// Suppress empty ASSERT() optimization
+#define USE_ASSERTS_ALL_WAYS 1
+
+#include "Cmm.h"
+#include "MachDeps.h"
+
+runCmmzh() {
+// BEWARE: Cmm isn't really type checked. I.e. you may construct
+// 256::I8, which is obviously wrong and let's to strange behaviour.
+
+  // --- I8
+  ASSERT(%mulmayoflo(64::I8, 2::I8) > 0::I8);
+  ASSERT(%mulmayoflo(2::I8, 64::I8) > 0::I8);
+  ASSERT(%mulmayoflo(-65::I8, 2::I8) > 0::I8);
+  ASSERT(%mulmayoflo(2::I8, -65::I8) > 0::I8);
+
+  // --- I16
+  ASSERT(%mulmayoflo(16384::I16, 2::I16) > 0::I16);
+  ASSERT(%mulmayoflo(2::I16, 16384::I16) > 0::I16);
+  ASSERT(%mulmayoflo(-16385::I16, 2::I16) > 0::I16);
+  ASSERT(%mulmayoflo(2::I16, -16385::I16) > 0::I16);
+
+
+  // -- I32
+  ASSERT(%mulmayoflo(2::I32, 1073741824::I32) > 0::I32);
+  ASSERT(%mulmayoflo(1073741824::I32, 2::I32) > 0::I32);
+  ASSERT(%mulmayoflo(2::I32, -1073741825::I32) > 0::I32);
+  ASSERT(%mulmayoflo(-1073741825::I32, 2::I32) > 0::I32);
+
+#if WORD_SIZE_IN_BITS >= 64
+  // -- I64
+  ASSERT(%mulmayoflo(4611686018427387904::I64, 2::I64) > 0::I64);
+  ASSERT(%mulmayoflo(2::I64, 4611686018427387904::I64) > 0::I64);
+  ASSERT(%mulmayoflo(-4611686018427387905::I64, 2::I64) > 0::I64);
+  ASSERT(%mulmayoflo(2::I64, -4611686018427387905::I64) > 0::I64);
+#endif
+
+  // --- I8
+  ASSERT(%mulmayoflo(1::I8, 1::I8) == 0::I8);
+  ASSERT(%mulmayoflo(0::I8, 0::I8) == 0::I8);
+  ASSERT(%mulmayoflo(-1::I8, 1::I8) == 0::I8);
+  ASSERT(%mulmayoflo(1::I8, -1::I8) == 0::I8);
+  ASSERT(%mulmayoflo(127::I8, 1::I8) == 0::I8);
+  ASSERT(%mulmayoflo(1::I8, 127::I8) == 0::I8);
+  ASSERT(%mulmayoflo(63::I8, 2::I8) == 0::I8);
+  ASSERT(%mulmayoflo(2::I8, 63::I8) == 0::I8);
+  ASSERT(%mulmayoflo(127::I8, -1::I8) == 0::I8);
+  ASSERT(%mulmayoflo(-1::I8, 127::I8) == 0::I8);
+  ASSERT(%mulmayoflo(-128::I8, 1::I8) == 0::I8);
+  ASSERT(%mulmayoflo(-64::I8, 2::I8) == 0::I8);
+  ASSERT(%mulmayoflo(2::I8, -64::I8) == 0::I8);
+
+  // --- I16
+  ASSERT(%mulmayoflo(1::I16, 1::I16) == 0::I16);
+  ASSERT(%mulmayoflo(0::I16, 0::I16) == 0::I16);
+  ASSERT(%mulmayoflo(-1::I16, 1::I16) == 0::I16);
+  ASSERT(%mulmayoflo(1::I16, -1::I16) == 0::I16);
+  ASSERT(%mulmayoflo(32767::I16, 1::I16) == 0::I16);
+  ASSERT(%mulmayoflo(1::I16,32767 ::I16) == 0::I16);
+  ASSERT(%mulmayoflo(16383::I16, 2::I16) == 0::I16);
+  ASSERT(%mulmayoflo(2::I16, 16383::I16) == 0::I16);
+  ASSERT(%mulmayoflo(-16384::I16, 2::I16) == 0::I16);
+  ASSERT(%mulmayoflo(2::I16, -16384::I16) == 0::I16);
+  ASSERT(%mulmayoflo(-32768::I16, 1::I16) == 0::I16);
+  ASSERT(%mulmayoflo(1::I16, -32768::I16) == 0::I16);
+
+  // -- I32
+  ASSERT(%mulmayoflo(1::I32, 1::I32) == 0::I32);
+  ASSERT(%mulmayoflo(0::I32, 0::I32) == 0::I32);
+  ASSERT(%mulmayoflo(-1::I32, 1::I32) == 0::I32);
+  ASSERT(%mulmayoflo(1::I32, -1::I32) == 0::I32);
+  ASSERT(%mulmayoflo(2147483647::I32, 1::I32) == 0::I32);
+  ASSERT(%mulmayoflo(1::I32, 2147483647::I32) == 0::I32);
+  ASSERT(%mulmayoflo(-2147483648::I32, 1::I32) == 0::I32);
+  ASSERT(%mulmayoflo(1::I32, -2147483648::I32) == 0::I32);
+  ASSERT(%mulmayoflo(2::I32, 1073741823::I32) == 0::I32);
+  ASSERT(%mulmayoflo(1073741823::I32, 2::I32) == 0::I32);
+  ASSERT(%mulmayoflo(2::I32, -1073741824::I32) == 0::I32);
+  ASSERT(%mulmayoflo(-1073741824::I32, 2::I32) == 0::I32);
+
+#if WORD_SIZE_IN_BITS >= 64
+  // -- I64
+  ASSERT(%mulmayoflo(1::I64, 1::I64) == 0::I64);
+  ASSERT(%mulmayoflo(0::I64, 0::I64) == 0::I64);
+  ASSERT(%mulmayoflo(-1::I64, 1::I64) == 0::I64);
+  ASSERT(%mulmayoflo(1::I64, -1::I64) == 0::I64);
+  ASSERT(%mulmayoflo(9223372036854775807::I64, 1::I64) == 0::I64);
+  ASSERT(%mulmayoflo(1::I64, 9223372036854775807::I64) == 0::I64);
+  ASSERT(%mulmayoflo(-9223372036854775808::I64, 1::I64) == 0::I64);
+  ASSERT(%mulmayoflo(1::I64, -9223372036854775808::I64) == 0::I64);
+  ASSERT(%mulmayoflo(4611686018427387903::I64, 2::I64) == 0::I64);
+  ASSERT(%mulmayoflo(2::I64, 4611686018427387903::I64) == 0::I64);
+  ASSERT(%mulmayoflo(-4611686018427387904::I64, 2::I64) == 0::I64);
+  ASSERT(%mulmayoflo(2::I64, -4611686018427387904::I64) == 0::I64);
+#endif
+
+  return(0);
+}
diff --git a/testsuite/tests/codeGen/should_run/all.T b/testsuite/tests/codeGen/should_run/all.T
index 7ea4b6ce8444..481dee16b3eb 100644
--- a/testsuite/tests/codeGen/should_run/all.T
+++ b/testsuite/tests/codeGen/should_run/all.T
@@ -226,3 +226,14 @@ test('T22296',[only_ways(llvm_ways)
 test('T22798', normal, compile_and_run, ['-fregs-graph'])
 test('CheckBoundsOK', normal, compile_and_run, ['-fcheck-prim-bounds'])
 test('OrigThunkInfo', normal, compile_and_run, ['-forig-thunk-info'])
+
+# TODO: Enable more architectures here. N.B. some code generation backends are
+# not implemeted correctly (according to
+# Note [MO_S_MulMayOflo significant width]) and may require fixing/adjustment.
+test('MulMayOflo_full',
+     [ extra_files(['MulMayOflo.hs']),
+       when(unregisterised(), skip),
+       unless(arch('x86_64') or arch('i386'), skip),
+        ignore_stdout],
+     multi_compile_and_run,
+     ['MulMayOflo', [('MulMayOflo_full.cmm', '')], ''])
-- 
GitLab