From 20956e5784fe43781d156dd7ab02f0bff4ab41fb Mon Sep 17 00:00:00 2001
From: Sylvain Henry <sylvain@haskus.fr>
Date: Wed, 20 Oct 2021 11:39:16 +0200
Subject: [PATCH] Remove target dependent CPP for Word64/Int64 (#11470)

Primops types were dependent on the target word-size at *compiler*
compilation time. It's an issue for multi-target as GHC may not have the
correct primops types for the target.

This patch fixes some primops types: if they take or return fixed 64-bit
values they now always use `Int64#/Word64#`, even on 64-bit
architectures (where they used `Int#/Word#` before). Users of these
primops may now need to convert from Int64#/Word64# to Int#/Word# (a
no-op at runtime).

This is a stripped down version of !3658 which goes the all way of
changing the underlying primitive types of Word64/Int64. This is left
for future work.

T12545 allocations increase ~4% on some CI platforms and decrease ~3% on
AArch64.

Metric Increase:
    T12545

Metric Decrease:
    T12545
---
 .gitignore                                    |   1 -
 compiler/GHC/Builtin/bytearray-ops.txt.pp     |  24 ++--
 compiler/GHC/Builtin/primops.txt.pp           | 116 ++++++------------
 compiler/ghc.cabal.in                         |   5 -
 configure.ac                                  |   1 -
 libraries/base/GHC/Conc/Sync.hs               |   6 +
 libraries/base/GHC/Int.hs                     |  10 +-
 libraries/base/GHC/Storable.hs                |  17 +++
 libraries/base/GHC/Word.hs                    |  22 +---
 libraries/ghc-bignum/src/GHC/Num/BigNat.hs    |  10 ++
 libraries/ghc-bignum/src/GHC/Num/Integer.hs   |  24 ++--
 testsuite/tests/codeGen/should_run/T9340.hs   |   8 ++
 .../tests/codeGen/should_run/cgrun072.hs      |   4 +
 .../tests/codeGen/should_run/cgrun075.hs      |   4 +
 .../tests/codeGen/should_run/cgrun076.hs      |   4 +
 .../concurrent/should_run/AtomicPrimops.hs    |  17 +++
 testsuite/tests/primops/should_run/T4442.hs   |  16 +++
 17 files changed, 154 insertions(+), 135 deletions(-)

diff --git a/.gitignore b/.gitignore
index 46fb24d4a995..cbf1429c23f0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -106,7 +106,6 @@ _darcs/
 /compiler/Bytecodes.h
 /compiler/ClosureTypes.h
 /compiler/FunTypes.h
-/compiler/MachDeps.h
 /compiler/MachRegs.h
 /compiler/ghc-llvm-version.h
 /compiler/ghc.cabal
diff --git a/compiler/GHC/Builtin/bytearray-ops.txt.pp b/compiler/GHC/Builtin/bytearray-ops.txt.pp
index 6ed9028c6bd5..01652d721657 100644
--- a/compiler/GHC/Builtin/bytearray-ops.txt.pp
+++ b/compiler/GHC/Builtin/bytearray-ops.txt.pp
@@ -70,7 +70,7 @@ primop IndexByteArrayOp_Int32 "indexInt32Array#" GenPrimOp
    with can_fail = True
 
 primop IndexByteArrayOp_Int64 "indexInt64Array#" GenPrimOp
-   ByteArray# -> Int# -> INT64
+   ByteArray# -> Int# -> Int64#
    {Read a 64-bit signed integer; offset in 8-byte words.}
    with can_fail = True
 
@@ -90,7 +90,7 @@ primop IndexByteArrayOp_Word32 "indexWord32Array#" GenPrimOp
    with can_fail = True
 
 primop IndexByteArrayOp_Word64 "indexWord64Array#" GenPrimOp
-   ByteArray# -> Int# -> WORD64
+   ByteArray# -> Int# -> Word64#
    {Read a 64-bit unsigned integer; offset in 8-byte words.}
    with can_fail = True
 
@@ -150,7 +150,7 @@ primop IndexByteArrayOp_Word8AsInt32 "indexWord8ArrayAsInt32#" GenPrimOp
    with can_fail = True
 
 primop IndexByteArrayOp_Word8AsInt64 "indexWord8ArrayAsInt64#" GenPrimOp
-   ByteArray# -> Int# -> INT64
+   ByteArray# -> Int# -> Int64#
    {Read a 64-bit signed integer; offset in bytes.}
    with can_fail = True
 
@@ -165,7 +165,7 @@ primop IndexByteArrayOp_Word8AsWord32 "indexWord8ArrayAsWord32#" GenPrimOp
    with can_fail = True
 
 primop IndexByteArrayOp_Word8AsWord64 "indexWord8ArrayAsWord64#" GenPrimOp
-   ByteArray# -> Int# -> WORD64
+   ByteArray# -> Int# -> Word64#
    {Read a 64-bit unsigned integer; offset in bytes.}
    with can_fail = True
 
@@ -241,7 +241,7 @@ primop ReadByteArrayOp_Int32 "readInt32Array#" GenPrimOp
         can_fail = True
 
 primop ReadByteArrayOp_Int64 "readInt64Array#" GenPrimOp
-   MutableByteArray# s -> Int# -> State# s -> (# State# s, INT64 #)
+   MutableByteArray# s -> Int# -> State# s -> (# State# s, Int64# #)
    {Read a 64-bit signed integer; offset in 8-byte words.}
    with has_side_effects = True
         can_fail = True
@@ -265,7 +265,7 @@ primop ReadByteArrayOp_Word32 "readWord32Array#" GenPrimOp
         can_fail = True
 
 primop ReadByteArrayOp_Word64 "readWord64Array#" GenPrimOp
-   MutableByteArray# s -> Int# -> State# s -> (# State# s, WORD64 #)
+   MutableByteArray# s -> Int# -> State# s -> (# State# s, Word64# #)
    {Read a 64-bit unsigned integer; offset in 8-byte words.}
    with has_side_effects = True
         can_fail = True
@@ -336,7 +336,7 @@ primop ReadByteArrayOp_Word8AsInt32 "readWord8ArrayAsInt32#" GenPrimOp
         can_fail = True
 
 primop ReadByteArrayOp_Word8AsInt64 "readWord8ArrayAsInt64#" GenPrimOp
-   MutableByteArray# s -> Int# -> State# s -> (# State# s, INT64 #)
+   MutableByteArray# s -> Int# -> State# s -> (# State# s, Int64# #)
    {Read a 64-bit signed integer; offset in bytes.}
    with has_side_effects = True
         can_fail = True
@@ -354,7 +354,7 @@ primop ReadByteArrayOp_Word8AsWord32 "readWord8ArrayAsWord32#" GenPrimOp
         can_fail = True
 
 primop ReadByteArrayOp_Word8AsWord64 "readWord8ArrayAsWord64#" GenPrimOp
-   MutableByteArray# s -> Int# -> State# s -> (# State# s, WORD64 #)
+   MutableByteArray# s -> Int# -> State# s -> (# State# s, Word64# #)
    {Read a 64-bit unsigned integer; offset in bytes.}
    with has_side_effects = True
         can_fail = True
@@ -431,7 +431,7 @@ primop WriteByteArrayOp_Int32 "writeInt32Array#" GenPrimOp
         can_fail = True
 
 primop WriteByteArrayOp_Int64 "writeInt64Array#" GenPrimOp
-   MutableByteArray# s -> Int# -> INT64 -> State# s -> State# s
+   MutableByteArray# s -> Int# -> Int64# -> State# s -> State# s
    {Write a 64-bit signed integer; offset in 8-byte words.}
    with has_side_effects = True
         can_fail = True
@@ -455,7 +455,7 @@ primop WriteByteArrayOp_Word32 "writeWord32Array#" GenPrimOp
         can_fail = True
 
 primop WriteByteArrayOp_Word64 "writeWord64Array#" GenPrimOp
-   MutableByteArray# s -> Int# -> WORD64 -> State# s -> State# s
+   MutableByteArray# s -> Int# -> Word64# -> State# s -> State# s
    {Write a 64-bit unsigned integer; offset in 8-byte words.}
    with has_side_effects = True
         can_fail = True
@@ -526,7 +526,7 @@ primop WriteByteArrayOp_Word8AsInt32 "writeWord8ArrayAsInt32#" GenPrimOp
         can_fail = True
 
 primop WriteByteArrayOp_Word8AsInt64 "writeWord8ArrayAsInt64#" GenPrimOp
-   MutableByteArray# s -> Int# -> INT64 -> State# s -> State# s
+   MutableByteArray# s -> Int# -> Int64# -> State# s -> State# s
    {Write a 64-bit signed integer; offset in bytes.}
    with has_side_effects = True
         can_fail = True
@@ -544,7 +544,7 @@ primop WriteByteArrayOp_Word8AsWord32 "writeWord8ArrayAsWord32#" GenPrimOp
         can_fail = True
 
 primop WriteByteArrayOp_Word8AsWord64 "writeWord8ArrayAsWord64#" GenPrimOp
-   MutableByteArray# s -> Int# -> WORD64 -> State# s -> State# s
+   MutableByteArray# s -> Int# -> Word64# -> State# s -> State# s
    {Write a 64-bit unsigned integer; offset in bytes.}
    with has_side_effects = True
         can_fail = True
diff --git a/compiler/GHC/Builtin/primops.txt.pp b/compiler/GHC/Builtin/primops.txt.pp
index 2aa1eefb4b4e..b2a45ad79f6d 100644
--- a/compiler/GHC/Builtin/primops.txt.pp
+++ b/compiler/GHC/Builtin/primops.txt.pp
@@ -132,9 +132,8 @@
 -- 3-tuple in the list of 3-tuples. That is, the vector attribute allows us to
 -- define a family of types or primops. Vector support also adds three new
 -- keywords: VECTOR, SCALAR, and VECTUPLE. These keywords are expanded to types
--- derived from the 3-tuple. For the 3-tuple <Int64,INT64,2>, VECTOR expands to
--- Int64X2#, SCALAR expands to INT64, and VECTUPLE expands to (# INT64, INT64
--- #).
+-- derived from the 3-tuple. For the 3-tuple <Int64#,Int64#,2>, VECTOR expands to
+-- Int64X2#, SCALAR expands to Int64#, and VECTUPLE expands to (# Int64#, Int64# #).
 
 defaults
    has_side_effects = False
@@ -222,8 +221,6 @@ defaults
 --      This means one shouldn't write a type involving both `a` and `o`,
 --      nor `b` and `p`, nor `o` and `v`, etc.
 
-#include "MachDeps.h"
-
 section "The word size story."
         {Haskell98 specifies that signed integers (type {\tt Int})
          must contain at least 30 bits. GHC always implements {\tt
@@ -231,12 +228,7 @@ section "The word size story."
          the {\tt MachDeps.h} constant {\tt WORD\_SIZE\_IN\_BITS}.
          This is normally set based on the {\tt config.h} parameter
          {\tt SIZEOF\_HSWORD}, i.e., 32 bits on 32-bit machines, 64
-         bits on 64-bit machines.  However, it can also be explicitly
-         set to a smaller number than 64, e.g., 62 bits, to allow the
-         possibility of using tag bits. Currently GHC itself has only
-         32-bit and 64-bit variants, but 61, 62, or 63-bit code can be
-         exported as an external core file for use in other back ends.
-         30 and 31-bit code is no longer supported.
+         bits on 64-bit machines.
 
          GHC also implements a primitive unsigned integer type {\tt
          Word\#} which always has the same number of bits as {\tt
@@ -245,33 +237,7 @@ section "The word size story."
          In addition, GHC supports families of explicit-sized integers
          and words at 8, 16, 32, and 64 bits, with the usual
          arithmetic operations, comparisons, and a range of
-         conversions.  The 8-bit and 16-bit sizes are always
-         represented as {\tt Int\#} and {\tt Word\#}, and the
-         operations implemented in terms of the primops on these
-         types, with suitable range restrictions on the results (using
-         the {\tt narrow$n$Int\#} and {\tt narrow$n$Word\#} families of
-         primops.  The 64-bit sizes are represented using {\tt Int\#}
-         and {\tt Word\#} when {\tt WORD\_SIZE\_IN\_BITS} $\geq$ 64;
-         otherwise, these are represented using distinct primitive
-         types {\tt Int64\#} and {\tt Word64\#}. These (when needed)
-         have a complete set of corresponding operations; however,
-         nearly all of these are implemented as external C functions
-         rather than as primops.  All of these details are hidden
-         under the {\tt PrelInt} and {\tt PrelWord} modules, which use
-         {\tt \#if}-defs to invoke the appropriate types and operators.
-
-         Word size also matters for the families of primops for
-         indexing/reading/writing fixed-size quantities at offsets
-         from an array base, address, or foreign pointer.  Here, a
-         slightly different approach is taken.  The names of these
-         primops are fixed, but their {\it types} vary according to
-         the value of {\tt WORD\_SIZE\_IN\_BITS}. For example, if word
-         size is at least 32 bits then an operator like
-         \texttt{indexInt32Array\#} has type {\tt ByteArray\# -> Int\#
-         -> Int\#}; otherwise it has type {\tt ByteArray\# -> Int\# ->
-         Int32\#}.  This approach confines the necessary {\tt
-         \#if}-defs to this file; no conditional compilation is needed
-         in the files that expose these primops.
+         conversions.
 
          Finally, there are strongly deprecated primops for coercing
          between {\tt Addr\#}, the primitive type of machine
@@ -280,16 +246,6 @@ section "The word size story."
          are completely bogus when tag bits are used in {\tt Int\#},
          so are not available in this case.  }
 
--- Define synonyms for indexing ops.
-
-#if WORD_SIZE_IN_BITS < 64
-#define INT64 Int64#
-#define WORD64 Word64#
-#else
-#define INT64 Int#
-#define WORD64 Word#
-#endif
-
 ------------------------------------------------------------------------
 section "Char#"
         {Operations on 31-bit characters.}
@@ -981,7 +937,7 @@ primop   PopCnt16Op   "popCnt16#"   GenPrimOp   Word# -> Word#
     {Count the number of set bits in the lower 16 bits of a word.}
 primop   PopCnt32Op   "popCnt32#"   GenPrimOp   Word# -> Word#
     {Count the number of set bits in the lower 32 bits of a word.}
-primop   PopCnt64Op   "popCnt64#"   GenPrimOp   WORD64 -> Word#
+primop   PopCnt64Op   "popCnt64#"   GenPrimOp   Word64# -> Word#
     {Count the number of set bits in a 64-bit word.}
 primop   PopCntOp   "popCnt#"   GenPrimOp   Word# -> Word#
     {Count the number of set bits in a word.}
@@ -992,7 +948,7 @@ primop   Pdep16Op   "pdep16#"   GenPrimOp   Word# -> Word# -> Word#
     {Deposit bits to lower 16 bits of a word at locations specified by a mask.}
 primop   Pdep32Op   "pdep32#"   GenPrimOp   Word# -> Word# -> Word#
     {Deposit bits to lower 32 bits of a word at locations specified by a mask.}
-primop   Pdep64Op   "pdep64#"   GenPrimOp   WORD64 -> WORD64 -> WORD64
+primop   Pdep64Op   "pdep64#"   GenPrimOp   Word64# -> Word64# -> Word64#
     {Deposit bits to a word at locations specified by a mask.}
 primop   PdepOp   "pdep#"   GenPrimOp   Word# -> Word# -> Word#
     {Deposit bits to a word at locations specified by a mask.}
@@ -1003,7 +959,7 @@ primop   Pext16Op   "pext16#"   GenPrimOp   Word# -> Word# -> Word#
     {Extract bits from lower 16 bits of a word at locations specified by a mask.}
 primop   Pext32Op   "pext32#"   GenPrimOp   Word# -> Word# -> Word#
     {Extract bits from lower 32 bits of a word at locations specified by a mask.}
-primop   Pext64Op   "pext64#"   GenPrimOp   WORD64 -> WORD64 -> WORD64
+primop   Pext64Op   "pext64#"   GenPrimOp   Word64# -> Word64# -> Word64#
     {Extract bits from a word at locations specified by a mask.}
 primop   PextOp   "pext#"   GenPrimOp   Word# -> Word# -> Word#
     {Extract bits from a word at locations specified by a mask.}
@@ -1014,7 +970,7 @@ primop   Clz16Op   "clz16#" GenPrimOp   Word# -> Word#
     {Count leading zeros in the lower 16 bits of a word.}
 primop   Clz32Op   "clz32#" GenPrimOp   Word# -> Word#
     {Count leading zeros in the lower 32 bits of a word.}
-primop   Clz64Op   "clz64#" GenPrimOp WORD64 -> Word#
+primop   Clz64Op   "clz64#" GenPrimOp Word64# -> Word#
     {Count leading zeros in a 64-bit word.}
 primop   ClzOp     "clz#"   GenPrimOp   Word# -> Word#
     {Count leading zeros in a word.}
@@ -1025,7 +981,7 @@ primop   Ctz16Op   "ctz16#" GenPrimOp   Word# -> Word#
     {Count trailing zeros in the lower 16 bits of a word.}
 primop   Ctz32Op   "ctz32#" GenPrimOp   Word# -> Word#
     {Count trailing zeros in the lower 32 bits of a word.}
-primop   Ctz64Op   "ctz64#" GenPrimOp WORD64 -> Word#
+primop   Ctz64Op   "ctz64#" GenPrimOp Word64# -> Word#
     {Count trailing zeros in a 64-bit word.}
 primop   CtzOp     "ctz#"   GenPrimOp   Word# -> Word#
     {Count trailing zeros in a word.}
@@ -1034,7 +990,7 @@ primop   BSwap16Op   "byteSwap16#"   GenPrimOp   Word# -> Word#
     {Swap bytes in the lower 16 bits of a word. The higher bytes are undefined. }
 primop   BSwap32Op   "byteSwap32#"   GenPrimOp   Word# -> Word#
     {Swap bytes in the lower 32 bits of a word. The higher bytes are undefined. }
-primop   BSwap64Op   "byteSwap64#"   GenPrimOp   WORD64 -> WORD64
+primop   BSwap64Op   "byteSwap64#"   GenPrimOp   Word64# -> Word64#
     {Swap bytes in a 64 bits of a word.}
 primop   BSwapOp     "byteSwap#"     GenPrimOp   Word# -> Word#
     {Swap bytes in a word.}
@@ -1045,7 +1001,7 @@ primop   BRev16Op   "bitReverse16#"   GenPrimOp   Word# -> Word#
     {Reverse the order of the bits in a 16-bit word.}
 primop   BRev32Op   "bitReverse32#"   GenPrimOp   Word# -> Word#
     {Reverse the order of the bits in a 32-bit word.}
-primop   BRev64Op   "bitReverse64#"   GenPrimOp   WORD64 -> WORD64
+primop   BRev64Op   "bitReverse64#"   GenPrimOp   Word64# -> Word64#
     {Reverse the order of the bits in a 64-bit word.}
 primop   BRevOp     "bitReverse#"     GenPrimOp   Word# -> Word#
     {Reverse the order of the bits in a word.}
@@ -1224,7 +1180,7 @@ primop   DoubleDecode_2IntOp   "decodeDouble_2Int#" GenPrimOp
    with out_of_line = True
 
 primop   DoubleDecode_Int64Op   "decodeDouble_Int64#" GenPrimOp
-   Double# -> (# INT64, Int# #)
+   Double# -> (# Int64#, Int# #)
    {Decode {\tt Double\#} into mantissa and base-2 exponent.}
    with out_of_line = True
 
@@ -1957,7 +1913,7 @@ primop CasByteArrayOp_Int32 "casInt32Array#" GenPrimOp
         can_fail = True
 
 primop CasByteArrayOp_Int64 "casInt64Array#" GenPrimOp
-   MutableByteArray# s -> Int# -> INT64 -> INT64 -> State# s -> (# State# s, INT64 #)
+   MutableByteArray# s -> Int# -> Int64# -> Int64# -> State# s -> (# State# s, Int64# #)
    {Given an array, an offset in 64 bit units, the expected old value, and
     the new value, perform an atomic compare and swap i.e. write the new
     value if the current value matches the provided old value. Returns
@@ -2203,7 +2159,7 @@ primop IndexOffAddrOp_Int32 "indexInt32OffAddr#" GenPrimOp
    with can_fail = True
 
 primop IndexOffAddrOp_Int64 "indexInt64OffAddr#" GenPrimOp
-   Addr# -> Int# -> INT64
+   Addr# -> Int# -> Int64#
    with can_fail = True
 
 primop IndexOffAddrOp_Word8 "indexWord8OffAddr#" GenPrimOp
@@ -2219,7 +2175,7 @@ primop IndexOffAddrOp_Word32 "indexWord32OffAddr#" GenPrimOp
    with can_fail = True
 
 primop IndexOffAddrOp_Word64 "indexWord64OffAddr#" GenPrimOp
-   Addr# -> Int# -> WORD64
+   Addr# -> Int# -> Word64#
    with can_fail = True
 
 primop ReadOffAddrOp_Char "readCharOffAddr#" GenPrimOp
@@ -2280,7 +2236,7 @@ primop ReadOffAddrOp_Int32 "readInt32OffAddr#" GenPrimOp
         can_fail         = True
 
 primop ReadOffAddrOp_Int64 "readInt64OffAddr#" GenPrimOp
-   Addr# -> Int# -> State# s -> (# State# s, INT64 #)
+   Addr# -> Int# -> State# s -> (# State# s, Int64# #)
    with has_side_effects = True
         can_fail         = True
 
@@ -2300,7 +2256,7 @@ primop ReadOffAddrOp_Word32 "readWord32OffAddr#" GenPrimOp
         can_fail         = True
 
 primop ReadOffAddrOp_Word64 "readWord64OffAddr#" GenPrimOp
-   Addr# -> Int# -> State# s -> (# State# s, WORD64 #)
+   Addr# -> Int# -> State# s -> (# State# s, Word64# #)
    with has_side_effects = True
         can_fail         = True
 
@@ -2360,7 +2316,7 @@ primop  WriteOffAddrOp_Int32 "writeInt32OffAddr#" GenPrimOp
         can_fail         = True
 
 primop  WriteOffAddrOp_Int64 "writeInt64OffAddr#" GenPrimOp
-   Addr# -> Int# -> INT64 -> State# s -> State# s
+   Addr# -> Int# -> Int64# -> State# s -> State# s
    with has_side_effects = True
         can_fail         = True
 
@@ -2380,7 +2336,7 @@ primop  WriteOffAddrOp_Word32 "writeWord32OffAddr#" GenPrimOp
         can_fail         = True
 
 primop  WriteOffAddrOp_Word64 "writeWord64OffAddr#" GenPrimOp
-   Addr# -> Int# -> WORD64 -> State# s -> State# s
+   Addr# -> Int# -> Word64# -> State# s -> State# s
    with has_side_effects = True
         can_fail         = True
 
@@ -2469,7 +2425,7 @@ primop  CasAddrOp_Word32 "atomicCasWord32Addr#" GenPrimOp
         can_fail         = True
 
 primop  CasAddrOp_Word64 "atomicCasWord64Addr#" GenPrimOp
-   Addr# -> WORD64 -> WORD64 -> State# s -> (# State# s, WORD64 #)
+   Addr# -> Word64# -> Word64# -> State# s -> (# State# s, Word64# #)
    { Compare and swap on a 64 bit-sized and aligned memory location.
 
      Use as: \s -> atomicCasWordAddr64# location expected desired s
@@ -3641,7 +3597,7 @@ primop  TraceMarkerOp "traceMarker#" GenPrimOp
    out_of_line      = True
 
 primop  SetThreadAllocationCounter "setThreadAllocationCounter#" GenPrimOp
-   INT64 -> State# RealWorld -> State# RealWorld
+   Int64# -> State# RealWorld -> State# RealWorld
    { Sets the allocation counter for the current thread to the given value. }
    with
    has_side_effects = True
@@ -3677,20 +3633,20 @@ section "SIMD Vectors"
 ------------------------------------------------------------------------
 
 #define ALL_VECTOR_TYPES \
-  [<Int8,Int8#,16>,<Int16,Int16#,8>,<Int32,Int32#,4>,<Int64,INT64,2> \
-  ,<Int8,Int8#,32>,<Int16,Int16#,16>,<Int32,Int32#,8>,<Int64,INT64,4> \
-  ,<Int8,Int8#,64>,<Int16,Int16#,32>,<Int32,Int32#,16>,<Int64,INT64,8> \
-  ,<Word8,Word#,16>,<Word16,Word#,8>,<Word32,Word32#,4>,<Word64,WORD64,2> \
-  ,<Word8,Word#,32>,<Word16,Word#,16>,<Word32,Word32#,8>,<Word64,WORD64,4> \
-  ,<Word8,Word#,64>,<Word16,Word#,32>,<Word32,Word32#,16>,<Word64,WORD64,8> \
+  [<Int8,Int8#,16>,<Int16,Int16#,8>,<Int32,Int32#,4>,<Int64,Int64#,2> \
+  ,<Int8,Int8#,32>,<Int16,Int16#,16>,<Int32,Int32#,8>,<Int64,Int64#,4> \
+  ,<Int8,Int8#,64>,<Int16,Int16#,32>,<Int32,Int32#,16>,<Int64,Int64#,8> \
+  ,<Word8,Word#,16>,<Word16,Word#,8>,<Word32,Word32#,4>,<Word64,Word64#,2> \
+  ,<Word8,Word#,32>,<Word16,Word#,16>,<Word32,Word32#,8>,<Word64,Word64#,4> \
+  ,<Word8,Word#,64>,<Word16,Word#,32>,<Word32,Word32#,16>,<Word64,Word64#,8> \
   ,<Float,Float#,4>,<Double,Double#,2> \
   ,<Float,Float#,8>,<Double,Double#,4> \
   ,<Float,Float#,16>,<Double,Double#,8>]
 
 #define SIGNED_VECTOR_TYPES \
-  [<Int8,Int8#,16>,<Int16,Int16#,8>,<Int32,Int32#,4>,<Int64,INT64,2> \
-  ,<Int8,Int8#,32>,<Int16,Int16#,16>,<Int32,Int32#,8>,<Int64,INT64,4> \
-  ,<Int8,Int8#,64>,<Int16,Int16#,32>,<Int32,Int32#,16>,<Int64,INT64,8> \
+  [<Int8,Int8#,16>,<Int16,Int16#,8>,<Int32,Int32#,4>,<Int64,Int64#,2> \
+  ,<Int8,Int8#,32>,<Int16,Int16#,16>,<Int32,Int32#,8>,<Int64,Int64#,4> \
+  ,<Int8,Int8#,64>,<Int16,Int16#,32>,<Int32,Int32#,16>,<Int64,Int64#,8> \
   ,<Float,Float#,4>,<Double,Double#,2> \
   ,<Float,Float#,8>,<Double,Double#,4> \
   ,<Float,Float#,16>,<Double,Double#,8>]
@@ -3701,12 +3657,12 @@ section "SIMD Vectors"
   ,<Float,Float#,16>,<Double,Double#,8>]
 
 #define INT_VECTOR_TYPES \
-  [<Int8,Int8#,16>,<Int16,Int16#,8>,<Int32,Int32#,4>,<Int64,INT64,2> \
-  ,<Int8,Int8#,32>,<Int16,Int16#,16>,<Int32,Int32#,8>,<Int64,INT64,4> \
-  ,<Int8,Int8#,64>,<Int16,Int16#,32>,<Int32,Int32#,16>,<Int64,INT64,8> \
-  ,<Word8,Word#,16>,<Word16,Word#,8>,<Word32,Word32#,4>,<Word64,WORD64,2> \
-  ,<Word8,Word#,32>,<Word16,Word#,16>,<Word32,Word32#,8>,<Word64,WORD64,4> \
-  ,<Word8,Word#,64>,<Word16,Word#,32>,<Word32,Word32#,16>,<Word64,WORD64,8>]
+  [<Int8,Int8#,16>,<Int16,Int16#,8>,<Int32,Int32#,4>,<Int64,Int64#,2> \
+  ,<Int8,Int8#,32>,<Int16,Int16#,16>,<Int32,Int32#,8>,<Int64,Int64#,4> \
+  ,<Int8,Int8#,64>,<Int16,Int16#,32>,<Int32,Int32#,16>,<Int64,Int64#,8> \
+  ,<Word8,Word#,16>,<Word16,Word#,8>,<Word32,Word32#,4>,<Word64,Word64#,2> \
+  ,<Word8,Word#,32>,<Word16,Word#,16>,<Word32,Word32#,8>,<Word64,Word64#,4> \
+  ,<Word8,Word#,64>,<Word16,Word#,32>,<Word32,Word32#,16>,<Word64,Word64#,8>]
 
 primtype VECTOR
    with llvm_only = True
diff --git a/compiler/ghc.cabal.in b/compiler/ghc.cabal.in
index 19ade29336a2..46b0752491b8 100644
--- a/compiler/ghc.cabal.in
+++ b/compiler/ghc.cabal.in
@@ -33,8 +33,6 @@ extra-source-files:
     ClosureTypes.h
     FunTypes.h
     MachRegs.h
-    -- Target sensative, should not be used.
-    MachDeps.h
 
 Flag internal-interpreter
     Description: Build with internal interpreter support.
@@ -75,9 +73,6 @@ Library
               Bytecodes.h
               ClosureTypes.h
               FunTypes.h
-              -- MachRegs.h -- hits #error, skip
-              -- target sensative, should not be used
-              MachDeps.h
               ghc-llvm-version.h
 
     Build-Depends: base       >= 4.11 && < 4.17,
diff --git a/configure.ac b/configure.ac
index bb0ab606ccdd..e8c49be7ac6c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -749,7 +749,6 @@ AC_MSG_NOTICE([Creating links for headers shared by the RTS and compiler])
 ln -f rts/include/rts/Bytecodes.h compiler/
 ln -f rts/include/rts/storage/ClosureTypes.h compiler/
 ln -f rts/include/rts/storage/FunTypes.h compiler/
-ln -f rts/include/MachDeps.h compiler/
 ln -f rts/include/stg/MachRegs.h compiler/
 AC_MSG_NOTICE([done.])
 
diff --git a/libraries/base/GHC/Conc/Sync.hs b/libraries/base/GHC/Conc/Sync.hs
index d5fb4868df63..38ce56ccbf5a 100644
--- a/libraries/base/GHC/Conc/Sync.hs
+++ b/libraries/base/GHC/Conc/Sync.hs
@@ -92,6 +92,8 @@ module GHC.Conc.Sync
         , sharedCAF
         ) where
 
+#include "MachDeps.h"
+
 import Foreign
 import Foreign.C
 
@@ -192,7 +194,11 @@ instance Ord ThreadId where
 -- @since 4.8.0.0
 setAllocationCounter :: Int64 -> IO ()
 setAllocationCounter (I64# i) = IO $ \s ->
+#if WORD_SIZE_IN_BITS < 64
   case setThreadAllocationCounter# i s of s' -> (# s', () #)
+#else
+  case setThreadAllocationCounter# (intToInt64# i) s of s' -> (# s', () #)
+#endif
 
 -- | Return the current value of the allocation counter for the
 -- current thread.
diff --git a/libraries/base/GHC/Int.hs b/libraries/base/GHC/Int.hs
index df25e1cbe4ec..45bb3f70cade 100644
--- a/libraries/base/GHC/Int.hs
+++ b/libraries/base/GHC/Int.hs
@@ -1048,7 +1048,11 @@ instance Bits Int64 where
     bitSizeMaybe i             = Just (finiteBitSize i)
     bitSize i                  = finiteBitSize i
     isSigned _                 = True
-    popCount (I64# x#)         = I# (word2Int# (popCnt64# (int2Word# x#)))
+#if WORD_SIZE_IN_BITS < 64
+    popCount (I64# x#)         = I# (word2Int# (popCnt64# (int64ToWord64# x#)))
+#else
+    popCount (I64# x#)         = I# (word2Int# (popCnt# (int2Word# x#)))
+#endif
     bit                        = bitDefault
     testBit                    = testBitDefault
 
@@ -1098,8 +1102,8 @@ instance FiniteBits Int64 where
     countLeadingZeros  (I64# x#) = I# (word2Int# (clz64# (int64ToWord64# x#)))
     countTrailingZeros (I64# x#) = I# (word2Int# (ctz64# (int64ToWord64# x#)))
 #else
-    countLeadingZeros  (I64# x#) = I# (word2Int# (clz64# (int2Word# x#)))
-    countTrailingZeros (I64# x#) = I# (word2Int# (ctz64# (int2Word# x#)))
+    countLeadingZeros  (I64# x#) = I# (word2Int# (clz# (int2Word# x#)))
+    countTrailingZeros (I64# x#) = I# (word2Int# (ctz# (int2Word# x#)))
 #endif
 
 -- | @since 2.01
diff --git a/libraries/base/GHC/Storable.hs b/libraries/base/GHC/Storable.hs
index d9b9382211fd..548430eb59a6 100644
--- a/libraries/base/GHC/Storable.hs
+++ b/libraries/base/GHC/Storable.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE CPP #-}
 {-# LANGUAGE Trustworthy #-}
 {-# LANGUAGE NoImplicitPrelude, MagicHash, UnboxedTuples #-}
 {-# OPTIONS_HADDOCK not-home #-}
@@ -51,6 +52,8 @@ module GHC.Storable
         , writeWord64OffPtr
         ) where
 
+#include "MachDeps.h"
+
 import GHC.Stable ( StablePtr(..) )
 import GHC.Int
 import GHC.Word
@@ -102,10 +105,17 @@ readInt32OffPtr (Ptr a) (I# i)
   = IO $ \s -> case readInt32OffAddr# a i s     of (# s2, x #) -> (# s2, I32# x #)
 readWord32OffPtr (Ptr a) (I# i)
   = IO $ \s -> case readWord32OffAddr# a i s    of (# s2, x #) -> (# s2, W32# x #)
+#if WORD_SIZE_IN_BITS < 64
 readInt64OffPtr (Ptr a) (I# i)
   = IO $ \s -> case readInt64OffAddr# a i s     of (# s2, x #) -> (# s2, I64# x #)
 readWord64OffPtr (Ptr a) (I# i)
   = IO $ \s -> case readWord64OffAddr# a i s    of (# s2, x #) -> (# s2, W64# x #)
+#else
+readInt64OffPtr (Ptr a) (I# i)
+  = IO $ \s -> case readInt64OffAddr# a i s     of (# s2, x #) -> (# s2, I64# (int64ToInt# x) #)
+readWord64OffPtr (Ptr a) (I# i)
+  = IO $ \s -> case readWord64OffAddr# a i s    of (# s2, x #) -> (# s2, W64# (word64ToWord# x) #)
+#endif
 
 writeWideCharOffPtr  :: Ptr Char          -> Int -> Char        -> IO ()
 writeIntOffPtr       :: Ptr Int           -> Int -> Int         -> IO ()
@@ -152,7 +162,14 @@ writeInt32OffPtr (Ptr a) (I# i) (I32# x)
   = IO $ \s -> case writeInt32OffAddr# a i x s     of s2 -> (# s2, () #)
 writeWord32OffPtr (Ptr a) (I# i) (W32# x)
   = IO $ \s -> case writeWord32OffAddr# a i x s    of s2 -> (# s2, () #)
+#if WORD_SIZE_IN_BITS < 64
 writeInt64OffPtr (Ptr a) (I# i) (I64# x)
   = IO $ \s -> case writeInt64OffAddr# a i x s     of s2 -> (# s2, () #)
 writeWord64OffPtr (Ptr a) (I# i) (W64# x)
   = IO $ \s -> case writeWord64OffAddr# a i x s    of s2 -> (# s2, () #)
+#else
+writeInt64OffPtr (Ptr a) (I# i) (I64# x)
+  = IO $ \s -> case writeInt64OffAddr# a i (intToInt64# x) s    of s2 -> (# s2, () #)
+writeWord64OffPtr (Ptr a) (I# i) (W64# x)
+  = IO $ \s -> case writeWord64OffAddr# a i (wordToWord64# x) s of s2 -> (# s2, () #)
+#endif
diff --git a/libraries/base/GHC/Word.hs b/libraries/base/GHC/Word.hs
index be44bfd541d8..43b6e4b31162 100644
--- a/libraries/base/GHC/Word.hs
+++ b/libraries/base/GHC/Word.hs
@@ -888,20 +888,6 @@ instance Enum Word64 where
                         = I# (word2Int# x#)
         | otherwise     = fromEnumError "Word64" x
 
-#if WORD_SIZE_IN_BITS < 64
-    -- See Note [Stable Unfolding for list producers] in GHC.Enum
-    {-# INLINE enumFrom #-}
-    enumFrom            = integralEnumFrom
-    -- See Note [Stable Unfolding for list producers] in GHC.Enum
-    {-# INLINE enumFromThen #-}
-    enumFromThen        = integralEnumFromThen
-    -- See Note [Stable Unfolding for list producers] in GHC.Enum
-    {-# INLINE enumFromTo #-}
-    enumFromTo          = integralEnumFromTo
-    -- See Note [Stable Unfolding for list producers] in GHC.Enum
-    {-# INLINE enumFromThenTo #-}
-    enumFromThenTo      = integralEnumFromThenTo
-#else
     -- See Note [Stable Unfolding for list producers] in GHC.Enum
     {-# INLINABLE enumFrom #-}
     enumFrom w
@@ -931,7 +917,6 @@ word64ToWord (W64# w#) = (W# w#)
 
 wordToWord64 :: Word -> Word64
 wordToWord64 (W# w#) = (W64# w#)
-#endif
 
 
 -- | @since 2.01
@@ -992,7 +977,7 @@ instance Bits Word64 where
     bitSizeMaybe i            = Just (finiteBitSize i)
     bitSize i                 = finiteBitSize i
     isSigned _                = False
-    popCount (W64# x#)        = I# (word2Int# (popCnt64# x#))
+    popCount (W64# x#)        = I# (word2Int# (popCnt# x#))
     bit                       = bitDefault
     testBit                   = testBitDefault
 
@@ -1009,8 +994,13 @@ instance FiniteBits Word64 where
     {-# INLINE countLeadingZeros #-}
     {-# INLINE countTrailingZeros #-}
     finiteBitSize _ = 64
+#if WORD_SIZE_IN_BITS < 64
     countLeadingZeros  (W64# x#) = I# (word2Int# (clz64# x#))
     countTrailingZeros (W64# x#) = I# (word2Int# (ctz64# x#))
+#else
+    countLeadingZeros  (W64# x#) = I# (word2Int# (clz# x#))
+    countTrailingZeros (W64# x#) = I# (word2Int# (ctz# x#))
+#endif
 
 -- | @since 2.01
 instance Show Word64 where
diff --git a/libraries/ghc-bignum/src/GHC/Num/BigNat.hs b/libraries/ghc-bignum/src/GHC/Num/BigNat.hs
index fa8b84eccdd4..5fa81b7e5b2c 100644
--- a/libraries/ghc-bignum/src/GHC/Num/BigNat.hs
+++ b/libraries/ghc-bignum/src/GHC/Num/BigNat.hs
@@ -263,6 +263,16 @@ bigNatToWord64# b
          in uncheckedShiftL64# wh 32# `or64#` wl
       else wl
 
+#else
+
+-- | Convert a Word64# into a BigNat on 64-bit architectures
+bigNatFromWord64# :: Word64# -> BigNat#
+bigNatFromWord64# w64 = bigNatFromWord# (word64ToWord# w64)
+
+-- | Convert a BigNat into a Word64# on 64-bit architectures
+bigNatToWord64# :: BigNat# -> Word64#
+bigNatToWord64# b = wordToWord64# (bigNatToWord# b)
+
 #endif
 
 -- | Encode (# BigNat mantissa, Int# exponent #) into a Double#
diff --git a/libraries/ghc-bignum/src/GHC/Num/Integer.hs b/libraries/ghc-bignum/src/GHC/Num/Integer.hs
index e2a020619ed5..d85148fc05db 100644
--- a/libraries/ghc-bignum/src/GHC/Num/Integer.hs
+++ b/libraries/ghc-bignum/src/GHC/Num/Integer.hs
@@ -998,14 +998,12 @@ integerIsPowerOf2# (IS i)
 integerIsPowerOf2# (IN _) = (# (# #) | #)
 integerIsPowerOf2# (IP w) = bigNatIsPowerOf2# w
 
-#if WORD_SIZE_IN_BITS == 32
-
--- | Convert an Int64# into an Integer on 32-bit architectures
+-- | Convert an Int64# into an Integer
 integerFromInt64# :: Int64# -> Integer
 {-# NOINLINE integerFromInt64# #-}
 integerFromInt64# !i
-  | isTrue# ((i `leInt64#` intToInt64#  0x7FFFFFFF#)
-      &&# (i `geInt64#` intToInt64# -0x80000000#))
+  | isTrue# ((i `leInt64#` intToInt64#  INT_MAXBOUND#)
+      &&# (i `geInt64#` intToInt64# INT_MINBOUND#))
   = IS (int64ToInt# i)
 
   | isTrue# (i `geInt64#` intToInt64# 0#)
@@ -1014,37 +1012,29 @@ integerFromInt64# !i
   | True
   = IN (bigNatFromWord64# (int64ToWord64# (negateInt64# i)))
 
--- | Convert a Word64# into an Integer on 32-bit architectures
+-- | Convert a Word64# into an Integer
 integerFromWord64# :: Word64# -> Integer
 {-# NOINLINE integerFromWord64# #-}
 integerFromWord64# !w
-  | isTrue# (w `leWord64#` wordToWord64# 0x7FFFFFFF##)
+  | isTrue# (w `leWord64#` wordToWord64# INT_MAXBOUND##)
   = IS (int64ToInt# (word64ToInt64# w))
   | True
   = IP (bigNatFromWord64# w)
 
--- | Convert an Integer into an Int64# on 32-bit architectures
+-- | Convert an Integer into an Int64#
 integerToInt64# :: Integer -> Int64#
 {-# NOINLINE integerToInt64# #-}
 integerToInt64# (IS i) = intToInt64# i
 integerToInt64# (IP b) = word64ToInt64# (bigNatToWord64# b)
 integerToInt64# (IN b) = negateInt64# (word64ToInt64# (bigNatToWord64# b))
 
--- | Convert an Integer into a Word64# on 32-bit architectures
+-- | Convert an Integer into a Word64#
 integerToWord64# :: Integer -> Word64#
 {-# NOINLINE integerToWord64# #-}
 integerToWord64# (IS i) = int64ToWord64# (intToInt64# i)
 integerToWord64# (IP b) = bigNatToWord64# b
 integerToWord64# (IN b) = int64ToWord64# (negateInt64# (word64ToInt64# (bigNatToWord64# b)))
 
-#else
-
--- | Convert an Int64# into an Integer on 64-bit architectures
-integerFromInt64# :: Int# -> Integer
-integerFromInt64# !x = IS x
-
-#endif
-
 ----------------------------------------------------------------------------
 -- Conversions to/from floating point
 ----------------------------------------------------------------------------
diff --git a/testsuite/tests/codeGen/should_run/T9340.hs b/testsuite/tests/codeGen/should_run/T9340.hs
index 45f791ba7307..22f58241152c 100644
--- a/testsuite/tests/codeGen/should_run/T9340.hs
+++ b/testsuite/tests/codeGen/should_run/T9340.hs
@@ -1,4 +1,5 @@
 {-# LANGUAGE MagicHash #-}
+{-# LANGUAGE CPP #-}
 
 import Control.Monad
 import Data.Bits
@@ -6,6 +7,8 @@ import GHC.Exts
 import GHC.Word
 import Numeric (showHex)
 
+#include "MachDeps.h"
+
 -- Reference Implementation
 
 -- count trailing zeros
@@ -58,8 +61,13 @@ ctzIUT32 (W# x#) = W# (ctz32# x#)
 clzIUT32 (W# x#) = W# (clz32# x#)
 
 ctzIUT64, clzIUT64 :: Word64 -> Word
+#if WORD_SIZE_IN_BITS < 64
 ctzIUT64 (W64# x#) = W# (ctz64# x#)
 clzIUT64 (W64# x#) = W# (clz64# x#)
+#else
+ctzIUT64 (W64# x#) = W# (ctz64# (wordToWord64# x#))
+clzIUT64 (W64# x#) = W# (clz64# (wordToWord64# x#))
+#endif
 
 main :: IO ()
 main = do
diff --git a/testsuite/tests/codeGen/should_run/cgrun072.hs b/testsuite/tests/codeGen/should_run/cgrun072.hs
index b97ce56d014a..729564b6314f 100644
--- a/testsuite/tests/codeGen/should_run/cgrun072.hs
+++ b/testsuite/tests/codeGen/should_run/cgrun072.hs
@@ -37,7 +37,11 @@ bswap32 :: Word32 -> Word32
 bswap32 (W32# w#) = W32# (wordToWord32# (byteSwap32# (word32ToWord# w#)))
 
 bswap64 :: Word64 -> Word64
+#if WORD_SIZE_IN_BITS < 64
 bswap64 (W64# w#) = W64# (byteSwap64# w#)
+#else
+bswap64 (W64# w#) = W64# (word64ToWord# (byteSwap64# (wordToWord64# w#)))
+#endif
 
 slowBswap64 :: Word64 -> Word64
 slowBswap64 w =
diff --git a/testsuite/tests/codeGen/should_run/cgrun075.hs b/testsuite/tests/codeGen/should_run/cgrun075.hs
index 5babde12547c..1cac98b2dd51 100644
--- a/testsuite/tests/codeGen/should_run/cgrun075.hs
+++ b/testsuite/tests/codeGen/should_run/cgrun075.hs
@@ -36,7 +36,11 @@ instance Pdep Word32 where
   pdep (W32# src#) (W32# mask#) = W32# (wordToWord32# (pdep32# (word32ToWord# src#) (word32ToWord# mask#)))
 
 instance Pdep Word64 where
+#if WORD_SIZE_IN_BITS < 64
   pdep (W64# src#) (W64# mask#) = W64# (pdep64# src# mask#)
+#else
+  pdep (W64# src#) (W64# mask#) = W64# (word64ToWord# (pdep64# (wordToWord64# src#) (wordToWord64# mask#)))
+#endif
 
 class SlowPdep a where
   slowPdep :: a -> a -> a
diff --git a/testsuite/tests/codeGen/should_run/cgrun076.hs b/testsuite/tests/codeGen/should_run/cgrun076.hs
index 4779b5beb836..ce26e375d013 100644
--- a/testsuite/tests/codeGen/should_run/cgrun076.hs
+++ b/testsuite/tests/codeGen/should_run/cgrun076.hs
@@ -36,7 +36,11 @@ instance Pext Word32 where
   pext (W32# src#) (W32# mask#) = W32# (wordToWord32# (pext32# (word32ToWord# src#) (word32ToWord# mask#)))
 
 instance Pext Word64 where
+#if WORD_SIZE_IN_BITS < 64
   pext (W64# src#) (W64# mask#) = W64# (pext64# src# mask#)
+#else
+  pext (W64# src#) (W64# mask#) = W64# (word64ToWord# (pext64# (wordToWord64# src#) (wordToWord64# mask#)))
+#endif
 
 class SlowPext a where
   slowPext :: a -> a -> a
diff --git a/testsuite/tests/concurrent/should_run/AtomicPrimops.hs b/testsuite/tests/concurrent/should_run/AtomicPrimops.hs
index b8adb3c6211d..05ac80dcad40 100644
--- a/testsuite/tests/concurrent/should_run/AtomicPrimops.hs
+++ b/testsuite/tests/concurrent/should_run/AtomicPrimops.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE CPP #-}
 {-# LANGUAGE MagicHash #-}
 {-# LANGUAGE UnboxedTuples #-}
 
@@ -14,6 +15,8 @@ import GHC.Int
 import GHC.IO
 import GHC.Word
 
+#include "MachDeps.h"
+
 -- | Iterations per worker.
 iters :: Word
 iters = 1000000
@@ -550,7 +553,11 @@ readInt32Array (MBA mba#) (I# ix#) = IO $ \ s# ->
 readInt64Array :: MByteArray -> Int -> IO Int64
 readInt64Array (MBA mba#) (I# ix#) = IO $ \ s# ->
     case readInt64Array# mba# ix# s# of
+#if WORD_SIZE_IN_BITS < 64
         (# s2#, n# #) -> (# s2#, I64# n# #)
+#else
+        (# s2#, n# #) -> (# s2#, I64# (int64ToInt# n#) #)
+#endif
 
 atomicWriteIntArray :: MByteArray -> Int -> Int -> IO ()
 atomicWriteIntArray (MBA mba#) (I# ix#) (I# n#) = IO $ \ s# ->
@@ -584,8 +591,13 @@ casInt32Array (MBA mba#) (I# ix#) (I32# old#) (I32# new#) = IO $ \ s# ->
 
 casInt64Array :: MByteArray -> Int -> Int64 -> Int64 -> IO Int64
 casInt64Array (MBA mba#) (I# ix#) (I64# old#) (I64# new#) = IO $ \ s# ->
+#if WORD_SIZE_IN_BITS < 64
     case casInt64Array# mba# ix# old# new# s# of
         (# s2#, old2# #) -> (# s2#, I64# old2# #)
+#else
+    case casInt64Array# mba# ix# (intToInt64# old#) (intToInt64# new#) s# of
+        (# s2#, old2# #) -> (# s2#, I64# (int64ToInt# old2#) #)
+#endif
 
 ------------------------------------------------------------------------
 -- Wrappers around Addr#
@@ -651,5 +663,10 @@ atomicCasWord32Ptr (Ptr addr#) (W32# old#) (W32# new#) = IO $ \ s# ->
         (# s2#, old2# #) -> (# s2#, W32# old2# #)
 atomicCasWord64Ptr :: Ptr Word64 -> Word64 -> Word64 -> IO Word64
 atomicCasWord64Ptr (Ptr addr#) (W64# old#) (W64# new#) = IO $ \ s# ->
+#if WORD_SIZE_IN_BITS < 64
     case atomicCasWord64Addr# addr# old# new# s# of
         (# s2#, old2# #) -> (# s2#, W64# old2# #)
+#else
+    case atomicCasWord64Addr# addr# (wordToWord64# old#) (wordToWord64# new#) s# of
+        (# s2#, old2# #) -> (# s2#, W64# (word64ToWord# old2#) #)
+#endif
diff --git a/testsuite/tests/primops/should_run/T4442.hs b/testsuite/tests/primops/should_run/T4442.hs
index dfdf93cc4f84..ad0c882533a4 100644
--- a/testsuite/tests/primops/should_run/T4442.hs
+++ b/testsuite/tests/primops/should_run/T4442.hs
@@ -222,11 +222,19 @@ main = do
     (\arr i s -> case readWord8ArrayAsInt32# arr i s of (# s', a #) -> (# s', I32# a #))
     (\arr i (I32# a) s -> writeWord8ArrayAsInt32# arr i a s)
     12345678 4
+#if WORD_SIZE_IN_BITS < 64
   testInt64Array "Int64#"
     (\arr i -> I64# (indexWord8ArrayAsInt64# arr i))
     (\arr i s -> case readWord8ArrayAsInt64# arr i s of (# s', a #) -> (# s', I64# a #))
     (\arr i (I64# a) s -> writeWord8ArrayAsInt64# arr i a s)
     1234567890123 8
+#else
+  testInt64Array "Int64#"
+    (\arr i -> I64# (int64ToInt# (indexWord8ArrayAsInt64# arr i)))
+    (\arr i s -> case readWord8ArrayAsInt64# arr i s of (# s', a #) -> (# s', I64# (int64ToInt# a) #))
+    (\arr i (I64# a) s -> writeWord8ArrayAsInt64# arr i (intToInt64# a) s)
+    1234567890123 8
+#endif
   testIntArray "Int#"
     (\arr i -> I# (indexWord8ArrayAsInt# arr i))
     (\arr i s -> case readWord8ArrayAsInt# arr i s of (# s', a #) -> (# s', I# a #))
@@ -248,11 +256,19 @@ main = do
     (\arr i s -> case readWord8ArrayAsWord32# arr i s of (# s', a #) -> (# s', W32# a #))
     (\arr i (W32# a) s -> writeWord8ArrayAsWord32# arr i a s)
     12345678 4
+#if WORD_SIZE_IN_BITS < 64
   testWord64Array "Word64#"
     (\arr i -> W64# (indexWord8ArrayAsWord64# arr i))
     (\arr i s -> case readWord8ArrayAsWord64# arr i s of (# s', a #) -> (# s', W64# a #))
     (\arr i (W64# a) s -> writeWord8ArrayAsWord64# arr i a s)
     1234567890123 8
+#else
+  testWord64Array "Word64#"
+    (\arr i -> W64# (word64ToWord# (indexWord8ArrayAsWord64# arr i)))
+    (\arr i s -> case readWord8ArrayAsWord64# arr i s of (# s', a #) -> (# s', W64# (word64ToWord# a) #))
+    (\arr i (W64# a) s -> writeWord8ArrayAsWord64# arr i (wordToWord64# a) s)
+    1234567890123 8
+#endif
   testWordArray "Word#"
     (\arr i -> W# (indexWord8ArrayAsWord# arr i))
     (\arr i s -> case readWord8ArrayAsWord# arr i s of (# s', a #) -> (# s', W# a #))
-- 
GitLab