x86 NCG SIMD: Primitives like insertFloatX4# should be available without -msse4
Summary
packFloatX4#
, insertFloatX4#
and broadcastFloatX4#
currently require -msse4
flag on x86 NCG.
This means they are not available by default (-msse2
).
NCG should compile them to SSE2 instructions (and use SSE4.1 instructions if they are available).
Steps to reproduce
-- simd_insert.hs
{-# LANGUAGE MagicHash, UnboxedTuples #-}
import GHC.Exts
unpackFloatX4 :: FloatX4# -> (Float, Float, Float, Float)
unpackFloatX4 v = case unpackFloatX4# v of
(# a0, a1, a2, a3 #) -> (F# a0, F# a1, F# a2, F# a3)
unpackDoubleX2 :: DoubleX2# -> (Double, Double)
unpackDoubleX2 v = case unpackDoubleX2# v of
(# a0, a1 #) -> (D# a0, D# a1)
testFloatX4 :: IO ()
testFloatX4 = do
let v = packFloatX4# (# 0.0#, 1.0#, 2.0#, 3.0# #)
print $ unpackFloatX4 v
let w = insertFloatX4# v 7.0# 0#
print $ unpackFloatX4 w
let x = insertFloatX4# v 7.0# 1#
print $ unpackFloatX4 x
let y = insertFloatX4# v 7.0# 2#
print $ unpackFloatX4 y
let z = insertFloatX4# v 7.0# 3#
print $ unpackFloatX4 z
testDoubleX2 :: IO ()
testDoubleX2 = do
let v = packDoubleX2# (# 0.0##, 1.0## #)
print $ unpackDoubleX2 v
let w = insertDoubleX2# v 7.0## 0#
print $ unpackDoubleX2 w
let x = insertDoubleX2# v 7.0## 1#
print $ unpackDoubleX2 x
main :: IO ()
main = do
testFloatX4
testDoubleX2
-- simd_extract.hs
{-# LANGUAGE MagicHash, UnboxedTuples #-}
import GHC.Exts
unpackFloatX4 :: FloatX4# -> (Float, Float, Float, Float)
unpackFloatX4 v = case unpackFloatX4# v of
(# a0, a1, a2, a3 #) -> (F# a0, F# a1, F# a2, F# a3)
unpackDoubleX2 :: DoubleX2# -> (Double, Double)
unpackDoubleX2 v = case unpackDoubleX2# v of
(# a0, a1 #) -> (D# a0, D# a1)
testFloatX4 :: IO ()
testFloatX4 = do
let v = packFloatX4# (# 0.0#, 1.0#, 2.0#, 3.0# #)
w = broadcastFloatX4# 5.0#
x = plusFloatX4# v w
print $ unpackFloatX4 x
testDoubleX2 :: IO ()
testDoubleX2 = do
let v = packDoubleX2# (# 0.0##, 1.0## #)
w = broadcastDoubleX2# 5.0##
x = plusDoubleX2# v w
print $ unpackDoubleX2 x
main :: IO ()
main = do
testFloatX4
testDoubleX2
$ ghc -fforce-recomp simd_insert.hs
[1 of 2] Compiling Main ( simd_insert.hs, simd_insert.o )
<no location info>: error:
sorry! (unimplemented feature or known bug)
GHC version 9.13.20241102:
FloatX4# operations require either -msse4 or -fllvm
$ ghc -fforce-recomp simd_broadcast.hs
[1 of 2] Compiling Main ( simd_broadcast.hs, simd_broadcast.o )
<no location info>: error:
sorry! (unimplemented feature or known bug)
GHC version 9.13.20241102:
32-bit float broadcast requires -msse4 or -fllvm.
Expected behavior
They should compile without -msse4
.
In fact, LLVM backend can compile them without -msse4
.
Environment
- GHC version used: 9.13.20241102 (573cad4b)
- Operating System: Linux
- System Architecture: x86_64