Miscompilation & Cmm Lint failure with SIMD vectors
The following program fails Cmm Lint:
{-# language MagicHash, UnboxedTuples, UnboxedSums #-}
module Main ( main ) where
import GHC.Exts
import GHC.Int
foo :: Int32X4# -> Integer
foo i32x4 =
case unpackInt32X4# i32x4 of
(# i1, i2, i3, i4 #) ->
let
s = sum $ map fromIntegral
[ I32# i1, I32# i2, I32# i3, I32# i4 ]
in s
main :: IO ()
main = do
print ( foo ( broadcastInt32X4# ( intToInt32# 1# ) ) )
Cmm lint error:
in basic block c1EB
in MachOp application:
%MO_UU_Conv_W64_W32(1 :: W32)
op is expecting: [W64]
arguments provide: [I32]
Program was:
{offset
c1Fg: // global
_s1Ew::P64 = R1;
if ((old + 0) - <highSp> < SpLim) (likely: False) goto c1Fh; else goto c1Fi;
c1Fi: // global
Hp = Hp + 48;
if (Hp > HpLim) (likely: False) goto c1Fk; else goto c1Fj;
c1Fk: // global
HpAlloc = 48;
goto c1Fh;
c1Fh: // global
R1 = _s1Ew::P64;
call (stg_gc_enter_1)(R1) args: 8, res: 0, upd: 8;
c1Fj: // global
(_c1EA::I64) = call "ccall" arg hints: [PtrHint,
PtrHint] result hints: [PtrHint] newCAF(BaseReg, _s1Ew::P64);
if (_c1EA::I64 == 0) goto c1EC; else goto c1EB;
c1EC: // global
call (I64[_s1Ew::P64])() args: 8, res: 0, upd: 8;
c1EB: // global
I64[(old + 24)] = stg_bh_upd_frame_info;
I64[(old + 16)] = _c1EA::I64;
_c1EE::Ix4V128 = <0 :: W32, 0 :: W32, 0 :: W32, 0 :: W32>;
_c1EF::Ix4V128 = %MO_V_Insert_4_W32(_c1EE::Ix4V128,
%MO_UU_Conv_W64_W32(1 :: W32), 0 :: W32);
_c1EG::Ix4V128 = %MO_V_Insert_4_W32(_c1EF::Ix4V128,
%MO_UU_Conv_W64_W32(1 :: W32), 1 :: W32);
_c1EH::Ix4V128 = %MO_V_Insert_4_W32(_c1EG::Ix4V128,
%MO_UU_Conv_W64_W32(1 :: W32), 2 :: W32);
_c1EI::Ix4V128 = %MO_V_Insert_4_W32(_c1EH::Ix4V128,
%MO_UU_Conv_W64_W32(1 :: W32), 3 :: W32);
_c1ED::Ix4V128 = _c1EI::Ix4V128;
_s1Ef::Ix4V128 = _c1ED::Ix4V128;
_s1Eh::I32 = %MO_SS_Conv_W32_W64(%MO_V_Extract_4_W32(_s1Ef::Ix4V128,
0 :: W32));
_s1Ei::I32 = %MO_SS_Conv_W32_W64(%MO_V_Extract_4_W32(_s1Ef::Ix4V128,
1 :: W32));
_s1Ej::I32 = %MO_SS_Conv_W32_W64(%MO_V_Extract_4_W32(_s1Ef::Ix4V128,
2 :: W32));
_s1Ek::I32 = %MO_SS_Conv_W32_W64(%MO_V_Extract_4_W32(_s1Ef::Ix4V128,
3 :: W32));
_s1Ek::I32 = _s1Ek::I32;
_s1Ej::I32 = _s1Ej::I32;
_s1Ei::I32 = _s1Ei::I32;
_s1Eh::I32 = _s1Eh::I32;
I64[Hp - 40] = sat_s1Ev_info;
I32[Hp - 24] = _s1Eh::I32;
I32[Hp - 20] = _s1Ei::I32;
I32[Hp - 16] = _s1Ej::I32;
I32[Hp - 12] = _s1Ek::I32;
_c1EL::P64 = Hp - 40;
I64[Hp - 8] = sat_s1El_info;
_c1F9::P64 = Hp - 8;
R3 = _c1EL::P64;
R2 = _c1F9::P64;
R1 = $_closure;
call stg_ap_pp_fast(R3, R2, R1) args: 24, res: 0, upd: 24;
}
It seems that the vector broadcast and unpack instructions end up emitting conversion instructions at incorrect types?
Turning off Cmm Lint, I get the following error from LLVM:
opt: /run/user/1000/ghc230199_0/ghc_1.ll:494:18: error: invalid cast opcode for cast from 'i32' to 'i32'
%ln1K6 = trunc i32 1 to i32
^
Edited by sheaf