Core transformations generate bad indexCharOffAddr# call
Consider the following functions, which only differ in a bang pattern on the local binding q
in the inner loop:
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE MagicHash #-}
module Bug where
import GHC.Prim
import GHC.Types
f :: Int -> String
f n_
= go n_ ""
where
go n cs | n < 62
= let !c = chooseChar62 n in c : cs
| otherwise
= go q (c : cs) where (q, r) = quotRem n 62
!c = chooseChar62 r
chooseChar62 :: Int -> Char
{-# INLINE chooseChar62 #-}
chooseChar62 (I# n) = C# (indexCharOffAddr# chars62 n)
chars62 = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"#
g :: Int -> String
g n_
= go n_ ""
where
go n cs | n < 62
= let !c = chooseChar62 n in c : cs
| otherwise
= go q (c : cs) where (!q, r) = quotRem n 62 -- !!! Note the bang on q
!c = chooseChar62 r
chooseChar62 :: Int -> Char
{-# INLINE chooseChar62 #-}
chooseChar62 (I# n) = C# (indexCharOffAddr# chars62 n)
chars62 = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"#
When building with -O -fPIC -dynamic -ddump-simpl
, this is the Core I see, with a HEAD checkout from earlier this week built by hadrian:
-- chararacter array, used by both
chars62_r30r :: Addr#
chars62_r30r
= "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"#
-- Used at the end of Bug.$wgo, in the -9223372036854775808# branch,
-- therefore only used by generated Core for f, but not g!
lvl_r30s :: Char
lvl_r30s
= case indexCharOffAddr# chars62_r30r -9223372036854775808# of v_B2
{ __DEFAULT ->
GHC.Types.C# v_B2
}
-- Core for f
Rec {
Bug.$wgo
:: Int# -> [Char] -> (# Char, [Char] #)
Bug.$wgo
= \ (ww_s2WU :: Int#) (w_s2WR :: [Char]) ->
case GHC.Real.even3 of { I# y_a2QI -> -- GHC.Real.even3 == -1
case y_a2QI of {
__DEFAULT ->
case quotRemInt# ww_s2WU 62# of { (# ipv_a2QN, ipv1_a2QO #) ->
case indexCharOffAddr# chars62_r30r ipv1_a2QO of wild2_X4
{ __DEFAULT ->
case <# ww_s2WU 62# of {
__DEFAULT ->
Bug.$wgo
ipv_a2QN (GHC.Types.: @ Char (GHC.Types.C# wild2_X4) w_s2WR);
1# ->
case indexCharOffAddr# chars62_r30r ww_s2WU of wild3_X1G
{ __DEFAULT ->
(# GHC.Types.C# wild3_X1G, w_s2WR #)
}
}
}
};
62# ->
case ww_s2WU of wild2_a2QQ {
__DEFAULT ->
case quotRemInt# wild2_a2QQ 62# of { (# ipv_a2QT, ipv1_a2QU #) ->
case indexCharOffAddr# chars62_r30r ipv1_a2QU of wild4_X4
{ __DEFAULT ->
case <# wild2_a2QQ 62# of {
__DEFAULT ->
Bug.$wgo
ipv_a2QT (GHC.Types.: @ Char (GHC.Types.C# wild4_X4) w_s2WR);
1# ->
case indexCharOffAddr# chars62_r30r wild2_a2QQ of wild5_X1G
{ __DEFAULT ->
(# GHC.Types.C# wild5_X1G, w_s2WR #)
}
}
}
};
-9223372036854775808# ->
case lvl_r30s of { C# v1_B2 -> (# GHC.Types.C# v1_B2, w_s2WR #) }
}
}
}
end Rec }
Bug.f_go :: Int -> [Char] -> [Char]
Bug.f_go
= \ (w_s2WQ :: Int) (w1_s2WR :: [Char]) ->
case w_s2WQ of { I# ww1_s2WU ->
case Bug.$wgo ww1_s2WU w1_s2WR of { (# ww3_s2Xa, ww4_s2Xb #) ->
GHC.Types.: @ Char ww3_s2Xa ww4_s2Xb
}
}
f :: Int -> String
f = \ (n__aXG :: Int) ->
case n__aXG of { I# ww1_s2WU ->
case Bug.$wgo ww1_s2WU (GHC.Types.[] @ Char) of
{ (# ww3_s2Xa, ww4_s2Xb #) ->
GHC.Types.: @ Char ww3_s2Xa ww4_s2Xb
}
}
-- Core for g
Rec {
Bug.$wgo1
:: Int# -> [Char] -> (# Char, [Char] #)
Bug.$wgo1
= \ (ww_s2X4 :: Int#) (w_s2X1 :: [Char]) ->
case GHC.Real.even3 of { I# y_a2QI ->
case y_a2QI of {
__DEFAULT ->
case quotRemInt# ww_s2X4 62# of { (# ipv_a2QN, ipv1_a2QO #) ->
case indexCharOffAddr# chars62_r30r ipv1_a2QO of wild2_X4
{ __DEFAULT ->
case <# ww_s2X4 62# of {
__DEFAULT ->
Bug.$wgo1
ipv_a2QN (GHC.Types.: @ Char (GHC.Types.C# wild2_X4) w_s2X1);
1# ->
case indexCharOffAddr# chars62_r30r ww_s2X4 of wild3_XY
{ __DEFAULT ->
(# GHC.Types.C# wild3_XY, w_s2X1 #)
}
}
}
};
62# ->
case ww_s2X4 of wild2_a2QQ {
__DEFAULT ->
case quotRemInt# wild2_a2QQ 62# of { (# ipv_a2QT, ipv1_a2QU #) ->
case indexCharOffAddr# chars62_r30r ipv1_a2QU of wild4_X4
{ __DEFAULT ->
case <# wild2_a2QQ 62# of {
__DEFAULT ->
Bug.$wgo1
ipv_a2QT (GHC.Types.: @ Char (GHC.Types.C# wild4_X4) w_s2X1);
1# ->
case indexCharOffAddr# chars62_r30r wild2_a2QQ of wild5_XY
{ __DEFAULT ->
(# GHC.Types.C# wild5_XY, w_s2X1 #)
}
}
}
};
-9223372036854775808# ->
case GHC.Real.overflowError of wild4_00 { }
}
}
}
end Rec }
Bug.g_go :: Int -> [Char] -> [Char]
Bug.g_go
= \ (w_s2X0 :: Int) (w1_s2X1 :: [Char]) ->
case w_s2X0 of { I# ww1_s2X4 ->
case Bug.$wgo1 ww1_s2X4 w1_s2X1 of { (# ww3_s2Xd, ww4_s2Xe #) ->
GHC.Types.: @ Char ww3_s2Xd ww4_s2Xe
}
}
g :: Int -> String
g = \ (n__a29X :: Int) ->
case n__a29X of { I# ww1_s2X4 ->
case Bug.$wgo1 ww1_s2X4 (GHC.Types.[] @ Char) of
{ (# ww3_s2Xd, ww4_s2Xe #) ->
GHC.Types.: @ Char ww3_s2Xd ww4_s2Xe
}
}
Of particular interest is:
-- chararacter array, used by both
chars62_r30r :: Addr#
chars62_r30r
= "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"#
lvl_r30s :: Char
lvl_r30s
= case indexCharOffAddr# chars62_r30r -9223372036854775808# of v_B2
{ __DEFAULT ->
GHC.Types.C# v_B2
}
which is only used in the Core for f
, not g
! We're trying to access index minBound :: Int
of that array of chars. While this is only used when we pass minBound
to our function, it is still wrong I think. Moreover, as hadrian issue 861 showed, this can lead to... linker errors! Which got fixed by changing the implementation of iToBase62
in Unique.hs from f
to g
:-)
Note that when I build the same commit with the make build system, then GHC generates Core close to g
's above for both functions.
I tried describing some of the transformations that occur in this comment on hadrian's issue tracker. The gist of it is that the lack of strictness in q
leads GHC to not spotting it early and when we inline quotRem
/quotRemInt
and start floating things in/out and distributing case ... of
branches around, we end up with a dedicated branch in the inner loop for minBound
which actually makes use of the result of indexAddrOffAddr#
, as you can see here:
-9223372036854775808# ->
-- lvl_r30s is our bad value
case lvl_r30s of { C# v1_B2 -> (# GHC.Types.C# v1_B2, w_s2WR #) }
whereas this is what this branch looks like for g
:
-9223372036854775808# ->
case GHC.Real.overflowError of wild4_00 { }
The overflowError
is still there and GHC therefore realised that there's no point in computing anything since we always raise an overflow error in that branch. This overflowError
just disappears in f
.
Trac metadata
Trac field | Value |
---|---|
Version | 8.5 |
Type | Bug |
TypeOfFailure | OtherFailure |
Priority | normal |
Resolution | Unresolved |
Component | Compiler |
Test case | |
Differential revisions | |
BlockedBy | |
Related | |
Blocking | |
CC | |
Operating system | |
Architecture |