Commit 1c0b5fdc authored by Herbert Valerio Riedel's avatar Herbert Valerio Riedel 🕺

Add -XBinaryLiterals language extension (re #9224)

Haskell2010 supports

- base-10 (prefix-less),
- base-8 (via `0[oO]`-prefix), and
- base-16 (via `0[xX]`-prefix) integer literals.

This commit adds syntax support for base-2 integer literals via the new `0[bB]`
prefix. The use of a `0b` prefix for indicating binary literals is known
from popular programming languages such as C++14, Perl, Python, Ruby, and Java.

This syntax extension is disabled by default and can be enabled via the
new `{-# LANGUAGE BinaryLiterals #-}` pragma and/or the new `-XBinaryLiterals`

This new extensions requires to upgrade the `ExtsBitmap` type from
`Word` to `Word64` as this adds a 33th flag which is not guaranteed to
fit into a `Word`.
Signed-off-by: Herbert Valerio Riedel's avatarHerbert Valerio Riedel <hvr@gnu.org>

Differential Revision: https://phabricator.haskell.org/D22
parent f4766c4b
......@@ -582,6 +582,7 @@ data ExtensionFlag
| Opt_TraditionalRecordSyntax
| Opt_LambdaCase
| Opt_MultiWayIf
| Opt_BinaryLiterals
| Opt_NegativeLiterals
| Opt_EmptyCase
| Opt_PatternSynonyms
......@@ -2875,6 +2876,7 @@ xFlags = [
( "UndecidableInstances", Opt_UndecidableInstances, nop ),
( "IncoherentInstances", Opt_IncoherentInstances, nop ),
( "PackageImports", Opt_PackageImports, nop ),
( "BinaryLiterals", Opt_BinaryLiterals, nop ),
( "NegativeLiterals", Opt_NegativeLiterals, nop ),
( "EmptyCase", Opt_EmptyCase, nop ),
( "PatternSynonyms", Opt_PatternSynonyms, nop )
......
......@@ -19,7 +19,7 @@ module Ctype
, is_digit -- Char# -> Bool
, is_alphanum -- Char# -> Bool
, is_decdigit, is_hexdigit, is_octdigit
, is_decdigit, is_hexdigit, is_octdigit, is_bindigit
, hexDigit, octDecDigit
) where
......@@ -87,6 +87,9 @@ is_hexdigit c
is_octdigit :: Char -> Bool
is_octdigit c = c >= '0' && c <= '7'
is_bindigit :: Char -> Bool
is_bindigit c = c == '0' || c == '1'
to_lower :: Char -> Char
to_lower c
| c >= 'A' && c <= 'Z' = chr (ord c - (ord 'A' - ord 'a'))
......
......@@ -117,6 +117,7 @@ $small = [$ascsmall $unismall \_]
$unigraphic = \x06 -- Trick Alex into handling Unicode. See alexGetChar.
$graphic = [$small $large $symbol $digit $special $unigraphic \:\"\']
$binit = 0-1
$octit = 0-7
$hexit = [$decdigit A-F a-f]
$symchar = [$symbol \:]
......@@ -134,6 +135,7 @@ $docsym = [\| \^ \* \$]
@consym = \: $symchar*
@decimal = $decdigit+
@binary = $binit+
@octal = $octit+
@hexadecimal = $hexit+
@exponent = [eE] [\-\+]? @decimal
......@@ -401,9 +403,12 @@ $tab+ { warn Opt_WarnTabs (text "Tab character") }
<0> {
-- Normal integral literals (:: Num a => a, from Integer)
@decimal { tok_num positive 0 0 decimal }
0[bB] @binary / { ifExtension binaryLiteralsEnabled } { tok_num positive 2 2 binary }
0[oO] @octal { tok_num positive 2 2 octal }
0[xX] @hexadecimal { tok_num positive 2 2 hexadecimal }
@negative @decimal / { ifExtension negativeLiteralsEnabled } { tok_num negative 1 1 decimal }
@negative 0[bB] @binary / { ifExtension negativeLiteralsEnabled `alexAndPred`
ifExtension binaryLiteralsEnabled } { tok_num negative 3 3 binary }
@negative 0[oO] @octal / { ifExtension negativeLiteralsEnabled } { tok_num negative 3 3 octal }
@negative 0[xX] @hexadecimal / { ifExtension negativeLiteralsEnabled } { tok_num negative 3 3 hexadecimal }
......@@ -417,13 +422,19 @@ $tab+ { warn Opt_WarnTabs (text "Tab character") }
-- It's simpler (and faster?) to give separate cases to the negatives,
-- especially considering octal/hexadecimal prefixes.
@decimal \# / { ifExtension magicHashEnabled } { tok_primint positive 0 1 decimal }
0[bB] @binary \# / { ifExtension magicHashEnabled `alexAndPred`
ifExtension binaryLiteralsEnabled } { tok_primint positive 2 3 binary }
0[oO] @octal \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 octal }
0[xX] @hexadecimal \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 hexadecimal }
@negative @decimal \# / { ifExtension magicHashEnabled } { tok_primint negative 1 2 decimal }
@negative 0[bB] @binary \# / { ifExtension magicHashEnabled `alexAndPred`
ifExtension binaryLiteralsEnabled } { tok_primint negative 3 4 binary }
@negative 0[oO] @octal \# / { ifExtension magicHashEnabled } { tok_primint negative 3 4 octal }
@negative 0[xX] @hexadecimal \# / { ifExtension magicHashEnabled } { tok_primint negative 3 4 hexadecimal }
@decimal \# \# / { ifExtension magicHashEnabled } { tok_primword 0 2 decimal }
0[bB] @binary \# \# / { ifExtension magicHashEnabled `alexAndPred`
ifExtension binaryLiteralsEnabled } { tok_primword 2 4 binary }
0[oO] @octal \# \# / { ifExtension magicHashEnabled } { tok_primword 2 4 octal }
0[xX] @hexadecimal \# \# / { ifExtension magicHashEnabled } { tok_primword 2 4 hexadecimal }
......@@ -1112,6 +1123,7 @@ positive = id
negative = negate
decimal, octal, hexadecimal :: (Integer, Char -> Int)
decimal = (10,octDecDigit)
binary = (2,octDecDigit)
octal = (8,octDecDigit)
hexadecimal = (16,hexDigit)
......@@ -1410,6 +1422,7 @@ lex_escape = do
'x' -> readNum is_hexdigit 16 hexDigit
'o' -> readNum is_octdigit 8 octDecDigit
'b' -> readNum is_bindigit 2 octDecDigit
x | is_decdigit x -> readNum2 is_decdigit 10 octDecDigit (octDecDigit x)
c1 -> do
......@@ -1855,8 +1868,8 @@ setAlrExpectingOCurly b = P $ \s -> POk (s {alr_expecting_ocurly = b}) ()
-- for reasons of efficiency, flags indicating language extensions (eg,
-- -fglasgow-exts or -XParallelArrays) are represented by a bitmap
-- stored in an unboxed Int
type ExtsBitmap = Word
-- stored in an unboxed Word64
type ExtsBitmap = Word64
xbit :: ExtBits -> ExtsBitmap
xbit = bit . fromEnum
......@@ -1897,6 +1910,7 @@ data ExtBits
| TypeLiteralsBit
| ExplicitNamespacesBit
| LambdaCaseBit
| BinaryLiteralsBit
| NegativeLiteralsBit
deriving Enum
......@@ -1952,6 +1966,8 @@ explicitNamespacesEnabled :: ExtsBitmap -> Bool
explicitNamespacesEnabled = xtest ExplicitNamespacesBit
lambdaCaseEnabled :: ExtsBitmap -> Bool
lambdaCaseEnabled = xtest LambdaCaseBit
binaryLiteralsEnabled :: ExtsBitmap -> Bool
binaryLiteralsEnabled = xtest BinaryLiteralsBit
negativeLiteralsEnabled :: ExtsBitmap -> Bool
negativeLiteralsEnabled = xtest NegativeLiteralsBit
patternSynonymsEnabled :: ExtsBitmap -> Bool
......@@ -2018,6 +2034,7 @@ mkPState flags buf loc =
.|. TypeLiteralsBit `setBitIf` xopt Opt_DataKinds flags
.|. ExplicitNamespacesBit `setBitIf` xopt Opt_ExplicitNamespaces flags
.|. LambdaCaseBit `setBitIf` xopt Opt_LambdaCase flags
.|. BinaryLiteralsBit `setBitIf` xopt Opt_BinaryLiterals flags
.|. NegativeLiteralsBit `setBitIf` xopt Opt_NegativeLiterals flags
.|. PatternSynonymsBit `setBitIf` xopt Opt_PatternSynonyms flags
--
......
......@@ -31,7 +31,7 @@
<itemizedlist>
<listitem>
<para>
TODO FIXME
Added support for <link linkend="binary-literals">binary integer literals</link>
</para>
</listitem>
</itemizedlist>
......
......@@ -762,6 +762,12 @@
<entry>dynamic</entry>
<entry><option>-XNoBangPatterns</option></entry>
</row>
<row>
<entry><option>-XBinaryLiterals</option></entry>
<entry>Enable support for <link linkend="binary-literals">binary literals</link>.</entry>
<entry>dynamic</entry>
<entry><option>-XNoBinaryLiterals</option></entry>
</row>
<row>
<entry><option>-XCApiFFI</option></entry>
<entry>Enable <link linkend="ffi-capi">the CAPI calling convention</link>.</entry>
......
......@@ -480,6 +480,26 @@ Indeed, the bindings can even be recursive.
</para>
</sect2>
<sect2 id="binary-literals">
<title>Binary integer literals</title>
<para>
Haskell 2010 and Haskell 98 allows for integer literals to
be given in decimal, octal (prefixed by
<literal>0o</literal> or <literal>0O</literal>), or
hexadecimal notation (prefixed by <literal>0x</literal> or
<literal>0X</literal>).
</para>
<para>
The language extension <option>-XBinaryLiterals</option>
adds support for expressing integer literals in binary
notation with the prefix <literal>0b</literal> or
<literal>0B</literal>. For instance, the binary integer
literal <literal>0b11001001</literal> will be desugared into
<literal>fromInteger 201</literal> when
<option>-XBinaryLiterals</option> is enabled.
</para>
</sect2>
<!-- ====================== HIERARCHICAL MODULES ======================= -->
......
......@@ -1016,6 +1016,9 @@ tests/overloadedlists/should_run/overloadedlistsrun05
tests/parser/should_compile/T5243
tests/parser/should_compile/T7476/Main.imports
tests/parser/should_compile/T7476/T7476
tests/parser/should_run/BinaryLiterals0
tests/parser/should_run/BinaryLiterals1
tests/parser/should_run/BinaryLiterals2
tests/parser/should_run/ParserMultiWayIf
tests/parser/should_run/T1344
tests/parser/should_run/operator
......
......@@ -32,6 +32,7 @@ check title expected got
expectedGhcOnlyExtensions :: [String]
expectedGhcOnlyExtensions = ["RelaxedLayout",
"AlternativeLayoutRule",
"BinaryLiterals",
"AlternativeLayoutRuleTransitional",
"JavaScriptFFI",
"PatternSynonyms"]
......
module ParserNoBinaryLiterals1 where
f :: Int -> ()
f 0b0 = ()
f _ = ()
ParserNoBinaryLiterals1.hs:4:1:
Equations for ‘f’ have different numbers of arguments
ParserNoBinaryLiterals1.hs:4:1-10
ParserNoBinaryLiterals1.hs:5:1-10
{-# LANGUAGE MagicHash #-}
module ParserNoBinaryLiterals2 where
import GHC.Types
f :: Word -> ()
f (W# 0b0##) = ()
f _ = ()
ParserNoBinaryLiterals2.hs:8:4:
Constructor ‘W#’ should have 1 argument, but has been given 2
In the pattern: W# 0 b0##
In an equation for ‘f’: f (W# 0 b0##) = ()
{-# LANGUAGE MagicHash #-}
module ParserNoBinaryLiterals3 where
import GHC.Types
f :: Int -> ()
f (I# 0b0#) = ()
f _ = ()
ParserNoBinaryLiterals3.hs:8:4:
Constructor ‘I#’ should have 1 argument, but has been given 2
In the pattern: I# 0 b0#
In an equation for ‘f’: f (I# 0 b0#) = ()
......@@ -75,6 +75,9 @@ test('readFailTraditionalRecords3', normal, compile_fail, [''])
test('ParserNoForallUnicode', normal, compile_fail, [''])
test('ParserNoLambdaCase', when(compiler_lt('ghc', '7.5'), skip), compile_fail, [''])
test('ParserNoMultiWayIf', when(compiler_lt('ghc', '7.5'), skip), compile_fail, [''])
test('ParserNoBinaryLiterals1', normal, compile_fail, [''])
test('ParserNoBinaryLiterals2', normal, compile_fail, [''])
test('ParserNoBinaryLiterals3', normal, compile_fail, [''])
test('T5425', normal, compile_fail, [''])
test('T984', normal, compile_fail, [''])
......
-- | Anti-Test for GHC 7.10+'s @BinaryLiterals@ extensions (see GHC #9224)
--
-- NB: This code won't compile with -XBinaryLiterals enabled
{-# LANGUAGE NegativeLiterals #-}
module Main where
main :: IO ()
main = print lst
where
-- "0b0" is to be parsed as "0 b0"
lst = [ (,) 0b0, (,) 0b1, (,) 0b10, (,) 0b11
, (,) -0b0, (,) -0b1, (,) -0b10, (,) -0b11
] :: [(Int,Int)]
b0 = 60
b1 = 61
b11 = 611
b10 = 610
[(0,60),(0,61),(0,610),(0,611),(0,60),(0,61),(0,610),(0,611)]
-- | Test for GHC 7.10+'s @BinaryLiterals@ extensions (see GHC #9224)
{-# LANGUAGE BinaryLiterals #-}
{-# LANGUAGE MagicHash #-}
module Main where
import GHC.Types
main = do
print [ I# 0b0#, I# -0b0#, I# 0b1#, I# -0b1#
, I# 0b00000000000000000000000000000000000000000000000000000000000000000000000000001#
, I# -0b00000000000000000000000000000000000000000000000000000000000000000000000000001#
, I# -0b11001001#, I# -0b11001001#
, I# -0b11111111#, I# -0b11111111#
]
print [ W# 0b0##, W# 0b1##, W# 0b11001001##, W# 0b11##, W# 0b11111111##
, W# 0b00000000000000000000000000000000000000000000000000000000000000000000000000001##
]
print [ 0b0, 0b1, 0b10, 0b11, 0b100, 0b101, 0b110, 0b111 :: Integer
, -0b0, -0b1, -0b10, -0b11, -0b100, -0b101, -0b110, -0b111
, 0b11111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111
, -0b11111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111
]
[0,0,1,-1,1,-1,-201,-201,-255,-255]
[0,1,201,3,255,1]
[0,1,2,3,4,5,6,7,0,-1,-2,-3,-4,-5,-6,-7,340282366920938463463374607431768211455,-340282366920938463463374607431768211455]
-- | Test for GHC 7.10+'s @BinaryLiterals@ extensions (see GHC #9224)
{-# LANGUAGE BinaryLiterals #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE NegativeLiterals #-}
module Main where
import GHC.Types
import GHC.Int
main = do
print [ I# 0B0#, I# -0B0#, I# 0B1#, I# -0B1#
, I# 0B00000000000000000000000000000000000000000000000000000000000000000000000000001#
, I# -0B00000000000000000000000000000000000000000000000000000000000000000000000000001#
, I# -0B11001001#, I# -0B11001001#
, I# -0B11111111#, I# -0B11111111#
]
print [ W# 0B0##, W# 0B1##, W# 0B11001001##, W# 0B11##, W# 0B11111111##
, W# 0B00000000000000000000000000000000000000000000000000000000000000000000000000001##
]
print [ 0B0, 0B1, 0B10, 0B11, 0B100, 0B101, 0B110, 0B111 :: Integer
, -0B0, -0B1, -0B10, -0B11, -0B100, -0B101, -0B110, -0B111
, 0B11111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111
, -0B11111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111
]
print [ I8# -0B10000000#, I8# 0B1111111# ]
[0,0,1,-1,1,-1,-201,-201,-255,-255]
[0,1,201,3,255,1]
[0,1,2,3,4,5,6,7,0,-1,-2,-3,-4,-5,-6,-7,340282366920938463463374607431768211455,-340282366920938463463374607431768211455]
[-128,127]
......@@ -6,3 +6,6 @@ test('T1344', normal, compile_and_run, [''])
test('operator', normal, compile_and_run, [''])
test('operator2', normal, compile_and_run, [''])
test('ParserMultiWayIf', when(compiler_lt('ghc', '7.5'), skip), compile_and_run, [''])
test('BinaryLiterals0', normal, compile_and_run, [''])
test('BinaryLiterals1', when(compiler_lt('ghc', '7.9'), skip), compile_and_run, [''])
test('BinaryLiterals2', when(compiler_lt('ghc', '7.9'), skip), compile_and_run, [''])
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment