diff --git a/.gitattributes b/.gitattributes index aa7ab64d11d6c3892597a41cbd35d76d0ff0daf4..454cd0e7dfad21ea5f38ec53e3686e74a631d8c2 100644 --- a/.gitattributes +++ b/.gitattributes @@ -2,3 +2,4 @@ # don't convert anything on checkout * text=auto eol=lf mk/win32-tarballs.md5sum text=auto eol=LF +testsuite/tests/parser/should_run/T25375.hs text=auto eol=crlf diff --git a/compiler/GHC/Parser/String.hs b/compiler/GHC/Parser/String.hs index 00687e3d84f43bd031a94dfad74e8d024a7b3786..5c226e8b62e63726c468bad352192ae8a9999acc 100644 --- a/compiler/GHC/Parser/String.hs +++ b/compiler/GHC/Parser/String.hs @@ -261,6 +261,7 @@ lexMultilineString = lexStringWith processChars processChars processChars :: HasChar c => [c] -> Either (c, LexErr) [c] processChars = collapseGaps -- Step 1 + >>> normalizeEOL >>> expandLeadingTabs -- Step 3 >>> rmCommonWhitespacePrefix -- Step 4 >>> collapseOnlyWsLines -- Step 5 @@ -268,6 +269,19 @@ lexMultilineString = lexStringWith processChars processChars >>> rmLastNewline -- Step 7b >>> resolveEscapes -- Step 8 + -- Normalize line endings to LF. The spec dictates that lines should be + -- split on newline characters and rejoined with ``\n``. But because we + -- aren't actually splitting/rejoining, we'll manually normalize here + normalizeEOL :: HasChar c => [c] -> [c] + normalizeEOL = + let go = \case + Char '\r' : c@(Char '\n') : cs -> c : go cs + c@(Char '\r') : cs -> setChar '\n' c : go cs + c@(Char '\f') : cs -> setChar '\n' c : go cs + c : cs -> c : go cs + [] -> [] + in go + -- expands all tabs, since the lexer will verify that tabs can only appear -- as leading indentation expandLeadingTabs :: HasChar c => [c] -> [c] diff --git a/docs/users_guide/exts/multiline_strings.rst b/docs/users_guide/exts/multiline_strings.rst index a3c71dd000c595863e654af906991bb576075059..e42853224eddf2ca0f8b8c9945f4bb7e58756821 100644 --- a/docs/users_guide/exts/multiline_strings.rst +++ b/docs/users_guide/exts/multiline_strings.rst @@ -14,7 +14,9 @@ With this extension, GHC now recognizes multiline string literals with ``"""`` d Normal string literals are lexed, then string gaps are collapsed, then escape characters are resolved. Multiline string literals add the following post-processing steps between collapsing string gaps and resolving escape characters: -#. Split the string by newlines +#. Split the string by newline characters + + * Includes ``\r\n``, ``\r``, ``\n``, ``\f`` #. Replace leading tabs with spaces up to the next tab stop @@ -24,7 +26,9 @@ Normal string literals are lexed, then string gaps are collapsed, then escape ch #. Join the string back with ``\n`` delimiters -#. If the first character of the string is a newline, remove it +#. If the first character of the string is ``\n``, remove it + +#. If the last character of the string is ``\n``, remove it Examples ~~~~~~~~ diff --git a/testsuite/tests/parser/should_run/T25375.hs b/testsuite/tests/parser/should_run/T25375.hs new file mode 100644 index 0000000000000000000000000000000000000000..1b516edabdc5843e815f7ae29ca37351da1dbcd0 --- /dev/null +++ b/testsuite/tests/parser/should_run/T25375.hs @@ -0,0 +1,38 @@ +{-# LANGUAGE MultilineStrings #-} + +str1 = unlines + [ "aaa" + , "bbb" + , "ccc" + ] + +str2 = "aaa\n\ + \bbb\n\ + \ccc\n" + +str3 = """ + aaa + bbb + ccc + """ + +str4 = """ + + aaa + bbb + ccc + + """ + +str5 = """ + aaa + bbb + ccc\n + """ + +main = do + print str1 + print str2 + print str3 + print str4 + print str5 diff --git a/testsuite/tests/parser/should_run/T25375.stdout b/testsuite/tests/parser/should_run/T25375.stdout new file mode 100644 index 0000000000000000000000000000000000000000..0cdf50b16d769a1a0830dafe58450ac6d7b23cc7 --- /dev/null +++ b/testsuite/tests/parser/should_run/T25375.stdout @@ -0,0 +1,5 @@ +"aaa\nbbb\nccc\n" +"aaa\nbbb\nccc\n" +"aaa\nbbb\nccc" +"\naaa\nbbb\nccc\n" +"aaa\nbbb\nccc\n" diff --git a/testsuite/tests/parser/should_run/all.T b/testsuite/tests/parser/should_run/all.T index 03f355d88e274e479a6bfc56c217315e7958bf82..df886c4a24f415e7ff2182cd0139323e1e4cdc15 100644 --- a/testsuite/tests/parser/should_run/all.T +++ b/testsuite/tests/parser/should_run/all.T @@ -23,3 +23,4 @@ test('RecordDotSyntax5', normal, compile_and_run, ['']) test('ListTuplePunsConstraints', extra_files(['ListTuplePunsConstraints.hs']), ghci_script, ['ListTuplePunsConstraints.script']) test('MultilineStrings', normal, compile_and_run, ['']) test('MultilineStringsOverloaded', normal, compile_and_run, ['']) +test('T25375', normal, compile_and_run, [''])