Commit 065be6e9 authored by alexbiehl's avatar alexbiehl Committed by Ben Gamari

Caret diag.: Avoid decoding whole module if only specific line is needed

Before we were decoding the whole file to get to the desired line. This
patch introduces a fast function which searches a StringBuffer for the
desired line so we only need to utf8 decode a little portion.

This is especially interesting if we have big modules with lots of
warnings.

Reviewers: austin, bgamari, Rufflewind, trofi

Reviewed By: Rufflewind, trofi

Subscribers: rwbarton, thomie

Differential Revision: https://phabricator.haskell.org/D3440
parent c87584f1
......@@ -64,7 +64,7 @@ import qualified PprColour as Col
import SrcLoc
import DynFlags
import FastString (unpackFS)
import StringBuffer (hGetStringBuffer, len, lexemeToString)
import StringBuffer (atLine, hGetStringBuffer, len, lexemeToString)
import Json
import System.Directory
......@@ -231,27 +231,26 @@ getSeverityColour _ = const mempty
getCaretDiagnostic :: Severity -> SrcSpan -> IO MsgDoc
getCaretDiagnostic _ (UnhelpfulSpan _) = pure empty
getCaretDiagnostic severity (RealSrcSpan span) = do
caretDiagnostic <$> getSrcLine (srcSpanFile span) (row - 1)
caretDiagnostic <$> getSrcLine (srcSpanFile span) row
where
getSrcLine fn i = do
(getLine i <$> readFile' (unpackFS fn))
`catchIOError` \ _ ->
getSrcLine fn i =
getLine i (unpackFS fn)
`catchIOError` \_ ->
pure Nothing
getLine i contents =
case drop i (lines contents) of
srcLine : _ -> Just srcLine
[] -> Nothing
readFile' fn = do
getLine i fn = do
-- StringBuffer has advantages over readFile:
-- (a) no lazy IO, otherwise IO exceptions may occur in pure code
-- (b) always UTF-8, rather than some system-dependent encoding
-- (Haskell source code must be UTF-8 anyway)
buf <- hGetStringBuffer fn
pure (fix <$> lexemeToString buf (len buf))
content <- hGetStringBuffer fn
case atLine i content of
Just at_line -> pure $
case lines (fix <$> lexemeToString at_line (len at_line)) of
srcLine : _ -> Just srcLine
_ -> Nothing
_ -> pure Nothing
-- allow user to visibly see that their code is incorrectly encoded
-- (StringBuffer.nextChar uses \0 to represent undecodable characters)
......
......@@ -6,7 +6,7 @@
Buffers for scanning string input stored in external arrays.
-}
{-# LANGUAGE CPP, MagicHash, UnboxedTuples #-}
{-# LANGUAGE BangPatterns, CPP, MagicHash, UnboxedTuples #-}
{-# OPTIONS_GHC -O #-}
-- We always optimise this, otherwise performance of a non-optimised
-- compiler is severely affected
......@@ -32,6 +32,7 @@ module StringBuffer
stepOn,
offsetBytes,
byteDiff,
atLine,
-- * Conversion
lexemeToString,
......@@ -240,6 +241,43 @@ byteDiff s1 s2 = cur s2 - cur s1
atEnd :: StringBuffer -> Bool
atEnd (StringBuffer _ l c) = l == c
-- | Computes a 'StringBuffer' which points to the first character of the
-- wanted line. Lines begin at 1.
atLine :: Int -> StringBuffer -> Maybe StringBuffer
atLine line sb@(StringBuffer buf len _) =
inlinePerformIO $
withForeignPtr buf $ \p -> do
p' <- skipToLine line len p
if p' == nullPtr
then return Nothing
else
let
delta = p' `minusPtr` p
in return $ Just (sb { cur = delta
, len = len - delta
})
skipToLine :: Int -> Int -> Ptr Word8 -> IO (Ptr Word8)
skipToLine !line !len !op0 = go 1 op0
where
!opend = op0 `plusPtr` len
go !i_line !op
| op >= opend = pure nullPtr
| i_line == line = pure op
| otherwise = do
w <- peek op :: IO Word8
case w of
10 -> go (i_line + 1) (plusPtr op 1)
13 -> do
-- this is safe because a 'StringBuffer' is
-- guaranteed to have 3 bytes sentinel values.
w' <- peek (plusPtr op 1) :: IO Word8
case w' of
10 -> go (i_line + 1) (plusPtr op 2)
_ -> go (i_line + 1) (plusPtr op 1)
_ -> go i_line (plusPtr op 1)
-- -----------------------------------------------------------------------------
-- Conversion
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment