Commit dbd81f7e authored by Tobias Dammers's avatar Tobias Dammers 🦈 Committed by Ben Gamari

Factor out readField (#14364)

Improves compiler performance of deriving Read instances, as suggested
in the issue.

Additionally, we introduce `readSymField`, a companion to `readField`
that parses symbol-type fields (where the field name is a symbol, e.g.
`(#)`, rather than an alphanumeric identifier. The decision between
these two functions is made a compile time, because we already know
which one we need based on the field name.

Reviewers: austin, hvr, bgamari, RyanGlScott

Reviewed By: bgamari

Subscribers: RyanGlScott, rwbarton, thomie

Differential Revision: https://phabricator.haskell.org/D4108
parent 4c06ccb7
......@@ -742,6 +742,10 @@ choose_RDR = varQual_RDR gHC_READ (fsLit "choose")
lexP_RDR = varQual_RDR gHC_READ (fsLit "lexP")
expectP_RDR = varQual_RDR gHC_READ (fsLit "expectP")
readField_RDR, readSymField_RDR :: RdrName
readField_RDR = varQual_RDR gHC_READ (fsLit "readField")
readSymField_RDR = varQual_RDR gHC_READ (fsLit "readSymField")
punc_RDR, ident_RDR, symbol_RDR :: RdrName
punc_RDR = dataQual_RDR lEX (fsLit "Punc")
ident_RDR = dataQual_RDR lEX (fsLit "Ident")
......
......@@ -900,9 +900,7 @@ instance Read T where
-- Record construction binds even more tightly than application
do expectP (Ident "T1")
expectP (Punc '{')
expectP (Ident "f1")
expectP (Punc '=')
x <- ReadP.reset Read.readPrec
x <- Read.readField "f1" (ReadP.reset readPrec)
expectP (Punc '}')
return (T1 { f1 = x }))
+++
......@@ -1068,21 +1066,28 @@ gen_Read_binds get_fixity loc tycon
read_arg a ty = ASSERT( not (isUnliftedType ty) )
noLoc (mkBindStmt (nlVarPat a) (nlHsVarApps step_RDR [readPrec_RDR]))
read_field lbl a = read_lbl lbl ++
[read_punc "=",
noLoc (mkBindStmt (nlVarPat a) (nlHsVarApps reset_RDR [readPrec_RDR]))]
-- When reading field labels we might encounter
-- a = 3
-- _a = 3
-- or (#) = 4
-- Note the parens!
read_lbl lbl | isSym lbl_str
= [read_punc "(", symbol_pat lbl_str, read_punc ")"]
| otherwise
= ident_h_pat lbl_str
where
lbl_str = unpackFS lbl
-- When reading field labels we might encounter
-- a = 3
-- _a = 3
-- or (#) = 4
-- Note the parens!
read_field lbl a =
[noLoc
(mkBindStmt
(nlVarPat a)
(nlHsApps
read_field
[ nlHsLit (mkHsString lbl_str)
, nlHsVarApps reset_RDR [readPrec_RDR]
]
)
)
]
where
lbl_str = unpackFS lbl
read_field
| isSym lbl_str = readSymField_RDR
| otherwise = readField_RDR
{-
************************************************************************
......
......@@ -36,6 +36,8 @@ module GHC.Read
, choose
, readListDefault, readListPrecDefault
, readNumber
, readField
, readSymField
-- Temporary
, readParen
......@@ -359,6 +361,50 @@ choose sps = foldr ((+++) . try_one) pfail sps
L.Symbol s' | s==s' -> p
_other -> pfail }
-- See Note [Why readField]
-- | 'Read' parser for a record field, of the form @fieldName=value@. The
-- @fieldName@ must be an alphanumeric identifier; for symbols (operator-style)
-- field names, e.g. @(#)@, use 'readSymField'). The second argument is a
-- parser for the field value.
readField :: String -> ReadPrec a -> ReadPrec a
readField fieldName readVal = do
expectP (L.Ident fieldName)
expectP (L.Punc "=")
readVal
{-# NOINLINE readField #-}
-- See Note [Why readField]
-- | 'Read' parser for a symbol record field, of the form @(###)=value@ (where
-- @###@ is the field name). The field name must be a symbol (operator-style),
-- e.g. @(#)@. For regular (alphanumeric) field names, use 'readField'. The
-- second argument is a parser for the field value.
readSymField :: String -> ReadPrec a -> ReadPrec a
readSymField fieldName readVal = do
expectP (L.Punc "(")
expectP (L.Symbol fieldName)
expectP (L.Punc ")")
expectP (L.Punc "=")
readVal
{-# NOINLINE readSymField #-}
-- Note [Why readField]
--
-- Previousy, the code for automatically deriving Read instance (in
-- typecheck/TcGenDeriv.hs) would generate inline code for parsing fields;
-- this, however, turned out to produce massive amounts of intermediate code,
-- and produced a considerable performance hit in the code generator.
-- Since Read instances are not generally supposed to be perfomance critical,
-- the readField and readSymField functions have been factored out, and the
-- code generator now just generates calls rather than manually inlining the
-- parsers. For large record types (e.g. 500 fields), this produces a
-- significant performance boost.
--
-- See also Trac #14364.
--------------------------------------------------------------
-- Simple instances of Read
--------------------------------------------------------------
......
......@@ -197,7 +197,7 @@ test('T3294',
# 2013-11-13: 1478325844 (x86/Windows, 64bit machine)
# 2014-01-12: 1565185140 (x86/Linux)
# 2013-04-04: 1377050640 (x86/Windows, 64bit machine)
(wordsize(64), 2253557280, 5)]),
(wordsize(64), 1858491504, 5)]),
# old: 1357587088 (amd64/Linux)
# 29/08/2012: 2961778696 (amd64/Linux)
# (^ increase due to new codegen, see #7198)
......@@ -212,6 +212,7 @@ test('T3294',
# 2016-07-11: 2739731144 (Windows) after fix for #12227 (ignoring)
# 2017-02-17: 2758641264 (amd64/Linux) (Type indexed Typeable)
# 2017-05-14: 2253557280 (amd64/Linux) Two-pass CmmLayoutStack
# 2017-10-24: 1858491504 (amd64/Linux) Improved linear regAlloc
conf_3294,
# Use `+RTS -G1` for more stable residency measurements. Note [residency].
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment