Commit 1d9a1d9f authored by Sylvain Henry's avatar Sylvain Henry Committed by Marge Bot

NCG: fast compilation of very large strings (#16190)

This patch adds an optimization into the NCG: for large strings
(threshold configurable via -fbinary-blob-threshold=NNN flag), instead
of printing `.asciz "..."` in the generated ASM source, we print
`.incbin "tmpXXX.dat"` and we dump the contents of the string into a
temporary "tmpXXX.dat" file.

See the note for more details.
parent 0f1eb88c
Pipeline #2202 passed with stages
in 311 minutes and 17 seconds
......@@ -911,6 +911,9 @@ data DynFlags = DynFlags {
specConstrCount :: Maybe Int, -- ^ Max number of specialisations for any one function
specConstrRecursive :: Int, -- ^ Max number of specialisations for recursive types
-- Not optional; otherwise ForceSpecConstr can diverge.
binBlobThreshold :: Word, -- ^ Binary literals (e.g. strings) whose size is above
-- this threshold will be dumped in a binary file
-- by the assembler code generator (0 to disable)
liberateCaseThreshold :: Maybe Int, -- ^ Threshold for LiberateCase
floatLamArgs :: Maybe Int, -- ^ Arg count for lambda floating
-- See CoreMonad.FloatOutSwitches
......@@ -1884,6 +1887,7 @@ defaultDynFlags mySettings (myLlvmTargets, myLlvmPasses) =
maxPmCheckIterations = 2000000,
ruleCheck = Nothing,
inlineCheck = Nothing,
binBlobThreshold = 500000, -- 500K is a good default (see #16190)
maxRelevantBinds = Just 6,
maxValidHoleFits = Just 6,
maxRefHoleFits = Just 6,
......@@ -3526,6 +3530,8 @@ dynamic_flags_deps = [
setOptLevel (mb_n `orElse` 1)))
-- If the number is missing, use 1
, make_ord_flag defFlag "fbinary-blob-threshold"
(intSuffix (\n d -> d { binBlobThreshold = fromIntegral n }))
, make_ord_flag defFlag "fmax-relevant-binds"
(intSuffix (\n d -> d { maxRelevantBinds = Just n }))
......
......@@ -125,8 +125,7 @@ pprDatas :: CmmStatics -> SDoc
pprDatas (Statics lbl dats) = vcat (pprLabel lbl : map pprData dats)
pprData :: CmmStatic -> SDoc
pprData (CmmString str)
= text "\t.string" <+> doubleQuotes (pprASCII str)
pprData (CmmString str) = pprBytes str
pprData (CmmUninitialised bytes) = text ".space " <> int bytes
pprData (CmmStaticLit lit) = pprDataItem lit
......
......@@ -14,6 +14,7 @@ module PprBase (
floatToBytes,
doubleToBytes,
pprASCII,
pprBytes,
pprSectionHeader
)
......@@ -28,6 +29,7 @@ import DynFlags
import FastString
import Outputable
import Platform
import FileCleanup
import qualified Data.Array.Unsafe as U ( castSTUArray )
import Data.Array.ST
......@@ -40,6 +42,7 @@ import Data.ByteString (ByteString)
import qualified Data.ByteString as BS
import GHC.Exts
import GHC.Word
import System.IO.Unsafe
......@@ -125,6 +128,51 @@ pprASCII str
]
ord0 = 0x30 -- = ord '0'
-- | Pretty print binary data.
--
-- Use either the ".string" directive or a ".incbin" directive.
-- See Note [Embedding large binary blobs]
--
-- A NULL byte is added after the binary data.
--
pprBytes :: ByteString -> SDoc
pprBytes bs = sdocWithDynFlags $ \dflags ->
if binBlobThreshold dflags == 0
|| fromIntegral (BS.length bs) <= binBlobThreshold dflags
then text "\t.string " <> doubleQuotes (pprASCII bs)
else unsafePerformIO $ do
bFile <- newTempName dflags TFL_CurrentModule ".dat"
BS.writeFile bFile bs
return $ text "\t.incbin \"" <> text bFile <> text "\"\n\t.byte 0"
{-
Note [Embedding large binary blobs]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
To embed a blob of binary data (e.g. an UTF-8 encoded string) into the generated
code object, we have several options:
1. Generate a ".byte" directive for each byte. This is what was done in the past
(see Note [Pretty print ASCII when AsmCodeGen]).
2. Generate a single ".string"/".asciz" directive for the whole sequence of
bytes. Bytes in the ASCII printable range are rendered as characters and
other values are escaped (e.g., "\t", "\077", etc.).
3. Create a temporary file into which we dump the binary data and generate a
single ".incbin" directive. The assembler will include the binary file for
us in the generated output object.
Now the code generator uses either (2) or (3), depending on the binary blob
size. Using (3) for small blobs adds too much overhead (see benchmark results
in #16190), so we only do it when the size is above a threshold (500K at the
time of writing).
The threshold is configurable via the `-fbinary-blob-threshold` flag.
-}
{-
Note [Pretty print ASCII when AsmCodeGen]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
......
......@@ -49,8 +49,6 @@ import Unique ( pprUniqueAlways )
import Outputable
import Platform
import FastString
import Data.Word
import qualified Data.ByteString as BS
-- -----------------------------------------------------------------------------
-- Printing this stuff out
......@@ -110,11 +108,7 @@ pprDatas :: CmmStatics -> SDoc
pprDatas (Statics lbl dats) = vcat (pprLabel lbl : map pprData dats)
pprData :: CmmStatic -> SDoc
pprData (CmmString str)
= vcat (map do1 (BS.unpack str)) $$ do1 0
where
do1 :: Word8 -> SDoc
do1 w = text "\t.byte\t" <> int (fromIntegral w)
pprData (CmmString str) = pprBytes str
pprData (CmmUninitialised bytes) = text ".skip " <> int bytes
pprData (CmmStaticLit lit) = pprDataItem lit
......
......@@ -47,7 +47,6 @@ import FastString
import Outputable
import Data.Word
import Data.Bits
-- -----------------------------------------------------------------------------
......@@ -154,8 +153,7 @@ pprDatas (align, (Statics lbl dats))
= vcat (pprAlign align : pprLabel lbl : map pprData dats)
pprData :: CmmStatic -> SDoc
pprData (CmmString str)
= ptext (sLit "\t.asciz ") <> doubleQuotes (pprASCII str)
pprData (CmmString str) = pprBytes str
pprData (CmmUninitialised bytes)
= sdocWithPlatform $ \platform ->
......
......@@ -1238,3 +1238,19 @@ by saying ``-fno-wombat``.
if a function definition will be inlined *at a call site*. The other option
determines if a function definition will be kept around at all for
potential inlining.
.. ghc-flag:: -fbinary-blob-threshold=⟨n⟩
:shortdesc: *default: 500K.* Tweak assembly generator for binary blobs.
:type: dynamic
:category: optimization
:default: 500000
The native code-generator can either dump binary blobs (e.g. string
literals) into the assembly file (by using ".asciz" or ".string" assembler
directives) or it can dump them as binary data into a temporary file which
is then included by the assembler (using the ".incbin" assembler directive).
This flag sets the size (in bytes) threshold above which the second approach
is used. You can disable the second approach entirely by setting the
threshold to 0.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment