Commit 224a6b86 authored by Sylvain Henry's avatar Sylvain Henry Committed by Marge Bot

TH: support raw bytes literals (#14741)

GHC represents String literals as ByteString internally for efficiency
reasons. However, until now it wasn't possible to efficiently create
large string literals with TH (e.g. to embed a file in a binary, cf #14741):
TH code had to unpack the bytes into a [Word8] that GHC then had to re-pack
into a ByteString.

This patch adds the possibility to efficiently create a "string" literal
from raw bytes. We get the following compile times for different sizes
of TH created literals:

|| Size || Before || After  || Gain ||
|| 30K  || 2.307s || 2.299  || 0%   ||
|| 3M   || 3.073s || 2.400s || 21%  ||
|| 30M  || 8.517s || 3.390s || 60%  ||

Ticket #14741 can be fixed if the original code uses this new TH feature.
parent 5be7ad78
Pipeline #3274 passed with stages
in 357 minutes and 9 seconds
......@@ -188,6 +188,20 @@ Note [Natural literals]
~~~~~~~~~~~~~~~~~~~~~~~
Similar to Integer literals.
Note [String literals]
~~~~~~~~~~~~~~~~~~~~~~
String literals are UTF-8 encoded and stored into ByteStrings in the following
ASTs: Haskell, Core, Stg, Cmm. TH can also emit ByteString based string literals
with the BytesPrimL constructor (see #14741).
It wasn't true before as [Word8] was used in Cmm AST and in TH which was quite
bad for performance with large strings (see #16198 and #14741).
To include string literals into output objects, the assembler code generator has
to embed the UTF-8 encoded binary blob. See Note [Embedding large binary blobs]
for more details.
-}
instance Binary LitNumType where
......
......@@ -45,6 +45,9 @@ import Control.Monad( unless, liftM, ap )
import Data.Maybe( catMaybes, isNothing )
import Language.Haskell.TH as TH hiding (sigP)
import Language.Haskell.TH.Syntax as TH
import Foreign.ForeignPtr
import Foreign.Ptr
import System.IO.Unsafe
-------------------------------------------------------------------
-- The external interface
......@@ -1189,6 +1192,11 @@ cvtLit (StringL s) = do { let { s' = mkFastString s }
cvtLit (StringPrimL s) = do { let { s' = BS.pack s }
; force s'
; return $ HsStringPrim NoSourceText s' }
cvtLit (BytesPrimL (Bytes fptr off sz)) = do
let bs = unsafePerformIO $ withForeignPtr fptr $ \ptr ->
BS.packCStringLen (ptr `plusPtr` fromIntegral off, fromIntegral sz)
force bs
return $ HsStringPrim NoSourceText bs
cvtLit _ = panic "Convert.cvtLit: Unexpected literal"
-- cvtLit should not be called on IntegerL, RationalL
-- That precondition is established right here in
......
......@@ -10,6 +10,7 @@ module GHCi.TH.Binary () where
import Prelude -- See note [Why do we import Prelude here?]
import Data.Binary
import qualified Data.ByteString as B
import qualified Data.ByteString.Internal as B
import GHC.Serialized
import qualified Language.Haskell.TH as TH
import qualified Language.Haskell.TH.Syntax as TH
......@@ -72,3 +73,10 @@ instance Binary TH.PatSynArgs
instance Binary Serialized where
put (Serialized tyrep wds) = put tyrep >> put (B.pack wds)
get = Serialized <$> get <*> (B.unpack <$> get)
instance Binary TH.Bytes where
put (TH.Bytes ptr off sz) = put bs
where bs = B.PS ptr (fromIntegral off) (fromIntegral sz)
get = do
B.PS ptr off sz <- get
return (TH.Bytes ptr (fromIntegral off) (fromIntegral sz))
......@@ -26,7 +26,7 @@ module Language.Haskell.TH.Lib (
-- ** Constructors lifted to 'Q'
-- *** Literals
intPrimL, wordPrimL, floatPrimL, doublePrimL, integerL, rationalL,
charL, stringL, stringPrimL, charPrimL,
charL, stringL, stringPrimL, charPrimL, bytesPrimL, mkBytes,
-- *** Patterns
litP, varP, tupP, unboxedTupP, unboxedSumP, conP, uInfixP, parensP,
infixP, tildeP, bangP, asP, wildP, recP,
......@@ -157,6 +157,8 @@ import Language.Haskell.TH.Lib.Internal hiding
import Language.Haskell.TH.Syntax
import Control.Monad (liftM2)
import Foreign.ForeignPtr
import Data.Word
import Prelude
-- All definitions below represent the "old" API, since their definitions are
......@@ -303,3 +305,17 @@ standaloneDerivWithStrategyD mds ctxt ty = do
ctxt' <- ctxt
ty' <- ty
return $ StandaloneDerivD mds ctxt' ty'
-------------------------------------------------------------------------------
-- * Bytes literals
-- | Create a Bytes datatype representing raw bytes to be embedded into the
-- program/library binary.
--
-- @since 2.16.0.0
mkBytes
:: ForeignPtr Word8 -- ^ Pointer to the data
-> Word -- ^ Offset from the pointer
-> Word -- ^ Number of bytes
-> Bytes
mkBytes = Bytes
......@@ -86,6 +86,8 @@ stringL :: String -> Lit
stringL = StringL
stringPrimL :: [Word8] -> Lit
stringPrimL = StringPrimL
bytesPrimL :: Bytes -> Lit
bytesPrimL = BytesPrimL
rationalL :: Rational -> Lit
rationalL = RationalL
......
......@@ -268,6 +268,7 @@ pprLit _ (CharL c) = text (show c)
pprLit _ (CharPrimL c) = text (show c) <> char '#'
pprLit _ (StringL s) = pprString s
pprLit _ (StringPrimL s) = pprString (bytesToString s) <> char '#'
pprLit _ (BytesPrimL {}) = pprString "<binary data>"
pprLit i (RationalL rat) = parensIf (i > noPrec) $
integer (numerator rat) <+> char '/'
<+> integer (denominator rat)
......
......@@ -44,6 +44,7 @@ import GHC.ForeignSrcLang.Type
import Language.Haskell.TH.LanguageExtensions
import Numeric.Natural
import Prelude
import Foreign.ForeignPtr
import qualified Control.Monad.Fail as Fail
......@@ -1619,6 +1620,7 @@ data Lit = CharL Char
| FloatPrimL Rational
| DoublePrimL Rational
| StringPrimL [Word8] -- ^ A primitive C-style string, type Addr#
| BytesPrimL Bytes -- ^ Some raw bytes, type Addr#:
| CharPrimL Char
deriving( Show, Eq, Ord, Data, Generic )
......@@ -1626,6 +1628,24 @@ data Lit = CharL Char
-- but that could complicate the
-- supposedly-simple TH.Syntax literal type
-- | Raw bytes embedded into the binary.
--
-- Avoid using Bytes constructor directly as it is likely to change in the
-- future. Use helpers such as `mkBytes` in Language.Haskell.TH.Lib instead.
data Bytes = Bytes
{ bytesPtr :: ForeignPtr Word8 -- ^ Pointer to the data
, bytesOffset :: Word -- ^ Offset from the pointer
, bytesSize :: Word -- ^ Number of bytes
-- Maybe someday:
-- , bytesAlignement :: Word -- ^ Alignement constraint
-- , bytesReadOnly :: Bool -- ^ Shall we embed into a read-only
-- -- section or not
-- , bytesInitialized :: Bool -- ^ False: only use `bytesSize` to allocate
-- -- an uninitialized region
}
deriving (Eq,Ord,Data,Generic,Show)
-- | Pattern in Haskell given in @{}@
data Pat
= LitP Lit -- ^ @{ 5 or \'c\' }@
......
......@@ -8,6 +8,9 @@
* Add a `ForallVisT` constructor to `Type` to represent visible, dependent
quantification.
* Introduce support for `Bytes` literals (raw bytes embedded into the output
binary)
## 2.15.0.0 *TBA*
* In `Language.Haskell.TH.Syntax`, `DataInstD`, `NewTypeInstD`, `TySynEqn`,
......
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE MagicHash #-}
import Language.Haskell.TH.Lib
import Data.Word
import Foreign.ForeignPtr
import Foreign.Marshal.Array
import GHC.Exts
import System.Mem
import Control.Monad.IO.Class
import GHC.CString
ptr :: Ptr ()
ptr = Ptr $(do
-- create a buffer containing the "Hello World!" string
let xs = [72,101,108,108,111,32,87,111,114,108,100,33] :: [Word8]
fp <- liftIO $ mallocForeignPtrArray 25
liftIO $ withForeignPtr fp $ \p -> do
pokeArray p xs
-- create a "Bytes" literal with an offset and size to only include "World"
let bys = mkBytes fp 6 5
liftIO performGC -- check that the GC doesn't release our buffer too early
litE (bytesPrimL bys))
main :: IO ()
main = do
let s = case ptr of Ptr addr -> unpackNBytes# addr 5#
putStrLn s
......@@ -472,3 +472,4 @@ test('T16183', normal, compile, ['-v0 -ddump-splices -dsuppress-uniques'])
test('T16195', normal, multimod_compile, ['T16195.hs', '-v0'])
test('T16293b', normal, compile, [''])
test('T16326_TH', normal, compile, ['-v0 -ddump-splices -dsuppress-uniques'])
test('T14741', normal, compile_and_run, [''])
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment