Commit b7e88ee0 authored by Thijs Alkemade's avatar Thijs Alkemade Committed by Ben Gamari

Reduce the size of string literals in binaries.

Removed the alignment for strings and mark then as cstring sections in
the generated asm so the linker can merge duplicate sections.

Reviewers: rwbarton, trofi, austin, trommler, simonmar, hvr, bgamari

Reviewed By: hvr, bgamari

Subscribers: simonpj, hvr, thomie

Differential Revision: https://phabricator.haskell.org/D1290

GHC Trac Issues: #9577
parent eafa06dc
...@@ -1088,6 +1088,10 @@ pprCLabel platform (DeadStripPreventer lbl) ...@@ -1088,6 +1088,10 @@ pprCLabel platform (DeadStripPreventer lbl)
| cGhcWithNativeCodeGen == "YES" | cGhcWithNativeCodeGen == "YES"
= pprCLabel platform lbl <> text "_dsp" = pprCLabel platform lbl <> text "_dsp"
pprCLabel _ (StringLitLabel u)
| cGhcWithNativeCodeGen == "YES"
= pprUnique u <> ptext (sLit "_str")
pprCLabel platform lbl pprCLabel platform lbl
= getPprStyle $ \ sty -> = getPprStyle $ \ sty ->
if cGhcWithNativeCodeGen == "YES" && asmStyle sty if cGhcWithNativeCodeGen == "YES" && asmStyle sty
...@@ -1109,8 +1113,8 @@ pprAsmCLbl _ lbl ...@@ -1109,8 +1113,8 @@ pprAsmCLbl _ lbl
= pprCLbl lbl = pprCLbl lbl
pprCLbl :: CLabel -> SDoc pprCLbl :: CLabel -> SDoc
pprCLbl (StringLitLabel u) pprCLbl (StringLitLabel _)
= pprUnique u <> text "_str" = panic "pprCLbl StringLitLabel"
pprCLbl (CaseLabel u CaseReturnPt) pprCLbl (CaseLabel u CaseReturnPt)
= hcat [pprUnique u, text "_ret"] = hcat [pprUnique u, text "_ret"]
......
...@@ -172,6 +172,7 @@ data SectionType ...@@ -172,6 +172,7 @@ data SectionType
| RelocatableReadOnlyData | RelocatableReadOnlyData
| UninitialisedData | UninitialisedData
| ReadOnlyData16 -- .rodata.cst16 on x86_64, 16-byte aligned | ReadOnlyData16 -- .rodata.cst16 on x86_64, 16-byte aligned
| CString
| OtherSection String | OtherSection String
deriving (Show) deriving (Show)
......
...@@ -171,10 +171,13 @@ mkByteStringCLit :: Unique -> [Word8] -> (CmmLit, GenCmmDecl CmmStatics info stm ...@@ -171,10 +171,13 @@ mkByteStringCLit :: Unique -> [Word8] -> (CmmLit, GenCmmDecl CmmStatics info stm
-- We have to make a top-level decl for the string, -- We have to make a top-level decl for the string,
-- and return a literal pointing to it -- and return a literal pointing to it
mkByteStringCLit uniq bytes mkByteStringCLit uniq bytes
= (CmmLabel lbl, CmmData sec $ Statics lbl [CmmString bytes]) = (CmmLabel lbl, CmmData (Section sec lbl) $ Statics lbl [CmmString bytes])
where where
lbl = mkStringLitLabel uniq lbl = mkStringLitLabel uniq
sec = Section ReadOnlyData lbl -- This can not happen for String literals (as there \NUL is replaced by
-- C0 80). However, it can happen with Addr# literals.
sec = if 0 `elem` bytes then ReadOnlyData else CString
mkDataLits :: Section -> CLabel -> [CmmLit] -> GenCmmDecl CmmStatics info stmt mkDataLits :: Section -> CLabel -> [CmmLit] -> GenCmmDecl CmmStatics info stmt
-- Build a data-segment data block -- Build a data-segment data block
mkDataLits section lbl lits mkDataLits section lbl lits
......
...@@ -170,4 +170,5 @@ pprSectionType s = doubleQuotes (ptext t) ...@@ -170,4 +170,5 @@ pprSectionType s = doubleQuotes (ptext t)
RelocatableReadOnlyData RelocatableReadOnlyData
-> sLit "relreadonly" -> sLit "relreadonly"
UninitialisedData -> sLit "uninitialised" UninitialisedData -> sLit "uninitialised"
CString -> sLit "cstring"
OtherSection s' -> sLit s' -- Not actually a literal though. OtherSection s' -> sLit s' -- Not actually a literal though.
...@@ -59,6 +59,7 @@ isSecConstant (Section t _) = case t of ...@@ -59,6 +59,7 @@ isSecConstant (Section t _) = case t of
ReadOnlyData -> True ReadOnlyData -> True
RelocatableReadOnlyData -> True RelocatableReadOnlyData -> True
ReadOnlyData16 -> True ReadOnlyData16 -> True
CString -> True
Data -> False Data -> False
UninitialisedData -> False UninitialisedData -> False
(OtherSection _) -> False (OtherSection _) -> False
...@@ -72,6 +73,7 @@ llvmSectionType t = case t of ...@@ -72,6 +73,7 @@ llvmSectionType t = case t of
ReadOnlyData16 -> fsLit ".rodata.cst16" ReadOnlyData16 -> fsLit ".rodata.cst16"
Data -> fsLit ".data" Data -> fsLit ".data"
UninitialisedData -> fsLit ".bss" UninitialisedData -> fsLit ".bss"
CString -> fsLit ".cstring"
(OtherSection _) -> panic "llvmSectionType: unknown section type" (OtherSection _) -> panic "llvmSectionType: unknown section type"
-- | Format a Cmm Section into a LLVM section name -- | Format a Cmm Section into a LLVM section name
......
...@@ -348,6 +348,12 @@ pprAlignForSection seg = ...@@ -348,6 +348,12 @@ pprAlignForSection seg =
ReadOnlyData16 ReadOnlyData16
| osDarwin -> sLit ".align 4" | osDarwin -> sLit ".align 4"
| otherwise -> sLit ".align 4" | otherwise -> sLit ".align 4"
-- TODO: This is copied from the ReadOnlyData case, but it can likely be
-- made more efficient.
CString
| osDarwin -> sLit ".align 2"
| ppc64 -> sLit ".align 3"
| otherwise -> sLit ".align 2"
OtherSection _ -> panic "PprMach.pprSectionAlign: unknown section" OtherSection _ -> panic "PprMach.pprSectionAlign: unknown section"
pprDataItem :: CmmLit -> SDoc pprDataItem :: CmmLit -> SDoc
......
...@@ -107,6 +107,7 @@ pprGNUSectionHeader t suffix = sdocWithDynFlags $ \dflags -> ...@@ -107,6 +107,7 @@ pprGNUSectionHeader t suffix = sdocWithDynFlags $ \dflags ->
RelocatableReadOnlyData -> sLit ".data.rel.ro" RelocatableReadOnlyData -> sLit ".data.rel.ro"
UninitialisedData -> sLit ".bss" UninitialisedData -> sLit ".bss"
ReadOnlyData16 -> sLit ".rodata.cst16" ReadOnlyData16 -> sLit ".rodata.cst16"
CString -> sLit ".rodata.str1.1,\"aMS\",@progbits,1"
OtherSection _ -> OtherSection _ ->
panic "PprBase.pprGNUSectionHeader: unknown section type" panic "PprBase.pprGNUSectionHeader: unknown section type"
...@@ -119,6 +120,7 @@ pprXcoffSectionHeader t = text $ case t of ...@@ -119,6 +120,7 @@ pprXcoffSectionHeader t = text $ case t of
ReadOnlyData -> ".csect .text[PR] # ReadOnlyData" ReadOnlyData -> ".csect .text[PR] # ReadOnlyData"
RelocatableReadOnlyData -> ".csect .text[PR] # RelocatableReadOnlyData" RelocatableReadOnlyData -> ".csect .text[PR] # RelocatableReadOnlyData"
ReadOnlyData16 -> ".csect .text[PR] # ReadOnlyData16" ReadOnlyData16 -> ".csect .text[PR] # ReadOnlyData16"
CString -> ".csect .text[PR] # CString"
UninitialisedData -> ".csect .data[BS]" UninitialisedData -> ".csect .data[BS]"
OtherSection _ -> OtherSection _ ->
panic "PprBase.pprXcoffSectionHeader: unknown section type" panic "PprBase.pprXcoffSectionHeader: unknown section type"
...@@ -132,5 +134,6 @@ pprDarwinSectionHeader t = ...@@ -132,5 +134,6 @@ pprDarwinSectionHeader t =
RelocatableReadOnlyData -> sLit ".const_data" RelocatableReadOnlyData -> sLit ".const_data"
UninitialisedData -> sLit ".data" UninitialisedData -> sLit ".data"
ReadOnlyData16 -> sLit ".const" ReadOnlyData16 -> sLit ".const"
CString -> sLit ".section\t__TEXT,__cstring,cstring_literals"
OtherSection _ -> OtherSection _ ->
panic "PprBase.pprDarwinSectionHeader: unknown section type" panic "PprBase.pprDarwinSectionHeader: unknown section type"
...@@ -339,6 +339,9 @@ pprAlignForSection seg = ...@@ -339,6 +339,9 @@ pprAlignForSection seg =
-> sLit ".align 8" -> sLit ".align 8"
UninitialisedData -> sLit ".align 8" UninitialisedData -> sLit ".align 8"
ReadOnlyData16 -> sLit ".align 16" ReadOnlyData16 -> sLit ".align 16"
-- TODO: This is copied from the ReadOnlyData case, but it can likely be
-- made more efficient.
CString -> sLit ".align 8"
OtherSection _ -> panic "PprMach.pprSectionHeader: unknown section") OtherSection _ -> panic "PprMach.pprSectionHeader: unknown section")
-- | Pretty print a data item. -- | Pretty print a data item.
......
...@@ -44,6 +44,8 @@ import Outputable ...@@ -44,6 +44,8 @@ import Outputable
import Data.Word import Data.Word
import Data.Char
import Data.Bits import Data.Bits
-- ----------------------------------------------------------------------------- -- -----------------------------------------------------------------------------
...@@ -140,10 +142,10 @@ pprBasicBlock info_env (BasicBlock blockid instrs) ...@@ -140,10 +142,10 @@ pprBasicBlock info_env (BasicBlock blockid instrs)
pprDatas :: (Alignment, CmmStatics) -> SDoc pprDatas :: (Alignment, CmmStatics) -> SDoc
pprDatas (align, (Statics lbl dats)) pprDatas (align, (Statics lbl dats))
= vcat (pprAlign align : pprLabel lbl : map pprData dats) = vcat (pprAlign align : pprLabel lbl : map pprData dats)
-- TODO: could remove if align == 1
pprData :: CmmStatic -> SDoc pprData :: CmmStatic -> SDoc
pprData (CmmString str) = pprASCII str pprData (CmmString str)
= ptext (sLit "\t.asciz ") <> doubleQuotes (pprASCII str)
pprData (CmmUninitialised bytes) pprData (CmmUninitialised bytes)
= sdocWithPlatform $ \platform -> = sdocWithPlatform $ \platform ->
...@@ -172,10 +174,20 @@ pprLabel lbl = pprGloblDecl lbl ...@@ -172,10 +174,20 @@ pprLabel lbl = pprGloblDecl lbl
pprASCII :: [Word8] -> SDoc pprASCII :: [Word8] -> SDoc
pprASCII str pprASCII str
= vcat (map do1 str) $$ do1 0 = hcat (map (do1 . fromIntegral) str)
where where
do1 :: Word8 -> SDoc do1 :: Int -> SDoc
do1 w = text "\t.byte\t" <> int (fromIntegral w) do1 w | '\t' <- chr w = ptext (sLit "\\t")
do1 w | '\n' <- chr w = ptext (sLit "\\n")
do1 w | '"' <- chr w = ptext (sLit "\\\"")
do1 w | '\\' <- chr w = ptext (sLit "\\\\")
do1 w | isPrint (chr w) = char (chr w)
do1 w | otherwise = char '\\' <> octal w
octal :: Int -> SDoc
octal w = int ((w `div` 64) `mod` 8)
<> int ((w `div` 8) `mod` 8)
<> int (w `mod` 8)
pprAlign :: Int -> SDoc pprAlign :: Int -> SDoc
pprAlign bytes pprAlign bytes
...@@ -418,10 +430,12 @@ pprAlignForSection seg = ...@@ -418,10 +430,12 @@ pprAlignForSection seg =
| target32Bit platform -> | target32Bit platform ->
case seg of case seg of
ReadOnlyData16 -> int 4 ReadOnlyData16 -> int 4
CString -> int 1
_ -> int 2 _ -> int 2
| otherwise -> | otherwise ->
case seg of case seg of
ReadOnlyData16 -> int 4 ReadOnlyData16 -> int 4
CString -> int 1
_ -> int 3 _ -> int 3
-- Other: alignments are given as bytes. -- Other: alignments are given as bytes.
_ _
...@@ -429,10 +443,12 @@ pprAlignForSection seg = ...@@ -429,10 +443,12 @@ pprAlignForSection seg =
case seg of case seg of
Text -> text "4,0x90" Text -> text "4,0x90"
ReadOnlyData16 -> int 16 ReadOnlyData16 -> int 16
CString -> int 1
_ -> int 4 _ -> int 4
| otherwise -> | otherwise ->
case seg of case seg of
ReadOnlyData16 -> int 16 ReadOnlyData16 -> int 16
CString -> int 1
_ -> int 8 _ -> int 8
pprDataItem :: CmmLit -> SDoc pprDataItem :: CmmLit -> SDoc
......
{-# LANGUAGE MagicHash #-}
import T9577_A
import GHC.Exts (Ptr(..), Addr#)
main = print (foo == Ptr "foo"#)
{-# LANGUAGE MagicHash #-}
module T9577_A where
import GHC.Exts (Ptr(..), Addr#)
foo = Ptr "foo"#
...@@ -148,3 +148,4 @@ test('T12059', normal, compile_and_run, ['']) ...@@ -148,3 +148,4 @@ test('T12059', normal, compile_and_run, [''])
test('T12433', normal, compile_and_run, ['']) test('T12433', normal, compile_and_run, [''])
test('T12757', normal, compile_and_run, ['']) test('T12757', normal, compile_and_run, [''])
test('T12855', normal, compile_and_run, ['']) test('T12855', normal, compile_and_run, [''])
test('T9577', [ unless(arch('x86_64') or arch('i386'),skip), only_ways(['normal']) ], compile_and_run, [''])
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment