PprBase.hs 9.48 KB
Newer Older
Sylvain Henry's avatar
Sylvain Henry committed
1 2
{-# LANGUAGE MagicHash #-}

3 4 5 6 7 8 9 10 11
-----------------------------------------------------------------------------
--
-- Pretty-printing assembly language
--
-- (c) The University of Glasgow 1993-2005
--
-----------------------------------------------------------------------------

module PprBase (
12 13 14
        castFloatToWord8Array,
        castDoubleToWord8Array,
        floatToBytes,
15
        doubleToBytes,
16
        pprASCII,
17
        pprBytes,
18
        pprSectionHeader
19 20 21 22
)

where

23 24
import GhcPrelude

25
import AsmUtils
26 27 28 29 30 31
import CLabel
import Cmm
import DynFlags
import FastString
import Outputable
import Platform
32
import FileCleanup
33

34 35
import qualified Data.Array.Unsafe as U ( castSTUArray )
import Data.Array.ST
36

37 38 39
import Control.Monad.ST

import Data.Word
Sylvain Henry's avatar
Sylvain Henry committed
40
import Data.Bits
41 42
import Data.ByteString (ByteString)
import qualified Data.ByteString as BS
Sylvain Henry's avatar
Sylvain Henry committed
43 44
import GHC.Exts
import GHC.Word
45
import System.IO.Unsafe
46 47 48 49 50 51 52



-- -----------------------------------------------------------------------------
-- Converting floating-point literals to integrals for printing

castFloatToWord8Array :: STUArray s Int Float -> ST s (STUArray s Int Word8)
53
castFloatToWord8Array = U.castSTUArray
54 55

castDoubleToWord8Array :: STUArray s Int Double -> ST s (STUArray s Int Word8)
56
castDoubleToWord8Array = U.castSTUArray
57 58

-- floatToBytes and doubleToBytes convert to the host's byte
59
-- order.  Providing that we're not cross-compiling for a
60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94
-- target with the opposite endianness, this should work ok
-- on all targets.

-- ToDo: this stuff is very similar to the shenanigans in PprAbs,
-- could they be merged?

floatToBytes :: Float -> [Int]
floatToBytes f
   = runST (do
        arr <- newArray_ ((0::Int),3)
        writeArray arr 0 f
        arr <- castFloatToWord8Array arr
        i0 <- readArray arr 0
        i1 <- readArray arr 1
        i2 <- readArray arr 2
        i3 <- readArray arr 3
        return (map fromIntegral [i0,i1,i2,i3])
     )

doubleToBytes :: Double -> [Int]
doubleToBytes d
   = runST (do
        arr <- newArray_ ((0::Int),7)
        writeArray arr 0 d
        arr <- castDoubleToWord8Array arr
        i0 <- readArray arr 0
        i1 <- readArray arr 1
        i2 <- readArray arr 2
        i3 <- readArray arr 3
        i4 <- readArray arr 4
        i5 <- readArray arr 5
        i6 <- readArray arr 6
        i7 <- readArray arr 7
        return (map fromIntegral [i0,i1,i2,i3,i4,i5,i6,i7])
     )
95

96 97 98 99 100 101
-- ---------------------------------------------------------------------------
-- Printing ASCII strings.
--
-- Print as a string and escape non-printable characters.
-- This is similar to charToC in Utils.

102
pprASCII :: ByteString -> SDoc
103 104 105 106 107
pprASCII str
  -- Transform this given literal bytestring to escaped string and construct
  -- the literal SDoc directly.
  -- See Trac #14741
  -- and Note [Pretty print ASCII when AsmCodeGen]
Sylvain Henry's avatar
Sylvain Henry committed
108
  = text $ BS.foldr (\w s -> do1 w ++ s) "" str
109
    where
Sylvain Henry's avatar
Sylvain Henry committed
110 111 112 113 114 115 116
       do1 :: Word8 -> String
       do1 w | 0x09 == w = "\\t"
             | 0x0A == w = "\\n"
             | 0x22 == w = "\\\""
             | 0x5C == w = "\\\\"
               -- ASCII printable characters range
             | w >= 0x20 && w <= 0x7E = [chr' w]
117 118
             | otherwise = '\\' : octal w

Sylvain Henry's avatar
Sylvain Henry committed
119 120 121 122 123 124 125 126 127
       -- we know that the Chars we create are in the ASCII range
       -- so we bypass the check in "chr"
       chr' :: Word8 -> Char
       chr' (W8# w#) = C# (chr# (word2Int# w#))

       octal :: Word8 -> String
       octal w = [ chr' (ord0 + (w `unsafeShiftR` 6) .&. 0x07)
                 , chr' (ord0 + (w `unsafeShiftR` 3) .&. 0x07)
                 , chr' (ord0 + w .&. 0x07)
128
                 ]
Sylvain Henry's avatar
Sylvain Henry committed
129
       ord0 = 0x30 -- = ord '0'
130

131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
-- | Pretty print binary data.
--
-- Use either the ".string" directive or a ".incbin" directive.
-- See Note [Embedding large binary blobs]
--
-- A NULL byte is added after the binary data.
--
pprBytes :: ByteString -> SDoc
pprBytes bs = sdocWithDynFlags $ \dflags ->
  if binBlobThreshold dflags == 0
     || fromIntegral (BS.length bs) <= binBlobThreshold dflags
    then text "\t.string " <> doubleQuotes (pprASCII bs)
    else unsafePerformIO $ do
      bFile <- newTempName dflags TFL_CurrentModule ".dat"
      BS.writeFile bFile bs
146 147 148
      return $ text "\t.incbin "
         <> pprFilePathString bFile -- proper escape (see #16389)
         <> text "\n\t.byte 0"
149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177

{-
Note [Embedding large binary blobs]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

To embed a blob of binary data (e.g. an UTF-8 encoded string) into the generated
code object, we have several options:

   1. Generate a ".byte" directive for each byte. This is what was done in the past
      (see Note [Pretty print ASCII when AsmCodeGen]).

   2. Generate a single ".string"/".asciz" directive for the whole sequence of
      bytes. Bytes in the ASCII printable range are rendered as characters and
      other values are escaped (e.g., "\t", "\077", etc.).

   3. Create a temporary file into which we dump the binary data and generate a
      single ".incbin" directive. The assembler will include the binary file for
      us in the generated output object.

Now the code generator uses either (2) or (3), depending on the binary blob
size.  Using (3) for small blobs adds too much overhead (see benchmark results
in #16190), so we only do it when the size is above a threshold (500K at the
time of writing).

The threshold is configurable via the `-fbinary-blob-threshold` flag.

-}


178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194
{-
Note [Pretty print ASCII when AsmCodeGen]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Previously, when generating assembly code, we created SDoc with
`(ptext . sLit)` for every bytes in literal bytestring, then
combine them using `hcat`.

When handling literal bytestrings with millions of bytes,
millions of SDoc would be created and to combine, leading to
high memory usage.

Now we escape the given bytestring to string directly and construct
SDoc only once. This improvement could dramatically decrease the
memory allocation from 4.7GB to 1.3GB when embedding a 3MB literal
string in source code. See Trac #14741 for profiling results.
-}

195 196 197 198 199 200
-- ----------------------------------------------------------------------------
-- Printing section headers.
--
-- If -split-section was specified, include the suffix label, otherwise just
-- print the section type. For Darwin, where subsections-for-symbols are
-- used instead, only print section type.
201 202 203 204
--
-- For string literals, additional flags are specified to enable merging of
-- identical strings in the linker. With -split-sections each string also gets
-- a unique section to allow strings from unused code to be GC'd.
205 206 207 208

pprSectionHeader :: Platform -> Section -> SDoc
pprSectionHeader platform (Section t suffix) =
 case platformOS platform of
209 210 211 212
   OSAIX     -> pprXcoffSectionHeader t
   OSDarwin  -> pprDarwinSectionHeader t
   OSMinGW32 -> pprGNUSectionHeader (char '$') t suffix
   _         -> pprGNUSectionHeader (char '.') t suffix
213

214 215
pprGNUSectionHeader :: SDoc -> SectionType -> CLabel -> SDoc
pprGNUSectionHeader sep t suffix = sdocWithDynFlags $ \dflags ->
216
  let splitSections = gopt Opt_SplitSections dflags
217
      subsection | splitSections = sep <> ppr suffix
218
                 | otherwise     = empty
219 220
  in  text ".section " <> ptext (header dflags) <> subsection <>
      flags dflags
221
  where
222
    header dflags = case t of
223 224
      Text -> sLit ".text"
      Data -> sLit ".data"
225 226 227 228 229 230 231 232
      ReadOnlyData  | OSMinGW32 <- platformOS (targetPlatform dflags)
                                -> sLit ".rdata"
                    | otherwise -> sLit ".rodata"
      RelocatableReadOnlyData | OSMinGW32 <- platformOS (targetPlatform dflags)
                                -- Concept does not exist on Windows,
                                -- So map these to R/O data.
                                          -> sLit ".rdata$rel.ro"
                              | otherwise -> sLit ".data.rel.ro"
233
      UninitialisedData -> sLit ".bss"
234 235 236
      ReadOnlyData16 | OSMinGW32 <- platformOS (targetPlatform dflags)
                                 -> sLit ".rdata$cst16"
                     | otherwise -> sLit ".rodata.cst16"
237 238
      CString
        | OSMinGW32 <- platformOS (targetPlatform dflags)
239
                    -> sLit ".rdata"
240
        | otherwise -> sLit ".rodata.str"
241 242
      OtherSection _ ->
        panic "PprBase.pprGNUSectionHeader: unknown section type"
243 244 245
    flags dflags = case t of
      CString
        | OSMinGW32 <- platformOS (targetPlatform dflags)
246
                    -> empty
247
        | otherwise -> text ",\"aMS\"," <> sectionType "progbits" <> text ",1"
248
      _ -> empty
249

250 251 252 253 254 255 256 257 258
-- XCOFF doesn't support relocating label-differences, so we place all
-- RO sections into .text[PR] sections
pprXcoffSectionHeader :: SectionType -> SDoc
pprXcoffSectionHeader t = text $ case t of
     Text                    -> ".csect .text[PR]"
     Data                    -> ".csect .data[RW]"
     ReadOnlyData            -> ".csect .text[PR] # ReadOnlyData"
     RelocatableReadOnlyData -> ".csect .text[PR] # RelocatableReadOnlyData"
     ReadOnlyData16          -> ".csect .text[PR] # ReadOnlyData16"
259
     CString                 -> ".csect .text[PR] # CString"
260 261 262 263
     UninitialisedData       -> ".csect .data[BS]"
     OtherSection _          ->
       panic "PprBase.pprXcoffSectionHeader: unknown section type"

264 265 266 267 268 269 270 271 272
pprDarwinSectionHeader :: SectionType -> SDoc
pprDarwinSectionHeader t =
  ptext $ case t of
     Text -> sLit ".text"
     Data -> sLit ".data"
     ReadOnlyData -> sLit ".const"
     RelocatableReadOnlyData -> sLit ".const_data"
     UninitialisedData -> sLit ".data"
     ReadOnlyData16 -> sLit ".const"
273
     CString -> sLit ".section\t__TEXT,__cstring,cstring_literals"
274 275
     OtherSection _ ->
       panic "PprBase.pprDarwinSectionHeader: unknown section type"