Elf.hs 16.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
{-
-----------------------------------------------------------------------------
--
-- (c) The University of Glasgow 2015
--
-- ELF format tools
--
-----------------------------------------------------------------------------
-}

module Elf (
    readElfSectionByName,
    readElfNoteAsString,
    makeElfNote
  ) where

import Exception
import DynFlags
import Platform
import ErrUtils
import Maybes     (MaybeT(..),runMaybeT)
import Util       (charToC)
import Outputable (text,hcat,SDoc)

import Control.Monad (when)
import Data.Binary.Get
import Data.Word
import Data.Char (ord)
import Data.ByteString.Lazy (ByteString)
import qualified Data.ByteString.Lazy as LBS
import qualified Data.ByteString.Lazy.Char8 as B8

{- Note [ELF specification]
   ~~~~~~~~~~~~~~~~~~~~~~~~

   ELF (Executable and Linking Format) is described in the System V Application
   Binary Interface (or ABI). The latter is composed of two parts: a generic
   part and a processor specific part. The generic ABI describes the parts of
Herbert Valerio Riedel's avatar
Herbert Valerio Riedel committed
39
   the interface that remain constant across all hardware implementations of
40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471
   System V.

   The latest release of the specification of the generic ABI is the version
   4.1 from March 18, 1997:

     - http://www.sco.com/developers/devspecs/gabi41.pdf

   Since 1997, snapshots of the draft for the "next" version are published:

     - http://www.sco.com/developers/gabi/

   Quoting the notice on the website: "There is more than one instance of these
   chapters to permit references to older instances to remain valid. All
   modifications to these chapters are forward-compatible, so that correct use
   of an older specification will not be invalidated by a newer instance.
   Approximately on a yearly basis, a new instance will be saved, as it reaches
   what appears to be a stable state."

   Nevertheless we will see that since 1998 it is not true for Note sections.

   Many ELF sections
   -----------------

   ELF-4.1: the normal section number fields in ELF are limited to 16 bits,
   which runs out of bits when you try to cram in more sections than that. Two
   fields are concerned: the one containing the number of the sections and the
   one containing the index of the section that contains section's names. (The
   same thing applies to the field containing the number of segments, but we
   don't care about it here).

   ELF-next: to solve this, theses fields in the ELF header have an escape
   value (different for each case), and the actual section number is stashed
   into unused fields in the first section header.

   We support this extension as it is forward-compatible with ELF-4.1.
   Moreover, GHC may generate objects with a lot of sections with the
   "function-sections" feature (one section per function).

   Note sections
   -------------

   Sections with type "note" (SHT_NOTE in the specification) are used to add
   arbitrary data into an ELF file. An entry in a note section is composed of a
   name, a type and a value.

   ELF-4.1: "The note information in sections and program header elements holds
   any number of entries, each of which is an array of 4-byte words in the
   format of the target processor." Each entry has the following format:
         | namesz |   Word32: size of the name string (including the ending \0)
         | descsz |   Word32: size of the value
         |  type  |   Word32: type of the note
         |  name  |   Name string (with \0 padding to ensure 4-byte alignment)
         |  ...   |
         |  desc  |   Value (with \0 padding to ensure 4-byte alignment)
         |  ...   |

   ELF-next: "The note information in sections and program header elements
   holds a variable amount of entries. In 64-bit objects (files with
   e_ident[EI_CLASS] equal to ELFCLASS64), each entry is an array of 8-byte
   words in the format of the target processor. In 32-bit objects (files with
   e_ident[EI_CLASS] equal to ELFCLASS32), each entry is an array of 4-byte
   words in the format of the target processor." (from 1998-2015 snapshots)

   This is not forward-compatible with ELF-4.1. In practice, for almost all
   platforms namesz, descz and type fields are 4-byte words for both 32-bit and
   64-bit objects (see elf.h and readelf source code).

   The only exception in readelf source code is for IA_64 machines with OpenVMS
   OS: "This OS has so many departures from the ELF standard that we test it at
   many places" (comment for is_ia64_vms() in readelf.c). In this case, namesz,
   descsz and type fields are 8-byte words and name and value fields are padded
   to ensure 8-byte alignment.

   We don't support this platform in the following code. Reading a note section
   could be done easily (by testing Machine and OS fields in the ELF header).
   Writing a note section, however, requires that we generate a different
   assembly code for GAS depending on the target platform and this is a little
   bit more involved.

-}


-- | ELF header
--
-- The ELF header indicates the native word size (32-bit or 64-bit) and the
-- endianness of the target machine. We directly store getters for words of
-- different sizes as it is more convenient to use. We also store the word size
-- as it is useful to skip some uninteresting fields.
--
-- Other information such as the target machine and OS are left out as we don't
-- use them yet. We could add them in the future if we ever need them.
data ElfHeader = ElfHeader
   { gw16     :: Get Word16   -- ^ Get a Word16 with the correct endianness
   , gw32     :: Get Word32   -- ^ Get a Word32 with the correct endianness
   , gwN      :: Get Word64   -- ^ Get a Word with the correct word size
                              --   and endianness
   , wordSize :: Int          -- ^ Word size in bytes
   }


-- | Read the ELF header
readElfHeader :: DynFlags -> ByteString -> IO (Maybe ElfHeader)
readElfHeader dflags bs = runGetOrThrow getHeader bs `catchIO` \_ -> do
    debugTraceMsg dflags 3 $
      text ("Unable to read ELF header")
    return Nothing
  where
    getHeader = do
      magic    <- getWord32be
      ws       <- getWord8
      endian   <- getWord8
      version  <- getWord8
      skip 9  -- skip OSABI, ABI version and padding
      when (magic /= 0x7F454C46 || version /= 1) $ fail "Invalid ELF header"

      case (ws, endian) of
          -- ELF 32, little endian
          (1,1) -> return . Just $ ElfHeader
                           getWord16le
                           getWord32le
                           (fmap fromIntegral getWord32le) 4
          -- ELF 32, big endian
          (1,2) -> return . Just $ ElfHeader
                           getWord16be
                           getWord32be
                           (fmap fromIntegral getWord32be) 4
          -- ELF 64, little endian
          (2,1) -> return . Just $ ElfHeader
                           getWord16le
                           getWord32le
                           (fmap fromIntegral getWord64le) 8
          -- ELF 64, big endian
          (2,2) -> return . Just $ ElfHeader
                           getWord16be
                           getWord32be
                           (fmap fromIntegral getWord64be) 8
          _     -> fail "Invalid ELF header"


------------------
-- SECTIONS
------------------


-- | Description of the section table
data SectionTable = SectionTable
  { sectionTableOffset :: Word64  -- ^ offset of the table describing sections
  , sectionEntrySize   :: Word16  -- ^ size of an entry in the section table
  , sectionEntryCount  :: Word64  -- ^ number of sections
  , sectionNameIndex   :: Word32  -- ^ index of a special section which
                                  --   contains section's names
  }

-- | Read the ELF section table
readElfSectionTable :: DynFlags
                    -> ElfHeader
                    -> ByteString
                    -> IO (Maybe SectionTable)

readElfSectionTable dflags hdr bs = action `catchIO` \_ -> do
    debugTraceMsg dflags 3 $
      text ("Unable to read ELF section table")
    return Nothing
  where
    getSectionTable :: Get SectionTable
    getSectionTable = do
      skip (24 + 2*wordSize hdr) -- skip header and some other fields
      secTableOffset <- gwN hdr
      skip 10
      entrySize      <- gw16 hdr
      entryCount     <- gw16 hdr
      secNameIndex   <- gw16 hdr
      return (SectionTable secTableOffset entrySize
                           (fromIntegral entryCount)
                           (fromIntegral secNameIndex))

    action = do
      secTable <- runGetOrThrow getSectionTable bs
      -- In some cases, the number of entries and the index of the section
      -- containing section's names must be found in unused fields of the first
      -- section entry (see Note [ELF specification])
      let
        offSize0 = fromIntegral $ sectionTableOffset secTable + 8
                                  + 3 * fromIntegral (wordSize hdr)
        offLink0 = fromIntegral $ offSize0 + fromIntegral (wordSize hdr)

      entryCount'     <- if sectionEntryCount secTable /= 0
                          then return (sectionEntryCount secTable)
                          else runGetOrThrow (gwN hdr) (LBS.drop offSize0 bs)
      entryNameIndex' <- if sectionNameIndex secTable /= 0xffff
                          then return (sectionNameIndex secTable)
                          else runGetOrThrow (gw32 hdr) (LBS.drop offLink0 bs)
      return (Just $ secTable
        { sectionEntryCount = entryCount'
        , sectionNameIndex  = entryNameIndex'
        })


-- | A section
data Section = Section
  { entryName :: ByteString   -- ^ Name of the section
  , entryBS   :: ByteString   -- ^ Content of the section
  }

-- | Read a ELF section
readElfSectionByIndex :: DynFlags
                      -> ElfHeader
                      -> SectionTable
                      -> Word64
                      -> ByteString
                      -> IO (Maybe Section)

readElfSectionByIndex dflags hdr secTable i bs = action `catchIO` \_ -> do
    debugTraceMsg dflags 3 $
      text ("Unable to read ELF section")
    return Nothing
  where
    -- read an entry from the section table
    getEntry = do
      nameIndex <- gw32 hdr
      skip (4+2*wordSize hdr)
      offset    <- fmap fromIntegral $ gwN hdr
      size      <- fmap fromIntegral $ gwN hdr
      let bs' = LBS.take size (LBS.drop offset bs)
      return (nameIndex,bs')

    -- read the entry with the given index in the section table
    getEntryByIndex x = runGetOrThrow getEntry bs'
      where
        bs' = LBS.drop off bs
        off = fromIntegral $ sectionTableOffset secTable +
                             x * fromIntegral (sectionEntrySize secTable)

    -- Get the name of a section
    getEntryName nameIndex = do
      let idx = fromIntegral (sectionNameIndex secTable)
      (_,nameTable) <- getEntryByIndex idx
      let bs' = LBS.drop nameIndex nameTable
      runGetOrThrow getLazyByteStringNul bs'

    action = do
      (nameIndex,bs') <- getEntryByIndex (fromIntegral i)
      name            <- getEntryName (fromIntegral nameIndex)
      return (Just $ Section name bs')


-- | Find a section from its name. Return the section contents.
--
-- We do not perform any check on the section type.
findSectionFromName :: DynFlags
                    -> ElfHeader
                    -> SectionTable
                    -> String
                    -> ByteString
                    -> IO (Maybe ByteString)
findSectionFromName dflags hdr secTable name bs =
    rec [0..sectionEntryCount secTable - 1]
  where
    -- convert the required section name into a ByteString to perform
    -- ByteString comparison instead of String comparison
    name' = B8.pack name

    -- compare recursively each section name and return the contents of
    -- the matching one, if any
    rec []     = return Nothing
    rec (x:xs) = do
      me <- readElfSectionByIndex dflags hdr secTable x bs
      case me of
        Just e | entryName e == name' -> return (Just (entryBS e))
        _                             -> rec xs


-- | Given a section name, read its contents as a ByteString.
--
-- If the section isn't found or if there is any parsing error, we return
-- Nothing
readElfSectionByName :: DynFlags
                     -> ByteString
                     -> String
                     -> IO (Maybe LBS.ByteString)

readElfSectionByName dflags bs name = action `catchIO` \_ -> do
    debugTraceMsg dflags 3 $
      text ("Unable to read ELF section \"" ++ name ++ "\"")
    return Nothing
  where
    action = runMaybeT $ do
      hdr      <- MaybeT $ readElfHeader dflags bs
      secTable <- MaybeT $ readElfSectionTable dflags hdr bs
      MaybeT $ findSectionFromName dflags hdr secTable name bs

------------------
-- NOTE SECTIONS
------------------

-- | read a Note as a ByteString
--
-- If you try to read a note from a section which does not support the Note
-- format, the parsing is likely to fail and Nothing will be returned
readElfNoteBS :: DynFlags
              -> ByteString
              -> String
              -> String
              -> IO (Maybe LBS.ByteString)

readElfNoteBS dflags bs sectionName noteId = action `catchIO`  \_ -> do
    debugTraceMsg dflags 3 $
         text ("Unable to read ELF note \"" ++ noteId ++
               "\" in section \"" ++ sectionName ++ "\"")
    return Nothing
  where
    -- align the getter on n bytes
    align n = do
      m <- bytesRead
      if m `mod` n == 0
        then return ()
        else skip 1 >> align n

    -- noteId as a bytestring
    noteId' = B8.pack noteId

    -- read notes recursively until the one with a valid identifier is found
    findNote hdr = do
      align 4
      namesz <- gw32 hdr
      descsz <- gw32 hdr
      _      <- gw32 hdr -- we don't use the note type
      name   <- if namesz == 0
                  then return LBS.empty
                  else getLazyByteStringNul
      align 4
      desc  <- if descsz == 0
                  then return LBS.empty
                  else getLazyByteString (fromIntegral descsz)
      if name == noteId'
        then return $ Just desc
        else findNote hdr


    action = runMaybeT $ do
      hdr  <- MaybeT $ readElfHeader dflags bs
      sec  <- MaybeT $ readElfSectionByName dflags bs sectionName
      MaybeT $ runGetOrThrow (findNote hdr) sec

-- | read a Note as a String
--
-- If you try to read a note from a section which does not support the Note
-- format, the parsing is likely to fail and Nothing will be returned
readElfNoteAsString :: DynFlags
                    -> FilePath
                    -> String
                    -> String
                    -> IO (Maybe String)

readElfNoteAsString dflags path sectionName noteId = action `catchIO`  \_ -> do
    debugTraceMsg dflags 3 $
         text ("Unable to read ELF note \"" ++ noteId ++
               "\" in section \"" ++ sectionName ++ "\"")
    return Nothing
  where
    action = do
      bs   <- LBS.readFile path
      note <- readElfNoteBS dflags bs sectionName noteId
      return (fmap B8.unpack note)


-- | Generate the GAS code to create a Note section
--
-- Header fields for notes are 32-bit long (see Note [ELF specification]).
--
-- It seems there is no easy way to force GNU AS to generate a 32-bit word in
-- every case. Hence we use .int directive to create them: however "The byte
-- order and bit size of the number depends on what kind of target the assembly
-- is for." (https://sourceware.org/binutils/docs/as/Int.html#Int)
--
-- If we add new target platforms, we need to check that the generated words
-- are 32-bit long, otherwise we need to use platform specific directives to
-- force 32-bit .int in asWord32.
makeElfNote :: DynFlags -> String -> String -> Word32 -> String -> SDoc
makeElfNote dflags sectionName noteName typ contents = hcat [
    text "\t.section ",
    text sectionName,
    text ",\"\",",
    text elfSectionNote,
    text "\n",

    -- note name length (+ 1 for ending \0)
    asWord32 (length noteName + 1),

    -- note contents size
    asWord32 (length contents),

    -- note type
    asWord32 typ,

    -- note name (.asciz for \0 ending string) + padding
    text "\t.asciz \"",
    text noteName,
    text "\"\n",
    text "\t.align 4\n",

    -- note contents (.ascii to avoid ending \0) + padding
    text "\t.ascii \"",
    text (escape contents),
    text "\"\n",
    text "\t.align 4\n"]
  where
    escape :: String -> String
    escape = concatMap (charToC.fromIntegral.ord)

    asWord32 :: Show a => a -> SDoc
    asWord32 x = hcat [
      text "\t.int ",
      text (show x),
      text "\n"]

    elfSectionNote :: String
    elfSectionNote = case platformArch (targetPlatform dflags) of
                             ArchARM _ _ _ -> "%note"
                             _             -> "@note"



------------------
-- Helpers
------------------

-- | runGet in IO monad that throws an IOException on failure
runGetOrThrow :: Get a -> LBS.ByteString -> IO a
runGetOrThrow g bs = case runGetOrFail g bs of
  Left _        -> fail "Error while reading file"
  Right (_,_,a) -> return a