Commit edd6d676 authored by Peter Wortmann's avatar Peter Wortmann Committed by Austin Seipp
Browse files

Generate DWARF unwind information

This tells debuggers such as GDB how to "unwind" a program state,
which allows them to walk the stack up.

Notes:

* The code is quite general, perhaps unnecessarily so. Unless we get
  more unwind information, only the first case of pprSetUnwind will
  get used - and pprUnwindExpr and pprUndefUnwind will never be
  called. It just so happens that this is a point where we can get a
  lot of features cheaply, even if we don't use them.

* When determining what location to show for a return address, most
  debuggers check the map for "rip-1", assuming that's where the
  "call" instruction is. For tables-next-to-code, that happens to
  always be the end of an info table. We therefore cheat a bit here by
  shifting .debug_frame information so it covers the end of the info
  table, as well as generating a .loc directive for the info table
  data.

  Debuggers will still show the wrong label for the return address,
  though.  Haven't found a way around that one yet.

(From Phabricator D396)
parent cc481ec8
......@@ -3,6 +3,7 @@ module Dwarf (
) where
import CLabel
import CmmExpr ( GlobalReg(..) )
import Config ( cProjectName, cProjectVersion )
import CoreSyn ( Tickish(..) )
import Debug
......@@ -18,6 +19,9 @@ import Dwarf.Constants
import Dwarf.Types
import Data.Maybe
import Data.List ( sortBy )
import Data.Ord ( comparing )
import qualified Data.Map as Map
import System.FilePath
import System.Directory ( getCurrentDirectory )
......@@ -62,7 +66,13 @@ dwarfGen df modLoc us blocks = do
let lineSct = dwarfLineSection $$
ptext dwarfLineLabel <> colon
return (infoSct $$ abbrevSct $$ lineSct, us')
-- .debug_frame section: Information about the layout of the GHC stack
let (framesU, us'') = takeUniqFromSupply us'
frameSct = dwarfFrameSection $$
ptext dwarfFrameLabel <> colon $$
pprDwarfFrame (debugFrame framesU procs)
return (infoSct $$ abbrevSct $$ lineSct $$ frameSct, us'')
-- | Header for a compilation unit, establishing global format
-- parameters
......@@ -118,3 +128,36 @@ blockToDwarf blk dws
, dwLabel = dblCLabel blk
, dwMarker = mkAsmTempLabel (dblLabel blk)
}
-- | Generates the data for the debug frame section, which encodes the
-- desired stack unwind behaviour for the debugger
debugFrame :: Unique -> [DebugBlock] -> DwarfFrame
debugFrame u procs
= DwarfFrame { dwCieLabel = mkAsmTempLabel u
, dwCieInit = initUws
, dwCieProcs = map (procToFrame initUws) procs
}
where initUws = Map.fromList [(Sp, UwReg Sp 0)]
-- | Generates unwind information for a procedure debug block
procToFrame :: UnwindTable -> DebugBlock -> DwarfFrameProc
procToFrame initUws blk
= DwarfFrameProc { dwFdeProc = dblCLabel blk
, dwFdeHasInfo = dblHasInfoTbl blk
, dwFdeBlocks = map (uncurry blockToFrame) blockUws
}
where blockUws :: [(DebugBlock, UnwindTable)]
blockUws = map snd $ sortBy (comparing fst) $ flatten initUws blk
flatten uws0 b@DebugBlock{ dblPosition=pos, dblUnwind=uws,
dblBlocks=blocks }
| Just p <- pos = (p, (b, uws')):nested
| otherwise = nested -- block was optimized out
where uws' = uws `Map.union` uws0
nested = concatMap (flatten uws') blocks
blockToFrame :: DebugBlock -> UnwindTable -> DwarfFrameBlock
blockToFrame blk uws
= DwarfFrameBlock { dwFdeBlock = mkAsmTempLabel $ dblLabel blk
, dwFdeBlkHasInfo = dblHasInfoTbl blk
, dwFdeUnwind = uws
}
......@@ -7,6 +7,9 @@ import FastString
import Platform
import Outputable
import Reg
import X86.Regs
import Data.Word
-- | Language ID used for Haskell.
......@@ -126,7 +129,66 @@ dwarfSection name = sdocWithPlatform $ \plat ->
".section .debug_" ++ name ++ ",\"\",@progbits"
-- | Dwarf section labels
dwarfInfoLabel, dwarfAbbrevLabel, dwarfLineLabel :: LitString
dwarfInfoLabel, dwarfAbbrevLabel, dwarfLineLabel, dwarfFrameLabel :: LitString
dwarfInfoLabel = sLit ".Lsection_info"
dwarfAbbrevLabel = sLit ".Lsection_abbrev"
dwarfLineLabel = sLit ".Lsection_line"
dwarfFrameLabel = sLit ".Lsection_frame"
-- | Mapping of registers to DWARF register numbers
dwarfRegNo :: Platform -> Reg -> Word8
dwarfRegNo p r = case platformArch p of
ArchX86
| r == eax -> 0
| r == ecx -> 1 -- yes, no typo
| r == edx -> 2
| r == ebx -> 3
| r == esp -> 4
| r == ebp -> 5
| r == esi -> 6
| r == edi -> 7
ArchX86_64
| r == rax -> 0
| r == rdx -> 1 -- this neither. The order GCC allocates registers in?
| r == rcx -> 2
| r == rbx -> 3
| r == rsi -> 4
| r == rdi -> 5
| r == rbp -> 6
| r == rsp -> 7
| r == r8 -> 8
| r == r9 -> 9
| r == r10 -> 10
| r == r11 -> 11
| r == r12 -> 12
| r == r13 -> 13
| r == r14 -> 14
| r == r15 -> 15
| r == xmm0 -> 17
| r == xmm1 -> 18
| r == xmm2 -> 19
| r == xmm3 -> 20
| r == xmm4 -> 21
| r == xmm5 -> 22
| r == xmm6 -> 23
| r == xmm7 -> 24
| r == xmm8 -> 25
| r == xmm9 -> 26
| r == xmm10 -> 27
| r == xmm11 -> 28
| r == xmm12 -> 29
| r == xmm13 -> 30
| r == xmm14 -> 31
| r == xmm15 -> 32
_other -> error "dwarfRegNo: Unsupported platform or unknown register!"
-- | Virtual register number to use for return address.
dwarfReturnRegNo :: Platform -> Word8
dwarfReturnRegNo p
-- We "overwrite" IP with our pseudo register - that makes sense, as
-- when using this mechanism gdb already knows the IP anyway. Clang
-- does this too, so it must be safe.
= case platformArch p of
ArchX86 -> 8 -- eip
ArchX86_64 -> 16 -- rip
_other -> error "dwarfReturnRegNo: Unsupported platform!"
module Dwarf.Types
( DwarfInfo(..)
( -- * Dwarf information
DwarfInfo(..)
, pprDwarfInfo
, pprAbbrevDecls
-- * Dwarf frame
, DwarfFrame(..), DwarfFrameProc(..), DwarfFrameBlock(..)
, pprDwarfFrame
-- * Utilities
, pprByte
, pprWord
, pprData4'
, pprDwWord
, pprWord
, pprLEBWord
, pprLEBInt
, wordAlign
)
where
import Debug
import CLabel
import CmmExpr ( GlobalReg(..) )
import FastString
import Outputable
import Platform
import Reg
import Dwarf.Constants
import Data.Bits
import Data.List ( mapAccumL )
import qualified Data.Map as Map
import Data.Word
import Data.Char
import CodeGen.Platform
-- | Individual dwarf records. Each one will be encoded as an entry in
-- the .debug_info section.
data DwarfInfo
......@@ -74,6 +88,7 @@ pprAbbrevDecls haveDebugLine =
, (dW_AT_external, dW_FORM_flag)
, (dW_AT_low_pc, dW_FORM_addr)
, (dW_AT_high_pc, dW_FORM_addr)
, (dW_AT_frame_base, dW_FORM_block1)
] $$
mkAbbrev DwAbbrBlock dW_TAG_lexical_block dW_CHILDREN_yes
[ (dW_AT_name, dW_FORM_string)
......@@ -107,6 +122,8 @@ pprDwarfInfoOpen _ (DwarfSubprogram _ name label) = sdocWithDynFlags $ \df ->
$$ pprFlag (externallyVisibleCLabel label)
$$ pprWord (ppr label)
$$ pprWord (ppr $ mkAsmTempEndLabel label)
$$ pprByte 1
$$ pprByte dW_OP_call_frame_cfa
pprDwarfInfoOpen _ (DwarfBlock _ label marker) = sdocWithDynFlags $ \df ->
pprAbbrev DwAbbrBlock
$$ pprString (renderWithStyle df (ppr label) (mkCodeStyle CStyle))
......@@ -117,6 +134,221 @@ pprDwarfInfoOpen _ (DwarfBlock _ label marker) = sdocWithDynFlags $ \df ->
pprDwarfInfoClose :: SDoc
pprDwarfInfoClose = pprAbbrev DwAbbrNull
-- | Information about unwind instructions for a procedure. This
-- corresponds to a "Common Information Entry" (CIE) in DWARF.
data DwarfFrame
= DwarfFrame
{ dwCieLabel :: CLabel
, dwCieInit :: UnwindTable
, dwCieProcs :: [DwarfFrameProc]
}
-- | Unwind instructions for an individual procedure. Corresponds to a
-- "Frame Description Entry" (FDE) in DWARF.
data DwarfFrameProc
= DwarfFrameProc
{ dwFdeProc :: CLabel
, dwFdeHasInfo :: Bool
, dwFdeBlocks :: [DwarfFrameBlock]
-- ^ List of blocks. Order must match asm!
}
-- | Unwind instructions for a block. Will become part of the
-- containing FDE.
data DwarfFrameBlock
= DwarfFrameBlock
{ dwFdeBlock :: CLabel
, dwFdeBlkHasInfo :: Bool
, dwFdeUnwind :: UnwindTable
}
-- | Header for the .debug_frame section. Here we emit the "Common
-- Information Entry" record that etablishes general call frame
-- parameters and the default stack layout.
pprDwarfFrame :: DwarfFrame -> SDoc
pprDwarfFrame DwarfFrame{dwCieLabel=cieLabel,dwCieInit=cieInit,dwCieProcs=procs}
= sdocWithPlatform $ \plat ->
let cieStartLabel= mkAsmTempDerivedLabel cieLabel (fsLit "_start")
cieEndLabel = mkAsmTempEndLabel cieLabel
length = ppr cieEndLabel <> char '-' <> ppr cieStartLabel
spReg = dwarfGlobalRegNo plat Sp
retReg = dwarfReturnRegNo plat
wordSize = platformWordSize plat
pprInit (g, uw) = pprSetUnwind plat g (Nothing, uw)
in vcat [ ppr cieLabel <> colon
, pprData4' length -- Length of CIE
, ppr cieStartLabel <> colon
, pprData4' (ptext (sLit "-1"))
-- Common Information Entry marker (-1 = 0xf..f)
, pprByte 3 -- CIE version (we require DWARF 3)
, pprByte 0 -- Augmentation (none)
, pprByte 1 -- Code offset multiplicator
, pprByte (128-fromIntegral wordSize)
-- Data offset multiplicator
-- (stacks grow down => "-w" in signed LEB128)
, pprByte retReg -- virtual register holding return address
] $$
-- Initial unwind table
vcat (map pprInit $ Map.toList cieInit) $$
vcat [ -- RET = *CFA
pprByte (dW_CFA_offset+retReg)
, pprByte 0
-- Sp' = CFA
-- (we need to set this manually as our Sp register is
-- often not the architecture's default stack register)
, pprByte dW_CFA_val_offset
, pprLEBWord (fromIntegral spReg)
, pprLEBWord 0
] $$
wordAlign $$
ppr cieEndLabel <> colon $$
-- Procedure unwind tables
vcat (map (pprFrameProc cieLabel cieInit) procs)
-- | Writes a "Frame Description Entry" for a procedure. This consists
-- mainly of referencing the CIE and writing state machine
-- instructions to describe how the frame base (CFA) changes.
pprFrameProc :: CLabel -> UnwindTable -> DwarfFrameProc -> SDoc
pprFrameProc frameLbl initUw (DwarfFrameProc procLbl hasInfo blocks)
= let fdeLabel = mkAsmTempDerivedLabel procLbl (fsLit "_fde")
fdeEndLabel = mkAsmTempDerivedLabel procLbl (fsLit "_fde_end")
procEnd = mkAsmTempEndLabel procLbl
ifInfo str = if hasInfo then text str else empty
-- see [Note: Info Offset]
in vcat [ pprData4' (ppr fdeEndLabel <> char '-' <> ppr fdeLabel)
, ppr fdeLabel <> colon
, pprData4' (ppr frameLbl <> char '-' <>
ptext dwarfFrameLabel) -- Reference to CIE
, pprWord (ppr procLbl <> ifInfo "-1") -- Code pointer
, pprWord (ppr procEnd <> char '-' <>
ppr procLbl <> ifInfo "+1") -- Block byte length
] $$
vcat (snd $ mapAccumL pprFrameBlock initUw blocks) $$
wordAlign $$
ppr fdeEndLabel <> colon
-- | Generates unwind information for a block. We only generate
-- instructions where unwind information actually changes. This small
-- optimisations saves a lot of space, as subsequent blocks often have
-- the same unwind information.
pprFrameBlock :: UnwindTable -> DwarfFrameBlock -> (UnwindTable, SDoc)
pprFrameBlock oldUws (DwarfFrameBlock blockLbl hasInfo uws)
| uws == oldUws
= (oldUws, empty)
| otherwise
= (,) uws $ sdocWithPlatform $ \plat ->
let lbl = ppr blockLbl <> if hasInfo then text "-1" else empty
-- see [Note: Info Offset]
isChanged g v | old == Just v = Nothing
| otherwise = Just (old, v)
where old = Map.lookup g oldUws
changed = Map.toList $ Map.mapMaybeWithKey isChanged uws
died = Map.toList $ Map.difference oldUws uws
in pprByte dW_CFA_set_loc $$ pprWord lbl $$
vcat (map (uncurry $ pprSetUnwind plat) changed) $$
vcat (map (pprUndefUnwind plat . fst) died)
-- [Note: Info Offset]
--
-- GDB was pretty much written with C-like programs in mind, and as a
-- result they assume that once you have a return address, it is a
-- good idea to look at (PC-1) to unwind further - as that's where the
-- "call" instruction is supposed to be.
--
-- Now on one hand, code generated by GHC looks nothing like what GDB
-- expects, and in fact going up from a return pointer is guaranteed
-- to land us inside an info table! On the other hand, that actually
-- gives us some wiggle room, as we expect IP to never *actually* end
-- up inside the info table, so we can "cheat" by putting whatever GDB
-- expects to see there. This is probably pretty safe, as GDB cannot
-- assume (PC-1) to be a valid code pointer in the first place - and I
-- have seen no code trying to correct this.
--
-- Note that this will not prevent GDB from failing to look-up the
-- correct function name for the frame, as that uses the symbol table,
-- which we can not manipulate as easily.
-- | Get DWARF register ID for a given GlobalReg
dwarfGlobalRegNo :: Platform -> GlobalReg -> Word8
dwarfGlobalRegNo p = maybe 0 (dwarfRegNo p . RegReal) . globalRegMaybe p
-- | Generate code for setting the unwind information for a register,
-- optimized using its known old value in the table. Note that "Sp" is
-- special: We see it as synonym for the CFA.
pprSetUnwind :: Platform -> GlobalReg -> (Maybe UnwindExpr, UnwindExpr) -> SDoc
pprSetUnwind _ Sp (Just (UwReg s _), UwReg s' o') | s == s'
= if o' >= 0
then pprByte dW_CFA_def_cfa_offset $$ pprLEBWord (fromIntegral o')
else pprByte dW_CFA_def_cfa_offset_sf $$ pprLEBInt o'
pprSetUnwind plat Sp (_, UwReg s' o')
= if o' >= 0
then pprByte dW_CFA_def_cfa $$
pprLEBWord (fromIntegral $ dwarfGlobalRegNo plat s') $$
pprLEBWord (fromIntegral o')
else pprByte dW_CFA_def_cfa_sf $$
pprLEBWord (fromIntegral $ dwarfGlobalRegNo plat s') $$
pprLEBInt o'
pprSetUnwind _ Sp (_, uw)
= pprByte dW_CFA_def_cfa_expression $$ pprUnwindExpr False uw
pprSetUnwind plat g (_, UwDeref (UwReg Sp o))
| o < 0 && ((-o) `mod` platformWordSize plat) == 0 -- expected case
= pprByte (dW_CFA_offset + dwarfGlobalRegNo plat g) $$
pprLEBWord (fromIntegral ((-o) `div` platformWordSize plat))
| otherwise
= pprByte dW_CFA_offset_extended_sf $$
pprLEBWord (fromIntegral (dwarfGlobalRegNo plat g)) $$
pprLEBInt o
pprSetUnwind plat g (_, UwDeref uw)
= pprByte dW_CFA_expression $$
pprLEBWord (fromIntegral (dwarfGlobalRegNo plat g)) $$
pprUnwindExpr True uw
pprSetUnwind plat g (_, uw)
= pprByte dW_CFA_val_expression $$
pprLEBWord (fromIntegral (dwarfGlobalRegNo plat g)) $$
pprUnwindExpr True uw
-- | Generates a DWARF expression for the given unwind expression. If
-- @spIsCFA@ is true, we see @Sp@ as the frame base CFA where it gets
-- mentioned.
pprUnwindExpr :: Bool -> UnwindExpr -> SDoc
pprUnwindExpr spIsCFA expr
= sdocWithPlatform $ \plat ->
let ppr (UwConst i)
| i >= 0 && i < 32 = pprByte (dW_OP_lit0 + fromIntegral i)
| otherwise = pprByte dW_OP_consts $$ pprLEBInt i -- lazy...
ppr (UwReg Sp i) | spIsCFA
= if i == 0
then pprByte dW_OP_call_frame_cfa
else ppr (UwPlus (UwReg Sp 0) (UwConst i))
ppr (UwReg g i) = pprByte (dW_OP_breg0+dwarfGlobalRegNo plat g) $$
pprLEBInt i
ppr (UwDeref u) = ppr u $$ pprByte dW_OP_deref
ppr (UwPlus u1 u2) = ppr u1 $$ ppr u2 $$ pprByte dW_OP_plus
ppr (UwMinus u1 u2) = ppr u1 $$ ppr u2 $$ pprByte dW_OP_minus
ppr (UwTimes u1 u2) = ppr u1 $$ ppr u2 $$ pprByte dW_OP_mul
in ptext (sLit "\t.byte 1f-.-1") $$
ppr expr $$
ptext (sLit "1:")
-- | Generate code for re-setting the unwind information for a
-- register to "undefined"
pprUndefUnwind :: Platform -> GlobalReg -> SDoc
pprUndefUnwind _ Sp = panic "pprUndefUnwind Sp" -- should never happen
pprUndefUnwind plat g = pprByte dW_CFA_undefined $$
pprLEBWord (fromIntegral $ dwarfGlobalRegNo plat g)
-- | Align assembly at (machine) word boundary
wordAlign :: SDoc
wordAlign = sdocWithPlatform $ \plat ->
ptext (sLit "\t.align ") <> case platformOS plat of
OSDarwin -> case platformWordSize plat of
8 -> text "3"
4 -> text "2"
_other -> error "wordAlign: Unsupported word size!"
_other -> ppr (platformWordSize plat)
-- | Assembly for a single byte of constant DWARF data
pprByte :: Word8 -> SDoc
pprByte x = ptext (sLit "\t.byte ") <> ppr (fromIntegral x :: Word)
......
......@@ -114,8 +114,14 @@ pprBasicBlock info_env (BasicBlock blockid instrs)
Nothing -> empty
Just (Statics info_lbl info) ->
pprSectionHeader Text $$
infoTableLoc $$
vcat (map pprData info) $$
pprLabel info_lbl
-- Make sure the info table has the right .loc for the block
-- coming right after it. See [Note: Info Offset]
infoTableLoc = case instrs of
(l@LOCATION{} : _) -> pprInstr l
_other -> empty
pprDatas :: (Alignment, CmmStatics) -> SDoc
pprDatas (align, (Statics lbl dats))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment