DebuggerUtils.hs 5.02 KB
Newer Older
1 2
{-# LANGUAGE CPP #-}

3 4 5 6
module DebuggerUtils (
       dataConInfoPtrToName,
  ) where

7 8
import GhcPrelude

9
import GHCi.InfoTable
10
import CmmInfo ( stdInfoTableSizeB )
Ian Lynagh's avatar
Ian Lynagh committed
11
import DynFlags
12
import HscTypes
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34
import FastString
import IfaceEnv
import Module
import OccName
import Name
import Outputable
import Util

import Data.Char
import Foreign
import Data.List

#include "HsVersions.h"

-- | Given a data constructor in the heap, find its Name.
--   The info tables for data constructors have a field which records
--   the source name of the constructor as a Ptr Word8 (UTF-8 encoded
--   string). The format is:
--
--   > Package:Module.Name
--
--   We use this string to lookup the interpreter's internal representation of the name
35
--   using the lookupOrig.
36
--
37 38 39 40 41 42 43
dataConInfoPtrToName :: HscEnv -> Ptr () -> IO Name
dataConInfoPtrToName hsc_env x = do
   let dflags = hsc_dflags hsc_env
   theString <- do
     let ptr = castPtr x :: Ptr StgInfoTable
     conDescAddress <- getConDescAddress dflags ptr
     peekArray0 0 conDescAddress
44
   let (pkg, mod, occ) = parse theString
45 46 47 48
       pkgFS = mkFastStringByteList pkg
       modFS = mkFastStringByteList mod
       occFS = mkFastStringByteList occ
       occName = mkOccNameFS OccName.dataName occFS
49
       modName = mkModule (fsToUnitId pkgFS) (mkModuleNameFS modFS)
50
   lookupOrigIO hsc_env modName occName
51 52 53

   where

54
   {- To find the string in the constructor's info table we need to consider
55 56 57
      the layout of info tables relative to the entry code for a closure.

      An info table can be next to the entry code for the closure, or it can
58 59
      be separate. The former (faster) is used in registerised versions of ghc,
      and the latter (portable) is for non-registerised versions.
60

61
      The diagrams below show where the string is to be found relative to
62 63 64 65 66 67 68 69 70 71 72
      the normal info table of the closure.

      1) Code next to table:

         --------------
         |            |   <- pointer to the start of the string
         --------------
         |            |   <- the (start of the) info table structure
         |            |
         |            |
         --------------
73
         | entry code |
74 75 76
         |    ....    |

         In this case the pointer to the start of the string can be found in
77
         the memory location _one word before_ the first entry in the normal info
78 79 80 81 82 83 84
         table.

      2) Code NOT next to table:

                                 --------------
         info table structure -> |     *------------------> --------------
                                 |            |             | entry code |
85
                                 |            |             |    ....    |
86
                                 --------------
87
         ptr to start of str ->  |            |
88 89 90 91 92 93
                                 --------------

         In this case the pointer to the start of the string can be found
         in the memory location: info_table_ptr + info_table_size
   -}

94 95
   getConDescAddress :: DynFlags -> Ptr StgInfoTable -> IO (Ptr Word8)
   getConDescAddress dflags ptr
96
    | ghciTablesNextToCode = do
97
       let ptr' = ptr `plusPtr` (- wORD_SIZE dflags)
98 99 100
       -- NB. the offset must be read as an Int32 not a Word32, so
       -- that the sign is preserved when converting to an Int.
       offsetToString <- fromIntegral <$> (peek ptr' :: IO Int32)
101
       return $ (ptr `plusPtr` stdInfoTableSizeB dflags) `plusPtr` offsetToString
102
    | otherwise =
103
       peek $ intPtrToPtr $ ptrToIntPtr ptr + fromIntegral (stdInfoTableSizeB dflags)
104
   -- parsing names is a little bit fiddly because we have a string in the form:
105 106 107 108 109 110 111
   -- pkg:A.B.C.foo, and we want to split it into three parts: ("pkg", "A.B.C", "foo").
   -- Thus we split at the leftmost colon and the rightmost occurrence of the dot.
   -- It would be easier if the string was in the form pkg:A.B.C:foo, but alas
   -- this is not the conventional way of writing Haskell names. We stick with
   -- convention, even though it makes the parsing code more troublesome.
   -- Warning: this code assumes that the string is well formed.
   parse :: [Word8] -> ([Word8], [Word8], [Word8])
112
   parse input
113
      = ASSERT(all (`lengthExceeds` 0) ([pkg, mod, occ])) (pkg, mod, occ)
114 115
      where
      dot = fromIntegral (ord '.')
116 117
      (pkg, rest1) = break (== fromIntegral (ord ':')) input
      (mod, occ)
118 119
         = (concat $ intersperse [dot] $ reverse modWords, occWord)
         where
120
         (modWords, occWord) = ASSERT(rest1 `lengthExceeds` 0) (parseModOcc [] (tail rest1))
121 122 123 124 125 126 127 128 129 130 131 132
      parseModOcc :: [[Word8]] -> [Word8] -> ([[Word8]], [Word8])
      -- We only look for dots if str could start with a module name,
      -- i.e. if it starts with an upper case character.
      -- Otherwise we might think that "X.:->" is the module name in
      -- "X.:->.+", whereas actually "X" is the module name and
      -- ":->.+" is a constructor name.
      parseModOcc acc str@(c : _)
       | isUpper $ chr $ fromIntegral c
         = case break (== dot) str of
              (top, []) -> (acc, top)
              (top, _ : bot) -> parseModOcc (top : acc) bot
      parseModOcc acc str = (acc, str)