StgCmmForeign.hs 19.1 KB
Newer Older
1 2 3 4 5 6 7 8 9
-----------------------------------------------------------------------------
--
-- Code generation for foreign calls.
--
-- (c) The University of Glasgow 2004-2006
--
-----------------------------------------------------------------------------

module StgCmmForeign (
10
  cgForeignCall,
11
  emitPrimCall, emitCCall,
12
  emitForeignCall,     -- For CmmParse
13 14 15 16 17 18
  emitSaveThreadState,
  saveThreadState,
  emitLoadThreadState,
  loadThreadState,
  emitOpenNursery,
  emitCloseNursery,
19 20 21 22 23
 ) where

#include "HsVersions.h"

import StgSyn
24
import StgCmmProf (storeCurCCS, ccsType, curCCS)
25 26 27 28
import StgCmmEnv
import StgCmmMonad
import StgCmmUtils
import StgCmmClosure
29
import StgCmmLayout
30

31
import Cmm
32
import CmmUtils
33
import MkGraph
34 35 36 37 38
import Type
import TysPrim
import CLabel
import SMRep
import ForeignCall
39
import DynFlags
40 41
import Maybes
import Outputable
Ian Lynagh's avatar
Ian Lynagh committed
42
import BasicTypes
43 44

import Control.Monad
45
import Prelude hiding( succ )
46 47 48 49 50

-----------------------------------------------------------------------------
-- Code generation for Foreign Calls
-----------------------------------------------------------------------------

51 52 53
-- | emit code for a foreign call, and return the results to the sequel.
--
cgForeignCall :: ForeignCall            -- the op
54
              -> [StgArg]               -- x,y    arguments
55
              -> Type                   -- result type
56
              -> FCode ReturnKind
57

58
cgForeignCall (CCall (CCallSpec target cconv safety)) stg_args res_ty
59 60 61 62 63 64 65 66 67 68 69
  = do  { dflags <- getDynFlags
        ; let -- in the stdcall calling convention, the symbol needs @size appended
              -- to it, where size is the total number of bytes of arguments.  We
              -- attach this info to the CLabel here, and the CLabel pretty printer
              -- will generate the suffix when the label is printed.
            call_size args
              | StdCallConv <- cconv = Just (sum (map arg_size args))
              | otherwise            = Nothing

              -- ToDo: this might not be correct for 64-bit API
            arg_size (arg, _) = max (widthInBytes $ typeWidth $ cmmExprType dflags arg)
70
                                     (wORD_SIZE dflags)
71
        ; cmm_args <- getFCallArgs stg_args
72
        ; (res_regs, res_hints) <- newUnboxedTupleRegs res_ty
73 74
        ; let ((call_args, arg_hints), cmm_target)
                = case target of
75 76 77
                   StaticTarget _   _      False ->
                       panic "cgForeignCall: unexpected FFI value import"
                   StaticTarget lbl mPkgId True
78 79 80 81
                     -> let labelSource
                                = case mPkgId of
                                        Nothing         -> ForeignLabelInThisPackage
                                        Just pkgId      -> ForeignLabelInPackage pkgId
82
                            size = call_size cmm_args
83 84 85 86
                        in  ( unzip cmm_args
                            , CmmLit (CmmLabel
                                        (mkForeignLabel lbl size labelSource IsFunction)))

87
                   DynamicTarget    ->  case cmm_args of
88 89
                                           (fn,_):rest -> (unzip rest, fn)
                                           [] -> panic "cgForeignCall []"
90
              fc = ForeignConvention cconv arg_hints res_hints CmmMayReturn
91
              call_target = ForeignTarget cmm_target fc
92

93 94 95 96 97 98 99 100 101 102 103 104
        -- we want to emit code for the call, and then emitReturn.
        -- However, if the sequel is AssignTo, we shortcut a little
        -- and generate a foreign call that assigns the results
        -- directly.  Otherwise we end up generating a bunch of
        -- useless "r = r" assignments, which are not merely annoying:
        -- they prevent the common block elimination from working correctly
        -- in the case of a safe foreign call.
        -- See Note [safe foreign call convention]
        --
        ; sequel <- getSequel
        ; case sequel of
            AssignTo assign_to_these _ ->
105
                emitForeignCall safety assign_to_these call_target call_args
106 107

            _something_else ->
108
                do { _ <- emitForeignCall safety res_regs call_target call_args
109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
                   ; emitReturn (map (CmmReg . CmmLocal) res_regs)
                   }
         }

{- Note [safe foreign call convention]

The simple thing to do for a safe foreign call would be the same as an
unsafe one: just

    emitForeignCall ...
    emitReturn ...

but consider what happens in this case

   case foo x y z of
     (# s, r #) -> ...

The sequel is AssignTo [r].  The call to newUnboxedTupleRegs picks [r]
as the result reg, and we generate

  r = foo(x,y,z) returns to L1  -- emitForeignCall
 L1:
  r = r  -- emitReturn
  goto L2
L2:
  ...

Now L1 is a proc point (by definition, it is the continuation of the
safe foreign call).  If L2 does a heap check, then L2 will also be a
proc point.

Furthermore, the stack layout algorithm has to arrange to save r
somewhere between the call and the jump to L1, which is annoying: we
would have to treat r differently from the other live variables, which
have to be saved *before* the call.

So we adopt a special convention for safe foreign calls: the results
are copied out according to the NativeReturn convention by the call,
and the continuation of the call should copyIn the results.  (The
copyOut code is actually inserted when the safe foreign call is
lowered later).  The result regs attached to the safe foreign call are
only used temporarily to hold the results before they are copied out.

We will now generate this:

  r = foo(x,y,z) returns to L1
 L1:
  r = R1  -- copyIn, inserted by mkSafeCall
  goto L2
 L2:
  ... r ...

And when the safe foreign call is lowered later (see Note [lower safe
foreign calls]) we get this:

  suspendThread()
  r = foo(x,y,z)
  resumeThread()
  R1 = r  -- copyOut, inserted by lowerSafeForeignCall
  jump L1
 L1:
  r = R1  -- copyIn, inserted by mkSafeCall
  goto L2
 L2:
  ... r ...

Now consider what happens if L2 does a heap check: the Adams
optimisation kicks in and commons up L1 with the heap-check
continuation, resulting in just one proc point instead of two. Yay!
-}

180 181

emitCCall :: [(CmmFormal,ForeignHint)]
182 183 184
          -> CmmExpr
          -> [(CmmActual,ForeignHint)]
          -> FCode ()
185
emitCCall hinted_results fn hinted_args
186
  = void $ emitForeignCall PlayRisky results target args
187 188 189 190
  where
    (args, arg_hints) = unzip hinted_args
    (results, result_hints) = unzip hinted_results
    target = ForeignTarget fn fc
191
    fc = ForeignConvention CCallConv arg_hints result_hints CmmMayReturn
192

193

194
emitPrimCall :: [CmmFormal] -> CallishMachOp -> [CmmActual] -> FCode ()
195
emitPrimCall res op args
196
  = void $ emitForeignCall PlayRisky res (PrimTarget op) args
197 198 199

-- alternative entry point, used by CmmParse
emitForeignCall
200 201 202 203
        :: Safety
        -> [CmmFormal]          -- where to put the results
        -> ForeignTarget        -- the op
        -> [CmmActual]          -- arguments
204
        -> FCode ReturnKind
205
emitForeignCall safety results target args
206
  | not (playSafe safety) = do
207 208
    dflags <- getDynFlags
    let (caller_save, caller_load) = callerSaveVolatileRegs dflags
209
    emit caller_save
210 211 212
    target' <- load_target_into_temp target
    args' <- mapM maybe_assign_temp args
    emit $ mkUnsafeCall target' results args'
213
    emit caller_load
214
    return AssignedDirectly
215 216

  | otherwise = do
217
    dflags <- getDynFlags
218
    updfr_off <- getUpdFrameOff
219 220
    target' <- load_target_into_temp target
    args' <- mapM maybe_assign_temp args
221
    k <- newLabelC
222
    let (off, _, copyout) = copyInOflow dflags NativeReturn (Young k) results []
223 224
       -- see Note [safe foreign call convention]
    emit $
225
           (    mkStore (CmmStackSlot (Young k) (widthInBytes (wordWidth dflags)))
226
                        (CmmLit (CmmBlock k))
227
            <*> mkLast (CmmForeignCall { tgt  = target'
228
                                       , res  = results
229
                                       , args = args'
230
                                       , succ = k
231 232
                                       , ret_args = off
                                       , ret_off = updfr_off
233 234 235 236 237
                                       , intrbl = playInterruptible safety })
            <*> mkLabel k
            <*> copyout
           )
    return (ReturnedTo k off)
238

239
load_target_into_temp :: ForeignTarget -> FCode ForeignTarget
240
load_target_into_temp (ForeignTarget expr conv) = do
241
  tmp <- maybe_assign_temp expr
242 243
  return (ForeignTarget tmp conv)
load_target_into_temp other_target@(PrimTarget _) =
244 245
  return other_target

246 247 248 249 250 251 252 253 254 255 256
-- What we want to do here is create a new temporary for the foreign
-- call argument if it is not safe to use the expression directly,
-- because the expression mentions caller-saves GlobalRegs (see
-- Note [Register Parameter Passing]).
--
-- However, we can't pattern-match on the expression here, because
-- this is used in a loop by CmmParse, and testing the expression
-- results in a black hole.  So we always create a temporary, and rely
-- on CmmSink to clean it up later.  (Yuck, ToDo).  The generated code
-- ends up being the same, at least for the RTS .cmm code.
--
Ian Lynagh's avatar
Ian Lynagh committed
257
maybe_assign_temp :: CmmExpr -> FCode CmmExpr
258 259 260 261 262
maybe_assign_temp e = do
  dflags <- getDynFlags
  reg <- newTemp (cmmExprType dflags e)
  emitAssign (CmmLocal reg) e
  return (CmmReg (CmmLocal reg))
263 264 265 266 267 268 269

-- -----------------------------------------------------------------------------
-- Save/restore the thread state in the TSO

-- This stuff can't be done in suspendThread/resumeThread, because it
-- refers to global registers which aren't available in the C world.

270 271
emitSaveThreadState :: FCode ()
emitSaveThreadState = do
272
  dflags <- getDynFlags
273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293
  tso <- newTemp (gcWord dflags)
  cn <- newTemp (bWord dflags)
  emit $ saveThreadState dflags tso cn


-- saveThreadState must be usable from the stack layout pass, where we
-- don't have FCode.  Therefore it takes LocalRegs as arguments, so
-- the caller can create these.
saveThreadState :: DynFlags -> LocalReg -> LocalReg -> CmmAGraph
saveThreadState dflags tso cn =
  catAGraphs [
    -- tso = CurrentTSO;
    mkAssign (CmmLocal tso) stgCurrentTSO,
    -- tso->stackobj->sp = Sp;
    mkStore (cmmOffset dflags (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal tso)) (tso_stackobj dflags)) (bWord dflags)) (stack_SP dflags)) stgSp,
    closeNursery dflags tso cn,
    -- and save the current cost centre stack in the TSO when profiling:
    if gopt Opt_SccProfilingOn dflags then
        mkStore (cmmOffset dflags (CmmReg (CmmLocal tso)) (tso_CCCS dflags)) curCCS
      else mkNop
    ]
294

295 296
emitCloseNursery :: FCode ()
emitCloseNursery = do
297 298 299 300 301 302 303 304 305 306 307
  dflags <- getDynFlags
  tso <- newTemp (gcWord dflags)
  cn <- newTemp (bWord dflags)
  emit $ mkAssign (CmmLocal tso) stgCurrentTSO <*>
         closeNursery dflags tso cn

{-
Closing the nursery corresponds to the following code:

  tso = CurrentTSO;
  cn = CurrentNuresry;
308

309 310 311 312 313
  // Update the allocation limit for the current thread.  We don't
  // check to see whether it has overflowed at this point, that check is
  // made when we run out of space in the current heap block (stg_gc_noregs)
  // and in the scheduler when context switching (schedulePostRunThread).
  tso->alloc_limit -= Hp + WDS(1) - cn->start;
314

315 316 317 318 319 320 321 322 323 324
  // Set cn->free to the next unoccupied word in the block
  cn->free = Hp + WDS(1);
-}

closeNursery :: DynFlags -> LocalReg -> LocalReg -> CmmAGraph
closeNursery df tso cn =
  let
      tsoreg     = CmmLocal tso
      cnreg      = CmmLocal cn
  in
325
  catAGraphs [
326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344
    mkAssign cnreg stgCurrentNursery,

    let alloc =
           CmmMachOp (mo_wordSub df)
              [ cmmOffsetW df stgHp 1
              , CmmLoad (nursery_bdescr_start df cnreg) (bWord df)
              ]

        alloc_limit = cmmOffset df (CmmReg tsoreg) (tso_alloc_limit df)
    in

    -- tso->alloc_limit += alloc
    mkStore alloc_limit (CmmMachOp (mo_wordSub df)
                               [ CmmLoad alloc_limit b64
                               , alloc ]),

    -- CurrentNursery->free = Hp+1;
    mkStore (nursery_bdescr_free df cnreg) (cmmOffsetW df stgHp 1)
   ]
345 346 347 348

emitLoadThreadState :: FCode ()
emitLoadThreadState = do
  dflags <- getDynFlags
349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385
  tso <- newTemp (gcWord dflags)
  stack <- newTemp (gcWord dflags)
  cn <- newTemp (bWord dflags)
  bdfree <- newTemp (bWord dflags)
  bdstart <- newTemp (bWord dflags)
  emit $ loadThreadState dflags tso stack cn bdfree bdstart

-- loadThreadState must be usable from the stack layout pass, where we
-- don't have FCode.  Therefore it takes LocalRegs as arguments, so
-- the caller can create these.
loadThreadState :: DynFlags
                -> LocalReg -> LocalReg -> LocalReg -> LocalReg -> LocalReg
                -> CmmAGraph
loadThreadState dflags tso stack cn bdfree bdstart =
  catAGraphs [
    -- tso = CurrentTSO;
    mkAssign (CmmLocal tso) stgCurrentTSO,
    -- stack = tso->stackobj;
    mkAssign (CmmLocal stack) (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal tso)) (tso_stackobj dflags)) (bWord dflags)),
    -- Sp = stack->sp;
    mkAssign sp (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal stack)) (stack_SP dflags)) (bWord dflags)),
    -- SpLim = stack->stack + RESERVED_STACK_WORDS;
    mkAssign spLim (cmmOffsetW dflags (cmmOffset dflags (CmmReg (CmmLocal stack)) (stack_STACK dflags))
                                (rESERVED_STACK_WORDS dflags)),
    -- HpAlloc = 0;
    --   HpAlloc is assumed to be set to non-zero only by a failed
    --   a heap check, see HeapStackCheck.cmm:GC_GENERIC
    mkAssign hpAlloc (zeroExpr dflags),
    openNursery dflags tso cn bdfree bdstart,
    -- and load the current cost centre stack from the TSO when profiling:
    if gopt Opt_SccProfilingOn dflags
       then storeCurCCS
              (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal tso))
                 (tso_CCCS dflags)) (ccsType dflags))
       else mkNop
   ]

386 387 388

emitOpenNursery :: FCode ()
emitOpenNursery = do
389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464
  dflags <- getDynFlags
  tso <- newTemp (gcWord dflags)
  cn <- newTemp (bWord dflags)
  bdfree <- newTemp (bWord dflags)
  bdstart <- newTemp (bWord dflags)
  emit $ mkAssign (CmmLocal tso) stgCurrentTSO <*>
         openNursery dflags tso cn bdfree bdstart

{-
Opening the nursery corresponds to the following code:

   tso = CurrentTSO;
   cn = CurrentNursery;
   bdfree = CurrentNuresry->free;
   bdstart = CurrentNuresry->start;

   // We *add* the currently occupied portion of the nursery block to
   // the allocation limit, because we will subtract it again in
   // closeNursery.
   tso->alloc_limit += bdfree - bdstart;

   // Set Hp to the last occupied word of the heap block.  Why not the
   // next unocupied word?  Doing it this way means that we get to use
   // an offset of zero more often, which might lead to slightly smaller
   // code on some architectures.
   Hp = bdfree - WDS(1);

   // Set HpLim to the end of the current nursery block (note that this block
   // might be a block group, consisting of several adjacent blocks.
   HpLim = bdstart + CurrentNursery->blocks*BLOCK_SIZE_W - 1;
-}

openNursery :: DynFlags
            -> LocalReg -> LocalReg -> LocalReg -> LocalReg
            -> CmmAGraph
openNursery df tso cn bdfree bdstart =
  let
      tsoreg     = CmmLocal tso
      cnreg      = CmmLocal cn
      bdfreereg  = CmmLocal bdfree
      bdstartreg = CmmLocal bdstart
  in
  catAGraphs [
     mkAssign cnreg stgCurrentNursery,
     mkAssign bdfreereg  (CmmLoad (nursery_bdescr_free df cnreg)  (bWord df)),
     mkAssign bdstartreg (CmmLoad (nursery_bdescr_start df cnreg) (bWord df)),

     -- alloc = bd->free - bd->start
     let alloc =
           CmmMachOp (mo_wordSub df) [CmmReg bdfreereg, CmmReg bdstartreg]

         alloc_limit = cmmOffset df (CmmReg tsoreg) (tso_alloc_limit df)
     in

     -- tso->alloc_limit += alloc
     mkStore alloc_limit (CmmMachOp (mo_wordAdd df)
                               [ CmmLoad alloc_limit b64
                               , alloc ]),

     -- Hp = CurrentNursery->free - 1;
     mkAssign hp (cmmOffsetW df (CmmReg bdfreereg) (-1)),

     -- HpLim = CurrentNursery->start +
     --              CurrentNursery->blocks*BLOCK_SIZE_W - 1;
     mkAssign hpLim
         (cmmOffsetExpr df
             (CmmReg bdstartreg)
             (cmmOffset df
               (CmmMachOp (mo_wordMul df) [
                 CmmMachOp (MO_SS_Conv W32 (wordWidth df))
                   [CmmLoad (nursery_bdescr_blocks df cnreg) b32],
                 mkIntExpr df (bLOCK_SIZE df)
                ])
               (-1)
             )
         )
465 466
   ]

467 468 469 470 471 472 473 474
nursery_bdescr_free, nursery_bdescr_start, nursery_bdescr_blocks
  :: DynFlags -> CmmReg -> CmmExpr
nursery_bdescr_free   dflags cn =
  cmmOffset dflags (CmmReg cn) (oFFSET_bdescr_free dflags)
nursery_bdescr_start  dflags cn =
  cmmOffset dflags (CmmReg cn) (oFFSET_bdescr_start dflags)
nursery_bdescr_blocks dflags cn =
  cmmOffset dflags (CmmReg cn) (oFFSET_bdescr_blocks dflags)
475

476
tso_stackobj, tso_CCCS, tso_alloc_limit, stack_STACK, stack_SP :: DynFlags -> ByteOff
477
tso_stackobj dflags = closureField dflags (oFFSET_StgTSO_stackobj dflags)
478
tso_alloc_limit dflags = closureField dflags (oFFSET_StgTSO_alloc_limit dflags)
479 480 481
tso_CCCS     dflags = closureField dflags (oFFSET_StgTSO_cccs dflags)
stack_STACK  dflags = closureField dflags (oFFSET_StgStack_stack dflags)
stack_SP     dflags = closureField dflags (oFFSET_StgStack_sp dflags)
482

483

484
closureField :: DynFlags -> ByteOff -> ByteOff
485
closureField dflags off = off + fixedHdrSize dflags
486

Ian Lynagh's avatar
Ian Lynagh committed
487
stgSp, stgHp, stgCurrentTSO, stgCurrentNursery :: CmmExpr
488 489 490
stgSp             = CmmReg sp
stgHp             = CmmReg hp
stgCurrentTSO     = CmmReg currentTSO
491 492
stgCurrentNursery = CmmReg currentNursery

493
sp, spLim, hp, hpLim, currentTSO, currentNursery, hpAlloc :: CmmReg
494 495 496 497 498 499
sp                = CmmGlobal Sp
spLim             = CmmGlobal SpLim
hp                = CmmGlobal Hp
hpLim             = CmmGlobal HpLim
currentTSO        = CmmGlobal CurrentTSO
currentNursery    = CmmGlobal CurrentNursery
500
hpAlloc           = CmmGlobal HpAlloc
501 502 503 504 505 506 507 508

-- -----------------------------------------------------------------------------
-- For certain types passed to foreign calls, we adjust the actual
-- value passed to the call.  For ByteArray#/Array# we pass the
-- address of the actual array, not the address of the heap object.

getFCallArgs :: [StgArg] -> FCode [(CmmExpr, ForeignHint)]
-- (a) Drop void args
509
-- (b) Add foreign-call shim code
510 511 512
-- It's (b) that makes this differ from getNonVoidArgAmodes

getFCallArgs args
513 514
  = do  { mb_cmms <- mapM get args
        ; return (catMaybes mb_cmms) }
515
  where
516 517 518 519
    get arg | isVoidRep arg_rep
            = return Nothing
            | otherwise
            = do { cmm <- getArgAmode (NonVoid arg)
520 521
                 ; dflags <- getDynFlags
                 ; return (Just (add_shim dflags arg_ty cmm, hint)) }
522 523 524 525
            where
              arg_ty  = stgArgType arg
              arg_rep = typePrimRep arg_ty
              hint    = typeForeignHint arg_ty
526

527 528
add_shim :: DynFlags -> Type -> CmmExpr -> CmmExpr
add_shim dflags arg_ty expr
529
  | tycon == arrayPrimTyCon || tycon == mutableArrayPrimTyCon
530
  = cmmOffsetB dflags expr (arrPtrsHdrSize dflags)
531

532 533 534
  | tycon == smallArrayPrimTyCon || tycon == smallMutableArrayPrimTyCon
  = cmmOffsetB dflags expr (smallArrPtrsHdrSize dflags)

535
  | tycon == byteArrayPrimTyCon || tycon == mutableByteArrayPrimTyCon
536
  = cmmOffsetB dflags expr (arrWordsHdrSize dflags)
537 538

  | otherwise = expr
539
  where
540 541
    UnaryRep rep_ty = repType arg_ty
    tycon           = tyConAppTyCon rep_ty
542
        -- should be a tycon app, since this is a foreign call