CmmSink.hs 21.9 KB
Newer Older
1
2
{-# LANGUAGE GADTs #-}
module CmmSink (
3
     cmmSink
4
5
  ) where

Simon Marlow's avatar
Simon Marlow committed
6
import CodeGen.Platform (callerSaves)
7

8
import Cmm
9
import CmmOpt
10
11
12
13
14
import BlockId
import CmmLive
import CmmUtils
import Hoopl

15
import DynFlags
16
import UniqFM
17
import PprCmm ()
18

19
import Data.List (partition)
20
21
22
import qualified Data.Set as Set

-- -----------------------------------------------------------------------------
Simon Marlow's avatar
Simon Marlow committed
23
-- Sinking and inlining
24
25
26
27

-- This is an optimisation pass that
--  (a) moves assignments closer to their uses, to reduce register pressure
--  (b) pushes assignments into a single branch of a conditional if possible
Simon Marlow's avatar
Simon Marlow committed
28
29
30
31
32
33
--  (c) inlines assignments to registers that are mentioned only once
--  (d) discards dead assignments
--
-- This tightens up lots of register-heavy code.  It is particularly
-- helpful in the Cmm generated by the Stg->Cmm code generator, in
-- which every function starts with a copyIn sequence like:
34
35
36
37
38
39
40
41
42
43
44
--
--    x1 = R1
--    x2 = Sp[8]
--    x3 = Sp[16]
--    if (Sp - 32 < SpLim) then L1 else L2
--
-- we really want to push the x1..x3 assignments into the L2 branch.
--
-- Algorithm:
--
--  * Start by doing liveness analysis.
Simon Marlow's avatar
Simon Marlow committed
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
--
--  * Keep a list of assignments A; earlier ones may refer to later ones
--
--  * Walk forwards through the graph, look at each node N:
--    * If any assignments in A (1) occur only once in N, and (2) are
--      not live after N, inline the assignment and remove it
--      from A.
--    * If N is an assignment:
--      * If the register is not live after N, discard it
--      * otherwise pick up the assignment and add it to A
--    * If N is a non-assignment node:
--      * remove any assignments from A that conflict with N, and
--        place them before N in the current block.  (we call this
--        "dropping" the assignments).
--      * An assignment conflicts with N if it:
--        - assigns to a register mentioned in N
--        - mentions a register assigned by N
--        - reads from memory written by N
63
64
65
--      * do this recursively, dropping dependent assignments
--    * At a multi-way branch:
--      * drop any assignments that are live on more than one branch
Simon Marlow's avatar
Simon Marlow committed
66
67
--      * if any successor has more than one predecessor (a
--        join-point), drop everything live in that successor
68
69
-- 
-- As a side-effect we'll delete some dead assignments (transitively,
Simon Marlow's avatar
Simon Marlow committed
70
71
-- even).  This isn't as good as removeDeadAssignments, but it's much
-- cheaper.
72
73
74
75
76
77
78

-- If we do this *before* stack layout, we might be able to avoid
-- saving some things across calls/procpoints.
--
-- *but*, that will invalidate the liveness analysis, and we'll have
-- to re-do it.

79
80
-- -----------------------------------------------------------------------------
-- things that we aren't optimising very well yet.
Simon Marlow's avatar
Simon Marlow committed
81
--
82
83
-- -----------
-- (1) From GHC's FastString.hashStr:
Simon Marlow's avatar
Simon Marlow committed
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
--
--  s2ay:
--      if ((_s2an::I64 == _s2ao::I64) >= 1) goto c2gn; else goto c2gp;
--  c2gn:
--      R1 = _s2au::I64;
--      call (I64[Sp])(R1) args: 8, res: 0, upd: 8;
--  c2gp:
--      _s2cO::I64 = %MO_S_Rem_W64(%MO_UU_Conv_W8_W64(I8[_s2aq::I64 + (_s2an::I64 << 0)]) + _s2au::I64 * 128,
--                                 4091);
--      _s2an::I64 = _s2an::I64 + 1;
--      _s2au::I64 = _s2cO::I64;
--      goto s2ay;
--
-- a nice loop, but we didn't eliminate the silly assignment at the end.
-- See Note [dependent assignments], which would probably fix this.
--
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
-- -----------
-- (2) From stg_atomically_frame in PrimOps.cmm
--
-- We have a diamond control flow:
--
--     x = ...
--       |
--      / \
--     A   B
--      \ /
--       |
--    use of x
--
-- Now x won't be sunk down to its use, because we won't push it into
-- both branches of the conditional.  We certainly do have to check
-- that we can sink it past all the code in both A and B, but having
-- discovered that, we could sink it to its use.
--

-- -----------------------------------------------------------------------------
Simon Marlow's avatar
Simon Marlow committed
120

121
type Assignment = (LocalReg, CmmExpr, AbsMem)
Simon Marlow's avatar
Simon Marlow committed
122
123
  -- Assignment caches AbsMem, an abstraction of the memory read by
  -- the RHS of the assignment.
124

125
126
cmmSink :: DynFlags -> CmmGraph -> CmmGraph
cmmSink dflags graph = ofBlockList (g_entry graph) $ sink mapEmpty $ blocks
127
  where
128
  liveness = cmmLocalLiveness dflags graph
Simon Marlow's avatar
Simon Marlow committed
129
  getLive l = mapFindWithDefault Set.empty l liveness
130
131
132

  blocks = postorderDfs graph

Simon Marlow's avatar
Simon Marlow committed
133
  join_pts = findJoinPoints blocks
Simon Marlow's avatar
Simon Marlow committed
134

135
136
137
  sink :: BlockEnv [Assignment] -> [CmmBlock] -> [CmmBlock]
  sink _ [] = []
  sink sunk (b:bs) =
Simon Marlow's avatar
Simon Marlow committed
138
    -- pprTrace "sink" (ppr lbl) $
Simon Marlow's avatar
Simon Marlow committed
139
    blockJoin first final_middle final_last : sink sunk' bs
140
141
142
143
    where
      lbl = entryLabel b
      (first, middle, last) = blockSplit b

Simon Marlow's avatar
Simon Marlow committed
144
      succs = successors last
145

Simon Marlow's avatar
Simon Marlow committed
146
147
148
149
      -- Annotate the middle nodes with the registers live *after*
      -- the node.  This will help us decide whether we can inline
      -- an assignment in the current node or not.
      live = Set.unions (map getLive succs)
150
151
      live_middle = gen_kill dflags last live
      ann_middles = annotate dflags live_middle (blockToList middle)
152

Simon Marlow's avatar
Simon Marlow committed
153
      -- Now sink and inline in this block
154
      (middle', assigs) = walk dflags ann_middles (mapFindWithDefault [] lbl sunk)
155
156
      fold_last = constantFold dflags last
      (final_last, assigs') = tryToInline dflags live fold_last assigs
Simon Marlow's avatar
Simon Marlow committed
157

Simon Marlow's avatar
Simon Marlow committed
158
159
160
      -- We cannot sink into join points (successors with more than
      -- one predecessor), so identify the join points and the set
      -- of registers live in them.
161
      (joins, nonjoins) = partition (`mapMember` join_pts) succs
Simon Marlow's avatar
Simon Marlow committed
162
163
164
165
166
167
168
169
170
171
172
173
174
175
      live_in_joins = Set.unions (map getLive joins)

      -- We do not want to sink an assignment into multiple branches,
      -- so identify the set of registers live in multiple successors.
      -- This is made more complicated because when we sink an assignment
      -- into one branch, this might change the set of registers that are
      -- now live in multiple branches.
      init_live_sets = map getLive nonjoins
      live_in_multi live_sets r =
         case filter (Set.member r) live_sets of
           (_one:_two:_) -> True
           _ -> False

      -- Now, drop any assignments that we will not sink any further.
176
      (dropped_last, assigs'') = dropAssignments dflags drop_if init_live_sets assigs'
Simon Marlow's avatar
Simon Marlow committed
177
178
179

      drop_if a@(r,rhs,_) live_sets = (should_drop, live_sets')
          where
180
            should_drop =  conflicts dflags a final_last
181
                        || not (isTrivial rhs) && live_in_multi live_sets r
Simon Marlow's avatar
Simon Marlow committed
182
                        || r `Set.member` live_in_joins
183

Simon Marlow's avatar
Simon Marlow committed
184
185
            live_sets' | should_drop = live_sets
                       | otherwise   = map upd live_sets
186

Simon Marlow's avatar
Simon Marlow committed
187
188
            upd set | r `Set.member` set = set `Set.union` live_rhs
                    | otherwise          = set
189

190
            live_rhs = foldRegsUsed dflags extendRegSet emptyRegSet rhs
191
192
193
194

      final_middle = foldl blockSnoc middle' dropped_last

      sunk' = mapUnion sunk $
195
                 mapFromList [ (l, filterAssignments dflags (getLive l) assigs'')
196
197
                             | l <- succs ]

Simon Marlow's avatar
Simon Marlow committed
198
199
200
201
202
203
204
205
206
207
{- TODO: enable this later, when we have some good tests in place to
   measure the effect and tune it.

-- small: an expression we don't mind duplicating
isSmall :: CmmExpr -> Bool
isSmall (CmmReg (CmmLocal _)) = True  -- not globals, we want to coalesce them instead
isSmall (CmmLit _) = True
isSmall (CmmMachOp (MO_Add _) [x,y]) = isTrivial x && isTrivial y
isSmall (CmmRegOff (CmmLocal _) _) = True
isSmall _ = False
208
-}
Simon Marlow's avatar
Simon Marlow committed
209
210
211

isTrivial :: CmmExpr -> Bool
isTrivial (CmmReg (CmmLocal _)) = True
212
-- isTrivial (CmmLit _) = True
Simon Marlow's avatar
Simon Marlow committed
213
isTrivial _ = False
214

Simon Marlow's avatar
Simon Marlow committed
215
--
216
-- annotate each node with the set of registers live *after* the node
Simon Marlow's avatar
Simon Marlow committed
217
--
218
219
220
annotate :: DynFlags -> LocalRegSet -> [CmmNode O O] -> [(LocalRegSet, CmmNode O O)]
annotate dflags live nodes = snd $ foldr ann (live,[]) nodes
  where ann n (live,nodes) = (gen_kill dflags n live, (live,n) : nodes)
Simon Marlow's avatar
Simon Marlow committed
221
222
223
224
225
226
227
228
229
230
231

--
-- Find the blocks that have multiple successors (join points)
--
findJoinPoints :: [CmmBlock] -> BlockEnv Int
findJoinPoints blocks = mapFilter (>1) succ_counts
 where
  all_succs = concatMap successors blocks

  succ_counts :: BlockEnv Int
  succ_counts = foldr (\l -> mapInsertWith (+) l 1) mapEmpty all_succs
232

Simon Marlow's avatar
Simon Marlow committed
233
234
235
236
--
-- filter the list of assignments to remove any assignments that
-- are not live in a continuation.
--
237
filterAssignments :: DynFlags -> LocalRegSet -> [Assignment] -> [Assignment]
238
filterAssignments dflags live assigs = reverse (go assigs [])
Simon Marlow's avatar
Simon Marlow committed
239
  where go []             kept = kept
240
241
242
        go (a@(r,_,_):as) kept | needed    = go as (a:kept)
                               | otherwise = go as kept
           where
Simon Marlow's avatar
Simon Marlow committed
243
              needed = r `Set.member` live
244
                       || any (conflicts dflags a) (map toNode kept)
Simon Marlow's avatar
Simon Marlow committed
245
246
247
                       --  Note that we must keep assignments that are
                       -- referred to by other assignments we have
                       -- already kept.
248

Simon Marlow's avatar
Simon Marlow committed
249
250
251
-- -----------------------------------------------------------------------------
-- Walk through the nodes of a block, sinking and inlining assignments
-- as we go.
Simon Marlow's avatar
Simon Marlow committed
252

253
walk :: DynFlags
254
     -> [(LocalRegSet, CmmNode O O)]    -- nodes of the block, annotated with
Simon Marlow's avatar
Simon Marlow committed
255
256
                                        -- the set of registers live *after*
                                        -- this node.
Simon Marlow's avatar
Simon Marlow committed
257

Simon Marlow's avatar
Simon Marlow committed
258
259
260
261
     -> [Assignment]                    -- The current list of
                                        -- assignments we are sinking.
                                        -- Later assignments may refer
                                        -- to earlier ones.
262

Simon Marlow's avatar
Simon Marlow committed
263
264
265
     -> ( Block CmmNode O O             -- The new block
        , [Assignment]                  -- Assignments to sink further
        )
266

267
walk dflags nodes assigs = go nodes emptyBlock assigs
Simon Marlow's avatar
Simon Marlow committed
268
269
270
 where
   go []               block as = (block, as)
   go ((live,node):ns) block as
Simon Marlow's avatar
Simon Marlow committed
271
    | shouldDiscard node live    = go ns block as
272
    | Just a <- shouldSink dflags node2 = go ns block (a : as1)
273
    | otherwise                         = go ns block' as'
Simon Marlow's avatar
Simon Marlow committed
274
    where
275
276
277
278
279
280
281
282
283
      node1 = constantFold dflags node

      (node2, as1) = tryToInline dflags live node1 as

      (dropped, as') = dropAssignmentsSimple dflags
                          (\a -> conflicts dflags a node2) as1

      block' = foldl blockSnoc block dropped `blockSnoc` node2

Simon Marlow's avatar
Simon Marlow committed
284

285
286
287
288
289
constantFold :: DynFlags -> CmmNode e x -> CmmNode e x
constantFold dflags node = mapExpDeep f node
  where f (CmmMachOp op args) = cmmMachOpFold dflags op args
        f (CmmRegOff r 0) = CmmReg r
        f e = e
290

Simon Marlow's avatar
Simon Marlow committed
291
292
293
294
295
296
--
-- Heuristic to decide whether to pick up and sink an assignment
-- Currently we pick up all assignments to local registers.  It might
-- be profitable to sink assignments to global regs too, but the
-- liveness analysis doesn't track those (yet) so we can't.
--
297
298
shouldSink :: DynFlags -> CmmNode e x -> Maybe Assignment
shouldSink dflags (CmmAssign (CmmLocal r) e) | no_local_regs = Just (r, e, exprMem dflags e)
Simon Marlow's avatar
Simon Marlow committed
299
  where no_local_regs = True -- foldRegsUsed (\_ _ -> False) True e
300
shouldSink _ _other = Nothing
301

Simon Marlow's avatar
Simon Marlow committed
302
303
304
305
306
307
308
309
310
311
312
--
-- discard dead assignments.  This doesn't do as good a job as
-- removeDeadAsssignments, because it would need multiple passes
-- to get all the dead code, but it catches the common case of
-- superfluous reloads from the stack that the stack allocator
-- leaves behind.
--
-- Also we catch "r = r" here.  You might think it would fall
-- out of inlining, but the inliner will see that r is live
-- after the instruction and choose not to inline r in the rhs.
--
313
shouldDiscard :: CmmNode e x -> LocalRegSet -> Bool
Simon Marlow's avatar
Simon Marlow committed
314
315
316
317
318
319
320
shouldDiscard node live
   = case node of
       CmmAssign r (CmmReg r') | r == r' -> True
       CmmAssign (CmmLocal r) _ -> not (r `Set.member` live)
       _otherwise -> False
  

321
322
323
toNode :: Assignment -> CmmNode O O
toNode (r,rhs,_) = CmmAssign (CmmLocal r) rhs

324
dropAssignmentsSimple :: DynFlags -> (Assignment -> Bool) -> [Assignment]
Simon Marlow's avatar
Simon Marlow committed
325
                      -> ([CmmNode O O], [Assignment])
326
dropAssignmentsSimple dflags f = dropAssignments dflags (\a _ -> (f a, ())) ()
Simon Marlow's avatar
Simon Marlow committed
327

328
dropAssignments :: DynFlags -> (Assignment -> s -> (Bool, s)) -> s -> [Assignment]
Simon Marlow's avatar
Simon Marlow committed
329
                -> ([CmmNode O O], [Assignment])
330
dropAssignments dflags should_drop state assigs
331
332
 = (dropped, reverse kept)
 where
Simon Marlow's avatar
Simon Marlow committed
333
   (dropped,kept) = go state assigs [] []
334

Simon Marlow's avatar
Simon Marlow committed
335
336
337
338
   go _ []             dropped kept = (dropped, kept)
   go state (assig : rest) dropped kept
      | conflict  = go state' rest (toNode assig : dropped) kept
      | otherwise = go state' rest dropped (assig:kept)
339
      where
Simon Marlow's avatar
Simon Marlow committed
340
        (dropit, state') = should_drop assig state
341
        conflict = dropit || any (conflicts dflags assig) dropped
Simon Marlow's avatar
Simon Marlow committed
342

Simon Marlow's avatar
Simon Marlow committed
343
344
345
346
347

-- -----------------------------------------------------------------------------
-- Try to inline assignments into a node.

tryToInline
348
   :: DynFlags
349
   -> LocalRegSet               -- set of registers live after this
Simon Marlow's avatar
Simon Marlow committed
350
351
352
353
354
355
356
357
358
359
                                -- node.  We cannot inline anything
                                -- that is live after the node, unless
                                -- it is small enough to duplicate.
   -> CmmNode O x               -- The node to inline into
   -> [Assignment]              -- Assignments to inline
   -> (
        CmmNode O x             -- New node
      , [Assignment]            -- Remaining assignments
      )

360
tryToInline dflags live node assigs = go usages node [] assigs
Simon Marlow's avatar
Simon Marlow committed
361
362
 where
  usages :: UniqFM Int
363
  usages = foldRegsUsed dflags addUsage emptyUFM node
Simon Marlow's avatar
Simon Marlow committed
364

Simon Marlow's avatar
Simon Marlow committed
365
  go _usages node _skipped [] = (node, [])
Simon Marlow's avatar
Simon Marlow committed
366

Simon Marlow's avatar
Simon Marlow committed
367
  go usages node skipped (a@(l,rhs,_) : rest)
368
369
370
371
   | cannot_inline           = dont_inline
   | occurs_once             = inline_and_discard
   | isTrivial rhs           = inline_and_keep
   | otherwise               = dont_inline
Simon Marlow's avatar
Simon Marlow committed
372
   where
373
        inline_and_discard = go usages' inl_node skipped rest
374
          where usages' = foldRegsUsed dflags addUsage usages rhs
Simon Marlow's avatar
Simon Marlow committed
375

376
377
        dont_inline        = keep node  -- don't inline the assignment, keep it
        inline_and_keep    = keep inl_node -- inline the assignment, keep it
Simon Marlow's avatar
Simon Marlow committed
378

379
380
        keep node' = (final_node, a : rest')
          where (final_node, rest') = go usages' node' (l:skipped) rest
381
                usages' = foldLocalRegsUsed dflags (\m r -> addToUFM m r 2) usages rhs
382
383
384
                -- we must not inline anything that is mentioned in the RHS
                -- of a binding that we have already skipped, so we set the
                -- usages of the regs on the RHS to 2.
Simon Marlow's avatar
Simon Marlow committed
385

386
        cannot_inline = skipped `regsUsedIn` rhs -- Note [dependent assignments]
387
                        || l `elem` skipped
388
                        || not (okToInline dflags rhs node)
Simon Marlow's avatar
Simon Marlow committed
389

390
391
392
        occurs_once = not (l `elemRegSet` live)
                      && lookupUFM usages l == Just 1

Simon Peyton Jones's avatar
Simon Peyton Jones committed
393
        inl_node = mapExpDeep inline node   -- mapExpDeep is where the inlining actually takes place!
Simon Marlow's avatar
Simon Marlow committed
394
395
           where inline (CmmReg    (CmmLocal l'))     | l == l' = rhs
                 inline (CmmRegOff (CmmLocal l') off) | l == l'
396
                    = cmmOffset dflags rhs off
397
398
                    -- re-constant fold after inlining
                 inline (CmmMachOp op args) = cmmMachOpFold dflags op args
Simon Marlow's avatar
Simon Marlow committed
399
400
                 inline other = other

Simon Marlow's avatar
Simon Marlow committed
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
-- Note [dependent assignments]
--
-- If our assignment list looks like
--
--    [ y = e,  x = ... y ... ]
--
-- We cannot inline x.  Remember this list is really in reverse order,
-- so it means  x = ... y ...; y = e
--
-- Hence if we inline x, the outer assignment to y will capture the
-- reference in x's right hand side.
--
-- In this case we should rename the y in x's right-hand side,
-- i.e. change the list to [ y = e, x = ... y1 ..., y1 = y ]
-- Now we can go ahead and inline x.
--
-- For now we do nothing, because this would require putting
-- everything inside UniqSM.
419
420
421
422
423
424
425
426
427
--
-- One more variant of this (#7366):
--
--   [ y = e, y = z ]
--
-- If we don't want to inline y = e, because y is used many times, we
-- might still be tempted to inline y = z (because we always inline
-- trivial rhs's).  But of course we can't, because y is equal to e,
-- not z.
Simon Marlow's avatar
Simon Marlow committed
428

Simon Marlow's avatar
Simon Marlow committed
429
430
431
addUsage :: UniqFM Int -> LocalReg -> UniqFM Int
addUsage m r = addToUFM_C (+) m r 1

Simon Marlow's avatar
Simon Marlow committed
432
regsUsedIn :: [LocalReg] -> CmmExpr -> Bool
Simon Marlow's avatar
Simon Marlow committed
433
regsUsedIn [] _ = False
Simon Marlow's avatar
Simon Marlow committed
434
435
436
437
438
439
440
441
442
443
444
regsUsedIn ls e = wrapRecExpf f e False
  where f (CmmReg (CmmLocal l))      _ | l `elem` ls = True
        f (CmmRegOff (CmmLocal l) _) _ | l `elem` ls = True
        f _ z = z

-- we don't inline into CmmUnsafeForeignCall if the expression refers
-- to global registers.  This is a HACK to avoid global registers
-- clashing with C argument-passing registers, really the back-end
-- ought to be able to handle it properly, but currently neither PprC
-- nor the NCG can do it.  See Note [Register parameter passing]
-- See also StgCmmForeign:load_args_into_temps.
445
446
447
okToInline :: DynFlags -> CmmExpr -> CmmNode e x -> Bool
okToInline dflags expr CmmUnsafeForeignCall{} = not (anyCallerSavesRegs dflags expr)
okToInline _ _ _ = True
Simon Marlow's avatar
Simon Marlow committed
448

Simon Marlow's avatar
Simon Marlow committed
449
-- -----------------------------------------------------------------------------
450
451
452
453
454
455

-- | @conflicts (r,e) stmt@ is @False@ if and only if the assignment
-- @r = e@ can be safely commuted past @stmt@.
--
-- We only sink "r = G" assignments right now, so conflicts is very simple:
--
456
457
conflicts :: DynFlags -> Assignment -> CmmNode O x -> Bool
conflicts dflags (r, rhs, addr) node
458
459
460

  -- (1) an assignment to a register conflicts with a use of the register
  | CmmAssign reg  _ <- node, reg `regUsedIn` rhs                 = True
461
  | foldRegsUsed dflags (\b r' -> r == r' || b) False node        = True
462
463

  -- (2) a store to an address conflicts with a read of the same memory
464
465
  | CmmStore addr' e <- node
  , memConflicts addr (loadAddr dflags addr' (cmmExprWidth dflags e)) = True
466
467

  -- (3) an assignment to Hp/Sp conflicts with a heap/stack read respectively
468
469
470
471
472
473
  | HeapMem    <- addr, CmmAssign (CmmGlobal Hp) _ <- node        = True
  | StackMem   <- addr, CmmAssign (CmmGlobal Sp) _ <- node        = True
  | SpMem{}    <- addr, CmmAssign (CmmGlobal Sp) _ <- node        = True

  -- (4) assignments that read caller-saves GlobalRegs conflict with a
  -- foreign call.  See Note [foreign calls clobber GlobalRegs].
474
  | CmmUnsafeForeignCall{} <- node, anyCallerSavesRegs dflags rhs = True
475

476
477
  -- (5) foreign calls clobber heap: see Note [foreign calls clobber heap]
  | CmmUnsafeForeignCall{} <- node, memConflicts addr AnyMem      = True
478

479
480
481
482
  -- (6) native calls clobber any memory
  | CmmCall{} <- node, memConflicts addr AnyMem                   = True

  -- (7) otherwise, no conflict
483
484
485
  | otherwise = False


486
487
488
489
anyCallerSavesRegs :: DynFlags -> CmmExpr -> Bool
anyCallerSavesRegs dflags e = wrapRecExpf f e False
  where f (CmmReg (CmmGlobal r)) _
         | callerSaves (targetPlatform dflags) r = True
490
491
        f _ z = z

492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
-- An abstraction of memory read or written.
data AbsMem
  = NoMem            -- no memory accessed
  | AnyMem           -- arbitrary memory
  | HeapMem          -- definitely heap memory
  | StackMem         -- definitely stack memory
  | SpMem            -- <size>[Sp+n]
       {-# UNPACK #-} !Int
       {-# UNPACK #-} !Int

-- Having SpMem is important because it lets us float loads from Sp
-- past stores to Sp as long as they don't overlap, and this helps to
-- unravel some long sequences of
--    x1 = [Sp + 8]
--    x2 = [Sp + 16]
--    ...
--    [Sp + 8]  = xi
--    [Sp + 16] = xj
510
--
511
512
513
-- Note that SpMem is invalidated if Sp is changed, but the definition
-- of 'conflicts' above handles that.

Simon Marlow's avatar
Simon Marlow committed
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
-- ToDo: this won't currently fix the following commonly occurring code:
--    x1 = [R1 + 8]
--    x2 = [R1 + 16]
--    ..
--    [Hp - 8] = x1
--    [Hp - 16] = x2
--    ..

-- because [R1 + 8] and [Hp - 8] are both HeapMem.  We know that
-- assignments to [Hp + n] do not conflict with any other heap memory,
-- but this is tricky to nail down.  What if we had
--
--   x = Hp + n
--   [x] = ...
--
--  the store to [x] should be "new heap", not "old heap".
--  Furthermore, you could imagine that if we started inlining
--  functions in Cmm then there might well be reads of heap memory
--  that was written in the same basic block.  To take advantage of
--  non-aliasing of heap memory we will have to be more clever.

535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
-- Note [foreign calls clobber]
--
-- It is tempting to say that foreign calls clobber only
-- non-heap/stack memory, but unfortunately we break this invariant in
-- the RTS.  For example, in stg_catch_retry_frame we call
-- stmCommitNestedTransaction() which modifies the contents of the
-- TRec it is passed (this actually caused incorrect code to be
-- generated).
--
-- Since the invariant is true for the majority of foreign calls,
-- perhaps we ought to have a special annotation for calls that can
-- modify heap/stack memory.  For now we just use the conservative
-- definition here.


550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
bothMems :: AbsMem -> AbsMem -> AbsMem
bothMems NoMem    x         = x
bothMems x        NoMem     = x
bothMems HeapMem  HeapMem   = HeapMem
bothMems StackMem StackMem     = StackMem
bothMems (SpMem o1 w1) (SpMem o2 w2)
  | o1 == o2  = SpMem o1 (max w1 w2)
  | otherwise = StackMem
bothMems SpMem{}  StackMem  = StackMem
bothMems StackMem SpMem{}   = StackMem
bothMems _         _        = AnyMem

memConflicts :: AbsMem -> AbsMem -> Bool
memConflicts NoMem      _          = False
memConflicts _          NoMem      = False
memConflicts HeapMem    StackMem   = False
memConflicts StackMem   HeapMem    = False
memConflicts SpMem{}    HeapMem    = False
memConflicts HeapMem    SpMem{}    = False
memConflicts (SpMem o1 w1) (SpMem o2 w2)
  | o1 < o2   = o1 + w1 > o2
  | otherwise = o2 + w2 > o1
memConflicts _         _         = True

574
575
576
577
exprMem :: DynFlags -> CmmExpr -> AbsMem
exprMem dflags (CmmLoad addr w)  = bothMems (loadAddr dflags addr (typeWidth w)) (exprMem dflags addr)
exprMem dflags (CmmMachOp _ es)  = foldr bothMems NoMem (map (exprMem dflags) es)
exprMem _      _                 = NoMem
578

579
580
loadAddr :: DynFlags -> CmmExpr -> Width -> AbsMem
loadAddr dflags e w =
581
  case e of
582
583
   CmmReg r       -> regAddr dflags r 0 w
   CmmRegOff r i  -> regAddr dflags r i w
584
585
586
   _other | CmmGlobal Sp `regUsedIn` e -> StackMem
          | otherwise -> AnyMem

587
588
589
regAddr :: DynFlags -> CmmReg -> Int -> Width -> AbsMem
regAddr _      (CmmGlobal Sp) i w = SpMem i (widthInBytes w)
regAddr _      (CmmGlobal Hp) _ _ = HeapMem
590
regAddr _      (CmmGlobal CurrentTSO) _ _ = HeapMem -- important for PrimOps
591
592
regAddr dflags r _ _ | isGcPtrType (cmmRegType dflags r) = HeapMem -- yay! GCPtr pays for itself
regAddr _      _ _ _ = AnyMem