CmmOpt.hs 16.2 KB
Newer Older
1
2
{-# LANGUAGE CPP #-}

3
4
5
6
7
8
-- The default iteration limit is a bit too low for the definitions
-- in this module.
#if __GLASGOW_HASKELL__ >= 800
{-# OPTIONS_GHC -fmax-pmcheck-iterations=10000000 #-}
#endif

9
10
11
12
13
14
15
16
17
-----------------------------------------------------------------------------
--
-- Cmm optimisation
--
-- (c) The University of Glasgow 2006
--
-----------------------------------------------------------------------------

module CmmOpt (
18
19
        constantFoldNode,
        constantFoldExpr,
20
        cmmMachOpFold,
21
        cmmMachOpFoldM
22
23
24
25
 ) where

#include "HsVersions.h"

26
import CmmUtils
27
import Cmm
Ian Lynagh's avatar
Ian Lynagh committed
28
import DynFlags
29
import Util
30
31

import Outputable
32
import Platform
33

Simon Marlow's avatar
Simon Marlow committed
34
import Data.Bits
35
import Data.Maybe
36

37
38
39
40
41
42
43
44
45
46

constantFoldNode :: DynFlags -> CmmNode e x -> CmmNode e x
constantFoldNode dflags = mapExp (constantFoldExpr dflags)

constantFoldExpr :: DynFlags -> CmmExpr -> CmmExpr
constantFoldExpr dflags = wrapRecExp f
  where f (CmmMachOp op args) = cmmMachOpFold dflags op args
        f (CmmRegOff r 0) = CmmReg r
        f e = e

47
48
49
50
51
52
53
-- -----------------------------------------------------------------------------
-- MachOp constant folder

-- Now, try to constant-fold the MachOps.  The arguments have already
-- been optimized and folded.

cmmMachOpFold
54
    :: DynFlags
Ian Lynagh's avatar
Ian Lynagh committed
55
    -> MachOp       -- The operation from an CmmMachOp
56
    -> [CmmExpr]    -- The optimized arguments
57
58
    -> CmmExpr

59
cmmMachOpFold dflags op args = fromMaybe (CmmMachOp op args) (cmmMachOpFoldM dflags op args)
60
61
62
63

-- Returns Nothing if no changes, useful for Hoopl, also reduces
-- allocation!
cmmMachOpFoldM
64
    :: DynFlags
Ian Lynagh's avatar
Ian Lynagh committed
65
    -> MachOp
66
67
68
    -> [CmmExpr]
    -> Maybe CmmExpr

Ian Lynagh's avatar
Ian Lynagh committed
69
cmmMachOpFoldM _ op [CmmLit (CmmInt x rep)]
70
  = Just $ case op of
Ian Lynagh's avatar
Ian Lynagh committed
71
72
      MO_S_Neg _ -> CmmLit (CmmInt (-x) rep)
      MO_Not _   -> CmmLit (CmmInt (complement x) rep)
73

Jan Stolarek's avatar
Jan Stolarek committed
74
        -- these are interesting: we must first narrow to the
75
76
77
        -- "from" type, in order to truncate to the correct size.
        -- The final narrow/widen to the destination type
        -- is implicit in the CmmLit.
Ian Lynagh's avatar
Ian Lynagh committed
78
79
80
      MO_SF_Conv _from to -> CmmLit (CmmFloat (fromInteger x) to)
      MO_SS_Conv  from to -> CmmLit (CmmInt (narrowS from x) to)
      MO_UU_Conv  from to -> CmmLit (CmmInt (narrowU from x) to)
81

82
      _ -> panic $ "cmmMachOpFoldM: unknown unary op: " ++ show op
83
84
85


-- Eliminate conversion NOPs
Ian Lynagh's avatar
Ian Lynagh committed
86
87
cmmMachOpFoldM _ (MO_SS_Conv rep1 rep2) [x] | rep1 == rep2 = Just x
cmmMachOpFoldM _ (MO_UU_Conv rep1 rep2) [x] | rep1 == rep2 = Just x
88
89

-- Eliminate nested conversions where possible
90
cmmMachOpFoldM dflags conv_outer [CmmMachOp conv_inner [x]]
91
92
93
  | Just (rep1,rep2,signed1) <- isIntConversion conv_inner,
    Just (_,   rep3,signed2) <- isIntConversion conv_outer
  = case () of
94
95
96
97
98
99
        -- widen then narrow to the same size is a nop
      _ | rep1 < rep2 && rep1 == rep3 -> Just x
        -- Widen then narrow to different size: collapse to single conversion
        -- but remember to use the signedness from the widening, just in case
        -- the final conversion is a widen.
        | rep1 < rep2 && rep2 > rep3 ->
100
            Just $ cmmMachOpFold dflags (intconv signed1 rep1 rep3) [x]
101
102
        -- Nested widenings: collapse if the signedness is the same
        | rep1 < rep2 && rep2 < rep3 && signed1 == signed2 ->
103
            Just $ cmmMachOpFold dflags (intconv signed1 rep1 rep3) [x]
104
105
        -- Nested narrowings: collapse
        | rep1 > rep2 && rep2 > rep3 ->
106
            Just $ cmmMachOpFold dflags (MO_UU_Conv rep1 rep3) [x]
107
108
        | otherwise ->
            Nothing
109
  where
Jan Stolarek's avatar
Jan Stolarek committed
110
        isIntConversion (MO_UU_Conv rep1 rep2)
111
112
113
114
          = Just (rep1,rep2,False)
        isIntConversion (MO_SS_Conv rep1 rep2)
          = Just (rep1,rep2,True)
        isIntConversion _ = Nothing
115

116
117
        intconv True  = MO_SS_Conv
        intconv False = MO_UU_Conv
118
119
120
121
122

-- ToDo: a narrow of a load can be collapsed into a narrow load, right?
-- but what if the architecture only supports word-sized loads, should
-- we do the transformation anyway?

123
cmmMachOpFoldM dflags mop [CmmLit (CmmInt x xrep), CmmLit (CmmInt y _)]
124
  = case mop of
125
126
        -- for comparisons: don't forget to narrow the arguments before
        -- comparing, since they might be out of range.
127
128
        MO_Eq _   -> Just $ CmmLit (CmmInt (if x_u == y_u then 1 else 0) (wordWidth dflags))
        MO_Ne _   -> Just $ CmmLit (CmmInt (if x_u /= y_u then 1 else 0) (wordWidth dflags))
129

130
131
132
133
        MO_U_Gt _ -> Just $ CmmLit (CmmInt (if x_u >  y_u then 1 else 0) (wordWidth dflags))
        MO_U_Ge _ -> Just $ CmmLit (CmmInt (if x_u >= y_u then 1 else 0) (wordWidth dflags))
        MO_U_Lt _ -> Just $ CmmLit (CmmInt (if x_u <  y_u then 1 else 0) (wordWidth dflags))
        MO_U_Le _ -> Just $ CmmLit (CmmInt (if x_u <= y_u then 1 else 0) (wordWidth dflags))
134

135
136
137
138
        MO_S_Gt _ -> Just $ CmmLit (CmmInt (if x_s >  y_s then 1 else 0) (wordWidth dflags))
        MO_S_Ge _ -> Just $ CmmLit (CmmInt (if x_s >= y_s then 1 else 0) (wordWidth dflags))
        MO_S_Lt _ -> Just $ CmmLit (CmmInt (if x_s <  y_s then 1 else 0) (wordWidth dflags))
        MO_S_Le _ -> Just $ CmmLit (CmmInt (if x_s <= y_s then 1 else 0) (wordWidth dflags))
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155

        MO_Add r -> Just $ CmmLit (CmmInt (x + y) r)
        MO_Sub r -> Just $ CmmLit (CmmInt (x - y) r)
        MO_Mul r -> Just $ CmmLit (CmmInt (x * y) r)
        MO_U_Quot r | y /= 0 -> Just $ CmmLit (CmmInt (x_u `quot` y_u) r)
        MO_U_Rem  r | y /= 0 -> Just $ CmmLit (CmmInt (x_u `rem`  y_u) r)
        MO_S_Quot r | y /= 0 -> Just $ CmmLit (CmmInt (x `quot` y) r)
        MO_S_Rem  r | y /= 0 -> Just $ CmmLit (CmmInt (x `rem` y) r)

        MO_And   r -> Just $ CmmLit (CmmInt (x .&. y) r)
        MO_Or    r -> Just $ CmmLit (CmmInt (x .|. y) r)
        MO_Xor   r -> Just $ CmmLit (CmmInt (x `xor` y) r)

        MO_Shl   r -> Just $ CmmLit (CmmInt (x `shiftL` fromIntegral y) r)
        MO_U_Shr r -> Just $ CmmLit (CmmInt (x_u `shiftR` fromIntegral y) r)
        MO_S_Shr r -> Just $ CmmLit (CmmInt (x `shiftR` fromIntegral y) r)

Ian Lynagh's avatar
Ian Lynagh committed
156
        _          -> Nothing
157
158

   where
159
160
161
162
163
        x_u = narrowU xrep x
        y_u = narrowU xrep y
        x_s = narrowS xrep x
        y_s = narrowS xrep y

164
165
166
167
168
169

-- When possible, shift the constants to the right-hand side, so that we
-- can match for strength reductions.  Note that the code generator will
-- also assume that constants have been shifted to the right when
-- possible.

170
cmmMachOpFoldM dflags op [x@(CmmLit _), y]
171
   | not (isLit y) && isCommutableMachOp op
172
   = Just (cmmMachOpFold dflags op [y, x])
173
174
175
176
177
178
179
180
181
182
183
184
185
186

-- Turn (a+b)+c into a+(b+c) where possible.  Because literals are
-- moved to the right, it is more likely that we will find
-- opportunities for constant folding when the expression is
-- right-associated.
--
-- ToDo: this appears to introduce a quadratic behaviour due to the
-- nested cmmMachOpFold.  Can we fix this?
--
-- Why do we check isLit arg1?  If arg1 is a lit, it means that arg2
-- is also a lit (otherwise arg1 would be on the right).  If we
-- put arg1 on the left of the rearranged expression, we'll get into a
-- loop:  (x1+x2)+x3 => x1+(x2+x3)  => (x2+x3)+x1 => x2+(x3+x1) ...
--
187
188
189
-- Also don't do it if arg1 is PicBaseReg, so that we don't separate the
-- PicBaseReg from the corresponding label (or label difference).
--
190
cmmMachOpFoldM dflags mop1 [CmmMachOp mop2 [arg1,arg2], arg3]
191
   | mop2 `associates_with` mop1
192
     && not (isLit arg1) && not (isPicReg arg1)
193
   = Just (cmmMachOpFold dflags mop2 [arg1, cmmMachOpFold dflags mop1 [arg2,arg3]])
194
195
196
197
198
199
   where
     MO_Add{} `associates_with` MO_Sub{} = True
     mop1 `associates_with` mop2 =
        mop1 == mop2 && isAssociativeMachOp mop1

-- special case: (a - b) + c  ==>  a + (c - b)
200
cmmMachOpFoldM dflags mop1@(MO_Add{}) [CmmMachOp mop2@(MO_Sub{}) [arg1,arg2], arg3]
201
   | not (isLit arg1) && not (isPicReg arg1)
202
   = Just (cmmMachOpFold dflags mop1 [arg1, cmmMachOpFold dflags mop2 [arg3,arg2]])
203

Simon Marlow's avatar
Simon Marlow committed
204
205
206
207
208
209
210
211
212
213
214
215
216
217
-- special case: (PicBaseReg + lit) + N  ==>  PicBaseReg + (lit+N)
--
-- this is better because lit+N is a single link-time constant (e.g. a
-- CmmLabelOff), so the right-hand expression needs only one
-- instruction, whereas the left needs two.  This happens when pointer
-- tagging gives us label+offset, and PIC turns the label into
-- PicBaseReg + label.
--
cmmMachOpFoldM _ MO_Add{} [ CmmMachOp op@MO_Add{} [pic, CmmLit lit]
                          , CmmLit (CmmInt n rep) ]
  | isPicReg pic
  = Just $ CmmMachOp op [pic, CmmLit $ cmmOffsetLit lit off ]
  where off = fromIntegral (narrowS rep n)

218
-- Make a RegOff if we can
Ian Lynagh's avatar
Ian Lynagh committed
219
cmmMachOpFoldM _ (MO_Add _) [CmmReg reg, CmmLit (CmmInt n rep)]
220
  = Just $ cmmRegOff reg (fromIntegral (narrowS rep n))
Ian Lynagh's avatar
Ian Lynagh committed
221
cmmMachOpFoldM _ (MO_Add _) [CmmRegOff reg off, CmmLit (CmmInt n rep)]
222
  = Just $ cmmRegOff reg (off + fromIntegral (narrowS rep n))
Ian Lynagh's avatar
Ian Lynagh committed
223
cmmMachOpFoldM _ (MO_Sub _) [CmmReg reg, CmmLit (CmmInt n rep)]
224
  = Just $ cmmRegOff reg (- fromIntegral (narrowS rep n))
Ian Lynagh's avatar
Ian Lynagh committed
225
cmmMachOpFoldM _ (MO_Sub _) [CmmRegOff reg off, CmmLit (CmmInt n rep)]
226
  = Just $ cmmRegOff reg (off - fromIntegral (narrowS rep n))
227
228
229

-- Fold label(+/-)offset into a CmmLit where possible

230
231
232
233
234
235
cmmMachOpFoldM _ (MO_Add _) [CmmLit lit, CmmLit (CmmInt i rep)]
  = Just $ CmmLit (cmmOffsetLit lit (fromIntegral (narrowU rep i)))
cmmMachOpFoldM _ (MO_Add _) [CmmLit (CmmInt i rep), CmmLit lit]
  = Just $ CmmLit (cmmOffsetLit lit (fromIntegral (narrowU rep i)))
cmmMachOpFoldM _ (MO_Sub _) [CmmLit lit, CmmLit (CmmInt i rep)]
  = Just $ CmmLit (cmmOffsetLit lit (fromIntegral (negate (narrowU rep i))))
236

237

238
239
240
241
242
243
-- Comparison of literal with widened operand: perform the comparison
-- at the smaller width, as long as the literal is within range.

-- We can't do the reverse trick, when the operand is narrowed:
-- narrowing throws away bits from the operand, there's no way to do
-- the same comparison at the larger size.
244

245
cmmMachOpFoldM dflags cmp [CmmMachOp conv [x], CmmLit (CmmInt i _)]
Ian Lynagh's avatar
Ian Lynagh committed
246
  |     -- powerPC NCG has a TODO for I8/I16 comparisons, so don't try
247
    platformArch (targetPlatform dflags) `elem` [ArchX86, ArchX86_64],
Ian Lynagh's avatar
Ian Lynagh committed
248
        -- if the operand is widened:
249
250
251
252
253
254
    Just (rep, signed, narrow_fn) <- maybe_conversion conv,
        -- and this is a comparison operation:
    Just narrow_cmp <- maybe_comparison cmp rep signed,
        -- and the literal fits in the smaller size:
    i == narrow_fn rep i
        -- then we can do the comparison at the smaller size
255
  = Just (cmmMachOpFold dflags narrow_cmp [x, CmmLit (CmmInt i rep)])
256
 where
257
    maybe_conversion (MO_UU_Conv from to)
258
259
        | to > from
        = Just (from, False, narrowU)
260
261
    maybe_conversion (MO_SS_Conv from to)
        | to > from
262
        = Just (from, True, narrowS)
263

Simon Marlow's avatar
Simon Marlow committed
264
265
        -- don't attempt to apply this optimisation when the source
        -- is a float; see #1916
266
    maybe_conversion _ = Nothing
267

268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
        -- careful (#2080): if the original comparison was signed, but
        -- we were doing an unsigned widen, then we must do an
        -- unsigned comparison at the smaller size.
    maybe_comparison (MO_U_Gt _) rep _     = Just (MO_U_Gt rep)
    maybe_comparison (MO_U_Ge _) rep _     = Just (MO_U_Ge rep)
    maybe_comparison (MO_U_Lt _) rep _     = Just (MO_U_Lt rep)
    maybe_comparison (MO_U_Le _) rep _     = Just (MO_U_Le rep)
    maybe_comparison (MO_Eq   _) rep _     = Just (MO_Eq   rep)
    maybe_comparison (MO_S_Gt _) rep True  = Just (MO_S_Gt rep)
    maybe_comparison (MO_S_Ge _) rep True  = Just (MO_S_Ge rep)
    maybe_comparison (MO_S_Lt _) rep True  = Just (MO_S_Lt rep)
    maybe_comparison (MO_S_Le _) rep True  = Just (MO_S_Le rep)
    maybe_comparison (MO_S_Gt _) rep False = Just (MO_U_Gt rep)
    maybe_comparison (MO_S_Ge _) rep False = Just (MO_U_Ge rep)
    maybe_comparison (MO_S_Lt _) rep False = Just (MO_U_Lt rep)
    maybe_comparison (MO_S_Le _) rep False = Just (MO_U_Le rep)
    maybe_comparison _ _ _ = Nothing
285

286
287
-- We can often do something with constants of 0 and 1 ...

288
cmmMachOpFoldM dflags mop [x, y@(CmmLit (CmmInt 0 _))]
289
  = case mop of
Ian Lynagh's avatar
Ian Lynagh committed
290
291
292
293
294
295
296
297
298
299
300
301
302
        MO_Add   _ -> Just x
        MO_Sub   _ -> Just x
        MO_Mul   _ -> Just y
        MO_And   _ -> Just y
        MO_Or    _ -> Just x
        MO_Xor   _ -> Just x
        MO_Shl   _ -> Just x
        MO_S_Shr _ -> Just x
        MO_U_Shr _ -> Just x
        MO_Ne    _ | isComparisonExpr x -> Just x
        MO_Eq    _ | Just x' <- maybeInvertCmmExpr x -> Just x'
        MO_U_Gt  _ | isComparisonExpr x -> Just x
        MO_S_Gt  _ | isComparisonExpr x -> Just x
303
304
305
306
        MO_U_Lt  _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 0 (wordWidth dflags))
        MO_S_Lt  _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 0 (wordWidth dflags))
        MO_U_Ge  _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 1 (wordWidth dflags))
        MO_S_Ge  _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 1 (wordWidth dflags))
Ian Lynagh's avatar
Ian Lynagh committed
307
308
309
310
        MO_U_Le  _ | Just x' <- maybeInvertCmmExpr x -> Just x'
        MO_S_Le  _ | Just x' <- maybeInvertCmmExpr x -> Just x'
        _ -> Nothing

311
cmmMachOpFoldM dflags mop [x, (CmmLit (CmmInt 1 rep))]
312
  = case mop of
Ian Lynagh's avatar
Ian Lynagh committed
313
314
315
316
317
318
319
320
321
        MO_Mul    _ -> Just x
        MO_S_Quot _ -> Just x
        MO_U_Quot _ -> Just x
        MO_S_Rem  _ -> Just $ CmmLit (CmmInt 0 rep)
        MO_U_Rem  _ -> Just $ CmmLit (CmmInt 0 rep)
        MO_Ne    _ | Just x' <- maybeInvertCmmExpr x -> Just x'
        MO_Eq    _ | isComparisonExpr x -> Just x
        MO_U_Lt  _ | Just x' <- maybeInvertCmmExpr x -> Just x'
        MO_S_Lt  _ | Just x' <- maybeInvertCmmExpr x -> Just x'
322
323
324
325
        MO_U_Gt  _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 0 (wordWidth dflags))
        MO_S_Gt  _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 0 (wordWidth dflags))
        MO_U_Le  _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 1 (wordWidth dflags))
        MO_S_Le  _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 1 (wordWidth dflags))
Ian Lynagh's avatar
Ian Lynagh committed
326
327
328
        MO_U_Ge  _ | isComparisonExpr x -> Just x
        MO_S_Ge  _ | isComparisonExpr x -> Just x
        _ -> Nothing
329
330
331

-- Now look for multiplication/division by powers of 2 (integers).

332
cmmMachOpFoldM dflags mop [x, (CmmLit (CmmInt n _))]
333
  = case mop of
334
335
        MO_Mul rep
           | Just p <- exactLog2 n ->
336
                 Just (cmmMachOpFold dflags (MO_Shl rep) [x, CmmLit (CmmInt p rep)])
337
338
        MO_U_Quot rep
           | Just p <- exactLog2 n ->
339
                 Just (cmmMachOpFold dflags (MO_U_Shr rep) [x, CmmLit (CmmInt p rep)])
340
        MO_S_Quot rep
Jan Stolarek's avatar
Jan Stolarek committed
341
           | Just p <- exactLog2 n,
342
343
344
345
346
347
348
349
             CmmReg _ <- x ->   -- We duplicate x below, hence require
                                -- it is a reg.  FIXME: remove this restriction.
                -- shift right is not the same as quot, because it rounds
                -- to minus infinity, whereasq quot rounds toward zero.
                -- To fix this up, we add one less than the divisor to the
                -- dividend if it is a negative number.
                --
                -- to avoid a test/jump, we use the following sequence:
Ian Lynagh's avatar
Ian Lynagh committed
350
                --      x1 = x >> word_size-1  (all 1s if -ve, all 0s if +ve)
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
                --      x2 = y & (divisor-1)
                --      result = (x+x2) >>= log2(divisor)
                -- this could be done a bit more simply using conditional moves,
                -- but we're processor independent here.
                --
                -- we optimise the divide by 2 case slightly, generating
                --      x1 = x >> word_size-1  (unsigned)
                --      return = (x + x1) >>= log2(divisor)
                let
                    bits = fromIntegral (widthInBits rep) - 1
                    shr = if p == 1 then MO_U_Shr rep else MO_S_Shr rep
                    x1 = CmmMachOp shr [x, CmmLit (CmmInt bits rep)]
                    x2 = if p == 1 then x1 else
                         CmmMachOp (MO_And rep) [x1, CmmLit (CmmInt (n-1) rep)]
                    x3 = CmmMachOp (MO_Add rep) [x, x2]
                in
367
                Just (cmmMachOpFold dflags (MO_S_Shr rep) [x3, CmmLit (CmmInt p rep)])
Ian Lynagh's avatar
Ian Lynagh committed
368
        _ -> Nothing
369

Simon Marlow's avatar
comment    
Simon Marlow committed
370
371
372
373
374
-- ToDo (#7116): optimise floating-point multiplication, e.g. x*2.0 -> x+x
-- Unfortunately this needs a unique supply because x might not be a
-- register.  See #2253 (program 6) for an example.


375
376
-- Anything else is just too hard.

Ian Lynagh's avatar
Ian Lynagh committed
377
cmmMachOpFoldM _ _ _ = Nothing
378
379
380
381

-- -----------------------------------------------------------------------------
-- Utils

Ian Lynagh's avatar
Ian Lynagh committed
382
isLit :: CmmExpr -> Bool
383
384
385
386
387
isLit (CmmLit _) = True
isLit _          = False

isComparisonExpr :: CmmExpr -> Bool
isComparisonExpr (CmmMachOp op _) = isComparisonMachOp op
Ian Lynagh's avatar
Ian Lynagh committed
388
isComparisonExpr _                  = False
389

Ian Lynagh's avatar
Ian Lynagh committed
390
isPicReg :: CmmExpr -> Bool
391
isPicReg (CmmReg (CmmGlobal PicBaseReg)) = True
392
isPicReg _ = False