TrivColorable.hs 11.4 KB
Newer Older
1
{-# LANGUAGE CPP #-}
2

Sylvain Henry's avatar
Sylvain Henry committed
3
module GHC.CmmToAsm.Reg.Graph.TrivColorable (
4
        trivColorable,
5
)
6 7 8 9 10

where

#include "HsVersions.h"

11 12
import GhcPrelude

Sylvain Henry's avatar
Sylvain Henry committed
13 14
import GHC.Platform.Reg.Class
import GHC.Platform.Reg
15

16
import GraphBase
17

David Feuer's avatar
David Feuer committed
18
import UniqSet
John Ericson's avatar
John Ericson committed
19
import GHC.Platform
20
import Panic
21 22 23 24

-- trivColorable ---------------------------------------------------------------

-- trivColorable function for the graph coloring allocator
25
--
26 27
--      This gets hammered by scanGraph during register allocation,
--      so needs to be fairly efficient.
28
--
29
--      NOTE:   This only works for architectures with just RcInteger and RcDouble
30
--              (which are disjoint) ie. x86, x86_64 and ppc
31
--
32
--      The number of allocatable regs is hard coded in here so we can do
Gabor Greif's avatar
Gabor Greif committed
33
--              a fast comparison in trivColorable.
34
--
35 36 37
--      It's ok if these numbers are _less_ than the actual number of free
--              regs, but they can't be more or the register conflict
--              graph won't color.
38
--
39 40
--      If the graph doesn't color then the allocator will panic, but it won't
--              generate bad object code or anything nasty like that.
41
--
42 43 44 45
--      There is an allocatableRegsInClass :: RegClass -> Int, but doing
--      the unboxing is too slow for us here.
--      TODO: Is that still true? Could we use allocatableRegsInClass
--      without losing performance now?
46
--
47
--      Look at includes/stg/MachRegs.h to get the numbers.
48 49 50 51 52
--


-- Disjoint registers ----------------------------------------------------------
--
53 54 55 56 57
--      The definition has been unfolded into individual cases for speed.
--      Each architecture has a different register setup, so we use a
--      different regSqueeze function for each.
--
accSqueeze
58 59 60
        :: Int
        -> Int
        -> (reg -> Int)
David Feuer's avatar
David Feuer committed
61
        -> UniqSet reg
62
        -> Int
63

David Feuer's avatar
David Feuer committed
64
accSqueeze count maxCount squeeze us = acc count (nonDetEltsUniqSet us)
65
  -- See Note [Unique Determinism and code generation]
66
  where acc count [] = count
67 68
        acc count _ | count >= maxCount = count
        acc count (r:rs) = acc (count + squeeze r) rs
69 70 71 72 73

{- Note [accSqueeze]
~~~~~~~~~~~~~~~~~~~~
BL 2007/09
Doing a nice fold over the UniqSet makes trivColorable use
74
32% of total compile time and 42% of total alloc when compiling SHA1.hs from darcs.
75 76 77 78
Therefore the UniqFM is made non-abstract and we use custom fold.

MS 2010/04
When converting UniqFM to use Data.IntMap, the fold cannot use UniqFM internal
79
representation any more. But it is imperative that the accSqueeze stops
80 81
the folding if the count gets greater or equal to maxCount. We thus convert
UniqFM to a (lazy) list, do the fold and stops if necessary, which was
82
the most efficient variant tried. Benchmark compiling 10-times SHA1.hs follows.
83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
(original = previous implementation, folding = fold of the whole UFM,
 lazyFold = the current implementation,
 hackFold = using internal representation of Data.IntMap)

                                 original  folding   hackFold  lazyFold
 -O -fasm (used everywhere)      31.509s   30.387s   30.791s   30.603s
                                 100.00%   96.44%    97.72%    97.12%
 -fregs-graph                    67.938s   74.875s   62.673s   64.679s
                                 100.00%   110.21%   92.25%    95.20%
 -fregs-iterative                89.761s   143.913s  81.075s   86.912s
                                 100.00%   160.33%   90.32%    96.83%
 -fnew-codegen                   38.225s   37.142s   37.551s   37.119s
                                 100.00%   97.17%    98.24%    97.11%
 -fnew-codegen -fregs-graph      91.786s   91.51s    87.368s   86.88s
                                 100.00%   99.70%    95.19%    94.65%
 -fnew-codegen -fregs-iterative  206.72s   343.632s  194.694s  208.677s
                                 100.00%   166.23%   94.18%    100.95%
-}
101 102

trivColorable
103
        :: Platform
104 105
        -> (RegClass -> VirtualReg -> Int)
        -> (RegClass -> RealReg    -> Int)
106
        -> Triv VirtualReg RegClass RealReg
107

108
trivColorable platform virtualRegSqueeze realRegSqueeze RcInteger conflicts exclusions
109 110
        | let cALLOCATABLE_REGS_INTEGER
                  =        (case platformArch platform of
111 112 113 114
                            ArchX86       -> 3
                            ArchX86_64    -> 5
                            ArchPPC       -> 16
                            ArchSPARC     -> 14
115
                            ArchSPARC64   -> panic "trivColorable ArchSPARC64"
116
                            ArchPPC_64 _  -> 15
117
                            ArchARM _ _ _ -> panic "trivColorable ArchARM"
Colin Watson's avatar
Colin Watson committed
118
                            ArchARM64     -> panic "trivColorable ArchARM64"
119 120 121
                            ArchAlpha     -> panic "trivColorable ArchAlpha"
                            ArchMipseb    -> panic "trivColorable ArchMipseb"
                            ArchMipsel    -> panic "trivColorable ArchMipsel"
122
                            ArchS390X     -> panic "trivColorable ArchS390X"
thoughtpolice's avatar
thoughtpolice committed
123
                            ArchJavaScript-> panic "trivColorable ArchJavaScript"
124
                            ArchUnknown   -> panic "trivColorable ArchUnknown")
125
        , count2        <- accSqueeze 0 cALLOCATABLE_REGS_INTEGER
126 127 128
                                (virtualRegSqueeze RcInteger)
                                conflicts

129
        , count3        <- accSqueeze  count2    cALLOCATABLE_REGS_INTEGER
130 131
                                (realRegSqueeze   RcInteger)
                                exclusions
132

133
        = count3 < cALLOCATABLE_REGS_INTEGER
134

135
trivColorable platform virtualRegSqueeze realRegSqueeze RcFloat conflicts exclusions
136 137
        | let cALLOCATABLE_REGS_FLOAT
                  =        (case platformArch platform of
138 139 140 141
                    -- On x86_64 and x86, Float and RcDouble
                    -- use the same registers,
                    -- so we only use RcDouble to represent the
                    -- register allocation problem on those types.
142 143 144 145
                            ArchX86       -> 0
                            ArchX86_64    -> 0
                            ArchPPC       -> 0
                            ArchSPARC     -> 22
146
                            ArchSPARC64   -> panic "trivColorable ArchSPARC64"
147
                            ArchPPC_64 _  -> 0
148
                            ArchARM _ _ _ -> panic "trivColorable ArchARM"
Colin Watson's avatar
Colin Watson committed
149
                            ArchARM64     -> panic "trivColorable ArchARM64"
150 151 152
                            ArchAlpha     -> panic "trivColorable ArchAlpha"
                            ArchMipseb    -> panic "trivColorable ArchMipseb"
                            ArchMipsel    -> panic "trivColorable ArchMipsel"
153
                            ArchS390X     -> panic "trivColorable ArchS390X"
thoughtpolice's avatar
thoughtpolice committed
154
                            ArchJavaScript-> panic "trivColorable ArchJavaScript"
155
                            ArchUnknown   -> panic "trivColorable ArchUnknown")
156
        , count2        <- accSqueeze 0 cALLOCATABLE_REGS_FLOAT
157 158 159
                                (virtualRegSqueeze RcFloat)
                                conflicts

160
        , count3        <- accSqueeze  count2    cALLOCATABLE_REGS_FLOAT
161 162
                                (realRegSqueeze   RcFloat)
                                exclusions
163

164
        = count3 < cALLOCATABLE_REGS_FLOAT
165

166
trivColorable platform virtualRegSqueeze realRegSqueeze RcDouble conflicts exclusions
167 168
        | let cALLOCATABLE_REGS_DOUBLE
                  =        (case platformArch platform of
169 170 171 172 173 174 175 176
                            ArchX86       -> 8
                            -- in x86 32bit mode sse2 there are only
                            -- 8 XMM registers xmm0 ... xmm7
                            ArchX86_64    -> 10
                            -- in x86_64 there are 16 XMM registers
                            -- xmm0 .. xmm15, here 10 is a
                            -- "dont need to solve conflicts" count that
                            -- was chosen at some point in the past.
177 178
                            ArchPPC       -> 26
                            ArchSPARC     -> 11
179
                            ArchSPARC64   -> panic "trivColorable ArchSPARC64"
180
                            ArchPPC_64 _  -> 20
181
                            ArchARM _ _ _ -> panic "trivColorable ArchARM"
Colin Watson's avatar
Colin Watson committed
182
                            ArchARM64     -> panic "trivColorable ArchARM64"
183 184 185
                            ArchAlpha     -> panic "trivColorable ArchAlpha"
                            ArchMipseb    -> panic "trivColorable ArchMipseb"
                            ArchMipsel    -> panic "trivColorable ArchMipsel"
186
                            ArchS390X     -> panic "trivColorable ArchS390X"
thoughtpolice's avatar
thoughtpolice committed
187
                            ArchJavaScript-> panic "trivColorable ArchJavaScript"
188
                            ArchUnknown   -> panic "trivColorable ArchUnknown")
189
        , count2        <- accSqueeze 0 cALLOCATABLE_REGS_DOUBLE
190 191
                                (virtualRegSqueeze RcDouble)
                                conflicts
192

193
        , count3        <- accSqueeze  count2    cALLOCATABLE_REGS_DOUBLE
194 195 196
                                (realRegSqueeze   RcDouble)
                                exclusions

197
        = count3 < cALLOCATABLE_REGS_DOUBLE
198

199 200


201

202 203
-- Specification Code ----------------------------------------------------------
--
204 205
--      The trivColorable function for each particular architecture should
--      implement the following function, but faster.
206 207
--

208 209 210 211 212
{-
trivColorable :: RegClass -> UniqSet Reg -> UniqSet Reg -> Bool
trivColorable classN conflicts exclusions
 = let

213 214 215 216 217 218
        acc :: Reg -> (Int, Int) -> (Int, Int)
        acc r (cd, cf)
         = case regClass r of
                RcInteger       -> (cd+1, cf)
                RcFloat         -> (cd,   cf+1)
                _               -> panic "Regs.trivColorable: reg class not handled"
219

niteria's avatar
niteria committed
220 221
        tmp                     = nonDetFoldUFM acc (0, 0) conflicts
        (countInt,  countFloat) = nonDetFoldUFM acc tmp    exclusions
222

223 224
        squeese         = worst countInt   classN RcInteger
                        + worst countFloat classN RcFloat
225

226
   in   squeese < allocatableRegsInClass classN
227 228

-- | Worst case displacement
229
--      node N of classN has n neighbors of class C.
230
--
231 232
--      We currently only have RcInteger and RcDouble, which don't conflict at all.
--      This is a bit boring compared to what's in RegArchX86.
233 234 235 236
--
worst :: Int -> RegClass -> RegClass -> Int
worst n classN classC
 = case classN of
237 238 239 240 241 242 243 244 245
        RcInteger
         -> case classC of
                RcInteger       -> min n (allocatableRegsInClass RcInteger)
                RcFloat         -> 0

        RcDouble
         -> case classC of
                RcFloat         -> min n (allocatableRegsInClass RcFloat)
                RcInteger       -> 0
246

247 248 249 250 251
-- allocatableRegs is allMachRegNos with the fixed-use regs removed.
-- i.e., these are the regs for which we are prepared to allow the
-- register allocator to attempt to map VRegs to.
allocatableRegs :: [RegNo]
allocatableRegs
thomie's avatar
thomie committed
252
   = let isFree i = freeReg i
253
     in  filter isFree allMachRegNos
254 255


256
-- | The number of regs in each class.
257 258
--      We go via top level CAFs to ensure that we're not recomputing
--      the length of these lists each time the fn is called.
259 260 261
allocatableRegsInClass :: RegClass -> Int
allocatableRegsInClass cls
 = case cls of
262 263
        RcInteger       -> allocatableRegsInteger
        RcFloat         -> allocatableRegsDouble
264

265
allocatableRegsInteger :: Int
266 267 268
allocatableRegsInteger
        = length $ filter (\r -> regClass r == RcInteger)
                 $ map RealReg allocatableRegs
269

270 271
allocatableRegsFloat :: Int
allocatableRegsFloat
272 273
        = length $ filter (\r -> regClass r == RcFloat
                 $ map RealReg allocatableRegs
274
-}