UniqDFM.hs 14.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
{-
(c) Bartosz Nitka, Facebook, 2015

UniqDFM: Specialised deterministic finite maps, for things with @Uniques@.

Basically, the things need to be in class @Uniquable@, and we use the
@getUnique@ method to grab their @Uniques@.

This is very similar to @UniqFM@, the major difference being that the order of
folding is not dependent on @Unique@ ordering, giving determinism.
Currently the ordering is determined by insertion order.

See Note [Unique Determinism] in Unique for explanation why @Unique@ ordering
is not deterministic.
-}

{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE FlexibleContexts #-}
{-# OPTIONS_GHC -Wall #-}

module UniqDFM (
        -- * Unique-keyed deterministic mappings
        UniqDFM,       -- abstract type

        -- ** Manipulating those mappings
        emptyUDFM,
28
        unitUDFM,
29
        addToUDFM,
30
        addToUDFM_C,
niteria's avatar
niteria committed
31
        addListToUDFM,
32
        delFromUDFM,
33 34 35 36
        delListFromUDFM,
        adjustUDFM,
        alterUDFM,
        mapUDFM,
37
        plusUDFM,
niteria's avatar
niteria committed
38
        plusUDFM_C,
niteria's avatar
niteria committed
39
        lookupUDFM, lookupUDFM_Directly,
40
        elemUDFM,
41 42
        foldUDFM,
        eltsUDFM,
niteria's avatar
niteria committed
43
        filterUDFM, filterUDFM_Directly,
44 45
        isNullUDFM,
        sizeUDFM,
niteria's avatar
niteria committed
46
        intersectUDFM, udfmIntersectUFM,
47
        intersectsUDFM,
48
        disjointUDFM, disjointUdfmUfm,
49
        minusUDFM,
50
        listToUDFM,
51
        udfmMinusUFM,
52
        partitionUDFM,
niteria's avatar
niteria committed
53 54
        anyUDFM, allUDFM,
        pprUDFM,
55

56
        udfmToList,
57
        udfmToUfm,
niteria's avatar
niteria committed
58
        nonDetFoldUDFM,
59
        alwaysUnsafeUfmToUdfm,
60 61 62 63 64 65 66 67 68
    ) where

import Unique           ( Uniquable(..), Unique, getKey )
import Outputable

import qualified Data.IntMap as M
import Data.Data
import Data.List (sortBy)
import Data.Function (on)
69
import UniqFM (UniqFM, listToUFM_Directly, ufmToList, ufmToIntMap)
70 71 72

-- Note [Deterministic UniqFM]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
73 74 75 76 77 78 79 80 81 82 83 84 85 86
-- A @UniqDFM@ is just like @UniqFM@ with the following additional
-- property: the function `udfmToList` returns the elements in some
-- deterministic order not depending on the Unique key for those elements.
--
-- If the client of the map performs operations on the map in deterministic
-- order then `udfmToList` returns them in deterministic order.
--
-- There is an implementation cost: each element is given a serial number
-- as it is added, and `udfmToList` sorts it's result by this serial
-- number. So you should only use `UniqDFM` if you need the deterministic
-- property.
--
-- `foldUDFM` also preserves determinism.
--
87 88 89 90 91 92
-- Normal @UniqFM@ when you turn it into a list will use
-- Data.IntMap.toList function that returns the elements in the order of
-- the keys. The keys in @UniqFM@ are always @Uniques@, so you end up with
-- with a list ordered by @Uniques@.
-- The order of @Uniques@ is known to be not stable across rebuilds.
-- See Note [Unique Determinism] in Unique.
93 94
--
--
95 96 97 98 99 100 101 102 103 104
-- There's more than one way to implement this. The implementation here tags
-- every value with the insertion time that can later be used to sort the
-- values when asked to convert to a list.
--
-- An alternative would be to have
--
--   data UniqDFM ele = UDFM (M.IntMap ele) [ele]
--
-- where the list determines the order. This makes deletion tricky as we'd
-- only accumulate elements in that list, but makes merging easier as you
105 106 107 108 109 110 111 112 113
-- can just merge both structures independently.
-- Deletion can probably be done in amortized fashion when the size of the
-- list is twice the size of the set.

-- | A type of values tagged with insertion time
data TaggedVal val =
  TaggedVal
    val
    {-# UNPACK #-} !Int -- ^ insertion time
114
  deriving Data
115 116 117 118 119 120 121 122 123 124 125 126 127

taggedFst :: TaggedVal val -> val
taggedFst (TaggedVal v _) = v

taggedSnd :: TaggedVal val -> Int
taggedSnd (TaggedVal _ i) = i

instance Eq val => Eq (TaggedVal val) where
  (TaggedVal v1 _) == (TaggedVal v2 _) = v1 == v2

instance Functor TaggedVal where
  fmap f (TaggedVal val i) = TaggedVal (f val) i

128 129 130 131 132 133 134 135 136
-- | Type of unique deterministic finite maps
data UniqDFM ele =
  UDFM
    !(M.IntMap (TaggedVal ele)) -- A map where keys are Unique's values and
                                -- values are tagged with insertion time.
                                -- The invariant is that all the tags will
                                -- be distinct within a single map
    {-# UNPACK #-} !Int         -- Upper bound on the values' insertion
                                -- time. See Note [Overflow on plusUDFM]
137
  deriving (Data, Functor)
138 139 140 141

emptyUDFM :: UniqDFM elt
emptyUDFM = UDFM M.empty 0

142 143 144
unitUDFM :: Uniquable key => key -> elt -> UniqDFM elt
unitUDFM k v = UDFM (M.singleton (getKey $ getUnique k) (TaggedVal v 0)) 1

145 146 147 148
addToUDFM :: Uniquable key => UniqDFM elt -> key -> elt  -> UniqDFM elt
addToUDFM (UDFM m i) k v =
  UDFM (M.insert (getKey $ getUnique k) (TaggedVal v i) m) (i + 1)

149 150 151 152
addToUDFM_Directly :: UniqDFM elt -> Unique -> elt -> UniqDFM elt
addToUDFM_Directly (UDFM m i) u v =
  UDFM (M.insert (getKey u) (TaggedVal v i) m) (i + 1)

niteria's avatar
niteria committed
153 154 155 156 157 158 159
addToUDFM_Directly_C
  :: (elt -> elt -> elt) -> UniqDFM elt -> Unique -> elt -> UniqDFM elt
addToUDFM_Directly_C f (UDFM m i) u v =
  UDFM (M.insertWith tf (getKey u) (TaggedVal v i) m) (i + 1)
  where
  tf (TaggedVal a j) (TaggedVal b _) = TaggedVal (f a b) j

niteria's avatar
niteria committed
160 161 162
addListToUDFM :: Uniquable key => UniqDFM elt -> [(key,elt)] -> UniqDFM elt
addListToUDFM = foldl (\m (k, v) -> addToUDFM m k v)

163 164 165 166 167 168 169 170 171 172 173 174
addToUDFM_C
  :: Uniquable key => (elt -> elt -> elt) -- old -> new -> result
  -> UniqDFM elt -- old
  -> key -> elt -- new
  -> UniqDFM elt -- result
addToUDFM_C f (UDFM m i) k v =
  UDFM (M.insertWith tf (getKey $ getUnique k) (TaggedVal v i) m) (i + 1)
  where
  tf (TaggedVal a j) (TaggedVal b _) = TaggedVal (f b a) j
                                       -- Flip the arguments, just like
                                       -- addToUFM_C does.

175 176 177
addListToUDFM_Directly :: UniqDFM elt -> [(Unique,elt)] -> UniqDFM elt
addListToUDFM_Directly = foldl (\m (k, v) -> addToUDFM_Directly m k v)

niteria's avatar
niteria committed
178 179 180 181
addListToUDFM_Directly_C
  :: (elt -> elt -> elt) -> UniqDFM elt -> [(Unique,elt)] -> UniqDFM elt
addListToUDFM_Directly_C f = foldl (\m (k, v) -> addToUDFM_Directly_C f m k v)

182 183 184
delFromUDFM :: Uniquable key => UniqDFM elt -> key -> UniqDFM elt
delFromUDFM (UDFM m i) k = UDFM (M.delete (getKey $ getUnique k) m) i

niteria's avatar
niteria committed
185 186 187 188 189 190 191
plusUDFM_C :: (elt -> elt -> elt) -> UniqDFM elt -> UniqDFM elt -> UniqDFM elt
plusUDFM_C f udfml@(UDFM _ i) udfmr@(UDFM _ j)
  -- we will use the upper bound on the tag as a proxy for the set size,
  -- to insert the smaller one into the bigger one
  | i > j = insertUDFMIntoLeft_C f udfml udfmr
  | otherwise = insertUDFMIntoLeft_C f udfmr udfml

192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
-- Note [Overflow on plusUDFM]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
-- There are multiple ways of implementing plusUDFM.
-- The main problem that needs to be solved is overlap on times of
-- insertion between different keys in two maps.
-- Consider:
--
-- A = fromList [(a, (x, 1))]
-- B = fromList [(b, (y, 1))]
--
-- If you merge them naively you end up with:
--
-- C = fromList [(a, (x, 1)), (b, (y, 1))]
--
-- Which loses information about ordering and brings us back into
-- non-deterministic world.
--
-- The solution I considered before would increment the tags on one of the
-- sets by the upper bound of the other set. The problem with this approach
-- is that you'll run out of tags for some merge patterns.
-- Say you start with A with upper bound 1, you merge A with A to get A' and
-- the upper bound becomes 2. You merge A' with A' and the upper bound
-- doubles again. After 64 merges you overflow.
-- This solution would have the same time complexity as plusUFM, namely O(n+m).
--
-- The solution I ended up with has time complexity of
-- O(m log m + m * min (n+m, W)) where m is the smaller set.
-- It simply inserts the elements of the smaller set into the larger
-- set in the order that they were inserted into the smaller set. That's
-- O(m log m) for extracting the elements from the smaller set in the
-- insertion order and O(m * min(n+m, W)) to insert them into the bigger
-- set.

plusUDFM :: UniqDFM elt -> UniqDFM elt -> UniqDFM elt
plusUDFM udfml@(UDFM _ i) udfmr@(UDFM _ j)
  -- we will use the upper bound on the tag as a proxy for the set size,
  -- to insert the smaller one into the bigger one
  | i > j = insertUDFMIntoLeft udfml udfmr
  | otherwise = insertUDFMIntoLeft udfmr udfml

insertUDFMIntoLeft :: UniqDFM elt -> UniqDFM elt -> UniqDFM elt
insertUDFMIntoLeft udfml udfmr = addListToUDFM_Directly udfml $ udfmToList udfmr

niteria's avatar
niteria committed
235 236 237 238 239
insertUDFMIntoLeft_C
  :: (elt -> elt -> elt) -> UniqDFM elt -> UniqDFM elt -> UniqDFM elt
insertUDFMIntoLeft_C f udfml udfmr =
  addListToUDFM_Directly_C f udfml $ udfmToList udfmr

240 241 242
lookupUDFM :: Uniquable key => UniqDFM elt -> key -> Maybe elt
lookupUDFM (UDFM m _i) k = taggedFst `fmap` M.lookup (getKey $ getUnique k) m

niteria's avatar
niteria committed
243 244 245
lookupUDFM_Directly :: UniqDFM elt -> Unique -> Maybe elt
lookupUDFM_Directly (UDFM m _i) k = taggedFst `fmap` M.lookup (getKey k) m

246 247 248 249 250
elemUDFM :: Uniquable key => key -> UniqDFM elt -> Bool
elemUDFM k (UDFM m _i) = M.member (getKey $ getUnique k) m

-- | Performs a deterministic fold over the UniqDFM.
-- It's O(n log n) while the corresponding function on `UniqFM` is O(n).
251 252 253
foldUDFM :: (elt -> a -> a) -> a -> UniqDFM elt -> a
foldUDFM k z m = foldr k z (eltsUDFM m)

niteria's avatar
niteria committed
254 255 256 257 258 259 260
-- | Performs a nondeterministic fold over the UniqDFM.
-- It's O(n), same as the corresponding function on `UniqFM`.
-- If you use this please provide a justification why it doesn't introduce
-- nondeterminism.
nonDetFoldUDFM :: (elt -> a -> a) -> a -> UniqDFM elt -> a
nonDetFoldUDFM k z (UDFM m _i) = foldr k z $ map taggedFst $ M.elems m

261 262 263 264
eltsUDFM :: UniqDFM elt -> [elt]
eltsUDFM (UDFM m _i) =
  map taggedFst $ sortBy (compare `on` taggedSnd) $ M.elems m

265 266 267
filterUDFM :: (elt -> Bool) -> UniqDFM elt -> UniqDFM elt
filterUDFM p (UDFM m i) = UDFM (M.filter (\(TaggedVal v _) -> p v) m) i

niteria's avatar
niteria committed
268 269 270 271 272
filterUDFM_Directly :: (Unique -> elt -> Bool) -> UniqDFM elt -> UniqDFM elt
filterUDFM_Directly p (UDFM m i) = UDFM (M.filterWithKey p' m) i
  where
  p' k (TaggedVal v _) = p (getUnique k) v

273 274
-- | Converts `UniqDFM` to a list, with elements in deterministic order.
-- It's O(n log n) while the corresponding function on `UniqFM` is O(n).
275 276 277 278 279
udfmToList :: UniqDFM elt -> [(Unique, elt)]
udfmToList (UDFM m _i) =
  [ (getUnique k, taggedFst v)
  | (k, v) <- sortBy (compare `on` (taggedSnd . snd)) $ M.toList m ]

280 281 282 283 284 285 286 287 288 289 290
isNullUDFM :: UniqDFM elt -> Bool
isNullUDFM (UDFM m _) = M.null m

sizeUDFM :: UniqDFM elt -> Int
sizeUDFM (UDFM m _i) = M.size m

intersectUDFM :: UniqDFM elt -> UniqDFM elt -> UniqDFM elt
intersectUDFM (UDFM x i) (UDFM y _j) = UDFM (M.intersection x y) i
  -- M.intersection is left biased, that means the result will only have
  -- a subset of elements from the left set, so `i` is a good upper bound.

niteria's avatar
niteria committed
291 292 293 294 295
udfmIntersectUFM :: UniqDFM elt -> UniqFM elt -> UniqDFM elt
udfmIntersectUFM (UDFM x i) y = UDFM (M.intersection x (ufmToIntMap y)) i
  -- M.intersection is left biased, that means the result will only have
  -- a subset of elements from the left set, so `i` is a good upper bound.

296 297 298
intersectsUDFM :: UniqDFM elt -> UniqDFM elt -> Bool
intersectsUDFM x y = isNullUDFM (x `intersectUDFM` y)

299 300 301
disjointUDFM :: UniqDFM elt -> UniqDFM elt -> Bool
disjointUDFM (UDFM x _i) (UDFM y _j) = M.null (M.intersection x y)

302 303 304
disjointUdfmUfm :: UniqDFM elt -> UniqFM elt2 -> Bool
disjointUdfmUfm (UDFM x _i) y = M.null (M.intersection x (ufmToIntMap y))

305 306 307 308 309
minusUDFM :: UniqDFM elt1 -> UniqDFM elt2 -> UniqDFM elt1
minusUDFM (UDFM x i) (UDFM y _j) = UDFM (M.difference x y) i
  -- M.difference returns a subset of a left set, so `i` is a good upper
  -- bound.

310 311 312 313 314
udfmMinusUFM :: UniqDFM elt1 -> UniqFM elt2 -> UniqDFM elt1
udfmMinusUFM (UDFM x i) y = UDFM (M.difference x (ufmToIntMap y)) i
  -- M.difference returns a subset of a left set, so `i` is a good upper
  -- bound.

315 316 317 318 319 320 321 322 323 324
-- | Partition UniqDFM into two UniqDFMs according to the predicate
partitionUDFM :: (elt -> Bool) -> UniqDFM elt -> (UniqDFM elt, UniqDFM elt)
partitionUDFM p (UDFM m i) =
  case M.partition (p . taggedFst) m of
    (left, right) -> (UDFM left i, UDFM right i)

-- | Delete a list of elements from a UniqDFM
delListFromUDFM  :: Uniquable key => UniqDFM elt -> [key] -> UniqDFM elt
delListFromUDFM = foldl delFromUDFM

325 326 327 328 329
-- | This allows for lossy conversion from UniqDFM to UniqFM
udfmToUfm :: UniqDFM elt -> UniqFM elt
udfmToUfm (UDFM m _i) =
  listToUFM_Directly [(getUnique k, taggedFst tv) | (k, tv) <- M.toList m]

330 331 332
listToUDFM :: Uniquable key => [(key,elt)] -> UniqDFM elt
listToUDFM = foldl (\m (k, v) -> addToUDFM m k v) emptyUDFM

333 334 335
listToUDFM_Directly :: [(Unique, elt)] -> UniqDFM elt
listToUDFM_Directly = foldl (\m (u, v) -> addToUDFM_Directly m u v) emptyUDFM

336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361
-- | Apply a function to a particular element
adjustUDFM :: Uniquable key => (elt -> elt) -> UniqDFM elt -> key -> UniqDFM elt
adjustUDFM f (UDFM m i) k = UDFM (M.adjust (fmap f) (getKey $ getUnique k) m) i

-- | The expression (alterUDFM f k map) alters value x at k, or absence
-- thereof. alterUDFM can be used to insert, delete, or update a value in
-- UniqDFM. Use addToUDFM, delFromUDFM or adjustUDFM when possible, they are
-- more efficient.
alterUDFM
  :: Uniquable key
  => (Maybe elt -> Maybe elt)  -- How to adjust
  -> UniqDFM elt               -- old
  -> key                       -- new
  -> UniqDFM elt               -- result
alterUDFM f (UDFM m i) k =
  UDFM (M.alter alterf (getKey $ getUnique k) m) (i + 1)
  where
  alterf Nothing = inject $ f Nothing
  alterf (Just (TaggedVal v _)) = inject $ f (Just v)
  inject Nothing = Nothing
  inject (Just v) = Just $ TaggedVal v i

-- | Map a function over every value in a UniqDFM
mapUDFM :: (elt1 -> elt2) -> UniqDFM elt1 -> UniqDFM elt2
mapUDFM f (UDFM m i) = UDFM (M.map (fmap f) m) i

362 363 364
anyUDFM :: (elt -> Bool) -> UniqDFM elt -> Bool
anyUDFM p (UDFM m _i) = M.fold ((||) . p . taggedFst) False m

niteria's avatar
niteria committed
365 366 367
allUDFM :: (elt -> Bool) -> UniqDFM elt -> Bool
allUDFM p (UDFM m _i) = M.fold ((&&) . p . taggedFst) True m

368 369 370 371
instance Monoid (UniqDFM a) where
  mempty = emptyUDFM
  mappend = plusUDFM

372 373 374 375 376
-- This should not be used in commited code, provided for convenience to
-- make ad-hoc conversions when developing
alwaysUnsafeUfmToUdfm :: UniqFM elt -> UniqDFM elt
alwaysUnsafeUfmToUdfm = listToUDFM_Directly . ufmToList

377 378 379 380 381 382 383 384
-- Output-ery

instance Outputable a => Outputable (UniqDFM a) where
    ppr ufm = pprUniqDFM ppr ufm

pprUniqDFM :: (a -> SDoc) -> UniqDFM a -> SDoc
pprUniqDFM ppr_elt ufm
  = brackets $ fsep $ punctuate comma $
385
    [ ppr uq <+> text ":->" <+> ppr_elt elt
386
    | (uq, elt) <- udfmToList ufm ]
niteria's avatar
niteria committed
387 388 389 390 391 392

pprUDFM :: UniqDFM a    -- ^ The things to be pretty printed
       -> ([a] -> SDoc) -- ^ The pretty printing function to use on the elements
       -> SDoc          -- ^ 'SDoc' where the things have been pretty
                        -- printed
pprUDFM ufm pp = pp (eltsUDFM ufm)