Commit 6664ab83 authored by niteria's avatar niteria Committed by Ben Gamari

Add DVarSet - a deterministic set of Vars

This implements `DVarSet`, a deterministic set of Vars, with an
interface very similar to `VarSet` with a couple of functions missing.

I will need this in changes that follow, one of them will be about
changing the type of the set of Vars that `RuleInfo` holds to make the
free variable computation deterministic.

Test Plan:
./validate
I can add new tests if anyone wants me to.

Reviewers: simonpj, simonmar, austin, bgamari

Reviewed By: simonmar, bgamari

Subscribers: thomie

Differential Revision: https://phabricator.haskell.org/D1487

GHC Trac Issues: #4012
parent 192dd068
......@@ -19,7 +19,20 @@ module VarSet (
minusVarSet, foldVarSet, filterVarSet,
transCloVarSet, fixVarSet,
lookupVarSet, mapVarSet, sizeVarSet, seqVarSet,
elemVarSetByKey, partitionVarSet
elemVarSetByKey, partitionVarSet,
-- * Deterministic Var set types
DVarSet, DIdSet, DTyVarSet,
-- ** Manipulating these sets
emptyDVarSet, unitDVarSet, mkDVarSet,
extendDVarSet,
elemDVarSet, dVarSetElems, subDVarSet,
unionDVarSet, unionDVarSets, mapUnionDVarSet,
intersectDVarSet,
isEmptyDVarSet, delDVarSet,
minusDVarSet, foldDVarSet, filterDVarSet,
sizeDVarSet, seqDVarSet,
) where
#include "HsVersions.h"
......@@ -27,6 +40,7 @@ module VarSet (
import Var ( Var, TyVar, CoVar, Id )
import Unique
import UniqSet
import UniqDSet
import UniqFM( disjointUFM )
{-
......@@ -113,7 +127,7 @@ subVarSet s1 s2 = isEmptyVarSet (s1 `minusVarSet` s2)
fixVarSet :: (VarSet -> VarSet) -- Map the current set to a new set
-> VarSet -> VarSet
-- (fixVarSet f s) repeatedly applies f to the set s,
-- (fixVarSet f s) repeatedly applies f to the set s,
-- until it reaches a fixed point.
fixVarSet fn vars
| new_vars `subVarSet` vars = vars
......@@ -149,3 +163,66 @@ transCloVarSet fn seeds
seqVarSet :: VarSet -> ()
seqVarSet s = sizeVarSet s `seq` ()
-- Deterministic VarSet
-- See Note [Deterministic UniqFM] in UniqDFM for explanation why we need
-- DVarSet.
type DVarSet = UniqDSet Var
type DIdSet = UniqDSet Id
type DTyVarSet = UniqDSet TyVar
emptyDVarSet :: DVarSet
emptyDVarSet = emptyUniqDSet
unitDVarSet :: Var -> DVarSet
unitDVarSet = unitUniqDSet
mkDVarSet :: [Var] -> DVarSet
mkDVarSet = mkUniqDSet
extendDVarSet :: DVarSet -> Var -> DVarSet
extendDVarSet = addOneToUniqDSet
elemDVarSet :: Var -> DVarSet -> Bool
elemDVarSet = elementOfUniqDSet
dVarSetElems :: DVarSet -> [Var]
dVarSetElems = uniqDSetToList
subDVarSet :: DVarSet -> DVarSet -> Bool
subDVarSet s1 s2 = isEmptyDVarSet (s1 `minusDVarSet` s2)
unionDVarSet :: DVarSet -> DVarSet -> DVarSet
unionDVarSet = unionUniqDSets
unionDVarSets :: [DVarSet] -> DVarSet
unionDVarSets = unionManyUniqDSets
-- | Map the function over the list, and union the results
mapUnionDVarSet :: (a -> DVarSet) -> [a] -> DVarSet
mapUnionDVarSet get_set xs = foldr (unionDVarSet . get_set) emptyDVarSet xs
intersectDVarSet :: DVarSet -> DVarSet -> DVarSet
intersectDVarSet = intersectUniqDSets
isEmptyDVarSet :: DVarSet -> Bool
isEmptyDVarSet = isEmptyUniqDSet
delDVarSet :: DVarSet -> Var -> DVarSet
delDVarSet = delOneFromUniqDSet
minusDVarSet :: DVarSet -> DVarSet -> DVarSet
minusDVarSet = minusUniqDSet
foldDVarSet :: (Var -> a -> a) -> a -> DVarSet -> a
foldDVarSet = foldUniqDSet
filterDVarSet :: (Var -> Bool) -> DVarSet -> DVarSet
filterDVarSet = filterUniqDSet
sizeDVarSet :: DVarSet -> Int
sizeDVarSet = sizeUniqDSet
seqDVarSet :: DVarSet -> ()
seqDVarSet s = sizeDVarSet s `seq` ()
......@@ -468,8 +468,9 @@ Library
State
Stream
StringBuffer
UniqFM
UniqDFM
UniqDSet
UniqFM
UniqSet
Util
Vectorise.Builtins.Base
......
......@@ -595,8 +595,9 @@ compiler_stage2_dll0_MODULES = \
TysPrim \
TysWiredIn \
Unify \
UniqFM \
UniqDFM \
UniqDSet \
UniqFM \
UniqSet \
UniqSupply \
Unique \
......
......@@ -25,11 +25,23 @@ module UniqDFM (
-- ** Manipulating those mappings
emptyUDFM,
unitUDFM,
addToUDFM,
delFromUDFM,
plusUDFM,
lookupUDFM,
elemUDFM,
foldUDFM,
eltsUDFM,
filterUDFM,
isNullUDFM,
sizeUDFM,
intersectUDFM,
minusUDFM,
udfmToList,
udfmToUfm,
alwaysUnsafeUfmToUdfm,
) where
import FastString
......@@ -41,16 +53,32 @@ import Data.Typeable
import Data.Data
import Data.List (sortBy)
import Data.Function (on)
import UniqFM (UniqFM, listToUFM_Directly, ufmToList)
-- Note [Deterministic UniqFM]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
-- A @UniqDFM@ is just like @UniqFM@ with the following additional
-- property: the function `udfmToList` returns the elements in some
-- deterministic order not depending on the Unique key for those elements.
--
-- If the client of the map performs operations on the map in deterministic
-- order then `udfmToList` returns them in deterministic order.
--
-- There is an implementation cost: each element is given a serial number
-- as it is added, and `udfmToList` sorts it's result by this serial
-- number. So you should only use `UniqDFM` if you need the deterministic
-- property.
--
-- `foldUDFM` also preserves determinism.
--
-- Normal @UniqFM@ when you turn it into a list will use
-- Data.IntMap.toList function that returns the elements in the order of
-- the keys. The keys in @UniqFM@ are always @Uniques@, so you end up with
-- with a list ordered by @Uniques@.
-- The order of @Uniques@ is known to be not stable across rebuilds.
-- See Note [Unique Determinism] in Unique.
--
--
-- There's more than one way to implement this. The implementation here tags
-- every value with the insertion time that can later be used to sort the
-- values when asked to convert to a list.
......@@ -61,12 +89,15 @@ import Data.Function (on)
--
-- where the list determines the order. This makes deletion tricky as we'd
-- only accumulate elements in that list, but makes merging easier as you
-- don't have to renumber everything.
-- I've tested both approaches by replacing UniqFM and the cost was about
-- the same for both. We don't need merging nor deletion yet, but when we
-- do it might be worth to reevaluate the trade-offs here.
data TaggedVal val = TaggedVal val {-# UNPACK #-} !Int
-- can just merge both structures independently.
-- Deletion can probably be done in amortized fashion when the size of the
-- list is twice the size of the set.
-- | A type of values tagged with insertion time
data TaggedVal val =
TaggedVal
val
{-# UNPACK #-} !Int -- ^ insertion time
deriving (Data, Typeable)
taggedFst :: TaggedVal val -> val
......@@ -81,19 +112,88 @@ instance Eq val => Eq (TaggedVal val) where
instance Functor TaggedVal where
fmap f (TaggedVal val i) = TaggedVal (f val) i
data UniqDFM ele = UDFM !(M.IntMap (TaggedVal ele)) {-# UNPACK #-} !Int
-- | Type of unique deterministic finite maps
data UniqDFM ele =
UDFM
!(M.IntMap (TaggedVal ele)) -- A map where keys are Unique's values and
-- values are tagged with insertion time.
-- The invariant is that all the tags will
-- be distinct within a single map
{-# UNPACK #-} !Int -- Upper bound on the values' insertion
-- time. See Note [Overflow on plusUDFM]
deriving (Data, Typeable, Functor)
emptyUDFM :: UniqDFM elt
emptyUDFM = UDFM M.empty 0
unitUDFM :: Uniquable key => key -> elt -> UniqDFM elt
unitUDFM k v = UDFM (M.singleton (getKey $ getUnique k) (TaggedVal v 0)) 1
addToUDFM :: Uniquable key => UniqDFM elt -> key -> elt -> UniqDFM elt
addToUDFM (UDFM m i) k v =
UDFM (M.insert (getKey $ getUnique k) (TaggedVal v i) m) (i + 1)
addToUDFM_Directly :: UniqDFM elt -> Unique -> elt -> UniqDFM elt
addToUDFM_Directly (UDFM m i) u v =
UDFM (M.insert (getKey u) (TaggedVal v i) m) (i + 1)
addListToUDFM_Directly :: UniqDFM elt -> [(Unique,elt)] -> UniqDFM elt
addListToUDFM_Directly = foldl (\m (k, v) -> addToUDFM_Directly m k v)
delFromUDFM :: Uniquable key => UniqDFM elt -> key -> UniqDFM elt
delFromUDFM (UDFM m i) k = UDFM (M.delete (getKey $ getUnique k) m) i
-- Note [Overflow on plusUDFM]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
-- There are multiple ways of implementing plusUDFM.
-- The main problem that needs to be solved is overlap on times of
-- insertion between different keys in two maps.
-- Consider:
--
-- A = fromList [(a, (x, 1))]
-- B = fromList [(b, (y, 1))]
--
-- If you merge them naively you end up with:
--
-- C = fromList [(a, (x, 1)), (b, (y, 1))]
--
-- Which loses information about ordering and brings us back into
-- non-deterministic world.
--
-- The solution I considered before would increment the tags on one of the
-- sets by the upper bound of the other set. The problem with this approach
-- is that you'll run out of tags for some merge patterns.
-- Say you start with A with upper bound 1, you merge A with A to get A' and
-- the upper bound becomes 2. You merge A' with A' and the upper bound
-- doubles again. After 64 merges you overflow.
-- This solution would have the same time complexity as plusUFM, namely O(n+m).
--
-- The solution I ended up with has time complexity of
-- O(m log m + m * min (n+m, W)) where m is the smaller set.
-- It simply inserts the elements of the smaller set into the larger
-- set in the order that they were inserted into the smaller set. That's
-- O(m log m) for extracting the elements from the smaller set in the
-- insertion order and O(m * min(n+m, W)) to insert them into the bigger
-- set.
plusUDFM :: UniqDFM elt -> UniqDFM elt -> UniqDFM elt
plusUDFM udfml@(UDFM _ i) udfmr@(UDFM _ j)
-- we will use the upper bound on the tag as a proxy for the set size,
-- to insert the smaller one into the bigger one
| i > j = insertUDFMIntoLeft udfml udfmr
| otherwise = insertUDFMIntoLeft udfmr udfml
insertUDFMIntoLeft :: UniqDFM elt -> UniqDFM elt -> UniqDFM elt
insertUDFMIntoLeft udfml udfmr = addListToUDFM_Directly udfml $ udfmToList udfmr
lookupUDFM :: Uniquable key => UniqDFM elt -> key -> Maybe elt
lookupUDFM (UDFM m _i) k = taggedFst `fmap` M.lookup (getKey $ getUnique k) m
elemUDFM :: Uniquable key => key -> UniqDFM elt -> Bool
elemUDFM k (UDFM m _i) = M.member (getKey $ getUnique k) m
-- | Performs a deterministic fold over the UniqDFM.
-- It's O(n log n) while the corresponding function on `UniqFM` is O(n).
foldUDFM :: (elt -> a -> a) -> a -> UniqDFM elt -> a
foldUDFM k z m = foldr k z (eltsUDFM m)
......@@ -101,11 +201,45 @@ eltsUDFM :: UniqDFM elt -> [elt]
eltsUDFM (UDFM m _i) =
map taggedFst $ sortBy (compare `on` taggedSnd) $ M.elems m
filterUDFM :: (elt -> Bool) -> UniqDFM elt -> UniqDFM elt
filterUDFM p (UDFM m i) = UDFM (M.filter (\(TaggedVal v _) -> p v) m) i
-- | Converts `UniqDFM` to a list, with elements in deterministic order.
-- It's O(n log n) while the corresponding function on `UniqFM` is O(n).
udfmToList :: UniqDFM elt -> [(Unique, elt)]
udfmToList (UDFM m _i) =
[ (getUnique k, taggedFst v)
| (k, v) <- sortBy (compare `on` (taggedSnd . snd)) $ M.toList m ]
isNullUDFM :: UniqDFM elt -> Bool
isNullUDFM (UDFM m _) = M.null m
sizeUDFM :: UniqDFM elt -> Int
sizeUDFM (UDFM m _i) = M.size m
intersectUDFM :: UniqDFM elt -> UniqDFM elt -> UniqDFM elt
intersectUDFM (UDFM x i) (UDFM y _j) = UDFM (M.intersection x y) i
-- M.intersection is left biased, that means the result will only have
-- a subset of elements from the left set, so `i` is a good upper bound.
minusUDFM :: UniqDFM elt1 -> UniqDFM elt2 -> UniqDFM elt1
minusUDFM (UDFM x i) (UDFM y _j) = UDFM (M.difference x y) i
-- M.difference returns a subset of a left set, so `i` is a good upper
-- bound.
-- | This allows for lossy conversion from UniqDFM to UniqFM
udfmToUfm :: UniqDFM elt -> UniqFM elt
udfmToUfm (UDFM m _i) =
listToUFM_Directly [(getUnique k, taggedFst tv) | (k, tv) <- M.toList m]
listToUDFM_Directly :: [(Unique, elt)] -> UniqDFM elt
listToUDFM_Directly = foldl (\m (u, v) -> addToUDFM_Directly m u v) emptyUDFM
-- This should not be used in commited code, provided for convenience to
-- make ad-hoc conversions when developing
alwaysUnsafeUfmToUdfm :: UniqFM elt -> UniqDFM elt
alwaysUnsafeUfmToUdfm = listToUDFM_Directly . ufmToList
-- Output-ery
instance Outputable a => Outputable (UniqDFM a) where
......
-- (c) Bartosz Nitka, Facebook, 2015
-- |
-- Specialised deterministic sets, for things with @Uniques@
--
-- Based on @UniqDFMs@ (as you would expect).
-- See Note [Deterministic UniqFM] in UniqDFM for explanation why we need it.
--
-- Basically, the things need to be in class @Uniquable@.
module UniqDSet (
-- * Unique set type
UniqDSet, -- type synonym for UniqFM a
-- ** Manipulating these sets
delOneFromUniqDSet,
emptyUniqDSet,
unitUniqDSet,
mkUniqDSet,
addOneToUniqDSet, addListToUniqDSet,
unionUniqDSets, unionManyUniqDSets,
minusUniqDSet,
intersectUniqDSets,
foldUniqDSet,
elementOfUniqDSet,
filterUniqDSet,
sizeUniqDSet,
isEmptyUniqDSet,
lookupUniqDSet,
uniqDSetToList,
) where
import UniqDFM
import Unique
type UniqDSet a = UniqDFM a
emptyUniqDSet :: UniqDSet a
emptyUniqDSet = emptyUDFM
unitUniqDSet :: Uniquable a => a -> UniqDSet a
unitUniqDSet x = unitUDFM x x
mkUniqDSet :: Uniquable a => [a] -> UniqDSet a
mkUniqDSet = foldl addOneToUniqDSet emptyUniqDSet
addOneToUniqDSet :: Uniquable a => UniqDSet a -> a -> UniqDSet a
addOneToUniqDSet set x = addToUDFM set x x
addListToUniqDSet :: Uniquable a => UniqDSet a -> [a] -> UniqDSet a
addListToUniqDSet = foldl addOneToUniqDSet
delOneFromUniqDSet :: Uniquable a => UniqDSet a -> a -> UniqDSet a
delOneFromUniqDSet = delFromUDFM
unionUniqDSets :: UniqDSet a -> UniqDSet a -> UniqDSet a
unionUniqDSets = plusUDFM
unionManyUniqDSets :: [UniqDSet a] -> UniqDSet a
unionManyUniqDSets [] = emptyUniqDSet
unionManyUniqDSets sets = foldr1 unionUniqDSets sets
minusUniqDSet :: UniqDSet a -> UniqDSet a -> UniqDSet a
minusUniqDSet = minusUDFM
intersectUniqDSets :: UniqDSet a -> UniqDSet a -> UniqDSet a
intersectUniqDSets = intersectUDFM
foldUniqDSet :: (a -> b -> b) -> b -> UniqDSet a -> b
foldUniqDSet = foldUDFM
elementOfUniqDSet :: Uniquable a => a -> UniqDSet a -> Bool
elementOfUniqDSet = elemUDFM
filterUniqDSet :: (a -> Bool) -> UniqDSet a -> UniqDSet a
filterUniqDSet = filterUDFM
sizeUniqDSet :: UniqDSet a -> Int
sizeUniqDSet = sizeUDFM
isEmptyUniqDSet :: UniqDSet a -> Bool
isEmptyUniqDSet = isNullUDFM
lookupUniqDSet :: Uniquable a => UniqDSet a -> a -> Maybe a
lookupUniqDSet = lookupUDFM
uniqDSetToList :: UniqDSet a -> [a]
uniqDSetToList = eltsUDFM
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment