Commit cbddd8cb authored by Herbert Valerio Riedel's avatar Herbert Valerio Riedel 🕺
Browse files

Optimise `Version` representation

This optimises the `[Int]` representation to a 16-byte heap object for
~99% of version numbers (up to 4 components, each within a [0..0xfffe]
value range) occuring on Hackage.

One noteworthy improvement of this optimisation is a significant reduction
of the size of the 01-index.cache file from previously 6299700 bytes
(before #3905) down to 3408864 bytes, i.e. down to ~54%

Also, this reduces the memory footprint and GC overhead a bit for
e.g. `cabal info zzz` (which reads in the index cache) from

    cabal.0: There is no package named 'zzz'.
    You may need to run 'cabal update' to get the latest list of available
    packages.
         859,337,368 bytes allocated in the heap
         447,261,128 bytes copied during GC
          37,385,208 bytes maximum residency (19 sample(s))
           1,311,136 bytes maximum slop
                 103 MB total memory in use (0 MB lost due to fragmentation)

                                         Tot time (elapsed)  Avg pause  Max pause
      Gen  0      1613 colls,     0 par    0.268s   0.268s     0.0002s    0.0012s
      Gen  1        19 colls,     0 par    0.227s   0.227s     0.0119s    0.0506s

      TASKS: 4 (1 bound, 3 peak workers (3 total), using -N1)

      SPARKS: 0 (0 converted, 0 overflowed, 0 dud, 0 GC'd, 0 fizzled)

      INIT    time    0.001s  (  0.001s elapsed)
      MUT     time    0.431s  (  0.758s elapsed)
      GC      time    0.495s  (  0.495s elapsed)
      EXIT    time    0.006s  (  0.005s elapsed)
      Total   time    0.934s  (  1.259s elapsed)

      Alloc rate    1,991,870,623 bytes per MUT second

      Productivity  46.9% of total user, 34.8% of total elapsed

to

    cabal.1: There is no package named 'zzz'.
    You may need to run 'cabal update' to get the latest list of available
    packages.
         834,314,392 bytes allocated in the heap
         440,791,176 bytes copied during GC
          36,663,112 bytes maximum residency (19 sample(s))
           2,225,040 bytes maximum slop
                  96 MB total memory in use (0 MB lost due to fragmentation)

                                         Tot time (elapsed)  Avg pause  Max pause
      Gen  0      1574 colls,     0 par    0.254s   0.254s     0.0002s    0.0007s
      Gen  1        19 colls,     0 par    0.223s   0.223s     0.0118s    0.0474s

      TASKS: 4 (1 bound, 3 peak workers (3 total), using -N1)

      SPARKS: 0 (0 converted, 0 overflowed, 0 dud, 0 GC'd, 0 fizzled)

      INIT    time    0.001s  (  0.001s elapsed)
      MUT     time    0.383s  (  0.699s elapsed)
      GC      time    0.477s  (  0.477s elapsed)
      EXIT    time    0.005s  (  0.005s elapsed)
      Total   time    0.869s  (  1.182s elapsed)

      Alloc rate    2,175,866,164 bytes per MUT second

      Productivity  44.9% of total user, 33.0% of total elapsed
parent d900c87d
......@@ -82,6 +82,8 @@ module Distribution.Version (
import Prelude ()
import Distribution.Compat.Prelude
import qualified Data.Version as Base
import Data.Bits (shiftL, shiftR, (.|.), (.&.))
import Data.Word (Word64)
import Distribution.Text
import qualified Distribution.Compat.ReadP as Parse
......@@ -105,13 +107,28 @@ import Control.Exception (assert)
-- 'Binary' instance using a different (and more compact) encoding.
--
-- @since 2.0
data Version = Version [Int]
deriving (Data,Eq,Ord,Generic,Show,Read,Typeable)
data Version = PV0 {-# UNPACK #-} !Word64
| PV1 !Int [Int]
-- NOTE: If a version fits into the packed Word64
-- representation (i.e. at most four version components
-- which all fall into the [0..0xfffe] range), then PV0
-- MUST be used. This is essential for the 'Eq' instance
-- to work.
deriving (Data,Eq,Generic,Show,Read,Typeable)
instance Ord Version where
compare (PV0 x) (PV0 y) = compare x y
compare xv yv = compare (versionNumbers xv) (versionNumbers yv)
PV0 x <= PV0 y = x <= y
xv <= yv = versionNumbers xv <= versionNumbers yv
instance Binary Version
instance NFData Version where
rnf = rnf . versionNumbers
rnf (PV0 _) = ()
rnf (PV1 _ ns) = rnf ns
instance Text Version where
disp ver
......@@ -138,7 +155,24 @@ instance Text Version where
mkVersion :: [Int] -> Version
-- TODO: add validity check; disallow 'mkVersion []' (we have
-- 'nullVersion' for that)
mkVersion = Version
mkVersion ns = case ns of
[] -> nullVersion
[v1] | v1 <= 0xfffe
-> PV0 (mkW64 (v1+1) 0 0 0)
[v1,v2] | v1 <= 0xfffe, v2 <= 0xfffe
-> PV0 (mkW64 (v1+1) (v2+1) 0 0)
[v1,v2,v3] | v1 <= 0xfffe, v2 <= 0xfffe, v3 <= 0xfffe
-> PV0 (mkW64 (v1+1) (v2+1) (v3+1) 0)
[v1,v2,v3,v4] | v1 <= 0xfffe, v2 <= 0xfffe, v3 <= 0xfffe, v4 <= 0xfffe
-> PV0 (mkW64 (v1+1) (v2+1) (v3+1) (v4+1))
v1:vs -> PV1 v1 vs
where
{-# INLINABLE mkW64 #-}
mkW64 :: Int -> Int -> Int -> Int -> Word64
mkW64 v1 v2 v3 v4 = (fromIntegral v1 `shiftL` 48)
.|. (fromIntegral v2 `shiftL` 32)
.|. (fromIntegral v3 `shiftL` 16)
.|. fromIntegral v4
-- | Variant of 'Version' which converts a "Data.Version" 'Version'
-- into Cabal's 'Version' type.
......@@ -155,7 +189,19 @@ mkVersion' = mkVersion . Base.versionBranch
--
-- @since 2.0
versionNumbers :: Version -> [Int]
versionNumbers (Version vs) = vs
versionNumbers (PV1 n ns) = n:ns
versionNumbers (PV0 w)
| v1 < 0 = []
| v2 < 0 = [v1]
| v3 < 0 = [v1,v2]
| v4 < 0 = [v1,v2,v3]
| otherwise = [v1,v2,v3,v4]
where
v1 = fromIntegral ((w `shiftR` 48) .&. 0xffff) - 1
v2 = fromIntegral ((w `shiftR` 32) .&. 0xffff) - 1
v3 = fromIntegral ((w `shiftR` 16) .&. 0xffff) - 1
v4 = fromIntegral (w .&. 0xffff) - 1
-- | Constant representing the special /null/ 'Version'
--
......@@ -166,7 +212,7 @@ versionNumbers (Version vs) = vs
nullVersion :: Version
-- TODO: at some point, 'mkVersion' may disallow creating /null/
-- 'Version's
nullVersion = Version []
nullVersion = PV0 0
-- | Apply function to list of version number components
--
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment