Commit 98e494af authored by Edward Z. Yang's avatar Edward Z. Yang

Improve Haddock documentation for compact.

Test Plan: none

Reviewers: simonmar, bgamari, austin

Subscribers: thomie

Differential Revision: https://phabricator.haskell.org/D3148
parent 42ff5d97
......@@ -15,10 +15,48 @@
-- Stability : unstable
-- Portability : non-portable (GHC Extensions)
--
-- This module provides a data structure, called a Compact, for
-- holding fully evaluated data in a consecutive block of memory.
-- This module provides a data structure, called a 'Compact', for
-- holding immutable, fully evaluated data in a consecutive block of memory.
-- Compact regions are good for two things:
--
-- /Since: 1.0.0/
-- 1. Data in a compact region is not traversed during GC; any
-- incoming pointer to a compact region keeps the entire region
-- live. Thus, if you put a long-lived data structure in a compact
-- region, you may save a lot of cycles during major collections,
-- since you will no longer be (uselessly) retraversing this
-- data structure.
--
-- 2. Because the data is stored contiguously, you can easily
-- dump the memory to disk and/or send it over the network.
-- For applications that are not bandwidth bound (GHC's heap
-- representation can be as much of a x4 expansion over a
-- binary serialization), this can lead to substantial speed ups.
--
-- For example, suppose you have a function @loadBigStruct :: IO BigStruct@,
-- which loads a large data structure from the file system. First,
-- ensure that @BigStruct@ is immutable by defining an 'NFData' instance
-- for it. Then, you can "compact" the structure with the following code:
--
-- @
-- do r <- 'compact' =<< loadBigStruct
-- let x = 'getCompact' r :: BigStruct
-- -- Do things with x
-- @
--
-- Note that 'compact' will not preserve internal sharing; use
-- 'compactWithSharing' (which is 10x slower) if you have cycles and/or
-- must preserve sharing. The 'Compact' pointer @r@ can be used
-- to add more data to a compact region; see 'compactAdd' or
-- 'compactAddWithSharing'.
--
-- The implementation of compact regions is described by:
--
-- * Edward Z. Yang, Giovanni Campagna, Ömer Ağacan, Ahmed El-Hassany, Abhishek
-- Kulkarni, Ryan Newton. \"/Efficient communication and Collection with Compact
-- Normal Forms/\". In Proceedings of the 20th ACM SIGPLAN International
-- Conference on Functional Programming. September 2015. <http://ezyang.com/compact.html>
--
-- This library is supported by GHC 8.2 and later.
module Data.Compact (
-- * The Compact type
......@@ -47,15 +85,21 @@ import GHC.Types
import Data.Compact.Internal as Internal
-- | Retrieve the object that was stored in a 'Compact'
-- | Retrieve a direct pointer to the value pointed at by a 'Compact' reference.
-- If you have used 'compactAdd', there may be multiple 'Compact' references
-- into the same compact region. Upholds the property:
--
-- > inCompact c (getCompact c) == True
--
getCompact :: Compact a -> a
getCompact (Compact _ obj _) = obj
-- | Compact a value. /O(size of unshared data)/
--
-- If the structure contains any internal sharing, the shared data
-- will be duplicated during the compaction process. Loops if the
-- structure contains cycles.
-- will be duplicated during the compaction process. This will
-- not terminate if the structure contains cycles (use 'compactWithSharing'
-- instead).
--
-- The NFData constraint is just to ensure that the object contains no
-- functions, 'compact' does not actually use it. If your object
......@@ -80,39 +124,56 @@ compact = Internal.compactSized 31268 False
compactWithSharing :: NFData a => a -> IO (Compact a)
compactWithSharing = Internal.compactSized 31268 True
-- | Add a value to an existing 'Compact'. Behaves exactly like
-- 'compact' with respect to sharing and the 'NFData' constraint.
-- | Add a value to an existing 'Compact'. This will help you avoid
-- copying when the value contains pointers into the compact region,
-- but remember that after compaction this value will only be deallocated
-- with the entire compact region.
--
-- Behaves exactly like 'compact' with respect to sharing and the 'NFData'
-- constraint.
--
compactAdd :: NFData a => Compact b -> a -> IO (Compact a)
compactAdd (Compact compact# _ lock) a = withMVar lock $ \_ -> IO $ \s ->
case compactAdd# compact# a s of { (# s1, pk #) ->
(# s1, Compact compact# pk lock #) }
-- | Add a value to an existing 'Compact'. Behaves exactly like
-- 'compactWithSharing' with respect to sharing and the 'NFData'
-- constraint.
-- | Add a value to an existing 'Compact', like 'compactAdd', but
-- behaving exactly like 'compactWithSharing' with respect to
-- sharing and the 'NFData' constraint.
--
compactAddWithSharing :: NFData a => Compact b -> a -> IO (Compact a)
compactAddWithSharing (Compact compact# _ lock) a =
withMVar lock $ \_ -> IO $ \s ->
case compactAddWithSharing# compact# a s of { (# s1, pk #) ->
(# s1, Compact compact# pk lock #) }
-- | Check if the second argument is inside the 'Compact'
-- | Check if the second argument is inside the passed 'Compact'.
--
inCompact :: Compact b -> a -> IO Bool
inCompact (Compact buffer _ _) !val =
IO (\s -> case compactContains# buffer val s of
(# s', v #) -> (# s', isTrue# v #) )
-- | Check if the argument is in any 'Compact'
-- | Check if the argument is in any 'Compact'. If true, the value in question
-- is also fully evaluated, since any value in a compact region must
-- be fully evaluated.
--
isCompact :: a -> IO Bool
isCompact !val =
IO (\s -> case compactContainsAny# val s of
(# s', v #) -> (# s', isTrue# v #) )
-- | Returns the size in bytes of the compact region.
--
compactSize :: Compact a -> IO Word
compactSize (Compact buffer _ lock) = withMVar lock $ \_ -> IO $ \s0 ->
case compactSize# buffer s0 of (# s1, sz #) -> (# s1, W# sz #)
-- | *Experimental.* This function doesn't actually resize a compact
-- region; rather, it changes the default block size which we allocate
-- when the current block runs out of space, and also appends a block
-- to the compact region.
--
compactResize :: Compact a -> Word -> IO ()
compactResize (Compact oldBuffer _ lock) (W# new_size) =
withMVar lock $ \_ -> IO $ \s ->
......
......@@ -13,11 +13,9 @@
-- Stability : unstable
-- Portability : non-portable (GHC Extensions)
--
-- This module provides a data structure, called a Compact, for
-- holding fully evaluated data in a consecutive block of memory.
--
-- This is a private implementation detail of the package and should
-- not be imported directly.
-- This module provides some internal functions and representation for dealing
-- with compact regions, which may be of interest if you need higher
-- performance.
--
-- /Since: 1.0.0/
......@@ -50,13 +48,20 @@ import GHC.Types
-- separate copy of the data.
--
-- The cost of compaction is similar to the cost of GC for the same
-- data, but it is perfomed only once. However, retainining internal
-- sharing during the compaction process is very costly, so it is
-- optional; there are two ways to create a 'Compact': 'compact' and
-- 'compactWithSharing'.
--
-- Data can be added to an existing 'Compact' with 'compactAdd' or
-- 'compactAddWithSharing'.
-- data, but it is perfomed only once. However, because
-- "Data.Compact.compact" does not stop-the-world, retaining internal
-- sharing during the compaction process is very costly. The user
-- can choose wether to 'compact' or 'compactWithSharing'.
--
-- When you have a @'Compact' a@, you can get a pointer to the actual object
-- in the region using "Data.Compact.getCompact". The 'Compact' type
-- serves as handle on the region itself; you can use this handle
-- to add data to a specific 'Compact' with 'compactAdd' or
-- 'compactAddWithSharing' (giving you a new handle which corresponds
-- to the same compact region, but points to the newly added object
-- in the region). At the moment, due to technical reasons,
-- it's not possible to get the @'Compact' a@ if you only have an @a@,
-- so make sure you hold on to the handle as necessary.
--
-- Data in a compact doesn't ever move, so compacting data is also a
-- way to pin arbitrary data structures in memory.
......@@ -70,8 +75,8 @@ import GHC.Types
-- address (the address might be stored in a C data structure, for
-- example), so we can't make a copy of it to store in the 'Compact'.
--
-- * Mutable objects also cannot be compacted, because subsequent
-- mutation would destroy the property that a compact is
-- * Objects with mutable pointer fields also cannot be compacted,
-- because subsequent mutation would destroy the property that a compact is
-- self-contained.
--
-- If compaction encounters any of the above, a 'CompactionFailed'
......@@ -83,6 +88,10 @@ data Compact a = Compact Compact# a (MVar ())
-- The MVar here is to enforce mutual exclusion among writers.
-- Note: the MVar protects the Compact# only, not the pure value 'a'
-- | Make a new 'Compact' object, given a pointer to the true
-- underlying region. You must uphold the invariant that @a@ lives
-- in the compact region.
--
mkCompact
:: Compact# -> a -> State# RealWorld -> (# State# RealWorld, Compact a #)
mkCompact compact# a s =
......@@ -91,6 +100,11 @@ mkCompact compact# a s =
where
unIO (IO a) = a
-- | Transfer @a@ into a new compact region, with a preallocated size,
-- possibly preserving sharing or not. If you know how big the data
-- structure in question is, you can save time by picking an appropriate
-- block size for the compact region.
--
compactSized :: NFData a => Int -> Bool -> a -> IO (Compact a)
compactSized (I# size) share a = IO $ \s0 ->
case compactNew# (int2Word# size) s0 of { (# s1, compact# #) ->
......
......@@ -14,9 +14,6 @@
-- Stability : unstable
-- Portability : non-portable (GHC Extensions)
--
-- This module provides a data structure, called a Compact, for
-- holding fully evaluated data in a consecutive block of memory.
--
-- This module contains support for serializing a Compact for network
-- transmission and on-disk storage.
--
......@@ -45,7 +42,7 @@ import Control.DeepSeq(NFData, force)
import Data.Compact.Internal
-- |A serialized version of the 'Compact' metadata (each block with
-- | A serialized version of the 'Compact' metadata (each block with
-- address and size and the address of the root). This structure is
-- meant to be sent alongside the actual 'Compact' data. It can be
-- sent out of band in advance if the data is to be sent over RDMA
......@@ -58,7 +55,6 @@ data SerializedCompact a = SerializedCompact
addrIsNull :: Addr# -> Bool
addrIsNull addr = isTrue# (nullAddr# `eqAddr#` addr)
compactGetFirstBlock :: Compact# -> IO (Ptr a, Word)
compactGetFirstBlock buffer =
IO (\s -> case compactGetFirstBlock# buffer s of
......@@ -85,10 +81,11 @@ mkBlockList buffer = compactGetFirstBlock buffer >>= go
-- before func had a chance to copy everything into its own
-- buffers/sockets/whatever
-- |Serialize the 'Compact', and call the provided function with
-- | Serialize the 'Compact', and call the provided function with
-- with the 'Compact' serialized representation. The resulting
-- action will be executed synchronously before this function
-- completes.
--
{-# NOINLINE withSerializedCompact #-}
withSerializedCompact :: NFData c => Compact a ->
(SerializedCompact a -> IO c) -> IO c
......@@ -115,7 +112,7 @@ fixupPointers firstBlock rootAddr s =
(# root #) -> case mkCompact buffer root s' of
(# s'', c #) -> (# s'', Just c #)
-- |Deserialize a 'SerializedCompact' into a in-memory 'Compact'. The
-- | Deserialize a 'SerializedCompact' into a in-memory 'Compact'. The
-- provided function will be called with the address and size of each
-- newly allocated block in succession, and should fill the memory
-- from the external source (eg. by reading from a socket or from disk)
......@@ -191,6 +188,9 @@ sanityCheckByteStrings (SerializedCompact scl _) bsl = go scl bsl
go ((_, size):scs) (bs:bss) =
fromIntegral size == ByteString.length bs && go scs bss
-- | Convenience function for importing a compact region that is represented
-- by a list of strict 'ByteString's.
--
importCompactByteStrings :: SerializedCompact a -> [ByteString.ByteString] ->
IO (Maybe (Compact a))
importCompactByteStrings serialized stringList =
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment