From ebb8287d9d67ad3df0eea2e18588dc25892cdb69 Mon Sep 17 00:00:00 2001
From: Simon Marlow
Date: Wed, 3 Nov 2010 12:13:50 +0000
Subject: [PATCH] add transclos program
---
parallel/transclos/Main.hs | 123 +++++++++++++++++++++++
parallel/transclos/Makefile | 19 ++++
parallel/transclos/TransClos.hs | 150 ++++++++++++++++++++++++++++
parallel/transclos/transclos.stdout | 3 +
4 files changed, 295 insertions(+)
create mode 100644 parallel/transclos/Main.hs
create mode 100644 parallel/transclos/Makefile
create mode 100644 parallel/transclos/TransClos.hs
create mode 100644 parallel/transclos/transclos.stdout
diff --git a/parallel/transclos/Main.hs b/parallel/transclos/Main.hs
new file mode 100644
index 00000000..ae597970
--- /dev/null
+++ b/parallel/transclos/Main.hs
@@ -0,0 +1,123 @@
+{-# OPTIONS_GHC -XFlexibleInstances -XBangPatterns #-}
+-- Time-stamp: <2010-11-03 12:01:15 simonmar>
+--
+-- Test wrapper for (parallel) transitive closure computation.
+-- The main parallel version is: TRANSCL_NESTED
+-- Other versions are:
+-- TRANSCL ... seq, circular implementation over lists
+-- TRANSCL_SET ... seq, circular implementation over sets
+-----------------------------------------------------------------------------
+
+module Main where
+
+import System.Environment(getArgs)
+import Data.List
+#if defined(STRATEGIES)
+import Control.Parallel
+import Control.DeepSeq
+import Control.Parallel.Strategies
+#else
+import GHC.Conc -- hiding (pseq,par)
+#endif
+--import Random
+import Control.Monad
+import TransClos
+import qualified Data.Set
+
+
+{-
+evalKlustered :: Kluster c => Int -> Strategy (c (c a)) -> Strategy (c a)
+evalKlustered n strat xs = return (dekluster (kluster n xs `using` strat))
+
+parChunkN :: (Kluster c, Traversable c) => Int -> Int -> Strategy a -> Strategy (c a)
+parChunkN n m strat = evalKlustered m (evalDepthN n (evalTraversable (rpar `dot` evalTraversable strat)))
+-}
+
+#define TRANS_CLOS 1
+
+main = do
+ args <- getArgs
+#ifndef TRANS_CLOS
+ when (length args < 4) $
+ error "Usage: Main "
+ let [v,n,z,m] = (map read args) :: [Int]
+ {- test parBuffer -}
+ let (strat, str) = case v of
+ 1 -> (parList rnf, "parList rnf: expect "++(show n)++" converted sparks")
+ 2 -> (parListChunk z rnf, "parListChunk: expect "++(show (n `div` z))++" converted sparks")
+ 3 -> (parListChunk_ z rnf, "parListChunk_: expect "++(show (n `div` z))++" converted sparks")
+ 4 -> (parListChunkS z rnf, "parListChunkS: expect "++(show (n
+`div` z))++" converted sparks")
+ 5 -> (parBuffer' z rnf, "parBuffer': expect "++(show n)++" converted sparks, at most "++(show z)++" running at the same time")
+ 6 -> (parBuffer z rnf, "parBuffer: expect "++(show n)++" converted sparks, at most "++(show z)++" running at the same time")
+ 7 -> (parBuffer_ z rnf, "parBuffer_: expect "++(show n)++" converted sparks, at most "++(show z)++" running at the same time")
+ 8 -> (evalBuffer_ z (rpar `dot` rnf), "evalBuffer_: expect "++(show n)++" converted sparks, at most "++(show z)++" running at the same time")
+ 9 -> (parBufferChunk_ 2 z rnf , "parBufferChunk_: chunksize 2; expect "++(show (n `div` 2))++" converted sparks, at most "++(show z)++" running at the same time")
+ 10 -> (evalBufferChunk 2 z (rpar `dot` seqList rnf) , "parBufferChunk: chunksize 2; expect "++(show (n `div` 2))++" converted sparks, at most "++(show z)++" running at the same time")
+ _ -> error "Unknown version"
+ let res = map nfib (take n (repeat m)) `using` strat
+ putStrLn ("Computing: map nfib (take n (repeat m)) for n = "++(show n)++" and m = "++(show m))
+ putStrLn ("Version: "++str)
+ putStrLn ("Res: "++(show res))
+#else
+ when (length args < 5) $
+ error "Usage: Main "
+ let [v,n,z,m,d] = (map read args) :: [Int]
+-- g <- newStdGen
+ let seeds = [1..10] -- take 10 $ randomRs (0::Int,m) g
+ let rel_one = \ n -> nfib d `pseq` n+1
+ let rel_list = \ n -> nfib ((d-1) `min` (n `max` d)) `pseq` [n+1..n+11]
+ let rel_set = \n -> nfib d `pseq` Data.Set.fromList [n+1..n+11]
+#if defined(TRANSCL)
+ let zs = {- take n $ -} transcl rel_list seeds -- list-based with 1-to-n rel
+#elif defined(TRANSCL_NESTED)
+ let zs = {- take n $ -} transcl_nested rel_list seeds -- list-based with 1-to-n rel; main PARALLEL version
+#elif defined(TRANSCL_SET)
+ let zs = Data.Set.toList $ {- take n $ -} transcl_set rel_set (Data.Set.fromList seeds) -- set-based with 1-to-n rel
+#else
+ let zs = {- take n $ -} transcl rel_list seeds -- default: seq, circular, with a 1-to-n list-based relation
+ -- unused verions
+ -- let zs = {- take n $ -} transcl_dup rel_one seeds -- no elim of duplicates; good parallelism but stupid
+ -- let zs = {- take n $ -} transcl_simp rel_one seeds -- list-based with 1-to-1 rel
+#endif
+ let (strat, str) = case v of
+ {- temp out of order
+ 1 -> (\ _ -> parListN n rnf (drop (length seeds) zs), "parListN with n = "++(show n))
+ 2 -> (\ _ -> parListChunkK z rnf (drop (length seeds) zs), "parListChunkK with z = "++(show z))
+ 3 -> (\ _ -> parListChunkN z n rnf (drop (length seeds) zs), "parListChunkN with blocksize z = "++(show z)++" and length n = "++(show n))
+ -}
+ 4 -> (\ _ -> error "parBuffer' ", "parBuffer with buffer size "++(show n))
+ -- 5 -> (\ _ -> parBufferLChunk n z (ins rnf) (drop (length seeds) zs), "parBufferLChunk with buffer size "++(show n)++" chunk size size "++(show z))
+ -- 6 -> (\ _ -> parBufferQChunk n z (ins rnf) (drop (length seeds) zs), "parBufferQChunk with buffer size "++(show n)++" chunk size size "++(show z))
+ -- 7 -> (\ _ -> parBufferAChunk n z (ins rnf) (drop (length seeds) zs), "parBufferAChunk with buffer size "++(show n)++" chunk size size "++(show z))
+ -- 10 -> (\ _ -> parBufferChunk_ z n rnf (drop (length seeds) zs), "parBufferChunk with buffer size "++(show n)++" chunk size size "++(show z))
+ -- 11 -> (\ _ -> evalBufferChunk z n (rpar `dot` seqList rnf) (drop (length seeds) zs), "evalBufferChunk with buffer size "++(show n)++" chunk size size "++(show z))
+ -- 12 -> (\ _ -> parBufferLSliceChunk n z z (rpar `dot` seqList (ins rnf)) (drop (length seeds) zs), "parBufferLSliceChunk with buffer size "++(show n)++" stride "++(show z)++" chunk size "++(show z))
+ -- 13 -> (\ _ -> parBufferQSliceChunk n z z (rpar `dot` seqList (ins rnf)) (drop (length seeds) zs), "parBufferQSliceChunk with buffer size "++(show n)++" stride "++(show z)++" chunk size "++(show z))
+ -- 14 -> (\ _ -> parBufferASliceChunk n z z (rpar `dot` seqList (ins rnf)) (drop (length seeds) zs), "parBufferASliceChunk with buffer size "++(show n)++" stride "++(show z)++" chunk size "++(show z))
+ -- 13 -> (\ b -> parBuffer_ z (drop (length seeds) zs) >> return b, "parBuffer_ with buffer size "++(show z))
+ v' -> error $ "Unknown version "++(show v')
+#if defined(TRANSCL)
+ let res = m `elem` zs -- NO: parallelism is hopeless on this one: `using` strat)
+#elif defined(TRANSCL_NESTED)
+ let res = if (v==4) -- special case for parBuffer (not of strategy type!)
+ then m `elem` (nub (concat (runEval $ do let (first, rest) = splitAt (length seeds) zs
+ rest' <- parBuffer n rdeepseq rest
+ return (first ++ rest') ))) -- main PARALLEL version
+ else m `elem` (nub (concat (zs `using` strat))) -- main PARALLEL version
+#elif defined(TRANSCL_SET)
+ let res = m `elem` zs -- default: seq, circular, with a 1-to-n list-based relation
+#else
+ let res = m `elem` zs -- default: seq, circular, with a 1-to-n list-based relation
+#endif
+ putStrLn ("Searching for value "++(show m)++" in transitive closure of relation \\ n -> [n+1..n+11] with seeds "++(show seeds))
+ putStrLn ("Version: "++str)
+ putStrLn ("Res: "++(show res))
+#endif
+
+
+nfib :: Int -> Int
+nfib 0 = 1
+nfib 1 = 1
+nfib n = nfib (n-1) + nfib (n-2) + 1
+
diff --git a/parallel/transclos/Makefile b/parallel/transclos/Makefile
new file mode 100644
index 00000000..95a7d506
--- /dev/null
+++ b/parallel/transclos/Makefile
@@ -0,0 +1,19 @@
+TOP = ../..
+include $(TOP)/mk/boilerplate.mk
+
+SRC_HC_OPTS += -cpp -DSTRATEGIES -DTRANSCL_NESTED -package random -package parallel
+
+# XXX: only speeds up without optimisation. This is bad. Could be
+# due to the nfib delay mucking up load-balancing.
+SRC_HC_OPTS += -O0
+
+#
+# ver = 4 (always)
+# buffer = parBuffer size.
+# 10 is about optimal for 7.1, greater degrades perf (less so for local-gc)
+# dummy = 999 (always)
+# delay = larger for
+PROG_ARGS = 4 10 10 999 24
+
+include $(TOP)/mk/target.mk
+
diff --git a/parallel/transclos/TransClos.hs b/parallel/transclos/TransClos.hs
new file mode 100644
index 00000000..b54c3001
--- /dev/null
+++ b/parallel/transclos/TransClos.hs
@@ -0,0 +1,150 @@
+-- Time-stamp: <2010-11-03 11:40:43 simonmar>
+-- Versions of computing a transitive closure to a given relation.
+-- Based on this lecture (in German, my apologies):
+-- http://www2.tcs.ifi.lmu.de/lehre/SS09/Fun/AFP_04.pdf
+-- Exercises 'Blatt 4' corresponding to AFP_04.tex:
+-- http://www2.tcs.ifi.lmu.de/lehre/SS09/Fun/Exc04.pdf
+-----------------------------------------------------------------------------
+
+module TransClos where
+
+-- import BinTree
+import Data.List as List
+import qualified Data.Set
+-- import CircPrgs(nub2)
+
+-- P-11
+
+-- sieve of Erathostenes; generates a lot of intermediate lists
+sieve :: [Integer]
+sieve = sieve' [2..]
+ where sieve' (x:xs) = x:(sieve' . filter (\n -> n `mod` x /= 0) $ xs)
+
+-- sum (take 5555 sieve)
+-- 143086552
+-- (14.00 secs, 940991964 bytes)
+-- ca. 940MB total
+
+-- circular program computing all prime numbers (AFP_04)
+-- is circular, but generates some intermediate lists when checking a candidate
+primes1 :: [Integer]
+primes1 = 2:[ n | n <- [3,5..], all (\ p -> n `mod` p /= 0) . takeWhile (\p -> p^2 <= n) $ primes1 ]
+
+-- sum (take 5555 primes1)
+-- 143086552
+-- (0.63 secs, 57357568 bytes)
+-- ca. 57MB total
+
+-- circular, without generating intermediate lists
+primes2 :: [Integer]
+primes2 = 2:(filter (not . multiple primes2) [3,5..])
+ where multiple (x:xs) n | x*x > n = False
+ | n `mod` x == 0 = True
+ | otherwise = multiple xs n
+-- sum (take 5555 primes2)
+-- 143086552
+-- (0.56 secs, 22138868 bytes)
+-- ca. 22MB total
+
+-----------------------------------------------------------------------------
+-- P-12
+
+-- naive, non-circular version
+transcl' :: (Eq a) => (a -> [a]) -> [a] -> [a]
+transcl' r xs = if xs==xs'
+ then xs
+ else transcl' r xs'
+ where xs' = foldl union xs (map r xs)
+
+-- transcl' (r1 444) [1]
+-- (5.65 secs, 1135677448 bytes)
+-- ca 1.1GB
+
+-- simple circular version
+-- the basic idea is shown by this simplified prg using a 1-to-1 relation only
+-- the list comp picks an elem from earlier in the *result* list, feeds it through
+-- the relation and adds it to the result list if it's not there already
+-- of course, we must make sure that elem doesn't search further in the list than the current elem!
+transcl_simp :: (Eq a) => (a -> a) -> [a] -> [a]
+transcl_simp r xs = zs
+ where zs = xs ++ [ x' | (n,x) <- zip [1..] zs, let x' = r x, not (x' `elem` take n zs) ]
+ -- possibly restrict the initial segment being searched, to increase parallelism: ^^^ (take (n `div` 2) zs)) ]
+
+-- version that does not check for duplicates! -- , not (x' `elem` (take n zs)) ]
+transcl_dup :: (Eq a) => (a -> a) -> [a] -> [a]
+transcl_dup r xs = zs
+ where zs = xs ++ [ x' | (n,x) <- zip [1..] zs, let x' = r x ]
+
+-- main parallel version:
+-- producing a list-of-list improves parallelism, since the position of an element
+-- does not depend on all the previous elements
+transcl_nested :: (Eq a) => (a -> [a]) -> [a] -> [[a]] {- [a] -}
+transcl_nested r xs = {- (nub . concat) -} zss
+ where -- zss :: [[a]]
+ zss = xs:(build 1 zss)
+ -- build :: Int -> [[a]] -> [[a]]
+ build j [] = []
+ build j (xs:xss) = zss' ++ build (j+length zss') xss
+ where zss' = [ filter (not . (`elem` (concat (take j zss)))) xs' | x <- xs, let xs' = r x ]
+ -- where zss' = [ filter (not . or . (map (`elem` (take j zss)))) xs' | x <- xs, let xs' = r x ]
+
+-- main circular version (seq)
+transcl :: (Eq a) => (a -> [a]) -> [a] -> [a]
+transcl r xs = xs'
+ where
+ xs' = xs ++ build 0 (length xs)
+ -- m and n is the interval that is used to generate new elements
+ build m n = if List.null ys'
+ then []
+ else ys' ++ build n (n + length ys')
+ where ys' = filter (not . (`elem` (take (n-1) xs'))) $ foldl union [] [ ys | y <- take (n-m) (drop m xs'), let ys = r y ]
+
+-- transcl (r1 444) [1]
+-- (0.02 secs, 3367572 bytes)
+-- ca 3.4MB
+-- transcl (r1 666) [1]
+-- (0.03 secs, 6617576 bytes)
+-- ca 6.6MB
+
+-- circular version, using sets rather than lists
+transcl_set :: (Ord a, Eq a) => (a -> Data.Set.Set a) -> Data.Set.Set a -> Data.Set.Set a
+transcl_set r xs = foldl Data.Set.union Data.Set.empty xs'
+ where
+ xs' = [xs] ++ build xs 1
+ -- build :: (Ord a, Eq a) => Data.Set.Set a -> Int -> [Data.Set.Set a]
+ build s n = if Data.Set.null ys'
+ then []
+ else [ys'] ++ build ys' (n+1)
+ where ys' = Data.Set.filter (is_new ys0) $
+ foldl Data.Set.union Data.Set.empty [ ys | y <- Data.Set.toList s, let ys = r y ]
+ ys0 = take n xs'
+
+ is_new ([]) y = True
+ is_new (xs:xss) y | y `Data.Set.member` xs = False
+ | otherwise = is_new xss y
+
+
+-- transcl_set (r1_set 444) (Data.Set.fromList [1])
+-- (0.07 secs, 3884380 bytes)
+-- ca 3.8MB
+
+-- this version tracks the interval which generated a list element
+-- t1 :: (Eq a) => (a -> [a]) -> [a] -> [a]
+transcl_dbg r xs = xs'
+ where
+ xs' = [ (x,0,0) | x <- xs ] ++ build 0 (length xs)
+ build m n = if List.null ys'
+ then []
+ else ys' ++ build n (n + length ys')
+ where ys' = filter (not . (`elem` (take (n-1) xs'))) $ foldl union [] [ ys | (y,_,_) <- take (n-m) (drop m xs'), let ys = [ (y,m,n) | y <- r y ] ]
+
+r1 b n | n** [n+1..n+11] with seeds [1,2,3,4,5,6,7,8,9,10]
+Version: parBuffer with buffer size 10
+Res: True
--
2.22.0
**