Commit 379fdc74 authored by Simon Marlow's avatar Simon Marlow

Add some SMP and threading benchmarks I have lying around

parent 50bba2ac
TOP = ..
include $(TOP)/mk/boilerplate.mk
SUBDIRS = \
sieve \
stm001 \
callback001 \
threads001 \
threads003
# later:
# stm02
# Not a good benchmark, can go really slowly for random reasons:
# threads002
include $(TOP)/mk/target.mk
{-# OPTIONS_GHC -fffi #-}
-- This benchmark is also ffi014 in the test suite.
module Main where
import Control.Concurrent
import Control.Monad
import Foreign.Ptr
import Data.IORef
import System.Environment
import System.IO
main = do
[s] <- getArgs
let n = read s :: Int
sem <- newQSemN 0
replicateM n (putStr "." >> hFlush stdout >> forkOS (thread sem) >> thread sem)
waitQSemN sem (n*2)
thread sem = do
var <- newIORef 0
let f = modifyIORef var (1+)
callC =<< mkFunc f
signalQSemN sem 1
type FUNC = IO ()
foreign import ccall unsafe "wrapper"
mkFunc :: FUNC -> IO (FunPtr FUNC)
foreign import ccall threadsafe "cbits.h callC"
callC:: FunPtr FUNC -> IO ()
TOP = ../..
include $(TOP)/mk/boilerplate.mk
FAST_OPTS = 100
NORM_OPTS = 200
SLOW_OPTS = 500
OBJS += Main_stub.o
include $(TOP)/mk/target.mk
........................................................................................................................................................................................................
\ No newline at end of file
#include "cbits.h"
void callC( FUNC* f) {
int i;
for(i=0;i<1000;i++) f();
}
typedef void FUNC();
void callC( FUNC* f);
TOP = ../..
include $(TOP)/mk/boilerplate.mk
FAST_OPTS = 10
NORM_OPTS = 100
SLOW_OPTS = 500
include $(TOP)/mk/target.mk
We get very poor speedup with -N2 on this example:
~/builds/64smp/nofib/smp/sieve > time ./sieve 100 +RTS -N1
1,422,224,616 bytes allocated in the heap
45,371,544 bytes copied during GC (scavenged)
1,644,576 bytes copied during GC (not scavenged)
85,608 bytes maximum residency (8 sample(s))
...
2.16s real 2.16s user 0.00s system 99% ./sieve 100 +RTS -N1 -sstderr
~/builds/64smp/nofib/smp/sieve > time ./sieve 100 +RTS -N2
1,422,223,024 bytes allocated in the heap
936,650,456 bytes copied during GC (scavenged)
52,740,464 bytes copied during GC (not scavenged)
4,002,560 bytes maximum residency (154 sample(s))
...
6.48s real 7.58s user 0.03s system 117% ./sieve 100 +RTS -N2 -sstderr
A lot more bytes shifted during GC. If we up the heap size:
~/builds/64smp/nofib/smp/sieve > time ./sieve 100 +RTS -N2 -sstderr -H32m
./sieve 100 +RTS -N2 -sstderr -H32m
1,422,261,808 bytes allocated in the heap
47,046,320 bytes copied during GC (scavenged)
1,277,408 bytes copied during GC (not scavenged)
657,848 bytes maximum residency (12 sample(s))
...
1.68s real 2.90s user 0.06s system 175% ./sieve 100 +RTS -N2 -sstderr -H32m
A lot of stuff moving into the old generation, perhaps?
This is not due to old-gen updates, because we have lock-free old-gen
updates now.
-------------------------------------------------------------------------------
-- $Id: Primes.hs#1 2005/06/13 15:48:09 REDMOND\\satnams $
-------------------------------------------------------------------------------
-- Satnam reported that this didn't show any speedup up from -N1 to -N4
module Main where
import System.Time
import Control.Concurrent
import System.Environment
-- how many primes to calculate in each thread
n_primes :: Int
n_primes = 500
primes1 n done
= do --putStrLn (show ((sieve [n..])!!n_primes))
show ((sieve [n..])!!n_primes) `seq` return ()
putMVar done ()
sieve (p:xs) = p : sieve [x | x <- xs, not (x `mod` p == 0)]
main
= do
[str] <- getArgs
let instances = read str :: Int
dones <- sequence (replicate instances newEmptyMVar)
sequence_ [forkIO (primes1 (i+2) (dones!!i)) | i <- [0..instances-1]]
sequence_ [takeMVar (dones!!i) | i <- [0..instances-1]]
#! /bin/sh
make clean boot
make -k |& tee log-normal
make clean boot
make -k EXTRA_HC_OPTS=-threaded |& tee log-threaded
make clean boot
make -k EXTRA_HC_OPTS=-smp |& tee log-smp-N1
make -k EXTRA_HC_OPTS=-smp EXTRA_RUNTEST_OPTS='+RTS -N2 -RTS' |& tee log-smp-N2
make -k EXTRA_HC_OPTS=-smp EXTRA_RUNTEST_OPTS='+RTS -N8 -RTS' |& tee log-smp-N8
make -k EXTRA_HC_OPTS=-smp EXTRA_RUNTEST_OPTS='+RTS -N16 -RTS' |& tee log-smp-N16
TOP = ../..
include $(TOP)/mk/boilerplate.mk
SRC_HC_OPTS += -package stm
include $(TOP)/mk/target.mk
module Main where
import Control.Concurrent
import Control.Concurrent.STM
import System.IO
data State = State {
vt :: TVar Int,
vm :: MVar Int,
chan :: TChan (),
count :: TVar Int
}
loopmax = 100000
numthreads = 50
main
= do t <- atomically (newTVar 0)
m <- newEmptyMVar
putMVar m 0
c <- atomically (newTChan)
cnt <- atomically (newTVar 0)
let st = State t m c cnt
forkIter numthreads (proc st domv loopmax)
atomically (readTChan c)
return ()
proc :: State -> (State -> IO ()) -> Int -> IO ()
proc st w 0 = do c <- atomically (do cnt <- readTVar (count st)
writeTVar (count st) (cnt+1)
if cnt+1 >= numthreads
then writeTChan (chan st) ()
else return ()
return cnt)
return ()
proc st w i
= do w st
proc st w (i-1)
dotv :: State -> IO ()
dotv st
= do n <- atomically (do n <- readTVar (vt st)
writeTVar (vt st) (n+1)
return n)
return ()
domv :: State -> IO ()
domv st
= do n <- takeMVar (vm st)
putMVar (vm st) (n+1)
return ()
forkIter :: Int -> IO () -> IO ()
forkIter n p
= iter n (do forkIO p
return ())
iter :: Int -> IO () -> IO ()
iter 0 _ = return ()
iter n f
= do f
iter (n-1) f
module Main where
import Control.Concurrent
import Control.Concurrent.STM
import System.IO
data StateM = StateM {
i :: Int,
s :: String,
b :: Bool
}
data State = State {
ti :: TVar Int,
ts :: TVar String,
tb :: TVar Bool,
mv :: MVar StateM,
chan :: TChan (),
count :: TVar Int
}
loopmax = 100000
numthreads = 10
main
= do i <- atomically (newTVar 0)
s <- atomically (newTVar "1")
b <- atomically (newTVar False)
m <- newEmptyMVar
let sm = StateM 0 "1" False
putMVar m sm
c <- atomically (newTChan)
cnt <- atomically (newTVar 0)
let st = State i s b m c cnt
{-
forkIter numthreads (proc st tvir loopmax)
forkIter numthreads (proc st tvirw loopmax)
forkIter numthreads (proc st tvsr loopmax)
forkIter numthreads (proc st tvsrw loopmax)
forkIter numthreads (proc st tvbr loopmax)
forkIter numthreads (proc st tvbrw loopmax)
-}
forkIter numthreads (proc st mvir loopmax)
forkIter numthreads (proc st mvirw loopmax)
forkIter numthreads (proc st mvsr loopmax)
forkIter numthreads (proc st mvsrw loopmax)
forkIter numthreads (proc st mvbr loopmax)
forkIter numthreads (proc st mvbrw loopmax)
atomically (readTChan c)
return ()
proc :: State -> (State -> IO ()) -> Int -> IO ()
proc st w 0 = do c <- atomically (do cnt <- readTVar (count st)
writeTVar (count st) (cnt+1)
if cnt+1 >= (numthreads*6)
then writeTChan (chan st) ()
else return ()
return cnt)
return ()
proc st w i
= do w st
proc st w (i-1)
tvir :: State -> IO ()
tvir st
= do n <- atomically (readTVar (ti st))
return ()
tvirw :: State -> IO ()
tvirw st
= do n <- atomically (do n <- readTVar (ti st)
writeTVar (ti st) (n+1)
return n)
return ()
tvsr :: State -> IO ()
tvsr st
= do s <- atomically (readTVar (ts st))
return ()
tvsrw :: State -> IO ()
tvsrw st
= do s <- atomically (do s <- readTVar (ts st)
writeTVar (ts st) (randomString s)
return s)
return ()
tvbr :: State -> IO ()
tvbr st
= do b <- atomically (readTVar (tb st))
return ()
tvbrw :: State -> IO ()
tvbrw st
= do b <- atomically (do b <- readTVar (tb st)
writeTVar (tb st) (not b)
return b)
return ()
mvir :: State -> IO ()
mvir st
= do m <- takeMVar (mv st)
let i2 = (i m)
putMVar (mv st) m
return ()
mvirw :: State -> IO ()
mvirw st
= do m <- takeMVar (mv st)
let i2 = (i m)
m2 = StateM (i2+1) (s m) (b m)
putMVar (mv st) m2
return ()
mvsr :: State -> IO ()
mvsr st
= do m <- takeMVar (mv st)
let s2 = (s m)
putMVar (mv st) m
return ()
mvsrw :: State -> IO ()
mvsrw st
= do m <- takeMVar (mv st)
let s2 = (s m)
m2 = StateM (i m) (randomString s2) (b m)
putMVar (mv st) m2
return ()
mvbr :: State -> IO ()
mvbr st
= do m <- takeMVar (mv st)
let b2 = (b m)
putMVar (mv st) m
return ()
mvbrw :: State -> IO ()
mvbrw st
= do m <- takeMVar (mv st)
let b2 = (b m)
m2 = StateM (i m) (s m) (not b2)
putMVar (mv st) m2
return ()
forkIter :: Int -> IO () -> IO ()
forkIter n p
= iter n (do forkIO p
return ())
iter :: Int -> IO () -> IO ()
iter 0 _ = return ()
iter n f
= do f
iter (n-1) f
randomString :: String -> String
randomString str
= case str of
"1" -> "2"
"2" -> "3"
"3" -> "1"
-------------------------------------------------------------------------------
--- $Id: Bench1.hs#4 2005/06/14 01:10:17 REDMOND\\satnams $
-------------------------------------------------------------------------------
module Main
where
import System.Time
import System.Random
import Control.Concurrent
systolicFilter :: [Double] -> [Double] -> [Double]
systolicFilter weights inputStream
= [sum [a*x | (a,x) <- zip weights xs]
| xs <- staggerBy (length weights) inputStream]
staggerBy n list | length list <= n = []
staggerBy n list
= take n list : staggerBy n (tail list)
applyFilter rgen resultMV
= do let weights = take 10 (randomRs (0.0, 10.0) rgen)
let inputStream = take 2000 (randomRs (0.0, 100.0) rgen)
let result = last (systolicFilter weights inputStream)
putMVar resultMV result
rgens 0 _ = []
rgens n rgen
= nextGen : rgens (n-1) nextGen
where
(_, nextGen) = split rgen
instances = 1000
main
= do putStrLn "SMP Systolic Filter Benchmarks"
dones <- sequence (replicate instances newEmptyMVar)
rgen1 <- getStdGen
let gens = rgens instances rgen1
t1 <- getClockTime
sequence [forkIO (applyFilter (gens!!i) (dones!!i)) |
i <- [0..instances-1]]
rs <- sequence [takeMVar (dones!!i) | i <- [0..instances-1]]
sequence [putStrLn (show (rs!!i)) | i <- [0..instances-1]]
t2 <- getClockTime
putStrLn ("Time: " ++ (timeDiffToString (diffClockTimes t2 t1)))
\ No newline at end of file
module Main where
-- Test thread creation.
-- (from: Einar Wolfgang Karlsen <ewk@Informatik.Uni-Bremen.DE>)
-- This test is essentially single-threaded, there is no parallelism
-- available. It just tests how quickly we can create a new thread
-- and context switch to it, many times.
import Control.Concurrent
import System.Environment
main :: IO ()
main = do
[n] <- getArgs
mvar <- newEmptyMVar
let
spawner :: (IO () -> IO ThreadId) -> Int -> IO ()
spawner c 0 = putMVar mvar ()
spawner c n = do { c (spawner c (n-1)); return ()}
spawner forkIO (read n :: Int)
takeMVar mvar
putStr "done"
TOP = ../..
include $(TOP)/mk/boilerplate.mk
FAST_OPTS = 100000
NORM_OPTS = 1000000
SLOW_OPTS = 10000000
include $(TOP)/mk/target.mk
done
\ No newline at end of file
module Main where
-- a variant of thread001, this one creates N threads as fast as
-- possible. The threads all signal a single QSemN, which the
-- main thread waits for.
-- If we are unlucky, the program can take a *long* time. This is
-- because if a thread yields while holding the semaphore, it will
-- prevent all other threads from finishing, and we get into a
-- situation where there are a lot of blocked threads, and the number
-- of threads being created outnumbers those being retired. The run
-- queue has two threads at any one time: the main thread, busy
-- creating new threads, and a single thread that has been unblocked.
-- Each pass over the run queue creates a bunch of new threads which
-- will all immediately block, and unblocks a single thread. Having
-- two processors helps, because it means we can unblock threads more
-- quickly.
import Control.Concurrent
import System.Environment
main :: IO ()
main = do
[s] <- getArgs
let n = read s :: Int
sem <- newQSemN 0
let
spawner :: (IO () -> IO ThreadId) -> Int -> IO ()
spawner c 0 = return ()
spawner c n = do { c (signalQSemN sem 1); spawner c (n-1); }
spawner forkIO n
waitQSemN sem n
TOP = ../..
include $(TOP)/mk/boilerplate.mk
FAST_OPTS = 100000
NORM_OPTS = 1000000
SLOW_OPTS = 10000000
include $(TOP)/mk/target.mk
-- $Id: message-ghc-2.code,v 1.3 2005/09/17 04:36:26 bfulgham Exp $
-- The Great Computer Language Shootout
-- http://shootout.alioth.debian.org/
-- Contributed by Einar Karttunen
-- Modified by Simon Marlow
-- This is the shootout "cheap concurrency" benchmark, modified
-- slightly. Modification noted below (***) to add more concurrency
-- and make a speedup on multiple processors available.
-- Creates 500 threads arranged in a sequence where each takes a value
-- from the left, adds 1, and passes it to the right (via MVars).
-- N more threads pump zeros in at the left. A sub-thread
-- takes N values from the right and sums them.
--
import Control.Concurrent
import Control.Monad
import System
thread :: MVar Int -> MVar Int -> IO ()
thread inp out = do x <- takeMVar inp; putMVar out $! x+1; thread inp out
spawn cur n = do next <- newEmptyMVar
forkIO $ thread cur next
return next
main = do n <- getArgs >>= readIO.head
s <- newEmptyMVar
e <- foldM spawn s [1..500]
f <- newEmptyMVar
forkIO $ replicateM n (takeMVar e) >>= putMVar f . sum
replicateM n (forkIO $ putMVar s 0)
-- *** replicateM n (putMVar s 0)
takeMVar f
-- vim: ts=4 ft=haskell
TOP = ../..
include $(TOP)/mk/boilerplate.mk
FAST_OPTS = 5000
NORM_OPTS = 10000
SLOW_OPTS = 20000
include $(TOP)/mk/target.mk
-- $Id: message-ghc-2.code,v 1.3 2005/09/17 04:36:26 bfulgham Exp $
-- The Great Computer Language Shootout
-- http://shootout.alioth.debian.org/
-- Contributed by Einar Karttunen
-- Modified by Simon Marlow
-- A Modification of threads003, using explicit assigning of threads to CPUs
-- (assumes 2 CPUs). This version can go faster with -N2 than -N1.
--
-- NB. don't forget to run it with +RTS -qm, to disable automatic migration.
import Control.Concurrent
import Control.Monad
import System
import GHC.Conc (forkOnIO)
thread :: MVar Int -> MVar Int -> IO ()
thread inp out = do x <- takeMVar inp; putMVar out $! x+1; thread inp out
spawn cur n = do next <- newEmptyMVar
forkOnIO (if (n <= 1000) then 0 else 1) $ thread cur next
return next
main = do n <- getArgs >>= readIO.head
s <- newEmptyMVar
e <- foldM spawn s [1..2000]
f <- newEmptyMVar
forkOnIO 1 $ replicateM n (takeMVar e) >>= putMVar f . sum
replicateM n (putMVar s 0)
takeMVar f
-- vim: ts=4 ft=haskell
TOP = ../..
include $(TOP)/mk/boilerplate.mk
FAST_OPTS = 5000
NORM_OPTS = 10000
SLOW_OPTS = 20000
include $(TOP)/mk/target.mk
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment