Commit 3f1d50e4 authored by tibbe's avatar tibbe

Add the fasta shootout benchmark

The benchmark is not enabled by default as the runstdtest.prl script
doesn't find the automated stdout files on the first run, thus requiring
make to be run twice.
parent eb9f4a3f
......@@ -53,6 +53,7 @@ real/veritas/veritas
shootout/binary-trees/binary-trees
shootout/fannkuch-redux/fannkuch-redux
shootout/fasta/fasta
shootout/n-body/n-body
shootout/pidigits/pidigits
shootout/spectral-norm/spectral-norm
......
{- The Computer Language Benchmarks Game
http://benchmarkgame.alioth.debian.org/
contributed by Bryan O'Sullivan
-}
import Control.Monad
import Data.ByteString.Unsafe
import Foreign.Ptr
import Foreign.Storable
import System.Environment
import qualified Data.ByteString.Char8 as B
import qualified Data.ByteString.Lazy.Char8 as L
main = do
n <- getArgs >>= readIO.head
writeAlu ">ONE Homo sapiens alu" (L.take (fromIntegral n*2) (L.cycle alu))
make ">TWO IUB ambiguity codes" (n*3) iub 42 >>=
void . make ">THREE Homo sapiens frequency" (n*5) homosapiens
writeAlu name s0 = B.putStrLn name >> go s0
where go s = L.putStrLn h >> unless (L.null t) (go t)
where (h,t) = L.splitAt 60 s
make name n0 tbl seed0 = do
B.putStrLn name
let modulus = 139968
fill ((c,p):cps) j =
let !k = min modulus (floor (fromIntegral modulus * (p::Float) + 1))
in B.replicate (k - j) c : fill cps k
fill _ _ = []
lookupTable = B.concat $ fill (scanl1 (\(_,p) (c,q) -> (c,p+q)) tbl) 0
line = B.replicate 60 '\0'
unsafeUseAsCString line $ \ptr -> do
let make' n !i seed
| n > (0::Int) = do
let newseed = rem (seed * 3877 + 29573) modulus
plusPtr ptr i `poke` unsafeIndex lookupTable newseed
if i+1 >= 60
then puts line 60 >> make' (n-1) 0 newseed
else make' (n-1) (i+1) newseed
| otherwise = when (i > 0) (puts line i) >> return seed
make' n0 0 seed0
alu = "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGG\
\TCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGG\
\CGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGC\
\GGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA"
iub = [('a',0.27),('c',0.12),('g',0.12),('t',0.27),('B',0.02)
,('D',0.02),('H',0.02),('K',0.02),('M',0.02),('N',0.02)
,('R',0.02),('S',0.02),('V',0.02),('W',0.02),('Y',0.02)]
homosapiens = [('a',0.3029549426680),('c',0.1979883004921)
,('g',0.1975473066391),('t',0.3015094502008)]
puts bs n = B.putStrLn (B.take n bs)
TOP = ../..
include $(TOP)/mk/boilerplate.mk
# Override default SRCS; the default is all source files, but
# we don't want to include fasta-c.c
SRCS = Main.hs
FAST_OPTS = 250000
NORM_OPTS = 2500000
SLOW_OPTS = 25000000 # official shootout setting
# The benchmark game also uses -fllvm, which we can't since it might
# not be available on the developer's machine.
HC_OPTS += -O2 -XBangPatterns -XOverloadedStrings -package bytestring
#------------------------------------------------------------------
# Create output to validate against
# FIXME: You have to run make twice for the runstdtest.prl script to
# find the various stdout files correctly.
fasta-c : fasta-c.o
gcc $< -o $@
fasta.faststdout : fasta-c
./fasta-c $(FAST_OPTS) > $@
fasta.stdout : fasta-c
./fasta-c $(NORM_OPTS) > $@
fasta.slowstdout : fasta-c
./fasta-c $(SLOW_OPTS) > $@
STDOUT_FILES = fasta.faststdout fasta.stdout fasta.slowstdout
all boot :: $(STDOUT_FILES)
include $(TOP)/mk/target.mk
/* The Computer Language Benchmarks Game
* http://benchmarksgame.alioth.debian.org/
*
* contributed by Mr Ledrug
*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
typedef struct {
float p;
char c;
} amino;
amino iub[] = {
{ 0.27, 'a' }, { 0.12, 'c' }, { 0.12, 'g' },
{ 0.27, 't' }, { 0.02, 'B' }, { 0.02, 'D' },
{ 0.02, 'H' }, { 0.02, 'K' }, { 0.02, 'M' },
{ 0.02, 'N' }, { 0.02, 'R' }, { 0.02, 'S' },
{ 0.02, 'V' }, { 0.02, 'W' }, { 0.02, 'Y' },
{ 0, 0 }
};
amino homosapiens[] = {
{0.3029549426680, 'a'},
{0.1979883004921, 'c'},
{0.1975473066391, 'g'},
{0.3015094502008, 't'},
{0, 0}
};
#define RMAX 139968U
#define RA 3877U
#define RC 29573U
#define WIDTH 60
#define LENGTH(a) (sizeof(a)/sizeof(a[0]))
inline void str_write(char *s) {
write(fileno(stdout), s, strlen(s));
}
void str_repeat(char *s, int outlen) {
int len = strlen(s) * (1 + WIDTH);
outlen += outlen / WIDTH;
char *ss = s;
char *buf = malloc(len);
int pos = 0;
while (pos < len) {
if (!*ss) ss = s;
buf[pos++] = *ss++;
if (pos >= len) break;
if (pos % (WIDTH + 1) == WIDTH)
buf[pos++] = '\n';
}
int fd = fileno(stdout);
int l = 0;
while (outlen > 0) {
l = outlen > len ? len : outlen;
write(fd, buf, l);
outlen -= len;
}
if (buf[l-1] != '\n') str_write("\n");
free(buf);
}
static char *alu =
"GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG"
"GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA"
"CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT"
"ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA"
"GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG"
"AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC"
"AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA";
inline unsigned int rnd(void) {
static unsigned rseed = 42;
return rseed = (rseed * RA + RC) % RMAX;
}
char lookup[RMAX];
void rand_fasta(amino *s, size_t outlen) {
int fd = fileno(stdout);
char buf[WIDTH+1];
int i, j, k;
float sum = 0;
for (i = j = k = 0; s[i].p && k < RMAX; i++) {
if (s[i].p) {
sum += s[i].p;
k = RMAX * sum + 1;
}
else
k = RMAX;
if (k > RMAX) k = RMAX;
memset(lookup + j, s[i].c, k - j);
j = k;
}
i = 0;
buf[WIDTH] = '\n';
while (outlen--) {
buf[i++] = lookup[rnd()];
if (i == WIDTH) {
write(fd, buf, WIDTH + 1);
i = 0;
}
}
if (i) {
buf[i] = '\n';
write(fd, buf, i + 1);
}
}
int main(int argc, char **argv) {
int n;
if (argc < 2 || (n = atoi(argv[1])) <= 0) {
printf("usage: %s length\n", argv[0]);
return 0;
}
str_write(">ONE Homo sapiens alu\n");
str_repeat(alu, n * 2);
str_write(">TWO IUB ambiguity codes\n");
rand_fasta(iub, n * 3);
str_write(">THREE Homo sapiens frequency\n");
rand_fasta(homosapiens, n * 5);
return 0;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment