diff --git a/.gitignore b/.gitignore index a778995f49b51c4c5a25b4e6e9f15ed03c97bbcb..c3908ab20f7d36b50ca8f31668205151d88f590e 100644 --- a/.gitignore +++ b/.gitignore @@ -53,6 +53,7 @@ real/veritas/veritas shootout/binary-trees/binary-trees shootout/fannkuch-redux/fannkuch-redux +shootout/fasta/fasta shootout/n-body/n-body shootout/pidigits/pidigits shootout/spectral-norm/spectral-norm diff --git a/shootout/fasta/Main.hs b/shootout/fasta/Main.hs new file mode 100644 index 0000000000000000000000000000000000000000..4bd08492f6c9d7f68b391bf1543688466495e0b1 --- /dev/null +++ b/shootout/fasta/Main.hs @@ -0,0 +1,58 @@ +{- The Computer Language Benchmarks Game + + http://benchmarkgame.alioth.debian.org/ + + contributed by Bryan O'Sullivan +-} + +import Control.Monad +import Data.ByteString.Unsafe +import Foreign.Ptr +import Foreign.Storable +import System.Environment +import qualified Data.ByteString.Char8 as B +import qualified Data.ByteString.Lazy.Char8 as L + +main = do + n <- getArgs >>= readIO.head + writeAlu ">ONE Homo sapiens alu" (L.take (fromIntegral n*2) (L.cycle alu)) + make ">TWO IUB ambiguity codes" (n*3) iub 42 >>= + void . make ">THREE Homo sapiens frequency" (n*5) homosapiens + +writeAlu name s0 = B.putStrLn name >> go s0 + where go s = L.putStrLn h >> unless (L.null t) (go t) + where (h,t) = L.splitAt 60 s + +make name n0 tbl seed0 = do + B.putStrLn name + let modulus = 139968 + fill ((c,p):cps) j = + let !k = min modulus (floor (fromIntegral modulus * (p::Float) + 1)) + in B.replicate (k - j) c : fill cps k + fill _ _ = [] + lookupTable = B.concat $ fill (scanl1 (\(_,p) (c,q) -> (c,p+q)) tbl) 0 + line = B.replicate 60 '\0' + unsafeUseAsCString line $ \ptr -> do + let make' n !i seed + | n > (0::Int) = do + let newseed = rem (seed * 3877 + 29573) modulus + plusPtr ptr i `poke` unsafeIndex lookupTable newseed + if i+1 >= 60 + then puts line 60 >> make' (n-1) 0 newseed + else make' (n-1) (i+1) newseed + | otherwise = when (i > 0) (puts line i) >> return seed + make' n0 0 seed0 + +alu = "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGG\ + \TCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGG\ + \CGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGC\ + \GGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA" + +iub = [('a',0.27),('c',0.12),('g',0.12),('t',0.27),('B',0.02) + ,('D',0.02),('H',0.02),('K',0.02),('M',0.02),('N',0.02) + ,('R',0.02),('S',0.02),('V',0.02),('W',0.02),('Y',0.02)] + +homosapiens = [('a',0.3029549426680),('c',0.1979883004921) + ,('g',0.1975473066391),('t',0.3015094502008)] + +puts bs n = B.putStrLn (B.take n bs) diff --git a/shootout/fasta/Makefile b/shootout/fasta/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..fe286524387d28a001c3d2bd0d91c25b23fa10b5 --- /dev/null +++ b/shootout/fasta/Makefile @@ -0,0 +1,38 @@ +TOP = ../.. +include $(TOP)/mk/boilerplate.mk + +# Override default SRCS; the default is all source files, but +# we don't want to include fasta-c.c +SRCS = Main.hs + +FAST_OPTS = 250000 +NORM_OPTS = 2500000 +SLOW_OPTS = 25000000 # official shootout setting + +# The benchmark game also uses -fllvm, which we can't since it might +# not be available on the developer's machine. +HC_OPTS += -O2 -XBangPatterns -XOverloadedStrings -package bytestring + +#------------------------------------------------------------------ +# Create output to validate against + +# FIXME: You have to run make twice for the runstdtest.prl script to +# find the various stdout files correctly. + +fasta-c : fasta-c.o + gcc $< -o $@ + +fasta.faststdout : fasta-c + ./fasta-c $(FAST_OPTS) > $@ + +fasta.stdout : fasta-c + ./fasta-c $(NORM_OPTS) > $@ + +fasta.slowstdout : fasta-c + ./fasta-c $(SLOW_OPTS) > $@ + +STDOUT_FILES = fasta.faststdout fasta.stdout fasta.slowstdout + +all boot :: $(STDOUT_FILES) + +include $(TOP)/mk/target.mk diff --git a/shootout/fasta/fasta-c.c b/shootout/fasta/fasta-c.c new file mode 100644 index 0000000000000000000000000000000000000000..5779316face9fc5c917e6f92c778fd7b4fe5b3b1 --- /dev/null +++ b/shootout/fasta/fasta-c.c @@ -0,0 +1,137 @@ +/* The Computer Language Benchmarks Game + * http://benchmarksgame.alioth.debian.org/ + * + * contributed by Mr Ledrug + */ + +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <unistd.h> + +typedef struct { + float p; + char c; +} amino; + +amino iub[] = { + { 0.27, 'a' }, { 0.12, 'c' }, { 0.12, 'g' }, + { 0.27, 't' }, { 0.02, 'B' }, { 0.02, 'D' }, + { 0.02, 'H' }, { 0.02, 'K' }, { 0.02, 'M' }, + { 0.02, 'N' }, { 0.02, 'R' }, { 0.02, 'S' }, + { 0.02, 'V' }, { 0.02, 'W' }, { 0.02, 'Y' }, + { 0, 0 } +}; + +amino homosapiens[] = { + {0.3029549426680, 'a'}, + {0.1979883004921, 'c'}, + {0.1975473066391, 'g'}, + {0.3015094502008, 't'}, + {0, 0} +}; + +#define RMAX 139968U +#define RA 3877U +#define RC 29573U +#define WIDTH 60 +#define LENGTH(a) (sizeof(a)/sizeof(a[0])) + +inline void str_write(char *s) { + write(fileno(stdout), s, strlen(s)); +} + +void str_repeat(char *s, int outlen) { + int len = strlen(s) * (1 + WIDTH); + outlen += outlen / WIDTH; + + char *ss = s; + char *buf = malloc(len); + int pos = 0; + + while (pos < len) { + if (!*ss) ss = s; + buf[pos++] = *ss++; + if (pos >= len) break; + if (pos % (WIDTH + 1) == WIDTH) + buf[pos++] = '\n'; + } + + int fd = fileno(stdout); + int l = 0; + while (outlen > 0) { + l = outlen > len ? len : outlen; + write(fd, buf, l); + outlen -= len; + } + if (buf[l-1] != '\n') str_write("\n"); + + free(buf); +} + +static char *alu = + "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG" + "GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA" + "CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT" + "ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA" + "GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG" + "AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC" + "AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA"; + +inline unsigned int rnd(void) { + static unsigned rseed = 42; + return rseed = (rseed * RA + RC) % RMAX; +} + +char lookup[RMAX]; +void rand_fasta(amino *s, size_t outlen) { + int fd = fileno(stdout); + char buf[WIDTH+1]; + + int i, j, k; + float sum = 0; + for (i = j = k = 0; s[i].p && k < RMAX; i++) { + if (s[i].p) { + sum += s[i].p; + k = RMAX * sum + 1; + } + else + k = RMAX; + if (k > RMAX) k = RMAX; + memset(lookup + j, s[i].c, k - j); + j = k; + } + + i = 0; + buf[WIDTH] = '\n'; + while (outlen--) { + buf[i++] = lookup[rnd()]; + if (i == WIDTH) { + write(fd, buf, WIDTH + 1); + i = 0; + } + } + if (i) { + buf[i] = '\n'; + write(fd, buf, i + 1); + } +} + +int main(int argc, char **argv) { + int n; + if (argc < 2 || (n = atoi(argv[1])) <= 0) { + printf("usage: %s length\n", argv[0]); + return 0; + } + + str_write(">ONE Homo sapiens alu\n"); + str_repeat(alu, n * 2); + + str_write(">TWO IUB ambiguity codes\n"); + rand_fasta(iub, n * 3); + + str_write(">THREE Homo sapiens frequency\n"); + rand_fasta(homosapiens, n * 5); + + return 0; +}