Util.lhs 36.3 KB
Newer Older
1
%
Simon Marlow's avatar
Simon Marlow committed
2
% (c) The University of Glasgow 2006
3 4 5
%

\begin{code}
6
{-# LANGUAGE CPP #-}
Ian Lynagh's avatar
Ian Lynagh committed
7

batterseapower's avatar
batterseapower committed
8
-- | Highly random utility functions
dterei's avatar
dterei committed
9
--
10
module Util (
batterseapower's avatar
batterseapower committed
11
        -- * Flags dependent on the compiler build
12
        ghciSupported, debugIsOn, ncgDebugIsOn,
13
        ghciTablesNextToCode,
14
        isWindowsHost, isDarwinHost,
sof's avatar
sof committed
15

batterseapower's avatar
batterseapower committed
16
        -- * General list processing
Ian Lynagh's avatar
Ian Lynagh committed
17
        zipEqual, zipWithEqual, zipWith3Equal, zipWith4Equal,
18
        zipLazy, stretchZipWith, zipWithAndUnzip,
dterei's avatar
dterei committed
19

batterseapower's avatar
batterseapower committed
20
        unzipWith,
dterei's avatar
dterei committed
21

22
        mapFst, mapSnd, chkAppend,
23
        mapAndUnzip, mapAndUnzip3, mapAccumL2,
Ian Lynagh's avatar
Ian Lynagh committed
24
        nOfThem, filterOut, partitionWith, splitEithers,
dterei's avatar
dterei committed
25

batterseapower's avatar
batterseapower committed
26
        foldl1', foldl2, count, all2,
27

Ian Lynagh's avatar
Ian Lynagh committed
28 29 30 31 32 33 34
        lengthExceeds, lengthIs, lengthAtLeast,
        listLengthCmp, atLength, equalLength, compareLength,

        isSingleton, only, singleton,
        notNull, snocView,

        isIn, isn'tIn,
35

36 37
        -- * Tuples
        fstOf3, sndOf3, thirdOf3,
batterseapower's avatar
batterseapower committed
38
        firstM, first3M,
39
        third3,
40
        uncurry3,
41

batterseapower's avatar
batterseapower committed
42 43
        -- * List operations controlled by another list
        takeList, dropList, splitAtList, split,
44
        dropTail,
batterseapower's avatar
batterseapower committed
45 46

        -- * For loop
Ian Lynagh's avatar
Ian Lynagh committed
47
        nTimes,
sof's avatar
sof committed
48

batterseapower's avatar
batterseapower committed
49
        -- * Sorting
50
        sortWith, minWith, nubSort,
51

batterseapower's avatar
batterseapower committed
52
        -- * Comparisons
53
        isEqual, eqListBy, eqMaybeBy,
54
        thenCmp, cmpList,
Ian Lynagh's avatar
Ian Lynagh committed
55
        removeSpaces,
dterei's avatar
dterei committed
56

57
        -- * Edit distance
58
        fuzzyMatch, fuzzyLookup,
59

batterseapower's avatar
batterseapower committed
60 61
        -- * Transitive closures
        transitiveClosure,
62

batterseapower's avatar
batterseapower committed
63 64
        -- * Strictness
        seqList,
65

batterseapower's avatar
batterseapower committed
66
        -- * Module names
Ian Lynagh's avatar
Ian Lynagh committed
67
        looksLikeModuleName,
68

batterseapower's avatar
batterseapower committed
69
        -- * Argument processing
Ian Lynagh's avatar
Ian Lynagh committed
70
        getCmd, toCmdArgs, toArgs,
71

batterseapower's avatar
batterseapower committed
72
        -- * Floating point
Ian Lynagh's avatar
Ian Lynagh committed
73
        readRational,
74

75
        -- * read helpers
76
        maybeRead, maybeReadFuzzy,
77

batterseapower's avatar
batterseapower committed
78
        -- * IO-ish utilities
Ian Lynagh's avatar
Ian Lynagh committed
79
        doesDirNameExist,
80
        getModificationUTCTime,
Ian Lynagh's avatar
Ian Lynagh committed
81
        modificationTimeIfExists,
82

83
        global, consIORef, globalM,
batterseapower's avatar
batterseapower committed
84 85

        -- * Filenames and paths
Ian Lynagh's avatar
Ian Lynagh committed
86 87 88 89
        Suffix,
        splitLongestPrefix,
        escapeSpaces,
        parseSearchPath,
90
        Direction(..), reslash,
91
        makeRelativeTo,
92 93

        -- * Utils for defining Data instances
94 95 96
        abstractConstr, abstractDataType, mkNoRepType,

        -- * Utils for printing C code
97 98 99 100
        charToC,

        -- * Hashing
        hashString,
101 102
    ) where

103 104
#include "HsVersions.h"

105
import Exception
106
import Panic
107

108
import Data.Data
Thomas Schilling's avatar
Thomas Schilling committed
109
import Data.IORef       ( IORef, newIORef, atomicModifyIORef )
Ian Lynagh's avatar
Ian Lynagh committed
110
import System.IO.Unsafe ( unsafePerformIO )
111
import Data.List        hiding (group)
112

113
#ifdef DEBUG
114
import FastTypes
115
#endif
sof's avatar
sof committed
116

117
import Control.Monad    ( liftM )
118
import System.IO.Error as IO ( isDoesNotExistError )
119
import System.Directory ( doesDirectoryExist, getModificationTime )
120
import System.FilePath
121

122
import Data.Char        ( isUpper, isAlphaNum, isSpace, chr, ord, isDigit )
123
import Data.Int
Ian Lynagh's avatar
Ian Lynagh committed
124
import Data.Ratio       ( (%) )
125 126 127 128
import Data.Ord         ( comparing )
import Data.Bits
import Data.Word
import qualified Data.IntMap as IM
129
import qualified Data.Set as Set
130

131 132
import Data.Time

133
infixr 9 `thenCmp`
134 135
\end{code}

136 137
%************************************************************************
%*                                                                      *
138
\subsection{Is DEBUG on, are we on Windows, etc?}
139 140 141
%*                                                                      *
%************************************************************************

142 143 144 145 146 147 148 149 150
These booleans are global constants, set by CPP flags.  They allow us to
recompile a single module (this one) to change whether or not debug output
appears. They sometimes let us avoid even running CPP elsewhere.

It's important that the flags are literal constants (True/False). Then,
with -0, tests of the flags in other modules will simplify to the correct
branch of the conditional, thereby dropping debug code altogether when
the flags are off.

151
\begin{code}
152 153 154 155 156 157 158
ghciSupported :: Bool
#ifdef GHCI
ghciSupported = True
#else
ghciSupported = False
#endif

159 160 161 162 163 164
debugIsOn :: Bool
#ifdef DEBUG
debugIsOn = True
#else
debugIsOn = False
#endif
165

166 167 168 169 170 171 172
ncgDebugIsOn :: Bool
#ifdef NCG_DEBUG
ncgDebugIsOn = True
#else
ncgDebugIsOn = False
#endif

Ian Lynagh's avatar
Ian Lynagh committed
173 174 175 176 177 178 179
ghciTablesNextToCode :: Bool
#ifdef GHCI_TABLES_NEXT_TO_CODE
ghciTablesNextToCode = True
#else
ghciTablesNextToCode = False
#endif

180 181 182 183 184 185
isWindowsHost :: Bool
#ifdef mingw32_HOST_OS
isWindowsHost = True
#else
isWindowsHost = False
#endif
Ian Lynagh's avatar
Ian Lynagh committed
186

187 188 189
isDarwinHost :: Bool
#ifdef darwin_HOST_OS
isDarwinHost = True
Ian Lynagh's avatar
Ian Lynagh committed
190
#else
191
isDarwinHost = False
Ian Lynagh's avatar
Ian Lynagh committed
192
#endif
193 194
\end{code}

195
%************************************************************************
Ian Lynagh's avatar
Ian Lynagh committed
196
%*                                                                      *
197
\subsection{A for loop}
Ian Lynagh's avatar
Ian Lynagh committed
198
%*                                                                      *
199 200 201
%************************************************************************

\begin{code}
batterseapower's avatar
batterseapower committed
202
-- | Compose a function with itself n times.  (nth rather than twice)
203 204 205 206 207 208
nTimes :: Int -> (a -> a) -> (a -> a)
nTimes 0 _ = id
nTimes 1 f = f
nTimes n f = f . nTimes (n-1) f
\end{code}

209
\begin{code}
dterei's avatar
dterei committed
210 211 212
fstOf3   :: (a,b,c) -> a
sndOf3   :: (a,b,c) -> b
thirdOf3 :: (a,b,c) -> c
213 214 215
fstOf3      (a,_,_) =  a
sndOf3      (_,b,_) =  b
thirdOf3    (_,_,c) =  c
216

217 218 219
third3 :: (c -> d) -> (a, b, c) -> (a, b, d)
third3 f (a, b, c) = (a, b, f c)

220 221
uncurry3 :: (a -> b -> c -> d) -> (a, b, c) -> d
uncurry3 f (a, b, c) = f a b c
222 223
\end{code}

batterseapower's avatar
batterseapower committed
224 225 226 227 228 229 230 231
\begin{code}
firstM :: Monad m => (a -> m c) -> (a, b) -> m (c, b)
firstM f (x, y) = liftM (\x' -> (x', y)) (f x)

first3M :: Monad m => (a -> m d) -> (a, b, c) -> m (d, b, c)
first3M f (x, y, z) = liftM (\x' -> (x', y, z)) (f x)
\end{code}

232
%************************************************************************
Ian Lynagh's avatar
Ian Lynagh committed
233
%*                                                                      *
234
\subsection[Utils-lists]{General list processing}
Ian Lynagh's avatar
Ian Lynagh committed
235
%*                                                                      *
236 237
%************************************************************************

238 239
\begin{code}
filterOut :: (a->Bool) -> [a] -> [a]
batterseapower's avatar
batterseapower committed
240
-- ^ Like filter, only it reverses the sense of the test
Ian Lynagh's avatar
Ian Lynagh committed
241
filterOut _ [] = []
242
filterOut p (x:xs) | p x       = filterOut p xs
Ian Lynagh's avatar
Ian Lynagh committed
243
                   | otherwise = x : filterOut p xs
244 245

partitionWith :: (a -> Either b c) -> [a] -> ([b], [c])
batterseapower's avatar
batterseapower committed
246
-- ^ Uses a function to determine which of two output lists an input element should join
Ian Lynagh's avatar
Ian Lynagh committed
247
partitionWith _ [] = ([],[])
248
partitionWith f (x:xs) = case f x of
Ian Lynagh's avatar
Ian Lynagh committed
249 250 251
                         Left  b -> (b:bs, cs)
                         Right c -> (bs, c:cs)
    where (bs,cs) = partitionWith f xs
252

253
splitEithers :: [Either a b] -> ([a], [b])
batterseapower's avatar
batterseapower committed
254
-- ^ Teases a list of 'Either's apart into two lists
255 256
splitEithers [] = ([],[])
splitEithers (e : es) = case e of
Ian Lynagh's avatar
Ian Lynagh committed
257 258 259
                        Left x -> (x:xs, ys)
                        Right y -> (xs, y:ys)
    where (xs,ys) = splitEithers es
260 261 262 263 264 265 266

chkAppend :: [a] -> [a] -> [a]
-- Checks for the second arguemnt being empty
-- Used in situations where that situation is common
chkAppend xs ys 
  | null ys   = xs
  | otherwise = xs ++ ys
267 268
\end{code}

269 270 271
A paranoid @zip@ (and some @zipWith@ friends) that checks the lists
are of equal length.  Alastair Reid thinks this should only happen if
DEBUGging on; hey, why not?
272 273

\begin{code}
Ian Lynagh's avatar
Ian Lynagh committed
274 275 276 277
zipEqual        :: String -> [a] -> [b] -> [(a,b)]
zipWithEqual    :: String -> (a->b->c) -> [a]->[b]->[c]
zipWith3Equal   :: String -> (a->b->c->d) -> [a]->[b]->[c]->[d]
zipWith4Equal   :: String -> (a->b->c->d->e) -> [a]->[b]->[c]->[d]->[e]
278 279

#ifndef DEBUG
280 281 282 283
zipEqual      _ = zip
zipWithEqual  _ = zipWith
zipWith3Equal _ = zipWith3
zipWith4Equal _ = zipWith4
284
#else
285
zipEqual _   []     []     = []
286
zipEqual msg (a:as) (b:bs) = (a,b) : zipEqual msg as bs
287
zipEqual msg _      _      = panic ("zipEqual: unequal lists:"++msg)
288 289

zipWithEqual msg z (a:as) (b:bs)=  z a b : zipWithEqual msg z as bs
290
zipWithEqual _   _ [] []        =  []
Ian Lynagh's avatar
Ian Lynagh committed
291
zipWithEqual msg _ _ _          =  panic ("zipWithEqual: unequal lists:"++msg)
292 293

zipWith3Equal msg z (a:as) (b:bs) (c:cs)
Ian Lynagh's avatar
Ian Lynagh committed
294
                                =  z a b c : zipWith3Equal msg z as bs cs
295
zipWith3Equal _   _ [] []  []   =  []
Ian Lynagh's avatar
Ian Lynagh committed
296
zipWith3Equal msg _ _  _   _    =  panic ("zipWith3Equal: unequal lists:"++msg)
297 298

zipWith4Equal msg z (a:as) (b:bs) (c:cs) (d:ds)
Ian Lynagh's avatar
Ian Lynagh committed
299
                                =  z a b c d : zipWith4Equal msg z as bs cs ds
300
zipWith4Equal _   _ [] [] [] [] =  []
Ian Lynagh's avatar
Ian Lynagh committed
301
zipWith4Equal msg _ _  _  _  _  =  panic ("zipWith4Equal: unequal lists:"++msg)
302 303 304
#endif
\end{code}

305
\begin{code}
batterseapower's avatar
batterseapower committed
306
-- | 'zipLazy' is a kind of 'zip' that is lazy in the second list (observe the ~)
307
zipLazy :: [a] -> [b] -> [(a,b)]
Ian Lynagh's avatar
Ian Lynagh committed
308
zipLazy []     _       = []
Ian Lynagh's avatar
Ian Lynagh committed
309
zipLazy (x:xs) ~(y:ys) = (x,y) : zipLazy xs ys
310 311
\end{code}

312 313

\begin{code}
314
stretchZipWith :: (a -> Bool) -> b -> (a->b->c) -> [a] -> [b] -> [c]
batterseapower's avatar
batterseapower committed
315 316
-- ^ @stretchZipWith p z f xs ys@ stretches @ys@ by inserting @z@ in
-- the places where @p@ returns @True@
317

Ian Lynagh's avatar
Ian Lynagh committed
318
stretchZipWith _ _ _ []     _ = []
319 320 321
stretchZipWith p z f (x:xs) ys
  | p x       = f x z : stretchZipWith p z f xs ys
  | otherwise = case ys of
Ian Lynagh's avatar
Ian Lynagh committed
322 323
                []     -> []
                (y:ys) -> f x y : stretchZipWith p z f xs ys
324 325 326
\end{code}


327
\begin{code}
328 329 330 331 332 333
mapFst :: (a->c) -> [(a,b)] -> [(c,b)]
mapSnd :: (b->c) -> [(a,b)] -> [(a,c)]

mapFst f xys = [(f x, y) | (x,y) <- xys]
mapSnd f xys = [(x, f y) | (x,y) <- xys]

334 335
mapAndUnzip :: (a -> (b, c)) -> [a] -> ([b], [c])

Ian Lynagh's avatar
Ian Lynagh committed
336
mapAndUnzip _ [] = ([], [])
337
mapAndUnzip f (x:xs)
Ian Lynagh's avatar
Ian Lynagh committed
338 339
  = let (r1,  r2)  = f x
        (rs1, rs2) = mapAndUnzip f xs
340 341
    in
    (r1:rs1, r2:rs2)
342 343 344

mapAndUnzip3 :: (a -> (b, c, d)) -> [a] -> ([b], [c], [d])

Ian Lynagh's avatar
Ian Lynagh committed
345
mapAndUnzip3 _ [] = ([], [], [])
346
mapAndUnzip3 f (x:xs)
Ian Lynagh's avatar
Ian Lynagh committed
347 348
  = let (r1,  r2,  r3)  = f x
        (rs1, rs2, rs3) = mapAndUnzip3 f xs
349 350
    in
    (r1:rs1, r2:rs2, r3:rs3)
351

352 353 354 355 356 357 358 359
zipWithAndUnzip :: (a -> b -> (c,d)) -> [a] -> [b] -> ([c],[d])
zipWithAndUnzip f (a:as) (b:bs)
  = let (r1,  r2)  = f a b
        (rs1, rs2) = zipWithAndUnzip f as bs
    in
    (r1:rs1, r2:rs2)
zipWithAndUnzip _ _ _ = ([],[])

360 361 362 363 364
mapAccumL2 :: (s1 -> s2 -> a -> (s1, s2, b)) -> s1 -> s2 -> [a] -> (s1, s2, [b])
mapAccumL2 f s1 s2 xs = (s1', s2', ys)
  where ((s1', s2'), ys) = mapAccumL (\(s1, s2) x -> case f s1 s2 x of
                                                       (s1', s2', y) -> ((s1', s2'), y))
                                     (s1, s2) xs
365 366
\end{code}

367 368
\begin{code}
nOfThem :: Int -> a -> [a]
sof's avatar
sof committed
369
nOfThem n thing = replicate n thing
370

batterseapower's avatar
batterseapower committed
371
-- | @atLength atLen atEnd ls n@ unravels list @ls@ to position @n@. Precisely:
sof's avatar
sof committed
372
--
batterseapower's avatar
batterseapower committed
373
-- @
sof's avatar
sof committed
374 375 376 377
--  atLength atLenPred atEndPred ls n
--   | n < 0         = atLenPred n
--   | length ls < n = atEndPred (n - length ls)
--   | otherwise     = atLenPred (drop n ls)
batterseapower's avatar
batterseapower committed
378
-- @
sof's avatar
sof committed
379 380 381 382 383
atLength :: ([a] -> b)
         -> (Int -> b)
         -> [a]
         -> Int
         -> b
Ian Lynagh's avatar
Ian Lynagh committed
384 385
atLength atLenPred atEndPred ls n
  | n < 0     = atEndPred n
sof's avatar
sof committed
386 387 388 389 390 391
  | otherwise = go n ls
  where
    go n [] = atEndPred n
    go 0 ls = atLenPred ls
    go n (_:xs) = go (n-1) xs

batterseapower's avatar
batterseapower committed
392 393
-- Some special cases of atLength:

394
lengthExceeds :: [a] -> Int -> Bool
batterseapower's avatar
batterseapower committed
395
-- ^ > (lengthExceeds xs n) = (length xs > n)
sof's avatar
sof committed
396
lengthExceeds = atLength notNull (const False)
sof's avatar
sof committed
397 398

lengthAtLeast :: [a] -> Int -> Bool
sof's avatar
sof committed
399
lengthAtLeast = atLength notNull (== 0)
sof's avatar
sof committed
400 401 402 403

lengthIs :: [a] -> Int -> Bool
lengthIs = atLength null (==0)

Ian Lynagh's avatar
Ian Lynagh committed
404 405
listLengthCmp :: [a] -> Int -> Ordering
listLengthCmp = atLength atLen atEnd
sof's avatar
sof committed
406 407 408 409 410 411 412 413
 where
  atEnd 0      = EQ
  atEnd x
   | x > 0     = LT -- not yet seen 'n' elts, so list length is < n.
   | otherwise = GT

  atLen []     = EQ
  atLen _      = GT
414

415
equalLength :: [a] -> [b] -> Bool
Ian Lynagh's avatar
Ian Lynagh committed
416
equalLength []     []     = True
417
equalLength (_:xs) (_:ys) = equalLength xs ys
Ian Lynagh's avatar
Ian Lynagh committed
418
equalLength _      _      = False
419 420

compareLength :: [a] -> [b] -> Ordering
Ian Lynagh's avatar
Ian Lynagh committed
421
compareLength []     []     = EQ
422
compareLength (_:xs) (_:ys) = compareLength xs ys
Ian Lynagh's avatar
Ian Lynagh committed
423 424
compareLength []     _      = LT
compareLength _      []     = GT
425 426

----------------------------
427 428 429
singleton :: a -> [a]
singleton x = [x]

430
isSingleton :: [a] -> Bool
Ian Lynagh's avatar
Ian Lynagh committed
431 432
isSingleton [_] = True
isSingleton _   = False
433

sof's avatar
sof committed
434 435 436 437
notNull :: [a] -> Bool
notNull [] = False
notNull _  = True

438 439 440 441 442 443
only :: [a] -> a
#ifdef DEBUG
only [a] = a
#else
only (a:_) = a
#endif
Ian Lynagh's avatar
Ian Lynagh committed
444
only _ = panic "Util: only"
445 446 447
\end{code}

Debugging/specialising versions of \tr{elem} and \tr{notElem}
448

449
\begin{code}
Ian Lynagh's avatar
Ian Lynagh committed
450
isIn, isn'tIn :: Eq a => String -> a -> [a] -> Bool
451 452

# ifndef DEBUG
453 454
isIn    _msg x ys = x `elem` ys
isn'tIn _msg x ys = x `notElem` ys
455

ross's avatar
ross committed
456
# else /* DEBUG */
457
isIn msg x ys
458
  = elem100 (_ILIT(0)) x ys
459
  where
460 461
    elem100 _ _ []        = False
    elem100 i x (y:ys)
462
      | i ># _ILIT(100) = trace ("Over-long elem in " ++ msg)
463 464
                                (x `elem` (y:ys))
      | otherwise       = x == y || elem100 (i +# _ILIT(1)) x ys
465 466

isn'tIn msg x ys
467
  = notElem100 (_ILIT(0)) x ys
468
  where
469 470
    notElem100 _ _ [] =  True
    notElem100 i x (y:ys)
471
      | i ># _ILIT(100) = trace ("Over-long notElem in " ++ msg)
472 473
                                (x `notElem` (y:ys))
      | otherwise      =  x /= y && notElem100 (i +# _ILIT(1)) x ys
ross's avatar
ross committed
474
# endif /* DEBUG */
475 476 477
\end{code}

%************************************************************************
Ian Lynagh's avatar
Ian Lynagh committed
478
%*                                                                      *
Ian Lynagh's avatar
Ian Lynagh committed
479
\subsubsection{Sort utils}
Ian Lynagh's avatar
Ian Lynagh committed
480
%*                                                                      *
481 482 483
%************************************************************************

\begin{code}
484
sortWith :: Ord b => (a->b) -> [a] -> [a]
Ian Lynagh's avatar
Ian Lynagh committed
485
sortWith get_key xs = sortBy (comparing get_key) xs
486

487 488 489
minWith :: Ord b => (a -> b) -> [a] -> a
minWith get_key xs = ASSERT( not (null xs) )
                     head (sortWith get_key xs)
490 491 492

nubSort :: Ord a => [a] -> [a]
nubSort = Set.toAscList . Set.fromList
493 494 495
\end{code}

%************************************************************************
Ian Lynagh's avatar
Ian Lynagh committed
496
%*                                                                      *
497
\subsection[Utils-transitive-closure]{Transitive closure}
Ian Lynagh's avatar
Ian Lynagh committed
498
%*                                                                      *
499 500 501 502 503
%************************************************************************

This algorithm for transitive closure is straightforward, albeit quadratic.

\begin{code}
Ian Lynagh's avatar
Ian Lynagh committed
504 505 506 507
transitiveClosure :: (a -> [a])         -- Successor function
                  -> (a -> a -> Bool)   -- Equality predicate
                  -> [a]
                  -> [a]                -- The transitive closure
508 509

transitiveClosure succ eq xs
510
 = go [] xs
511
 where
Ian Lynagh's avatar
Ian Lynagh committed
512
   go done []                      = done
513
   go done (x:xs) | x `is_in` done = go done xs
Ian Lynagh's avatar
Ian Lynagh committed
514
                  | otherwise      = go (x:done) (succ x ++ xs)
515

Ian Lynagh's avatar
Ian Lynagh committed
516
   _ `is_in` []                 = False
517
   x `is_in` (y:ys) | eq x y    = True
Ian Lynagh's avatar
Ian Lynagh committed
518
                    | otherwise = x `is_in` ys
519 520 521
\end{code}

%************************************************************************
Ian Lynagh's avatar
Ian Lynagh committed
522
%*                                                                      *
523
\subsection[Utils-accum]{Accumulating}
Ian Lynagh's avatar
Ian Lynagh committed
524
%*                                                                      *
525 526
%************************************************************************

527 528 529 530
A combination of foldl with zip.  It works with equal length lists.

\begin{code}
foldl2 :: (acc -> a -> b -> acc) -> acc -> [a] -> [b] -> acc
Ian Lynagh's avatar
Ian Lynagh committed
531
foldl2 _ z [] [] = z
532
foldl2 k z (a:as) (b:bs) = foldl2 k (k z a b) as bs
Ian Lynagh's avatar
Ian Lynagh committed
533
foldl2 _ _ _      _      = panic "Util: foldl2"
534 535

all2 :: (a -> b -> Bool) -> [a] -> [b] -> Bool
Ian Lynagh's avatar
Ian Lynagh committed
536
-- True if the lists are the same length, and
537
-- all corresponding elements satisfy the predicate
Ian Lynagh's avatar
Ian Lynagh committed
538
all2 _ []     []     = True
539
all2 p (x:xs) (y:ys) = p x y && all2 p xs ys
Ian Lynagh's avatar
Ian Lynagh committed
540
all2 _ _      _      = False
541 542 543 544 545 546
\end{code}

Count the number of times a predicate is true

\begin{code}
count :: (a -> Bool) -> [a] -> Int
Ian Lynagh's avatar
Ian Lynagh committed
547
count _ [] = 0
548
count p (x:xs) | p x       = 1 + count p xs
Ian Lynagh's avatar
Ian Lynagh committed
549
               | otherwise = count p xs
550 551
\end{code}

sof's avatar
sof committed
552 553 554 555 556 557
@splitAt@, @take@, and @drop@ but with length of another
list giving the break-off point:

\begin{code}
takeList :: [b] -> [a] -> [a]
takeList [] _ = []
Ian Lynagh's avatar
Ian Lynagh committed
558
takeList (_:xs) ls =
sof's avatar
sof committed
559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575
   case ls of
     [] -> []
     (y:ys) -> y : takeList xs ys

dropList :: [b] -> [a] -> [a]
dropList [] xs    = xs
dropList _  xs@[] = xs
dropList (_:xs) (_:ys) = dropList xs ys


splitAtList :: [b] -> [a] -> ([a], [a])
splitAtList [] xs     = ([], xs)
splitAtList _ xs@[]   = (xs, xs)
splitAtList (_:xs) (y:ys) = (y:ys', ys'')
    where
      (ys', ys'') = splitAtList xs ys

576 577
-- drop from the end of a list
dropTail :: Int -> [a] -> [a]
578 579 580 581 582 583 584 585 586
-- Specification: dropTail n = reverse . drop n . reverse
-- Better implemention due to Joachim Breitner
-- http://www.joachim-breitner.de/blog/archives/600-On-taking-the-last-n-elements-of-a-list.html
dropTail n xs
  = go (drop n xs) xs
  where
    go (_:ys) (x:xs) = x : go ys xs 
    go _      _      = []  -- Stop when ys runs out
                           -- It'll always run out before xs does
587

588
snocView :: [a] -> Maybe ([a],a)
Ian Lynagh's avatar
Ian Lynagh committed
589
        -- Split off the last element
590 591
snocView [] = Nothing
snocView xs = go [] xs
Ian Lynagh's avatar
Ian Lynagh committed
592 593 594 595 596
            where
                -- Invariant: second arg is non-empty
              go acc [x]    = Just (reverse acc, x)
              go acc (x:xs) = go (x:acc) xs
              go _   []     = panic "Util: snocView"
597

598 599
split :: Char -> String -> [String]
split c s = case rest of
Ian Lynagh's avatar
Ian Lynagh committed
600 601
                []     -> [chunk]
                _:rest -> chunk : split c rest
602
  where (chunk, rest) = break (==c) s
sof's avatar
sof committed
603 604
\end{code}

605

606
%************************************************************************
Ian Lynagh's avatar
Ian Lynagh committed
607
%*                                                                      *
608
\subsection[Utils-comparison]{Comparisons}
Ian Lynagh's avatar
Ian Lynagh committed
609
%*                                                                      *
610 611
%************************************************************************

612
\begin{code}
613 614 615 616 617 618 619 620
isEqual :: Ordering -> Bool
-- Often used in (isEqual (a `compare` b))
isEqual GT = False
isEqual EQ = True
isEqual LT = False

thenCmp :: Ordering -> Ordering -> Ordering
{-# INLINE thenCmp #-}
Ian Lynagh's avatar
Ian Lynagh committed
621 622
thenCmp EQ       ordering = ordering
thenCmp ordering _        = ordering
623

624
eqListBy :: (a->a->Bool) -> [a] -> [a] -> Bool
Ian Lynagh's avatar
Ian Lynagh committed
625
eqListBy _  []     []     = True
626
eqListBy eq (x:xs) (y:ys) = eq x y && eqListBy eq xs ys
Ian Lynagh's avatar
Ian Lynagh committed
627
eqListBy _  _      _      = False
628

629 630 631 632 633
eqMaybeBy :: (a ->a->Bool) -> Maybe a -> Maybe a -> Bool
eqMaybeBy _  Nothing  Nothing  = True
eqMaybeBy eq (Just x) (Just y) = eq x y
eqMaybeBy _  _        _        = False

634
cmpList :: (a -> a -> Ordering) -> [a] -> [a] -> Ordering
635 636
    -- `cmpList' uses a user-specified comparer

Ian Lynagh's avatar
Ian Lynagh committed
637 638 639
cmpList _   []     [] = EQ
cmpList _   []     _  = LT
cmpList _   _      [] = GT
640
cmpList cmp (a:as) (b:bs)
641
  = case cmp a b of { EQ -> cmpList cmp as bs; xxx -> xxx }
642 643
\end{code}

644
\begin{code}
645 646
removeSpaces :: String -> String
removeSpaces = reverse . dropWhile isSpace . reverse . dropWhile isSpace
647 648
\end{code}

649 650 651 652 653 654 655
%************************************************************************
%*                                                                      *
\subsection{Edit distance}
%*                                                                      *
%************************************************************************

\begin{code}
656 657 658 659 660 661
-- | Find the "restricted" Damerau-Levenshtein edit distance between two strings.
-- See: <http://en.wikipedia.org/wiki/Damerau-Levenshtein_distance>.
-- Based on the algorithm presented in "A Bit-Vector Algorithm for Computing
-- Levenshtein and Damerau Edit Distances" in PSC'02 (Heikki Hyyro).
-- See http://www.cs.uta.fi/~helmu/pubs/psc02.pdf and
--     http://www.cs.uta.fi/~helmu/pubs/PSCerr.html for an explanation
662
restrictedDamerauLevenshteinDistance :: String -> String -> Int
663 664
restrictedDamerauLevenshteinDistance str1 str2
  = restrictedDamerauLevenshteinDistanceWithLengths m n str1 str2
665 666 667 668
  where
    m = length str1
    n = length str2

669 670
restrictedDamerauLevenshteinDistanceWithLengths
  :: Int -> Int -> String -> String -> Int
671
restrictedDamerauLevenshteinDistanceWithLengths m n str1 str2
672 673 674 675 676 677 678 679 680 681 682
  | m <= n
  = if n <= 32 -- n must be larger so this check is sufficient
    then restrictedDamerauLevenshteinDistance' (undefined :: Word32) m n str1 str2
    else restrictedDamerauLevenshteinDistance' (undefined :: Integer) m n str1 str2

  | otherwise
  = if m <= 32 -- m must be larger so this check is sufficient
    then restrictedDamerauLevenshteinDistance' (undefined :: Word32) n m str2 str1
    else restrictedDamerauLevenshteinDistance' (undefined :: Integer) n m str2 str1

restrictedDamerauLevenshteinDistance'
683
  :: (Bits bv, Num bv) => bv -> Int -> Int -> String -> String -> Int
dterei's avatar
dterei committed
684
restrictedDamerauLevenshteinDistance' _bv_dummy m n str1 str2
685
  | [] <- str1 = n
686 687 688 689 690 691 692 693 694 695
  | otherwise  = extractAnswer $
                 foldl' (restrictedDamerauLevenshteinDistanceWorker
                             (matchVectors str1) top_bit_mask vector_mask)
                        (0, 0, m_ones, 0, m) str2
  where
    m_ones@vector_mask = (2 ^ m) - 1
    top_bit_mask = (1 `shiftL` (m - 1)) `asTypeOf` _bv_dummy
    extractAnswer (_, _, _, _, distance) = distance

restrictedDamerauLevenshteinDistanceWorker
696
      :: (Bits bv, Num bv) => IM.IntMap bv -> bv -> bv
697 698 699 700 701 702 703
      -> (bv, bv, bv, bv, Int) -> Char -> (bv, bv, bv, bv, Int)
restrictedDamerauLevenshteinDistanceWorker str1_mvs top_bit_mask vector_mask
                                           (pm, d0, vp, vn, distance) char2
  = seq str1_mvs $ seq top_bit_mask $ seq vector_mask $
    seq pm' $ seq d0' $ seq vp' $ seq vn' $
    seq distance'' $ seq char2 $
    (pm', d0', vp', vn', distance'')
704 705
  where
    pm' = IM.findWithDefault 0 (ord char2) str1_mvs
dterei's avatar
dterei committed
706

707
    d0' = ((((sizedComplement vector_mask d0) .&. pm') `shiftL` 1) .&. pm)
708
      .|. ((((pm' .&. vp) + vp) .&. vector_mask) `xor` vp) .|. pm' .|. vn
709 710
          -- No need to mask the shiftL because of the restricted range of pm

711 712
    hp' = vn .|. sizedComplement vector_mask (d0' .|. vp)
    hn' = d0' .&. vp
dterei's avatar
dterei committed
713

714 715 716 717
    hp'_shift = ((hp' `shiftL` 1) .|. 1) .&. vector_mask
    hn'_shift = (hn' `shiftL` 1) .&. vector_mask
    vp' = hn'_shift .|. sizedComplement vector_mask (d0' .|. hp'_shift)
    vn' = d0' .&. hp'_shift
dterei's avatar
dterei committed
718

719 720 721 722 723 724
    distance' = if hp' .&. top_bit_mask /= 0 then distance + 1 else distance
    distance'' = if hn' .&. top_bit_mask /= 0 then distance' - 1 else distance'

sizedComplement :: Bits bv => bv -> bv -> bv
sizedComplement vector_mask vect = vector_mask `xor` vect

725
matchVectors :: (Bits bv, Num bv) => String -> IM.IntMap bv
726 727 728 729 730 731 732
matchVectors = snd . foldl' go (0 :: Int, IM.empty)
  where
    go (ix, im) char = let ix' = ix + 1
                           im' = IM.insertWith (.|.) (ord char) (2 ^ ix) im
                       in seq ix' $ seq im' $ (ix', im')

#ifdef __GLASGOW_HASKELL__
733 734 735 736 737 738 739 740 741 742 743 744 745
{-# SPECIALIZE INLINE restrictedDamerauLevenshteinDistance'
                      :: Word32 -> Int -> Int -> String -> String -> Int #-}
{-# SPECIALIZE INLINE restrictedDamerauLevenshteinDistance'
                      :: Integer -> Int -> Int -> String -> String -> Int #-}

{-# SPECIALIZE restrictedDamerauLevenshteinDistanceWorker
               :: IM.IntMap Word32 -> Word32 -> Word32
               -> (Word32, Word32, Word32, Word32, Int)
               -> Char -> (Word32, Word32, Word32, Word32, Int) #-}
{-# SPECIALIZE restrictedDamerauLevenshteinDistanceWorker
               :: IM.IntMap Integer -> Integer -> Integer
               -> (Integer, Integer, Integer, Integer, Int)
               -> Char -> (Integer, Integer, Integer, Integer, Int) #-}
746 747 748 749 750 751 752 753 754

{-# SPECIALIZE INLINE sizedComplement :: Word32 -> Word32 -> Word32 #-}
{-# SPECIALIZE INLINE sizedComplement :: Integer -> Integer -> Integer #-}

{-# SPECIALIZE matchVectors :: String -> IM.IntMap Word32 #-}
{-# SPECIALIZE matchVectors :: String -> IM.IntMap Integer #-}
#endif

fuzzyMatch :: String -> [String] -> [String]
755 756 757 758 759 760 761 762 763 764 765 766
fuzzyMatch key vals = fuzzyLookup key [(v,v) | v <- vals]

-- | Search for possible matches to the users input in the given list,
-- returning a small number of ranked results
fuzzyLookup :: String -> [(String,a)] -> [a]
fuzzyLookup user_entered possibilites
  = map fst $ take mAX_RESULTS $ sortBy (comparing snd)
    [ (poss_val, distance) | (poss_str, poss_val) <- possibilites
                       , let distance = restrictedDamerauLevenshteinDistance
                                            poss_str user_entered
                       , distance <= fuzzy_threshold ]
  where
dterei's avatar
dterei committed
767 768
    -- Work out an approriate match threshold:
    -- We report a candidate if its edit distance is <= the threshold,
769
    -- The threshhold is set to about a quarter of the # of characters the user entered
dterei's avatar
dterei committed
770 771 772 773 774 775 776
    --   Length    Threshold
    --     1         0          -- Don't suggest *any* candidates
    --     2         1          -- for single-char identifiers
    --     3         1
    --     4         1
    --     5         1
    --     6         2
777 778
    --
    fuzzy_threshold = truncate $ fromIntegral (length user_entered + 2) / (4 :: Rational)
779
    mAX_RESULTS = 3
780 781
\end{code}

782
%************************************************************************
Ian Lynagh's avatar
Ian Lynagh committed
783
%*                                                                      *
784
\subsection[Utils-pairs]{Pairs}
Ian Lynagh's avatar
Ian Lynagh committed
785
%*                                                                      *
786 787 788 789 790 791 792
%************************************************************************

\begin{code}
unzipWith :: (a -> b -> c) -> [(a, b)] -> [c]
unzipWith f pairs = map ( \ (a, b) -> f a b ) pairs
\end{code}

793 794 795 796
\begin{code}
seqList :: [a] -> b -> b
seqList [] b = b
seqList (x:xs) b = x `seq` seqList xs b
797
\end{code}
798 799 800 801 802 803 804

Global variables:

\begin{code}
global :: a -> IORef a
global a = unsafePerformIO (newIORef a)
\end{code}
805

806 807 808
\begin{code}
consIORef :: IORef [a] -> a -> IO ()
consIORef var x = do
Thomas Schilling's avatar
Thomas Schilling committed
809
  atomicModifyIORef var (\xs -> (x:xs,()))
810 811
\end{code}

812
\begin{code}
813 814
globalM :: IO a -> IORef a
globalM ma = unsafePerformIO (ma >>= newIORef)
815 816
\end{code}

817 818 819
Module names:

\begin{code}
Ian Lynagh's avatar
Ian Lynagh committed
820
looksLikeModuleName :: String -> Bool
821
looksLikeModuleName [] = False
822 823
looksLikeModuleName (c:cs) = isUpper c && go cs
  where go [] = True
Ian Lynagh's avatar
Ian Lynagh committed
824
        go ('.':cs) = looksLikeModuleName cs
825
        go (c:cs)   = (isAlphaNum c || c == '_' || c == '\'') && go cs
826
\end{code}
sof's avatar
sof committed
827

828
Akin to @Prelude.words@, but acts like the Bourne shell, treating
Ian Lynagh's avatar
Ian Lynagh committed
829 830
quoted strings as Haskell Strings, and also parses Haskell [String]
syntax.
sof's avatar
sof committed
831 832

\begin{code}
Ian Lynagh's avatar
Ian Lynagh committed
833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857
getCmd :: String -> Either String             -- Error
                           (String, String) -- (Cmd, Rest)
getCmd s = case break isSpace $ dropWhile isSpace s of
           ([], _) -> Left ("Couldn't find command in " ++ show s)
           res -> Right res

toCmdArgs :: String -> Either String             -- Error
                              (String, [String]) -- (Cmd, Args)
toCmdArgs s = case getCmd s of
              Left err -> Left err
              Right (cmd, s') -> case toArgs s' of
                                 Left err -> Left err
                                 Right args -> Right (cmd, args)

toArgs :: String -> Either String   -- Error
                           [String] -- Args
toArgs str
    = case dropWhile isSpace str of
      s@('[':_) -> case reads s of
                   [(args, spaces)]
                    | all isSpace spaces ->
                       Right args
                   _ ->
                       Left ("Couldn't read " ++ show str ++ "as [String]")
      s -> toArgs' s
sof's avatar
sof committed
858
 where
Ian Lynagh's avatar
Ian Lynagh committed
859 860 861 862 863 864 865 866 867 868 869 870 871 872 873
  toArgs' s = case dropWhile isSpace s of
              [] -> Right []
              ('"' : _) -> case reads s of
                           [(arg, rest)]
                              -- rest must either be [] or start with a space
                            | all isSpace (take 1 rest) ->
                               case toArgs' rest of
                               Left err -> Left err
                               Right args -> Right (arg : args)
                           _ ->
                               Left ("Couldn't read " ++ show s ++ "as String")
              s' -> case break isSpace s' of
                    (arg, s'') -> case toArgs' s'' of
                                  Left err -> Left err
                                  Right args -> Right (arg : args)
sof's avatar
sof committed
874
\end{code}
875 876 877 878 879 880

-- -----------------------------------------------------------------------------
-- Floats

\begin{code}
readRational__ :: ReadS Rational -- NB: doesn't handle leading "-"
Ian Lynagh's avatar
Ian Lynagh committed
881
readRational__ r = do
882 883 884 885 886
     (n,d,s) <- readFix r
     (k,t)   <- readExp s
     return ((n%1)*10^^(k-d), t)
 where
     readFix r = do
Ian Lynagh's avatar
Ian Lynagh committed
887 888 889
        (ds,s)  <- lexDecDigits r
        (ds',t) <- lexDotDigits s
        return (read (ds++ds'), length ds', t)
890 891

     readExp (e:s) | e `elem` "eE" = readExp' s
Ian Lynagh's avatar
Ian Lynagh committed
892
     readExp s                     = return (0,s)
893 894

     readExp' ('+':s) = readDec s
Ian Lynagh's avatar
Ian Lynagh committed
895 896 897
     readExp' ('-':s) = do (k,t) <- readDec s
                           return (-k,t)
     readExp' s       = readDec s
898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919

     readDec s = do
        (ds,r) <- nonnull isDigit s
        return (foldl1 (\n d -> n * 10 + d) [ ord d - ord '0' | d <- ds ],
                r)

     lexDecDigits = nonnull isDigit

     lexDotDigits ('.':s) = return (span isDigit s)
     lexDotDigits s       = return ("",s)

     nonnull p s = do (cs@(_:_),t) <- return (span p s)
                      return (cs,t)

readRational :: String -> Rational -- NB: *does* handle a leading "-"
readRational top_s
  = case top_s of
      '-' : xs -> - (read_me xs)
      xs       -> read_me xs
  where
    read_me s
      = case (do { (x,"") <- readRational__ s ; return x }) of
Ian Lynagh's avatar
Ian Lynagh committed
920 921 922
          [x] -> x
          []  -> error ("readRational: no parse:"        ++ top_s)
          _   -> error ("readRational: ambiguous parse:" ++ top_s)
923 924


925 926 927
-----------------------------------------------------------------------------
-- read helpers