Hash.c 10.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10
/*-----------------------------------------------------------------------------
 *
 * (c) The AQUA Project, Glasgow University, 1995-1998
 * (c) The GHC Team, 1999
 *
 * Dynamically expanding linear hash tables, as described in
 * Per-\AAke Larson, ``Dynamic Hash Tables,'' CACM 31(4), April 1988,
 * pp. 446 -- 457.
 * -------------------------------------------------------------------------- */

11
#include "PosixSource.h"
12
#include "Rts.h"
Simon Marlow's avatar
Simon Marlow committed
13

14 15 16
#include "Hash.h"
#include "RtsUtils.h"

17 18
#include <string.h>

19
#define HSEGSIZE    1024    /* Size of a single hash table segment */
20
                            /* Also the minimum size of a hash table */
21
#define HDIRSIZE    1024    /* Size of the segment directory */
22 23
                            /* Maximum hash table size is HSEGSIZE * HDIRSIZE */
#define HLOAD       5       /* Maximum average load of a single hash bucket */
24

25 26
#define HCHUNK      (1024 * sizeof(W_) / sizeof(HashList))
                            /* Number of HashList cells to allocate in one go */
27 28 29


/* Linked list of (key, data) pairs for separate chaining */
30
typedef struct hashlist {
31 32 33
    StgWord key;
    void *data;
    struct hashlist *next;  /* Next cell in bucket chain (same hash value) */
34
} HashList;
35

36 37 38 39
typedef struct chunklist {
  HashList *chunk;
  struct chunklist *next;
} HashListChunk;
40 41

struct hashtable {
42 43 44 45 46 47 48
    int split;              /* Next bucket to split when expanding */
    int max;                /* Max bucket of smaller table */
    int mask1;              /* Mask for doing the mod of h_1 (smaller table) */
    int mask2;              /* Mask for doing the mod of h_2 (larger table) */
    int kcount;             /* Number of keys */
    int bcount;             /* Number of buckets */
    HashList **dir[HDIRSIZE];   /* Directory of segments */
49 50 51
    HashList *freeList;         /* free list of HashLists */
    HashListChunk *chunks;
    HashFunction *hash;         /* hash function */
52
    CompareFunction *compare;   /* key comparison function */
53 54 55 56 57 58 59
};

/* -----------------------------------------------------------------------------
 * Hash first using the smaller table.  If the bucket is less than the
 * next bucket to be split, re-hash using the larger table.
 * -------------------------------------------------------------------------- */

60
int
61
hashWord(HashTable *table, StgWord key)
62 63 64 65 66 67 68 69 70 71
{
    int bucket;

    /* Strip the boring zero bits */
    key /= sizeof(StgWord);

    /* Mod the size of the hash table (a power of 2) */
    bucket = key & table->mask1;

    if (bucket < table->split) {
72 73
        /* Mod the size of the expanded hash table (also a power of 2) */
        bucket = key & table->mask2;
74 75 76 77
    }
    return bucket;
}

78
int
79 80 81 82 83 84 85
hashStr(HashTable *table, char *key)
{
    int h, bucket;
    char *s;

    s = key;
    for (h=0; *s; s++) {
86 87 88
        h *= 128;
        h += *s;
        h = h % 1048583;        /* some random large prime */
89 90 91 92 93 94
    }

    /* Mod the size of the hash table (a power of 2) */
    bucket = h & table->mask1;

    if (bucket < table->split) {
95 96
        /* Mod the size of the expanded hash table (also a power of 2) */
        bucket = h & table->mask2;
97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114
    }

    return bucket;
}

static int
compareWord(StgWord key1, StgWord key2)
{
    return (key1 == key2);
}

static int
compareStr(StgWord key1, StgWord key2)
{
    return (strcmp((char *)key1, (char *)key2) == 0);
}


115 116 117 118 119 120 121
/* -----------------------------------------------------------------------------
 * Allocate a new segment of the dynamically growing hash table.
 * -------------------------------------------------------------------------- */

static void
allocSegment(HashTable *table, int segment)
{
122 123
    table->dir[segment] = stgMallocBytes(HSEGSIZE * sizeof(HashList *),
                                         "allocSegment");
124 125
}

126

127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
/* -----------------------------------------------------------------------------
 * Expand the larger hash table by one bucket, and split one bucket
 * from the smaller table into two parts.  Only the bucket referenced
 * by @table->split@ is affected by the expansion.
 * -------------------------------------------------------------------------- */

static void
expand(HashTable *table)
{
    int oldsegment;
    int oldindex;
    int newbucket;
    int newsegment;
    int newindex;
    HashList *hl;
    HashList *next;
    HashList *old, *new;

    if (table->split + table->max >= HDIRSIZE * HSEGSIZE)
146 147
        /* Wow!  That's big.  Too big, so don't expand. */
        return;
148 149 150 151 152 153 154 155 156 157 158 159

    /* Calculate indices of bucket to split */
    oldsegment = table->split / HSEGSIZE;
    oldindex = table->split % HSEGSIZE;

    newbucket = table->max + table->split;

    /* And the indices of the new bucket */
    newsegment = newbucket / HSEGSIZE;
    newindex = newbucket % HSEGSIZE;

    if (newindex == 0)
160
        allocSegment(table, newsegment);
161 162

    if (++table->split == table->max) {
163 164 165 166
        table->split = 0;
        table->max *= 2;
        table->mask1 = table->mask2;
        table->mask2 = table->mask2 << 1 | 1;
167 168 169 170 171 172 173
    }
    table->bcount++;

    /* Split the bucket, paying no attention to the original order */

    old = new = NULL;
    for (hl = table->dir[oldsegment][oldindex]; hl != NULL; hl = next) {
174 175 176 177 178 179 180 181
        next = hl->next;
        if (table->hash(table, hl->key) == newbucket) {
            hl->next = new;
            new = hl;
        } else {
            hl->next = old;
            old = hl;
        }
182 183 184 185 186 187 188 189 190 191 192 193 194 195 196
    }
    table->dir[oldsegment][oldindex] = old;
    table->dir[newsegment][newindex] = new;

    return;
}

void *
lookupHashTable(HashTable *table, StgWord key)
{
    int bucket;
    int segment;
    int index;
    HashList *hl;

197
    bucket = table->hash(table, key);
198 199 200 201
    segment = bucket / HSEGSIZE;
    index = bucket % HSEGSIZE;

    for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next)
202 203
        if (table->compare(hl->key, key))
            return hl->data;
204 205 206 207 208

    /* It's not there */
    return NULL;
}

209 210 211 212 213 214 215
/* -----------------------------------------------------------------------------
 * We allocate the hashlist cells in large chunks to cut down on malloc
 * overhead.  Although we keep a free list of hashlist cells, we make
 * no effort to actually return the space to the malloc arena.
 * -------------------------------------------------------------------------- */

static HashList *
216
allocHashList (HashTable *table)
217 218
{
    HashList *hl, *p;
219
    HashListChunk *cl;
220

221 222
    if ((hl = table->freeList) != NULL) {
        table->freeList = hl->next;
223 224
    } else {
        hl = stgMallocBytes(HCHUNK * sizeof(HashList), "allocHashList");
225
        cl = stgMallocBytes(sizeof (*cl), "allocHashList: chunkList");
226 227 228
        cl->chunk = hl;
        cl->next = table->chunks;
        table->chunks = cl;
229

230 231
        table->freeList = hl + 1;
        for (p = table->freeList; p < hl + HCHUNK - 1; p++)
232 233
            p->next = p + 1;
        p->next = NULL;
234 235 236 237 238
    }
    return hl;
}

static void
239
freeHashList (HashTable *table, HashList *hl)
240
{
241 242
    hl->next = table->freeList;
    table->freeList = hl;
243 244
}

245 246 247 248 249 250 251 252
void
insertHashTable(HashTable *table, StgWord key, void *data)
{
    int bucket;
    int segment;
    int index;
    HashList *hl;

253 254 255
    // Disable this assert; sometimes it's useful to be able to
    // overwrite entries in the hash table.
    // ASSERT(lookupHashTable(table, key) == NULL);
256

257 258
    /* When the average load gets too high, we expand the table */
    if (++table->kcount >= HLOAD * table->bcount)
259
        expand(table);
260

261
    bucket = table->hash(table, key);
262 263 264
    segment = bucket / HSEGSIZE;
    index = bucket % HSEGSIZE;

265
    hl = allocHashList(table);
266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282

    hl->key = key;
    hl->data = data;
    hl->next = table->dir[segment][index];
    table->dir[segment][index] = hl;

}

void *
removeHashTable(HashTable *table, StgWord key, void *data)
{
    int bucket;
    int segment;
    int index;
    HashList *hl;
    HashList *prev = NULL;

283
    bucket = table->hash(table, key);
284 285 286 287
    segment = bucket / HSEGSIZE;
    index = bucket % HSEGSIZE;

    for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next) {
288 289 290 291 292
        if (table->compare(hl->key,key) && (data == NULL || hl->data == data)) {
            if (prev == NULL)
                table->dir[segment][index] = hl->next;
            else
                prev->next = hl->next;
293
            freeHashList(table,hl);
294 295 296 297
            table->kcount--;
            return hl->data;
        }
        prev = hl;
298 299 300 301 302 303 304 305 306 307 308 309 310 311
    }

    /* It's not there */
    ASSERT(data == NULL);
    return NULL;
}

/* -----------------------------------------------------------------------------
 * When we free a hash table, we are also good enough to free the
 * data part of each (key, data) pair, as long as our caller can tell
 * us how to do it.
 * -------------------------------------------------------------------------- */

void
312
freeHashTable(HashTable *table, void (*freeDataFun)(void *) )
313 314 315 316 317
{
    long segment;
    long index;
    HashList *hl;
    HashList *next;
318
    HashListChunk *cl, *cl_next;
319 320 321 322

    /* The last bucket with something in it is table->max + table->split - 1 */
    segment = (table->max + table->split - 1) / HSEGSIZE;
    index = (table->max + table->split - 1) % HSEGSIZE;
323

324
    while (segment >= 0) {
325 326 327 328 329
        while (index >= 0) {
            for (hl = table->dir[segment][index]; hl != NULL; hl = next) {
                next = hl->next;
                if (freeDataFun != NULL)
                    (*freeDataFun)(hl->data);
330
            }
331 332 333 334 335
            index--;
        }
        stgFree(table->dir[segment]);
        segment--;
        index = HSEGSIZE - 1;
336
    }
337 338 339 340 341
    for (cl = table->chunks; cl != NULL; cl = cl_next) {
        cl_next = cl->next;
        stgFree(cl->chunk);
        stgFree(cl);
    }
sof's avatar
sof committed
342
    stgFree(table);
343 344 345 346 347 348 349
}

/* -----------------------------------------------------------------------------
 * When we initialize a hash table, we set up the first segment as well,
 * initializing all of the first segment's hash buckets to NULL.
 * -------------------------------------------------------------------------- */

350
HashTable *
351
allocHashTable_(HashFunction *hash, CompareFunction *compare)
352 353 354 355 356 357 358 359 360
{
    HashTable *table;
    HashList **hb;

    table = stgMallocBytes(sizeof(HashTable),"allocHashTable");

    allocSegment(table, 0);

    for (hb = table->dir[0]; hb < table->dir[0] + HSEGSIZE; hb++)
361
        *hb = NULL;
362 363 364 365 366 367 368

    table->split = 0;
    table->max = HSEGSIZE;
    table->mask1 = HSEGSIZE - 1;
    table->mask2 = 2 * HSEGSIZE - 1;
    table->kcount = 0;
    table->bcount = HSEGSIZE;
369 370
    table->freeList = NULL;
    table->chunks = NULL;
371 372
    table->hash = hash;
    table->compare = compare;
373 374 375

    return table;
}
376 377 378 379 380 381 382 383 384 385

HashTable *
allocHashTable(void)
{
    return allocHashTable_(hashWord, compareWord);
}

HashTable *
allocStrHashTable(void)
{
386 387
    return allocHashTable_((HashFunction *)hashStr,
                           (CompareFunction *)compareStr);
388
}
389 390 391 392

void
exitHashTable(void)
{
393
    /* nothing to do */
394
}
395 396 397 398 399

int keyCountHashTable (HashTable *table)
{
    return table->kcount;
}
400 401 402 403 404 405 406 407

// Local Variables:
// mode: C
// fill-column: 80
// indent-tabs-mode: nil
// c-basic-offset: 4
// buffer-file-coding-system: utf-8-unix
// End: