Hash.c 11.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10
/*-----------------------------------------------------------------------------
 *
 * (c) The AQUA Project, Glasgow University, 1995-1998
 * (c) The GHC Team, 1999
 *
 * Dynamically expanding linear hash tables, as described in
 * Per-\AAke Larson, ``Dynamic Hash Tables,'' CACM 31(4), April 1988,
 * pp. 446 -- 457.
 * -------------------------------------------------------------------------- */

11
#include "PosixSource.h"
12
#include "Rts.h"
Simon Marlow's avatar
Simon Marlow committed
13

14 15 16
#include "Hash.h"
#include "RtsUtils.h"

17 18
#include <string.h>

19
#define HSEGSIZE    1024    /* Size of a single hash table segment */
20
                            /* Also the minimum size of a hash table */
21
#define HDIRSIZE    1024    /* Size of the segment directory */
22 23
                            /* Maximum hash table size is HSEGSIZE * HDIRSIZE */
#define HLOAD       5       /* Maximum average load of a single hash bucket */
24

25 26
#define HCHUNK      (1024 * sizeof(W_) / sizeof(HashList))
                            /* Number of HashList cells to allocate in one go */
27 28 29


/* Linked list of (key, data) pairs for separate chaining */
30
typedef struct hashlist {
31 32 33
    StgWord key;
    void *data;
    struct hashlist *next;  /* Next cell in bucket chain (same hash value) */
34
} HashList;
35

36 37 38 39
typedef struct chunklist {
  HashList *chunk;
  struct chunklist *next;
} HashListChunk;
40 41

struct hashtable {
42 43 44 45 46 47 48
    int split;              /* Next bucket to split when expanding */
    int max;                /* Max bucket of smaller table */
    int mask1;              /* Mask for doing the mod of h_1 (smaller table) */
    int mask2;              /* Mask for doing the mod of h_2 (larger table) */
    int kcount;             /* Number of keys */
    int bcount;             /* Number of buckets */
    HashList **dir[HDIRSIZE];   /* Directory of segments */
49 50 51
    HashList *freeList;         /* free list of HashLists */
    HashListChunk *chunks;
    HashFunction *hash;         /* hash function */
52
    CompareFunction *compare;   /* key comparison function */
53 54 55 56 57 58 59
};

/* -----------------------------------------------------------------------------
 * Hash first using the smaller table.  If the bucket is less than the
 * next bucket to be split, re-hash using the larger table.
 * -------------------------------------------------------------------------- */

60
int
61
hashWord(HashTable *table, StgWord key)
62 63 64 65 66 67 68 69 70 71
{
    int bucket;

    /* Strip the boring zero bits */
    key /= sizeof(StgWord);

    /* Mod the size of the hash table (a power of 2) */
    bucket = key & table->mask1;

    if (bucket < table->split) {
72 73
        /* Mod the size of the expanded hash table (also a power of 2) */
        bucket = key & table->mask2;
74 75 76 77
    }
    return bucket;
}

78
int
79 80 81 82 83 84 85
hashStr(HashTable *table, char *key)
{
    int h, bucket;
    char *s;

    s = key;
    for (h=0; *s; s++) {
86 87 88
        h *= 128;
        h += *s;
        h = h % 1048583;        /* some random large prime */
89 90 91 92 93 94
    }

    /* Mod the size of the hash table (a power of 2) */
    bucket = h & table->mask1;

    if (bucket < table->split) {
95 96
        /* Mod the size of the expanded hash table (also a power of 2) */
        bucket = h & table->mask2;
97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114
    }

    return bucket;
}

static int
compareWord(StgWord key1, StgWord key2)
{
    return (key1 == key2);
}

static int
compareStr(StgWord key1, StgWord key2)
{
    return (strcmp((char *)key1, (char *)key2) == 0);
}


115 116 117 118 119 120 121
/* -----------------------------------------------------------------------------
 * Allocate a new segment of the dynamically growing hash table.
 * -------------------------------------------------------------------------- */

static void
allocSegment(HashTable *table, int segment)
{
122 123
    table->dir[segment] = stgMallocBytes(HSEGSIZE * sizeof(HashList *),
                                         "allocSegment");
124 125
}

126

127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
/* -----------------------------------------------------------------------------
 * Expand the larger hash table by one bucket, and split one bucket
 * from the smaller table into two parts.  Only the bucket referenced
 * by @table->split@ is affected by the expansion.
 * -------------------------------------------------------------------------- */

static void
expand(HashTable *table)
{
    int oldsegment;
    int oldindex;
    int newbucket;
    int newsegment;
    int newindex;
    HashList *hl;
    HashList *next;
    HashList *old, *new;

    if (table->split + table->max >= HDIRSIZE * HSEGSIZE)
146 147
        /* Wow!  That's big.  Too big, so don't expand. */
        return;
148 149 150 151 152 153 154 155 156 157 158 159

    /* Calculate indices of bucket to split */
    oldsegment = table->split / HSEGSIZE;
    oldindex = table->split % HSEGSIZE;

    newbucket = table->max + table->split;

    /* And the indices of the new bucket */
    newsegment = newbucket / HSEGSIZE;
    newindex = newbucket % HSEGSIZE;

    if (newindex == 0)
160
        allocSegment(table, newsegment);
161 162

    if (++table->split == table->max) {
163 164 165 166
        table->split = 0;
        table->max *= 2;
        table->mask1 = table->mask2;
        table->mask2 = table->mask2 << 1 | 1;
167 168 169 170 171 172 173
    }
    table->bcount++;

    /* Split the bucket, paying no attention to the original order */

    old = new = NULL;
    for (hl = table->dir[oldsegment][oldindex]; hl != NULL; hl = next) {
174 175 176 177 178 179 180 181
        next = hl->next;
        if (table->hash(table, hl->key) == newbucket) {
            hl->next = new;
            new = hl;
        } else {
            hl->next = old;
            old = hl;
        }
182 183 184 185 186 187 188 189 190 191 192 193 194 195 196
    }
    table->dir[oldsegment][oldindex] = old;
    table->dir[newsegment][newindex] = new;

    return;
}

void *
lookupHashTable(HashTable *table, StgWord key)
{
    int bucket;
    int segment;
    int index;
    HashList *hl;

197
    bucket = table->hash(table, key);
198 199 200 201
    segment = bucket / HSEGSIZE;
    index = bucket % HSEGSIZE;

    for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next)
202 203
        if (table->compare(hl->key, key))
            return hl->data;
204 205 206 207 208

    /* It's not there */
    return NULL;
}

209
// Puts up to szKeys keys of the hash table into the given array. Returns the
Facundo Domínguez's avatar
Facundo Domínguez committed
210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231
// actual amount of keys that have been retrieved.
//
// If the table is modified concurrently, the function behavior is undefined.
//
int keysHashTable(HashTable *table, StgWord keys[], int szKeys) {
    int segment;
    int k = 0;
    for(segment=0;segment<HDIRSIZE && table->dir[segment];segment+=1) {
        int index;
        for(index=0;index<HSEGSIZE;index+=1) {
            HashList *hl;
            for(hl=table->dir[segment][index];hl;hl=hl->next) {
                if (k == szKeys)
                  return k;
                keys[k] = hl->key;
                k += 1;
            }
        }
    }
    return k;
}

232 233 234 235 236 237 238
/* -----------------------------------------------------------------------------
 * We allocate the hashlist cells in large chunks to cut down on malloc
 * overhead.  Although we keep a free list of hashlist cells, we make
 * no effort to actually return the space to the malloc arena.
 * -------------------------------------------------------------------------- */

static HashList *
239
allocHashList (HashTable *table)
240 241
{
    HashList *hl, *p;
242
    HashListChunk *cl;
243

244 245
    if ((hl = table->freeList) != NULL) {
        table->freeList = hl->next;
246 247
    } else {
        hl = stgMallocBytes(HCHUNK * sizeof(HashList), "allocHashList");
248
        cl = stgMallocBytes(sizeof (*cl), "allocHashList: chunkList");
249 250 251
        cl->chunk = hl;
        cl->next = table->chunks;
        table->chunks = cl;
252

253 254
        table->freeList = hl + 1;
        for (p = table->freeList; p < hl + HCHUNK - 1; p++)
255 256
            p->next = p + 1;
        p->next = NULL;
257 258 259 260 261
    }
    return hl;
}

static void
262
freeHashList (HashTable *table, HashList *hl)
263
{
264 265
    hl->next = table->freeList;
    table->freeList = hl;
266 267
}

268 269 270 271 272 273 274 275
void
insertHashTable(HashTable *table, StgWord key, void *data)
{
    int bucket;
    int segment;
    int index;
    HashList *hl;

276 277 278
    // Disable this assert; sometimes it's useful to be able to
    // overwrite entries in the hash table.
    // ASSERT(lookupHashTable(table, key) == NULL);
279

280 281
    /* When the average load gets too high, we expand the table */
    if (++table->kcount >= HLOAD * table->bcount)
282
        expand(table);
283

284
    bucket = table->hash(table, key);
285 286 287
    segment = bucket / HSEGSIZE;
    index = bucket % HSEGSIZE;

288
    hl = allocHashList(table);
289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305

    hl->key = key;
    hl->data = data;
    hl->next = table->dir[segment][index];
    table->dir[segment][index] = hl;

}

void *
removeHashTable(HashTable *table, StgWord key, void *data)
{
    int bucket;
    int segment;
    int index;
    HashList *hl;
    HashList *prev = NULL;

306
    bucket = table->hash(table, key);
307 308 309 310
    segment = bucket / HSEGSIZE;
    index = bucket % HSEGSIZE;

    for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next) {
311 312 313 314 315
        if (table->compare(hl->key,key) && (data == NULL || hl->data == data)) {
            if (prev == NULL)
                table->dir[segment][index] = hl->next;
            else
                prev->next = hl->next;
316
            freeHashList(table,hl);
317 318 319 320
            table->kcount--;
            return hl->data;
        }
        prev = hl;
321 322 323 324 325 326 327 328 329 330 331 332 333 334
    }

    /* It's not there */
    ASSERT(data == NULL);
    return NULL;
}

/* -----------------------------------------------------------------------------
 * When we free a hash table, we are also good enough to free the
 * data part of each (key, data) pair, as long as our caller can tell
 * us how to do it.
 * -------------------------------------------------------------------------- */

void
335
freeHashTable(HashTable *table, void (*freeDataFun)(void *) )
336 337 338 339 340
{
    long segment;
    long index;
    HashList *hl;
    HashList *next;
341
    HashListChunk *cl, *cl_next;
342 343 344 345

    /* The last bucket with something in it is table->max + table->split - 1 */
    segment = (table->max + table->split - 1) / HSEGSIZE;
    index = (table->max + table->split - 1) % HSEGSIZE;
346

347
    while (segment >= 0) {
348 349 350 351 352
        while (index >= 0) {
            for (hl = table->dir[segment][index]; hl != NULL; hl = next) {
                next = hl->next;
                if (freeDataFun != NULL)
                    (*freeDataFun)(hl->data);
353
            }
354 355 356 357 358
            index--;
        }
        stgFree(table->dir[segment]);
        segment--;
        index = HSEGSIZE - 1;
359
    }
360 361 362 363 364
    for (cl = table->chunks; cl != NULL; cl = cl_next) {
        cl_next = cl->next;
        stgFree(cl->chunk);
        stgFree(cl);
    }
sof's avatar
sof committed
365
    stgFree(table);
366 367 368 369 370 371 372
}

/* -----------------------------------------------------------------------------
 * When we initialize a hash table, we set up the first segment as well,
 * initializing all of the first segment's hash buckets to NULL.
 * -------------------------------------------------------------------------- */

373
HashTable *
374
allocHashTable_(HashFunction *hash, CompareFunction *compare)
375 376 377 378 379 380 381 382 383
{
    HashTable *table;
    HashList **hb;

    table = stgMallocBytes(sizeof(HashTable),"allocHashTable");

    allocSegment(table, 0);

    for (hb = table->dir[0]; hb < table->dir[0] + HSEGSIZE; hb++)
384
        *hb = NULL;
385 386 387 388 389 390 391

    table->split = 0;
    table->max = HSEGSIZE;
    table->mask1 = HSEGSIZE - 1;
    table->mask2 = 2 * HSEGSIZE - 1;
    table->kcount = 0;
    table->bcount = HSEGSIZE;
392 393
    table->freeList = NULL;
    table->chunks = NULL;
394 395
    table->hash = hash;
    table->compare = compare;
396 397 398

    return table;
}
399 400 401 402 403 404 405 406 407 408

HashTable *
allocHashTable(void)
{
    return allocHashTable_(hashWord, compareWord);
}

HashTable *
allocStrHashTable(void)
{
409 410
    return allocHashTable_((HashFunction *)hashStr,
                           (CompareFunction *)compareStr);
411
}
412 413 414 415

void
exitHashTable(void)
{
416
    /* nothing to do */
417
}
418 419 420 421 422

int keyCountHashTable (HashTable *table)
{
    return table->kcount;
}