MBlock.c 12.4 KB
Newer Older
1
/* -----------------------------------------------------------------------------
2 3
 *
 * (c) The GHC Team 1998-1999
4 5 6 7 8 9 10
 *
 * MegaBlock Allocator Interface.  This file contains all the dirty
 * architecture-dependent hackery required to get a chunk of aligned
 * memory from the operating system.
 *
 * ---------------------------------------------------------------------------*/

11 12
/* This is non-posix compliant. */
/* #include "PosixSource.h" */
13 14 15 16 17 18 19

#include "Rts.h"
#include "RtsUtils.h"
#include "RtsFlags.h"
#include "MBlock.h"
#include "BlockAlloc.h"

20 21 22
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
23 24 25
#ifdef HAVE_STRING_H
#include <string.h>
#endif
26 27 28 29 30 31
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
32
#ifndef mingw32_HOST_OS
sof's avatar
sof committed
33 34 35
# ifdef HAVE_SYS_MMAN_H
# include <sys/mman.h>
# endif
36 37 38 39
#endif
#ifdef HAVE_FCNTL_H
#include <fcntl.h>
#endif
sof's avatar
sof committed
40
#if HAVE_WINDOWS_H
41 42
#include <windows.h>
#endif
43
#if darwin_HOST_OS
44 45
#include <mach/vm_map.h>
#endif
46

47 48
#include <errno.h>

49 50
lnat mblocks_allocated = 0;

51 52 53 54
/* -----------------------------------------------------------------------------
   The MBlock Map: provides our implementation of HEAP_ALLOCED()
   -------------------------------------------------------------------------- */

55
#if SIZEOF_VOID_P == 4
56
StgWord8 mblock_map[MBLOCK_MAP_SIZE]; // initially all zeros
57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89
#elif SIZEOF_VOID_P == 8
static MBlockMap dummy_mblock_map;
MBlockMap *mblock_cache = &dummy_mblock_map;
int mblock_map_count = 0;
MBlockMap **mblock_maps = NULL;

static MBlockMap *
findMBlockMap(void *p)
{
    int i;
    StgWord32 hi = (StgWord32) (((StgWord)p) >> 32);
    for( i = 0; i < mblock_map_count; i++ )
    {
        if(mblock_maps[i]->addrHigh32 == hi)
        {
	    return mblock_maps[i];
	}
    }
    return NULL;
}

StgBool
slowIsHeapAlloced(void *p)
{
    MBlockMap *map = findMBlockMap(p);
    if(map)
    {
    	mblock_cache = map;
	return map->mblocks[MBLOCK_MAP_ENTRY(p)];
    }
    else
    	return 0;
}
90
#endif
91

92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
static void
markHeapAlloced(void *p)
{
#if SIZEOF_VOID_P == 4
    mblock_map[MBLOCK_MAP_ENTRY(p)] = 1;
#elif SIZEOF_VOID_P == 8
    MBlockMap *map = findMBlockMap(p);
    if(map == NULL)
    {
    	mblock_map_count++;
    	mblock_maps = realloc(mblock_maps,
			      sizeof(MBlockMap*) * mblock_map_count);
	map = mblock_maps[mblock_map_count-1] = calloc(1,sizeof(MBlockMap));
	map->addrHigh32 = (StgWord32) (((StgWord)p) >> 32);
    }
    map->mblocks[MBLOCK_MAP_ENTRY(p)] = 1;
    mblock_cache = map;
#endif
}

112 113 114 115
/* -----------------------------------------------------------------------------
   Allocate new mblock(s)
   -------------------------------------------------------------------------- */

116 117 118 119 120 121
void *
getMBlock(void)
{
  return getMBlocks(1);
}

122 123 124 125 126 127 128 129 130 131 132 133 134 135 136
/* -----------------------------------------------------------------------------
   The mmap() method

   On Unix-like systems, we use mmap() to allocate our memory.  We
   want memory in chunks of MBLOCK_SIZE, and aligned on an MBLOCK_SIZE
   boundary.  The mmap() interface doesn't give us this level of
   control, so we have to use some heuristics.

   In the general case, if we want a block of n megablocks, then we
   allocate n+1 and trim off the slop from either side (using
   munmap()) to get an aligned chunk of size n.  However, the next
   time we'll try to allocate directly after the previously allocated
   chunk, on the grounds that this is aligned and likely to be free.
   If it turns out that we were wrong, we have to munmap() and try
   again using the general method.
137 138 139 140 141 142 143 144 145 146

   Note on posix_memalign(): this interface is available on recent
   systems and appears to provide exactly what we want.  However, it
   turns out not to be as good as our mmap() implementation, because
   it wastes extra space (using double the address space, in a test on
   x86_64/Linux).  The problem seems to be that posix_memalign()
   returns memory that can be free()'d, so the library must store
   extra information along with the allocated block, thus messing up
   the alignment.  Hence, we don't use posix_memalign() for now.

147 148
   -------------------------------------------------------------------------- */

149
#if !defined(mingw32_HOST_OS) && !defined(cygwin32_HOST_OS)
150 151 152 153 154

// A wrapper around mmap(), to abstract away from OS differences in
// the mmap() interface.

static void *
155
my_mmap (void *addr, lnat size)
156 157 158
{
    void *ret;

159
#if defined(solaris2_HOST_OS) || defined(irix_HOST_OS)
160 161
    { 
	int fd = open("/dev/zero",O_RDONLY);
162
	ret = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
163 164
	close(fd);
    }
165
#elif hpux_HOST_OS
166 167
    ret = mmap(addr, size, PROT_READ | PROT_WRITE, 
	       MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
168
#elif darwin_HOST_OS
169 170 171 172 173 174 175 176 177 178 179 180 181 182 183
    // Without MAP_FIXED, Apple's mmap ignores addr.
    // With MAP_FIXED, it overwrites already mapped regions, whic
    // mmap(0, ... MAP_FIXED ...) is worst of all: It unmaps the program text
    // and replaces it with zeroes, causing instant death.
    // This behaviour seems to be conformant with IEEE Std 1003.1-2001.
    // Let's just use the underlying Mach Microkernel calls directly,
    // they're much nicer.
    
    kern_return_t err;
    ret = addr;
    if(addr)	// try to allocate at adress
	err = vm_allocate(mach_task_self(),(vm_address_t*) &ret, size, FALSE);
    if(!addr || err)	// try to allocate anywhere
	err = vm_allocate(mach_task_self(),(vm_address_t*) &ret, size, TRUE);
	
184 185 186 187 188 189
    if(err) {
	// don't know what the error codes mean exactly, assume it's
	// not our problem though.
	errorBelch("memory allocation failed (requested %lu bytes)", size);
	stg_exit(EXIT_FAILURE);
    } else {
190
	vm_protect(mach_task_self(),ret,size,FALSE,VM_PROT_READ|VM_PROT_WRITE);
191
    }
192
#else
193
    ret = mmap(addr, size, PROT_READ | PROT_WRITE | PROT_EXEC, 
194 195 196
	       MAP_ANON | MAP_PRIVATE, -1, 0);
#endif

197
    if (ret == (void *)-1) {
198 199 200 201
	if (errno == ENOMEM || 
	    (errno == EINVAL && sizeof(void*)==4 && size >= 0xc0000000)) {
	    // If we request more than 3Gig, then we get EINVAL
	    // instead of ENOMEM (at least on Linux).
202
	    errorBelch("out of memory (requested %lu bytes)", size);
203
	    stg_exit(EXIT_FAILURE);
204
	} else {
205
	    barf("getMBlock: mmap: %s", strerror(errno));
206 207 208
	}
    }

209
    return ret;
210
}
211 212 213 214 215

// Implements the general case: allocate a chunk of memory of 'size'
// mblocks.

static void *
216
gen_map_mblocks (lnat size)
217 218 219 220 221 222 223 224 225 226 227
{
    int slop;
    void *ret;

    // Try to map a larger block, and take the aligned portion from
    // it (unmap the rest).
    size += MBLOCK_SIZE;
    ret = my_mmap(0, size);
    
    // unmap the slop bits around the chunk we allocated
    slop = (W_)ret & MBLOCK_MASK;
sof's avatar
sof committed
228
    
229
    if (munmap(ret, MBLOCK_SIZE - slop) == -1) {
sof's avatar
sof committed
230
      barf("gen_map_mblocks: munmap failed");
231 232
    }
    if (slop > 0 && munmap(ret+size-slop, slop) == -1) {
sof's avatar
sof committed
233
      barf("gen_map_mblocks: munmap failed");
234
    }
sof's avatar
sof committed
235

236
    // ToDo: if we happened to get an aligned block, then don't
sof's avatar
sof committed
237 238 239 240 241 242 243 244 245 246
    // unmap the excess, just use it. For this to work, you
    // need to keep in mind the following:
    //     * Calling my_mmap() with an 'addr' arg pointing to
    //       already my_mmap()ed space is OK and won't fail.
    //     * If my_mmap() can't satisfy the request at the
    //       given 'next_request' address in getMBlocks(), that
    //       you unmap the extra mblock mmap()ed here (or simply
    //       satisfy yourself that the slop introduced isn't worth
    //       salvaging.)
    // 
247

248 249 250 251 252 253 254 255 256
    // next time, try after the block we just got.
    ret += MBLOCK_SIZE - slop;
    return ret;
}


// The external interface: allocate 'n' mblocks, and return the
// address.

257 258 259
void *
getMBlocks(nat n)
{
260
  static caddr_t next_request = (caddr_t)HEAP_BASE;
261 262
  caddr_t ret;
  lnat size = MBLOCK_SIZE * n;
263
  nat i;
264
 
265 266 267 268 269
  if (next_request == 0) {
      // use gen_map_mblocks the first time.
      ret = gen_map_mblocks(size);
  } else {
      ret = my_mmap(next_request, size);
270

271 272
      if (((W_)ret & MBLOCK_MASK) != 0) {
	  // misaligned block!
273
#if 0 // defined(DEBUG)
274
	  errorBelch("warning: getMBlock: misaligned block %p returned when allocating %d megablock(s) at %p", ret, n, next_request);
275
#endif
276

277 278 279 280 281 282 283
	  // unmap this block...
	  if (munmap(ret, size) == -1) {
	      barf("getMBlock: munmap failed");
	  }
	  // and do it the hard way
	  ret = gen_map_mblocks(size);
      }
284 285
  }

286
  // Next time, we'll try to allocate right after the block we just got.
287
  // ToDo: check that we haven't already grabbed the memory at next_request
288 289
  next_request = ret + size;

290
  IF_DEBUG(gc,debugBelch("Allocated %d megablock(s) at %p\n",n,ret));
291

292 293
  // fill in the table
  for (i = 0; i < n; i++) {
294
      markHeapAlloced( ret + i * MBLOCK_SIZE );
295
  }
296

297
  mblocks_allocated += n;
298

299 300
  return ret;
}
sof's avatar
sof committed
301

302
#else /* defined(mingw32_HOST_OS) || defined(cygwin32_HOST_OS) */
sof's avatar
sof committed
303 304 305 306 307

/*
 On Win32 platforms we make use of the two-phased virtual memory API
 to allocate mega blocks. We proceed as follows:

sof's avatar
sof committed
308 309 310 311 312
 Reserve a large chunk of VM (256M at the time, or what the user asked
 for via the -M option), but don't supply a base address that's aligned on
 a MB boundary. Instead we round up to the nearest mblock from the chunk of
 VM we're handed back from the OS (at the moment we just leave the 'slop' at
 the beginning of the reserved chunk unused - ToDo: reuse it .)
sof's avatar
sof committed
313 314

 Reserving memory doesn't allocate physical storage (not even in the
sof's avatar
sof committed
315
 page file), this is done later on by committing pages (or mega-blocks in
sof's avatar
sof committed
316 317 318
 our case).
*/

sof's avatar
sof committed
319
char* base_non_committed = (char*)0;
sof's avatar
sof committed
320
char* end_non_committed = (char*)0;
sof's avatar
sof committed
321

sof's avatar
sof committed
322
/* Default is to reserve 256M of VM to minimise the slop cost. */
323
#define SIZE_RESERVED_POOL  ( 256 * 1024 * 1024 )
sof's avatar
sof committed
324

sof's avatar
sof committed
325 326 327
/* Number of bytes reserved */
static unsigned long size_reserved_pool = SIZE_RESERVED_POOL;

sof's avatar
sof committed
328 329 330 331 332 333
void *
getMBlocks(nat n)
{
  static char* base_mblocks       = (char*)0;
  static char* next_request       = (char*)0;
  void* ret                       = (void*)0;
sof's avatar
sof committed
334
  nat i;
sof's avatar
sof committed
335 336

  lnat size = MBLOCK_SIZE * n;
sof's avatar
sof committed
337 338 339
  
  if ( (base_non_committed == 0) || (next_request + size > end_non_committed) ) {
    if (base_non_committed) {
sof's avatar
sof committed
340 341 342 343 344 345 346
	/* Tacky, but if no user-provided -M option is in effect,
	 * set it to the default (==256M) in time for the heap overflow PSA.
	 */
	if (RtsFlags.GcFlags.maxHeapSize == 0) {
	    RtsFlags.GcFlags.maxHeapSize = size_reserved_pool / BLOCK_SIZE;
	}
	heapOverflow();
sof's avatar
sof committed
347 348 349 350 351 352 353
    }
    if (RtsFlags.GcFlags.maxHeapSize != 0) {
      size_reserved_pool = BLOCK_SIZE * RtsFlags.GcFlags.maxHeapSize;
      if (size_reserved_pool < MBLOCK_SIZE) {
	size_reserved_pool = 2*MBLOCK_SIZE;
      }
    }
sof's avatar
sof committed
354
    base_non_committed = VirtualAlloc ( NULL
sof's avatar
sof committed
355
                                      , size_reserved_pool
sof's avatar
sof committed
356 357 358 359
				      , MEM_RESERVE
				      , PAGE_READWRITE
				      );
    if ( base_non_committed == 0 ) {
360
         errorBelch("getMBlocks: VirtualAlloc failed with: %ld\n", GetLastError());
sof's avatar
sof committed
361 362
         ret=(void*)-1;
    } else {
sof's avatar
sof committed
363 364
      end_non_committed = (char*)base_non_committed + (unsigned long)size_reserved_pool;
      /* The returned pointer is not aligned on a mega-block boundary. Make it. */
365
      base_mblocks = (char*)((unsigned long)base_non_committed & (unsigned long)~MBLOCK_MASK) + MBLOCK_SIZE;
366
#      if 0
367 368
       debugBelch("getMBlocks: Dropping %d bytes off of 256M chunk\n", 
		  (unsigned)base_mblocks - (unsigned)base_non_committed);
369
#      endif
sof's avatar
sof committed
370

sof's avatar
sof committed
371
       if ( ((char*)base_mblocks + size) > end_non_committed ) {
372
          debugBelch("getMBlocks: oops, committed too small a region to start with.");
sof's avatar
sof committed
373 374 375 376 377 378 379 380 381 382
	  ret=(void*)-1;
       } else {
          next_request = base_mblocks;
       }
    }
  }
  /* Commit the mega block(s) to phys mem */
  if ( ret != (void*)-1 ) {
     ret = VirtualAlloc(next_request, size, MEM_COMMIT, PAGE_READWRITE);
     if (ret == NULL) {
383
        debugBelch("getMBlocks: VirtualAlloc failed with: %ld\n", GetLastError());
sof's avatar
sof committed
384 385 386 387 388
        ret=(void*)-1;
     }
  }

  if (((W_)ret & MBLOCK_MASK) != 0) {
389
    barf("getMBlocks: misaligned block returned");
sof's avatar
sof committed
390 391
  }

392 393 394
  if (ret == (void*)-1) {
     barf("getMBlocks: unknown memory allocation failure on Win32.");
  }
sof's avatar
sof committed
395

396
  IF_DEBUG(gc,debugBelch("Allocated %d megablock(s) at 0x%x\n",n,(nat)ret));
sof's avatar
sof committed
397 398 399 400
  next_request = (char*)next_request + size;

  mblocks_allocated += n;
  
401 402
  // fill in the table
  for (i = 0; i < n; i++) {
403
      markHeapAlloced( ret + i * MBLOCK_SIZE );
404 405
  }

sof's avatar
sof committed
406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423
  return ret;
}

/* Hand back the physical memory that is allocated to a mega-block. 
   ToDo: chain the released mega block onto some list so that
         getMBlocks() can get at it.

   Currently unused.
*/
#if 0
void
freeMBlock(void* p, nat n)
{
  BOOL rc;

  rc = VirtualFree(p, n * MBLOCK_SIZE , MEM_DECOMMIT );
  
  if (rc == FALSE) {
424
#    ifdef DEBUG
425
     debugBelch("freeMBlocks: VirtualFree failed with: %d\n", GetLastError());
426
#    endif
sof's avatar
sof committed
427 428 429 430 431 432
  }

}
#endif

#endif