MBlock.c 11.8 KB
Newer Older
1
/* -----------------------------------------------------------------------------
2
3
 *
 * (c) The GHC Team 1998-1999
4
5
6
7
8
9
10
 *
 * MegaBlock Allocator Interface.  This file contains all the dirty
 * architecture-dependent hackery required to get a chunk of aligned
 * memory from the operating system.
 *
 * ---------------------------------------------------------------------------*/

11
12
/* This is non-posix compliant. */
/* #include "PosixSource.h" */
13
14
15
16
17
18
19

#include "Rts.h"
#include "RtsUtils.h"
#include "RtsFlags.h"
#include "MBlock.h"
#include "BlockAlloc.h"

20
21
22
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
23
24
25
#ifdef HAVE_STRING_H
#include <string.h>
#endif
26
27
28
29
30
31
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
32
#ifndef mingw32_HOST_OS
sof's avatar
sof committed
33
34
35
# ifdef HAVE_SYS_MMAN_H
# include <sys/mman.h>
# endif
36
37
38
39
#endif
#ifdef HAVE_FCNTL_H
#include <fcntl.h>
#endif
sof's avatar
sof committed
40
#if HAVE_WINDOWS_H
41
42
#include <windows.h>
#endif
43
#if darwin_HOST_OS
44
45
#include <mach/vm_map.h>
#endif
46

47
48
#include <errno.h>

49
50
lnat mblocks_allocated = 0;

51
52
53
54
/* -----------------------------------------------------------------------------
   The MBlock Map: provides our implementation of HEAP_ALLOCED()
   -------------------------------------------------------------------------- */

55
#if SIZEOF_VOID_P == 4
56
StgWord8 mblock_map[MBLOCK_MAP_SIZE]; // initially all zeros
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#elif SIZEOF_VOID_P == 8
static MBlockMap dummy_mblock_map;
MBlockMap *mblock_cache = &dummy_mblock_map;
int mblock_map_count = 0;
MBlockMap **mblock_maps = NULL;

static MBlockMap *
findMBlockMap(void *p)
{
    int i;
    StgWord32 hi = (StgWord32) (((StgWord)p) >> 32);
    for( i = 0; i < mblock_map_count; i++ )
    {
        if(mblock_maps[i]->addrHigh32 == hi)
        {
	    return mblock_maps[i];
	}
    }
    return NULL;
}

StgBool
slowIsHeapAlloced(void *p)
{
    MBlockMap *map = findMBlockMap(p);
    if(map)
    {
    	mblock_cache = map;
	return map->mblocks[MBLOCK_MAP_ENTRY(p)];
    }
    else
    	return 0;
}
90
#endif
91

92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
static void
markHeapAlloced(void *p)
{
#if SIZEOF_VOID_P == 4
    mblock_map[MBLOCK_MAP_ENTRY(p)] = 1;
#elif SIZEOF_VOID_P == 8
    MBlockMap *map = findMBlockMap(p);
    if(map == NULL)
    {
    	mblock_map_count++;
    	mblock_maps = realloc(mblock_maps,
			      sizeof(MBlockMap*) * mblock_map_count);
	map = mblock_maps[mblock_map_count-1] = calloc(1,sizeof(MBlockMap));
	map->addrHigh32 = (StgWord32) (((StgWord)p) >> 32);
    }
    map->mblocks[MBLOCK_MAP_ENTRY(p)] = 1;
    mblock_cache = map;
#endif
}

112
113
114
115
/* -----------------------------------------------------------------------------
   Allocate new mblock(s)
   -------------------------------------------------------------------------- */

116
117
118
119
120
121
void *
getMBlock(void)
{
  return getMBlocks(1);
}

122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
/* -----------------------------------------------------------------------------
   The mmap() method

   On Unix-like systems, we use mmap() to allocate our memory.  We
   want memory in chunks of MBLOCK_SIZE, and aligned on an MBLOCK_SIZE
   boundary.  The mmap() interface doesn't give us this level of
   control, so we have to use some heuristics.

   In the general case, if we want a block of n megablocks, then we
   allocate n+1 and trim off the slop from either side (using
   munmap()) to get an aligned chunk of size n.  However, the next
   time we'll try to allocate directly after the previously allocated
   chunk, on the grounds that this is aligned and likely to be free.
   If it turns out that we were wrong, we have to munmap() and try
   again using the general method.
   -------------------------------------------------------------------------- */

139
#if !defined(mingw32_HOST_OS) && !defined(cygwin32_HOST_OS)
140
141
142
143
144

// A wrapper around mmap(), to abstract away from OS differences in
// the mmap() interface.

static void *
145
my_mmap (void *addr, lnat size)
146
147
148
{
    void *ret;

149
#if defined(solaris2_HOST_OS) || defined(irix_HOST_OS)
150
151
    { 
	int fd = open("/dev/zero",O_RDONLY);
152
	ret = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
153
154
	close(fd);
    }
155
#elif hpux_HOST_OS
156
157
    ret = mmap(addr, size, PROT_READ | PROT_WRITE, 
	       MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
158
#elif darwin_HOST_OS
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
    // Without MAP_FIXED, Apple's mmap ignores addr.
    // With MAP_FIXED, it overwrites already mapped regions, whic
    // mmap(0, ... MAP_FIXED ...) is worst of all: It unmaps the program text
    // and replaces it with zeroes, causing instant death.
    // This behaviour seems to be conformant with IEEE Std 1003.1-2001.
    // Let's just use the underlying Mach Microkernel calls directly,
    // they're much nicer.
    
    kern_return_t err;
    ret = addr;
    if(addr)	// try to allocate at adress
	err = vm_allocate(mach_task_self(),(vm_address_t*) &ret, size, FALSE);
    if(!addr || err)	// try to allocate anywhere
	err = vm_allocate(mach_task_self(),(vm_address_t*) &ret, size, TRUE);
	
174
    if(err) // don't know what the error codes mean exactly
175
	barf("memory allocation failed (requested %lu bytes)", size);
176
177
    else
	vm_protect(mach_task_self(),ret,size,FALSE,VM_PROT_READ|VM_PROT_WRITE);
178
#else
179
    ret = mmap(addr, size, PROT_READ | PROT_WRITE | PROT_EXEC, 
180
181
182
	       MAP_ANON | MAP_PRIVATE, -1, 0);
#endif

183
    if (ret == (void *)-1) {
184
185
186
187
	if (errno == ENOMEM || 
	    (errno == EINVAL && sizeof(void*)==4 && size >= 0xc0000000)) {
	    // If we request more than 3Gig, then we get EINVAL
	    // instead of ENOMEM (at least on Linux).
188
	    errorBelch("out of memory (requested %lu bytes)", size);
189
	    stg_exit(EXIT_FAILURE);
190
	} else {
191
	    barf("getMBlock: mmap: %s", strerror(errno));
192
193
194
	}
    }

195
    return ret;
196
}
197
198
199
200
201

// Implements the general case: allocate a chunk of memory of 'size'
// mblocks.

static void *
202
gen_map_mblocks (lnat size)
203
204
205
206
207
208
209
210
211
212
213
{
    int slop;
    void *ret;

    // Try to map a larger block, and take the aligned portion from
    // it (unmap the rest).
    size += MBLOCK_SIZE;
    ret = my_mmap(0, size);
    
    // unmap the slop bits around the chunk we allocated
    slop = (W_)ret & MBLOCK_MASK;
sof's avatar
sof committed
214
    
215
    if (munmap(ret, MBLOCK_SIZE - slop) == -1) {
sof's avatar
sof committed
216
      barf("gen_map_mblocks: munmap failed");
217
218
    }
    if (slop > 0 && munmap(ret+size-slop, slop) == -1) {
sof's avatar
sof committed
219
      barf("gen_map_mblocks: munmap failed");
220
    }
sof's avatar
sof committed
221

222
    // ToDo: if we happened to get an aligned block, then don't
sof's avatar
sof committed
223
224
225
226
227
228
229
230
231
232
    // unmap the excess, just use it. For this to work, you
    // need to keep in mind the following:
    //     * Calling my_mmap() with an 'addr' arg pointing to
    //       already my_mmap()ed space is OK and won't fail.
    //     * If my_mmap() can't satisfy the request at the
    //       given 'next_request' address in getMBlocks(), that
    //       you unmap the extra mblock mmap()ed here (or simply
    //       satisfy yourself that the slop introduced isn't worth
    //       salvaging.)
    // 
233

234
235
236
237
238
239
240
241
242
    // next time, try after the block we just got.
    ret += MBLOCK_SIZE - slop;
    return ret;
}


// The external interface: allocate 'n' mblocks, and return the
// address.

243
244
245
void *
getMBlocks(nat n)
{
246
  static caddr_t next_request = (caddr_t)HEAP_BASE;
247
248
  caddr_t ret;
  lnat size = MBLOCK_SIZE * n;
249
  nat i;
250
 
251
252
253
254
255
  if (next_request == 0) {
      // use gen_map_mblocks the first time.
      ret = gen_map_mblocks(size);
  } else {
      ret = my_mmap(next_request, size);
256

257
258
      if (((W_)ret & MBLOCK_MASK) != 0) {
	  // misaligned block!
259
#if 0 // defined(DEBUG)
260
	  errorBelch("warning: getMBlock: misaligned block %p returned when allocating %d megablock(s) at %p", ret, n, next_request);
261
#endif
262

263
264
265
266
267
268
269
	  // unmap this block...
	  if (munmap(ret, size) == -1) {
	      barf("getMBlock: munmap failed");
	  }
	  // and do it the hard way
	  ret = gen_map_mblocks(size);
      }
270
271
  }

272
  // Next time, we'll try to allocate right after the block we just got.
273
  // ToDo: check that we haven't already grabbed the memory at next_request
274
275
  next_request = ret + size;

276
  IF_DEBUG(gc,debugBelch("Allocated %d megablock(s) at %p\n",n,ret));
277

278
279
  // fill in the table
  for (i = 0; i < n; i++) {
280
      markHeapAlloced( ret + i * MBLOCK_SIZE );
281
  }
282

283
  mblocks_allocated += n;
284

285
286
  return ret;
}
sof's avatar
sof committed
287

288
#else /* defined(mingw32_HOST_OS) || defined(cygwin32_HOST_OS) */
sof's avatar
sof committed
289
290
291
292
293

/*
 On Win32 platforms we make use of the two-phased virtual memory API
 to allocate mega blocks. We proceed as follows:

sof's avatar
sof committed
294
295
296
297
298
 Reserve a large chunk of VM (256M at the time, or what the user asked
 for via the -M option), but don't supply a base address that's aligned on
 a MB boundary. Instead we round up to the nearest mblock from the chunk of
 VM we're handed back from the OS (at the moment we just leave the 'slop' at
 the beginning of the reserved chunk unused - ToDo: reuse it .)
sof's avatar
sof committed
299
300

 Reserving memory doesn't allocate physical storage (not even in the
sof's avatar
sof committed
301
 page file), this is done later on by committing pages (or mega-blocks in
sof's avatar
sof committed
302
303
304
 our case).
*/

sof's avatar
sof committed
305
char* base_non_committed = (char*)0;
sof's avatar
sof committed
306
char* end_non_committed = (char*)0;
sof's avatar
sof committed
307

sof's avatar
sof committed
308
/* Default is to reserve 256M of VM to minimise the slop cost. */
309
#define SIZE_RESERVED_POOL  ( 256 * 1024 * 1024 )
sof's avatar
sof committed
310

sof's avatar
sof committed
311
312
313
/* Number of bytes reserved */
static unsigned long size_reserved_pool = SIZE_RESERVED_POOL;

sof's avatar
sof committed
314
315
316
317
318
319
void *
getMBlocks(nat n)
{
  static char* base_mblocks       = (char*)0;
  static char* next_request       = (char*)0;
  void* ret                       = (void*)0;
sof's avatar
sof committed
320
  nat i;
sof's avatar
sof committed
321
322

  lnat size = MBLOCK_SIZE * n;
sof's avatar
sof committed
323
324
325
  
  if ( (base_non_committed == 0) || (next_request + size > end_non_committed) ) {
    if (base_non_committed) {
sof's avatar
sof committed
326
327
328
329
330
331
332
	/* Tacky, but if no user-provided -M option is in effect,
	 * set it to the default (==256M) in time for the heap overflow PSA.
	 */
	if (RtsFlags.GcFlags.maxHeapSize == 0) {
	    RtsFlags.GcFlags.maxHeapSize = size_reserved_pool / BLOCK_SIZE;
	}
	heapOverflow();
sof's avatar
sof committed
333
334
335
336
337
338
339
    }
    if (RtsFlags.GcFlags.maxHeapSize != 0) {
      size_reserved_pool = BLOCK_SIZE * RtsFlags.GcFlags.maxHeapSize;
      if (size_reserved_pool < MBLOCK_SIZE) {
	size_reserved_pool = 2*MBLOCK_SIZE;
      }
    }
sof's avatar
sof committed
340
    base_non_committed = VirtualAlloc ( NULL
sof's avatar
sof committed
341
                                      , size_reserved_pool
sof's avatar
sof committed
342
343
344
345
				      , MEM_RESERVE
				      , PAGE_READWRITE
				      );
    if ( base_non_committed == 0 ) {
346
         errorBelch("getMBlocks: VirtualAlloc failed with: %ld\n", GetLastError());
sof's avatar
sof committed
347
348
         ret=(void*)-1;
    } else {
sof's avatar
sof committed
349
350
      end_non_committed = (char*)base_non_committed + (unsigned long)size_reserved_pool;
      /* The returned pointer is not aligned on a mega-block boundary. Make it. */
351
      base_mblocks = (char*)((unsigned long)base_non_committed & (unsigned long)~MBLOCK_MASK) + MBLOCK_SIZE;
352
#      if 0
353
354
       debugBelch("getMBlocks: Dropping %d bytes off of 256M chunk\n", 
		  (unsigned)base_mblocks - (unsigned)base_non_committed);
355
#      endif
sof's avatar
sof committed
356

sof's avatar
sof committed
357
       if ( ((char*)base_mblocks + size) > end_non_committed ) {
358
          debugBelch("getMBlocks: oops, committed too small a region to start with.");
sof's avatar
sof committed
359
360
361
362
363
364
365
366
367
368
	  ret=(void*)-1;
       } else {
          next_request = base_mblocks;
       }
    }
  }
  /* Commit the mega block(s) to phys mem */
  if ( ret != (void*)-1 ) {
     ret = VirtualAlloc(next_request, size, MEM_COMMIT, PAGE_READWRITE);
     if (ret == NULL) {
369
        debugBelch("getMBlocks: VirtualAlloc failed with: %ld\n", GetLastError());
sof's avatar
sof committed
370
371
372
373
374
        ret=(void*)-1;
     }
  }

  if (((W_)ret & MBLOCK_MASK) != 0) {
375
    barf("getMBlocks: misaligned block returned");
sof's avatar
sof committed
376
377
  }

378
379
380
  if (ret == (void*)-1) {
     barf("getMBlocks: unknown memory allocation failure on Win32.");
  }
sof's avatar
sof committed
381

382
  IF_DEBUG(gc,debugBelch("Allocated %d megablock(s) at 0x%x\n",n,(nat)ret));
sof's avatar
sof committed
383
384
385
386
  next_request = (char*)next_request + size;

  mblocks_allocated += n;
  
387
388
  // fill in the table
  for (i = 0; i < n; i++) {
389
      markHeapAlloced( ret + i * MBLOCK_SIZE );
390
391
  }

sof's avatar
sof committed
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
  return ret;
}

/* Hand back the physical memory that is allocated to a mega-block. 
   ToDo: chain the released mega block onto some list so that
         getMBlocks() can get at it.

   Currently unused.
*/
#if 0
void
freeMBlock(void* p, nat n)
{
  BOOL rc;

  rc = VirtualFree(p, n * MBLOCK_SIZE , MEM_DECOMMIT );
  
  if (rc == FALSE) {
410
#    ifdef DEBUG
411
     debugBelch("freeMBlocks: VirtualFree failed with: %d\n", GetLastError());
412
#    endif
sof's avatar
sof committed
413
414
415
416
417
418
  }

}
#endif

#endif