GCTDecl.h 5.14 KB
Newer Older
Simon Marlow's avatar
Simon Marlow committed
1 2
/* -----------------------------------------------------------------------------
 *
3
 * (c) The GHC Team 1998-2014
Simon Marlow's avatar
Simon Marlow committed
4 5 6
 *
 * Documentation on the architecture of the Garbage Collector can be
 * found in the online commentary:
7
 *
8
 *   http://ghc.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
Simon Marlow's avatar
Simon Marlow committed
9 10 11 12 13 14 15 16
 *
 * ---------------------------------------------------------------------------*/

#ifndef SM_GCTDECL_H
#define SM_GCTDECL_H

#include "BeginPrivate.h"

17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
/* The gct variable is thread-local and points to the current thread's
   gc_thread structure. It is heavily accessed, and thus high
   performance access is crucial to parallel (-threaded) workloads.

   First, we try to use a 'global register variable' which is a GCC
   extension. This reserves the register globally.

   If that's not possible, then we need to use __thread, which is a
   compiler/OS specific TLS storage mechanism (assumed to be Fast
   Enough.)

   BUT, some older versions of OS X compilers (llvm-gcc, older Clangs)
   do not support __thread at all. Modern clang however, does - but on
   OS X it's not as fast as the Linux (which can write directly into a
   segment register - see #7602.)

   If we don't support __thread then we do the absolute worst thing:
   we just use pthread_getspecific and pthread_setspecific (which are
   horribly slow.)
*/
Simon Marlow's avatar
Simon Marlow committed
37

38
#define GCT_REG_DECL(type,name,reg) register type name REG(reg);
Simon Marlow's avatar
Simon Marlow committed
39 40


41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58
/* -------------------------------------------------------------------------- */

/* First: if we're not using the threaded RTS, it's easy: just fake it. */
#if !defined(THREADED_RTS)
extern StgWord8 the_gc_thread[];
#define gct ((gc_thread*)&the_gc_thread)
#define SET_GCT(to) /*nothing*/
#define DECLARE_GCT /*nothing*/

#else /* defined(THREADED_RTS) */

/* -------------------------------------------------------------------------- */

/* Now, llvm-gcc and some older Clang compilers do not support
   __thread. So we have to fallback to the extremely slow case,
   unfortunately. Note: clang_CC_FLAVOR implies llvm_CC_FLAVOR */
#if defined(llvm_CC_FLAVOR) && (CC_SUPPORTS_TLS == 0)
#define gct ((gc_thread *)(pthread_getspecific(gctKey)))
59
#define SET_GCT(to) (pthread_setspecific(gctKey, to))
60
#define DECLARE_GCT ThreadLocalKey gctKey;
Simon Marlow's avatar
Simon Marlow committed
61

62
/* -------------------------------------------------------------------------- */
Simon Marlow's avatar
Simon Marlow committed
63

64 65 66 67 68 69 70
/* However, if we *are* using an LLVM based compiler with __thread
   support, then use that (since LLVM doesn't support global register
   variables.) */
#elif defined(llvm_CC_FLAVOR) && (CC_SUPPORTS_TLS == 1)
extern __thread gc_thread* gct;
#define SET_GCT(to) gct = (to)
#define DECLARE_GCT __thread gc_thread* gct;
Simon Marlow's avatar
Simon Marlow committed
71

72
/* -------------------------------------------------------------------------- */
Simon Marlow's avatar
Simon Marlow committed
73

74 75 76 77 78
/* Next up: Using __thread is better than stealing a register on
   x86/Linux, because we have too few registers available. In my
   tests it was worth about 5% in GC performance, but of course that
   might change as gcc improves. -- SDM 2009/04/03 */
#elif (defined(i386_HOST_ARCH) && defined(linux_HOST_OS))
Simon Marlow's avatar
Simon Marlow committed
79
extern __thread gc_thread* gct;
80
#define SET_GCT(to) gct = (to)
Simon Marlow's avatar
Simon Marlow committed
81 82
#define DECLARE_GCT __thread gc_thread* gct;

83
/* -------------------------------------------------------------------------- */
Simon Marlow's avatar
Simon Marlow committed
84

85 86 87
/* Next up: On SPARC we can't pin gct to a register. Names like %l1
   are just offsets into the register window, which change on each
   function call.
Simon Marlow's avatar
Simon Marlow committed
88

89 90 91 92 93 94 95 96 97 98
   There are eight global (non-window) registers, but they're used for other
   purposes:

    %g0     -- always zero
    %g1     -- volatile over function calls, used by the linker
    %g2-%g3 -- used as scratch regs by the C compiler (caller saves)
    %g4     -- volatile over function calls, used by the linker
    %g5-%g7 -- reserved by the OS
*/
#elif defined(sparc_HOST_ARCH)
Simon Marlow's avatar
Simon Marlow committed
99
extern __thread gc_thread* gct;
100
#define SET_GCT(to) gct = (to)
Simon Marlow's avatar
Simon Marlow committed
101 102
#define DECLARE_GCT __thread gc_thread* gct;

103
/* -------------------------------------------------------------------------- */
Simon Marlow's avatar
Simon Marlow committed
104

105 106 107 108
/* Next up: generally, if REG_Base is defined and we're *not* using
   i386, then actually declare the needed register. The catch for i386
   here is that REG_Base is %ebx, but that is also used for -fPIC, so
   it can't be stolen */
Simon Marlow's avatar
Simon Marlow committed
109
#elif defined(REG_Base) && !defined(i386_HOST_ARCH)
110 111
GCT_REG_DECL(gc_thread*, gct, REG_Base);
#define SET_GCT(to) gct = (to)
Simon Marlow's avatar
Simon Marlow committed
112 113
#define DECLARE_GCT /* nothing */

114
/* -------------------------------------------------------------------------- */
Simon Marlow's avatar
Simon Marlow committed
115

116 117
/* Next up: if REG_R1 is available after checking REG_Base, we're
   gonna steal it in every case we can. */
Simon Marlow's avatar
Simon Marlow committed
118
#elif defined(REG_R1)
119 120
GCT_REG_DECL(gc_thread*, gct, REG_R1);
#define SET_GCT(to) gct = (to)
Simon Marlow's avatar
Simon Marlow committed
121 122
#define DECLARE_GCT /* nothing */

123
/* -------------------------------------------------------------------------- */
Simon Marlow's avatar
Simon Marlow committed
124

125 126 127
/* Finally, as an absolute fallback, if none of the above tests check
   out but we *do* have __thread support, then use that. */
#elif CC_SUPPORTS_TLS == 1
Simon Marlow's avatar
Simon Marlow committed
128
extern __thread gc_thread* gct;
129
#define SET_GCT(to) gct = (to)
Simon Marlow's avatar
Simon Marlow committed
130 131
#define DECLARE_GCT __thread gc_thread* gct;

132
/* -------------------------------------------------------------------------- */
Simon Marlow's avatar
Simon Marlow committed
133

134 135 136
/* Impossible! */
#else
#error Cannot find a way to declare the thread-local gc variable!
Simon Marlow's avatar
Simon Marlow committed
137 138 139 140 141 142 143
#endif

#endif // THREADED_RTS

#include "EndPrivate.h"

#endif // SM_GCTDECL_H