SMP.h 8.32 KB
Newer Older
1
2
/* ----------------------------------------------------------------------------
 *
3
 * (c) The GHC Team, 2005
4
 *
5
 * Macros for THREADED_RTS support
6
7
8
9
10
11
 *
 * -------------------------------------------------------------------------- */

#ifndef SMP_H
#define SMP_H

12
/* THREADED_RTS is currently not compatible with the following options:
13
 *
14
 *      PROFILING (but only 1 CPU supported)
15
 *      TICKY_TICKY
16
 *      Unregisterised builds are ok, but only 1 CPU supported.
17
18
 */

19
20
21
22
23
24
25
26
#ifdef CMINUSMINUS

#define unlockClosure(ptr,info)                 \
    prim %write_barrier() [];                   \
    StgHeader_info(ptr) = info;    

#else

27
#if defined(THREADED_RTS)
28

29
30
#if  defined(TICKY_TICKY)
#error Build options incompatible with THREADED_RTS.
31
32
#endif

Simon Marlow's avatar
Simon Marlow committed
33
34
35
36
37
38
39
40
41
42
43
44
45
/* ----------------------------------------------------------------------------
   Atomic operations
   ------------------------------------------------------------------------- */
   
/* 
 * The atomic exchange operation: xchg(p,w) exchanges the value
 * pointed to by p with the value w, returning the old value.
 *
 * Used for locking closures during updates (see lockClosure() below)
 * and the MVar primops.
 */
INLINE_HEADER StgWord xchg(StgPtr p, StgWord w);

46
/* 
Simon Marlow's avatar
Simon Marlow committed
47
 * Compare-and-swap.  Atomically does this:
48
 *
Simon Marlow's avatar
Simon Marlow committed
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
 * cas(p,o,n) { 
 *    r = *p; 
 *    if (r == o) { *p = n }; 
 *    return r;
 * }
 */
INLINE_HEADER StgWord cas(StgVolatilePtr p, StgWord o, StgWord n);

/*
 * Prevents write operations from moving across this call in either
 * direction.
 */ 
INLINE_HEADER void write_barrier(void);

/* ----------------------------------------------------------------------------
   Implementations
   ------------------------------------------------------------------------- */
/* 
67
68
 * NB: the xchg instruction is implicitly locked, so we do not need
 * a lock prefix here. 
69
 */
70
71
72
73
INLINE_HEADER StgWord
xchg(StgPtr p, StgWord w)
{
    StgWord result;
74
#if i386_HOST_ARCH || x86_64_HOST_ARCH
75
76
    result = w;
    __asm__ __volatile__ (
77
 	  "xchg %1,%0"
78
79
80
          :"+r" (result), "+m" (*p)
          : /* no input-only operands */
	);
81
82
83
84
85
#elif powerpc_HOST_ARCH
    __asm__ __volatile__ (
        "1:     lwarx     %0, 0, %2\n"
        "       stwcx.    %1, 0, %2\n"
        "       bne-      1b"
86
        :"=&r" (result)
87
88
        :"r" (w), "r" (p)
    );
89
90
91
92
93
94
95
#elif sparc_HOST_ARCH
    result = w;
    __asm__ __volatile__ (
        "swap %1,%0"
	: "+r" (result), "+m" (*p)
	: /* no input-only operands */
      );
96
97
98
#elif !defined(WITHSMP)
    result = *p;
    *p = w;
99
100
101
#else
#error xchg() unimplemented on this architecture
#endif
102
103
104
    return result;
}

105
106
107
108
109
110
111
/* 
 * CMPXCHG - the single-word atomic compare-and-exchange instruction.  Used 
 * in the STM implementation.
 */
INLINE_HEADER StgWord
cas(StgVolatilePtr p, StgWord o, StgWord n)
{
112
#if i386_HOST_ARCH || x86_64_HOST_ARCH
113
    __asm__ __volatile__ (
114
 	  "lock\ncmpxchg %3,%1"
115
116
117
          :"=a"(o), "=m" (*(volatile unsigned int *)p) 
          :"0" (o), "r" (n));
    return o;
118
119
120
121
122
123
124
125
126
#elif powerpc_HOST_ARCH
    StgWord result;
    __asm__ __volatile__ (
        "1:     lwarx     %0, 0, %3\n"
        "       cmpw      %0, %1\n"
        "       bne       2f\n"
        "       stwcx.    %2, 0, %3\n"
        "       bne-      1b\n"
        "2:"
127
        :"=&r" (result)
128
        :"r" (o), "r" (n), "r" (p)
129
        :"cc", "memory"
130
131
    );
    return result;
132
133
134
135
136
137
138
139
#elif sparc_HOST_ARCH
    __asm__ __volatile__ (
	"cas [%1], %2, %0"
	: "+r" (n)
	: "r" (p), "r" (o)
	: "memory"
    );
    return n;
140
141
142
143
144
145
146
#elif !defined(WITHSMP)
    StgWord result;
    result = *p;
    if (result == o) {
        *p = n;
    }
    return result;
147
148
149
#else
#error cas() unimplemented on this architecture
#endif
150
151
}

152
153
154
155
156
157
158
159
160
161
162
/*
 * Write barrier - ensure that all preceding writes have happened
 * before all following writes.  
 *
 * We need to tell both the compiler AND the CPU about the barrier.
 * This is a brute force solution; better results might be obtained by
 * using volatile type declarations to get fine-grained ordering
 * control in C, and optionally a memory barrier instruction on CPUs
 * that require it (not x86 or x86_64).
 */
INLINE_HEADER void
163
write_barrier(void) {
164
165
#if i386_HOST_ARCH || x86_64_HOST_ARCH
    __asm__ __volatile__ ("" : : : "memory");
166
167
#elif powerpc_HOST_ARCH
    __asm__ __volatile__ ("lwsync" : : : "memory");
168
169
170
#elif sparc_HOST_ARCH
    /* Sparc in TSO mode does not require write/write barriers. */
    __asm__ __volatile__ ("" : : : "memory");
171
172
#elif !defined(WITHSMP)
    return;
173
174
175
176
177
#else
#error memory barriers unimplemented on this architecture
#endif
}

Simon Marlow's avatar
Simon Marlow committed
178
/* -----------------------------------------------------------------------------
179
180
181
 * Locking/unlocking closures
 *
 * This is used primarily in the implementation of MVars.
Simon Marlow's avatar
Simon Marlow committed
182
183
 * -------------------------------------------------------------------------- */

184
185
#define SPIN_COUNT 4000

186
187
188
189
190
191
192
193
194
#ifdef KEEP_LOCKCLOSURE
// We want a callable copy of lockClosure() so that we can refer to it
// from .cmm files compiled using the native codegen.
extern StgInfoTable *lockClosure(StgClosure *p);
INLINE_ME
#else
INLINE_HEADER
#endif
StgInfoTable *
195
196
197
198
lockClosure(StgClosure *p)
{
    StgWord info;
    do {
199
200
	nat i = 0;
	do {
Simon Marlow's avatar
Simon Marlow committed
201
	    info = xchg((P_)(void *)&p->header.info, (W_)&stg_WHITEHOLE_info);
202
203
	    if (info != (W_)&stg_WHITEHOLE_info) return (StgInfoTable *)info;
	} while (++i < SPIN_COUNT);
204
205
	yieldThread();
    } while (1);
206
207
208
209
210
}

INLINE_HEADER void
unlockClosure(StgClosure *p, StgInfoTable *info)
{
Simon Marlow's avatar
Simon Marlow committed
211
    // This is a strictly ordered write, so we need a write_barrier():
212
    write_barrier();
213
    p->header.info = info;
214
}
215

Simon Marlow's avatar
Simon Marlow committed
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
/* -----------------------------------------------------------------------------
 * Spin locks
 *
 * These are simple spin-only locks as opposed to Mutexes which
 * probably spin for a while before blocking in the kernel.  We use
 * these when we are sure that all our threads are actively running on
 * a CPU, eg. in the GC.
 *
 * TODO: measure whether we really need these, or whether Mutexes
 * would do (and be a bit safer if a CPU becomes loaded).
 * -------------------------------------------------------------------------- */

#if defined(DEBUG)
typedef struct StgSync_
{
    StgWord32 lock;
    StgWord64 spin; // DEBUG version counts how much it spins
} StgSync;
#else
typedef StgWord StgSync;
#endif

typedef lnat StgSyncCount;


#if defined(DEBUG)

// Debug versions of spin locks maintain a spin count

// How to use: 
//  To use the debug veriosn of the spin locks, a debug version of the program 
//  can be run under a deugger with a break point on stat_exit. At exit time 
//  of the program one can examine the state the spin count counts of various
//  spin locks to check for contention. 

// acquire spin lock
252
INLINE_HEADER void ACQUIRE_SPIN_LOCK(StgSync * p)
Simon Marlow's avatar
Simon Marlow committed
253
254
255
256
257
258
259
260
261
262
{
    StgWord32 r = 0;
    do {
        p->spin++;
        r = cas((StgVolatilePtr)&(p->lock), 1, 0);
    } while(r == 0);
    p->spin--;
}

// release spin lock
263
INLINE_HEADER void RELEASE_SPIN_LOCK(StgSync * p)
Simon Marlow's avatar
Simon Marlow committed
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
{
    write_barrier();
    p->lock = 1;
}

// initialise spin lock
INLINE_HEADER void initSpinLock(StgSync * p)
{
    write_barrier();
    p->lock = 1;
    p->spin = 0;
}

#else

// acquire spin lock
280
INLINE_HEADER void ACQUIRE_SPIN_LOCK(StgSync * p)
Simon Marlow's avatar
Simon Marlow committed
281
282
283
284
285
286
287
288
{
    StgWord32 r = 0;
    do {
        r = cas((StgVolatilePtr)p, 1, 0);
    } while(r == 0);
}

// release spin lock
289
INLINE_HEADER void RELEASE_SPIN_LOCK(StgSync * p)
Simon Marlow's avatar
Simon Marlow committed
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
{
    write_barrier();
    (*p) = 1;
}

// init spin lock
INLINE_HEADER void initSpinLock(StgSync * p)
{
    write_barrier();
    (*p) = 1;
}

#endif /* DEBUG */

/* ---------------------------------------------------------------------- */
305
#else /* !THREADED_RTS */
306

307
#define write_barrier() /* nothing */
308
309
310
311
312
313
314
315
316

INLINE_HEADER StgWord
xchg(StgPtr p, StgWord w)
{
    StgWord old = *p;
    *p = w;
    return old;
}

317
318
319
320
321
322
323
324
INLINE_HEADER StgInfoTable *
lockClosure(StgClosure *p)
{ return (StgInfoTable *)p->header.info; }

INLINE_HEADER void
unlockClosure(StgClosure *p STG_UNUSED, StgInfoTable *info STG_UNUSED)
{ /* nothing */ }

325
326
327
// Using macros here means we don't have to ensure the argument is in scope
#define ACQUIRE_SPIN_LOCK(p) /* nothing */
#define RELEASE_SPIN_LOCK(p) /* nothing */
Simon Marlow's avatar
Simon Marlow committed
328
329
330
331

INLINE_HEADER void initSpinLock(void * p STG_UNUSED)
{ /* nothing */ }

332
#endif /* !THREADED_RTS */
333

334
335
336
337
338
339
340
// Handy specialised versions of lockClosure()/unlockClosure()
INLINE_HEADER void lockTSO(StgTSO *tso)
{ lockClosure((StgClosure *)tso); }

INLINE_HEADER void unlockTSO(StgTSO *tso)
{ unlockClosure((StgClosure*)tso, (StgInfoTable*)&stg_TSO_info); }

341
#endif /* SMP_H */
342
343

#endif /* CMINUSMINUS */