SMP.h 4.7 KB
Newer Older
1
2
/* ----------------------------------------------------------------------------
 *
3
 * (c) The GHC Team, 2005
4
 *
5
 * Macros for THREADED_RTS support
6
7
8
9
10
11
 *
 * -------------------------------------------------------------------------- */

#ifndef SMP_H
#define SMP_H

12
/* THREADED_RTS is currently not compatible with the following options:
13
 *
14
 *      PROFILING (but only 1 CPU supported)
15
 *      TICKY_TICKY
16
 *      Unregisterised builds are ok, but only 1 CPU supported.
17
18
 */

19
#if defined(THREADED_RTS)
20

21
22
#if  defined(TICKY_TICKY)
#error Build options incompatible with THREADED_RTS.
23
24
25
26
#endif

/* 
 * XCHG - the atomic exchange instruction.  Used for locking closures
27
 * during updates (see lockClosure() below) and the MVar primops.
28
29
30
 *
 * NB: the xchg instruction is implicitly locked, so we do not need
 * a lock prefix here. 
31
 */
32
33
34
35
INLINE_HEADER StgWord
xchg(StgPtr p, StgWord w)
{
    StgWord result;
36
#if i386_HOST_ARCH || x86_64_HOST_ARCH
37
38
    result = w;
    __asm__ __volatile__ (
39
 	  "xchg %1,%0"
40
41
42
          :"+r" (result), "+m" (*p)
          : /* no input-only operands */
	);
43
44
45
46
47
48
49
50
#elif powerpc_HOST_ARCH
    __asm__ __volatile__ (
        "1:     lwarx     %0, 0, %2\n"
        "       stwcx.    %1, 0, %2\n"
        "       bne-      1b"
        :"=r" (result)
        :"r" (w), "r" (p)
    );
51
52
53
54
55
56
57
#elif sparc_HOST_ARCH
    result = w;
    __asm__ __volatile__ (
        "swap %1,%0"
	: "+r" (result), "+m" (*p)
	: /* no input-only operands */
      );
58
59
60
#elif !defined(WITHSMP)
    result = *p;
    *p = w;
61
62
63
#else
#error xchg() unimplemented on this architecture
#endif
64
65
66
    return result;
}

67
68
69
70
71
72
73
/* 
 * CMPXCHG - the single-word atomic compare-and-exchange instruction.  Used 
 * in the STM implementation.
 */
INLINE_HEADER StgWord
cas(StgVolatilePtr p, StgWord o, StgWord n)
{
74
#if i386_HOST_ARCH || x86_64_HOST_ARCH
75
    __asm__ __volatile__ (
76
 	  "lock/cmpxchg %3,%1"
77
78
79
          :"=a"(o), "=m" (*(volatile unsigned int *)p) 
          :"0" (o), "r" (n));
    return o;
80
81
82
83
84
85
86
87
88
#elif powerpc_HOST_ARCH
    StgWord result;
    __asm__ __volatile__ (
        "1:     lwarx     %0, 0, %3\n"
        "       cmpw      %0, %1\n"
        "       bne       2f\n"
        "       stwcx.    %2, 0, %3\n"
        "       bne-      1b\n"
        "2:"
89
        :"=&r" (result)
90
        :"r" (o), "r" (n), "r" (p)
91
        :"cc", "memory"
92
93
    );
    return result;
94
95
96
97
98
99
100
101
#elif sparc_HOST_ARCH
    __asm__ __volatile__ (
	"cas [%1], %2, %0"
	: "+r" (n)
	: "r" (p), "r" (o)
	: "memory"
    );
    return n;
102
103
104
105
106
107
108
#elif !defined(WITHSMP)
    StgWord result;
    result = *p;
    if (result == o) {
        *p = n;
    }
    return result;
109
110
111
#else
#error cas() unimplemented on this architecture
#endif
112
113
}

114
115
116
117
118
119
120
121
122
123
124
/*
 * Write barrier - ensure that all preceding writes have happened
 * before all following writes.  
 *
 * We need to tell both the compiler AND the CPU about the barrier.
 * This is a brute force solution; better results might be obtained by
 * using volatile type declarations to get fine-grained ordering
 * control in C, and optionally a memory barrier instruction on CPUs
 * that require it (not x86 or x86_64).
 */
INLINE_HEADER void
125
write_barrier(void) {
126
127
#if i386_HOST_ARCH || x86_64_HOST_ARCH
    __asm__ __volatile__ ("" : : : "memory");
128
129
#elif powerpc_HOST_ARCH
    __asm__ __volatile__ ("lwsync" : : : "memory");
130
131
132
#elif sparc_HOST_ARCH
    /* Sparc in TSO mode does not require write/write barriers. */
    __asm__ __volatile__ ("" : : : "memory");
133
134
#elif !defined(WITHSMP)
    return;
135
136
137
138
139
140
141
142
143
144
145
146
#else
#error memory barriers unimplemented on this architecture
#endif
}

/*
 * Locking/unlocking closures
 *
 * This is used primarily in the implementation of MVars.
 */
#define SPIN_COUNT 4000

147
148
149
150
151
INLINE_HEADER StgInfoTable *
lockClosure(StgClosure *p)
{
    StgWord info;
    do {
152
153
	nat i = 0;
	do {
Simon Marlow's avatar
Simon Marlow committed
154
	    info = xchg((P_)(void *)&p->header.info, (W_)&stg_WHITEHOLE_info);
155
156
	    if (info != (W_)&stg_WHITEHOLE_info) return (StgInfoTable *)info;
	} while (++i < SPIN_COUNT);
157
158
	yieldThread();
    } while (1);
159
160
161
162
163
}

INLINE_HEADER void
unlockClosure(StgClosure *p, StgInfoTable *info)
{
164
    // This is a strictly ordered write, so we need a wb():
165
    write_barrier();
166
    p->header.info = info;
167
}
168

169
#else /* !THREADED_RTS */
170

171
#define write_barrier() /* nothing */
172
173
174
175
176
177
178
179
180

INLINE_HEADER StgWord
xchg(StgPtr p, StgWord w)
{
    StgWord old = *p;
    *p = w;
    return old;
}

181
182
183
184
185
186
187
188
INLINE_HEADER StgInfoTable *
lockClosure(StgClosure *p)
{ return (StgInfoTable *)p->header.info; }

INLINE_HEADER void
unlockClosure(StgClosure *p STG_UNUSED, StgInfoTable *info STG_UNUSED)
{ /* nothing */ }

189
#endif /* !THREADED_RTS */
190

191
192
193
194
195
196
197
// Handy specialised versions of lockClosure()/unlockClosure()
INLINE_HEADER void lockTSO(StgTSO *tso)
{ lockClosure((StgClosure *)tso); }

INLINE_HEADER void unlockTSO(StgTSO *tso)
{ unlockClosure((StgClosure*)tso, (StgInfoTable*)&stg_TSO_info); }

198
#endif /* SMP_H */