atomic.c 11.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13
#include "Rts.h"

// Fallbacks for atomic primops on byte arrays. The builtins used
// below are supported on both GCC and LLVM.
//
// Ideally these function would take StgWord8, StgWord16, etc but
// older GCC versions incorrectly assume that the register that the
// argument is passed in has been zero extended, which is incorrect
// according to the ABI and is not what GHC does when it generates
// calls to these functions.

// FetchAddByteArrayOp_Int

14
extern StgWord hs_atomic_add8(StgWord x, StgWord val);
15
StgWord
16
hs_atomic_add8(StgWord x, StgWord val)
17
{
18
  return __sync_fetch_and_add((volatile StgWord8 *) x, (StgWord8) val);
19 20
}

21
extern StgWord hs_atomic_add16(StgWord x, StgWord val);
22
StgWord
23
hs_atomic_add16(StgWord x, StgWord val)
24
{
25
  return __sync_fetch_and_add((volatile StgWord16 *) x, (StgWord16) val);
26 27
}

28
extern StgWord hs_atomic_add32(StgWord x, StgWord val);
29
StgWord
30
hs_atomic_add32(StgWord x, StgWord val)
31
{
32
  return __sync_fetch_and_add((volatile StgWord32 *) x, (StgWord32) val);
33 34
}

35
#if WORD_SIZE_IN_BITS == 64
36
extern StgWord64 hs_atomic_add64(StgWord x, StgWord64 val);
37
StgWord64
38
hs_atomic_add64(StgWord x, StgWord64 val)
39
{
40
  return __sync_fetch_and_add((volatile StgWord64 *) x, val);
41
}
42
#endif
43 44 45

// FetchSubByteArrayOp_Int

46
extern StgWord hs_atomic_sub8(StgWord x, StgWord val);
47
StgWord
48
hs_atomic_sub8(StgWord x, StgWord val)
49
{
50
  return __sync_fetch_and_sub((volatile StgWord8 *) x, (StgWord8) val);
51 52
}

53
extern StgWord hs_atomic_sub16(StgWord x, StgWord val);
54
StgWord
55
hs_atomic_sub16(StgWord x, StgWord val)
56
{
57
  return __sync_fetch_and_sub((volatile StgWord16 *) x, (StgWord16) val);
58 59
}

60
extern StgWord hs_atomic_sub32(StgWord x, StgWord val);
61
StgWord
62
hs_atomic_sub32(StgWord x, StgWord val)
63
{
64
  return __sync_fetch_and_sub((volatile StgWord32 *) x, (StgWord32) val);
65 66
}

67
#if WORD_SIZE_IN_BITS == 64
68
extern StgWord64 hs_atomic_sub64(StgWord x, StgWord64 val);
69
StgWord64
70
hs_atomic_sub64(StgWord x, StgWord64 val)
71
{
72
  return __sync_fetch_and_sub((volatile StgWord64 *) x, val);
73
}
74
#endif
75 76 77

// FetchAndByteArrayOp_Int

78
extern StgWord hs_atomic_and8(StgWord x, StgWord val);
79
StgWord
80
hs_atomic_and8(StgWord x, StgWord val)
81
{
82
  return __sync_fetch_and_and((volatile StgWord8 *) x, (StgWord8) val);
83 84
}

85
extern StgWord hs_atomic_and16(StgWord x, StgWord val);
86
StgWord
87
hs_atomic_and16(StgWord x, StgWord val)
88
{
89
  return __sync_fetch_and_and((volatile StgWord16 *) x, (StgWord16) val);
90 91
}

92
extern StgWord hs_atomic_and32(StgWord x, StgWord val);
93
StgWord
94
hs_atomic_and32(StgWord x, StgWord val)
95
{
96
  return __sync_fetch_and_and((volatile StgWord32 *) x, (StgWord32) val);
97 98
}

99
#if WORD_SIZE_IN_BITS == 64
100
extern StgWord64 hs_atomic_and64(StgWord x, StgWord64 val);
101
StgWord64
102
hs_atomic_and64(StgWord x, StgWord64 val)
103
{
104
  return __sync_fetch_and_and((volatile StgWord64 *) x, val);
105
}
106
#endif
107 108 109

// FetchNandByteArrayOp_Int

110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147
// Note [__sync_fetch_and_nand usage]
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
//
// The __sync_fetch_and_nand builtin is a bit of a disaster. It was introduced
// in GCC long ago with silly semantics. Specifically:
//
//    *ptr = ~(tmp & value)
//
// Clang introduced the builtin with the same semantics.
//
// In GCC 4.4 the operation's semantics were rightly changed to,
//
//    *ptr = ~tmp & value
//
// and the -Wsync-nand warning was added warning users of the operation about
// the change.
//
// Clang took this change as a reason to remove support for the
// builtin in 2010. Then, in 2014 Clang re-added support with the new
// semantics. However, the warning flag was given a different name
// (-Wsync-fetch-and-nand-semantics-changed) for added fun.
//
// Consequently, we are left with a bit of a mess: GHC requires GCC >4.4
// (enforced by the FP_GCC_VERSION autoconf check), so we thankfully don't need
// to support the operation's older broken semantics. However, we need to take
// care to explicitly disable -Wsync-nand wherever possible, lest the build
// fails with -Werror.  Furthermore, we need to emulate the operation when
// building with some Clang versions (shipped by some Mac OS X releases) which
// lack support for the builtin.
//
// In the words of Bob Dylan: everything is broken.
//
// See also:
//
//  * https://bugs.llvm.org/show_bug.cgi?id=8842
//  * https://ghc.haskell.org/trac/ghc/ticket/9678
//

148 149 150 151 152 153 154 155 156
#define CAS_NAND(x, val)                                            \
  {                                                                 \
    __typeof__ (*(x)) tmp = *(x);                                   \
    while (!__sync_bool_compare_and_swap(x, tmp, ~(tmp & (val)))) { \
      tmp = *(x);                                                   \
    }                                                               \
    return tmp;                                                     \
  }

157
// N.B. __has_builtin is only provided by clang
158 159 160 161
#if !defined(__has_builtin)
#define __has_builtin(x) 0
#endif

162 163 164 165 166 167
#if defined(__clang__) && !__has_builtin(__sync_fetch_and_nand)
#define USE_SYNC_FETCH_AND_NAND 0
#else
#define USE_SYNC_FETCH_AND_NAND 1
#endif

168 169
// Otherwise this fails with -Werror
#pragma GCC diagnostic push
170 171 172
#if defined(__clang__)
#pragma GCC diagnostic ignored "-Wsync-fetch-and-nand-semantics-changed"
#elif defined(__GNUC__)
jberryman's avatar
jberryman committed
173 174 175 176
// At least when compiling with our LTO shim gcc this doesn't seem to do anything:
// a warning is raised (arbitrarily on the last occurrence of
// __sync_fetch_and_nand), marked "error", but build still completes
// successfully, whether this line is here or not:
177
#pragma GCC diagnostic ignored "-Wsync-nand"
178
#endif
179

180
extern StgWord hs_atomic_nand8(StgWord x, StgWord val);
181
StgWord
182
hs_atomic_nand8(StgWord x, StgWord val)
183
{
184
#if USE_SYNC_FETCH_AND_NAND
185
  return __sync_fetch_and_nand((volatile StgWord8 *) x, (StgWord8) val);
186 187
#else
  CAS_NAND((volatile StgWord8 *) x, (StgWord8) val)
188 189 190
#endif
}

191
extern StgWord hs_atomic_nand16(StgWord x, StgWord val);
192
StgWord
193
hs_atomic_nand16(StgWord x, StgWord val)
194
{
195
#if USE_SYNC_FETCH_AND_NAND
196
  return __sync_fetch_and_nand((volatile StgWord16 *) x, (StgWord16) val);
197 198
#else
  CAS_NAND((volatile StgWord16 *) x, (StgWord16) val);
199 200 201
#endif
}

202
extern StgWord hs_atomic_nand32(StgWord x, StgWord val);
203
StgWord
204
hs_atomic_nand32(StgWord x, StgWord val)
205
{
206
#if USE_SYNC_FETCH_AND_NAND
207
  return __sync_fetch_and_nand((volatile StgWord32 *) x, (StgWord32) val);
208 209
#else
  CAS_NAND((volatile StgWord32 *) x, (StgWord32) val);
210 211 212
#endif
}

213
#if WORD_SIZE_IN_BITS == 64
214
extern StgWord64 hs_atomic_nand64(StgWord x, StgWord64 val);
215
StgWord64
216
hs_atomic_nand64(StgWord x, StgWord64 val)
217
{
218
#if USE_SYNC_FETCH_AND_NAND
219
  return __sync_fetch_and_nand((volatile StgWord64 *) x, val);
220 221
#else
  CAS_NAND((volatile StgWord64 *) x, val);
222 223
#endif
}
224
#endif
225

226 227
#pragma GCC diagnostic pop

228 229
// FetchOrByteArrayOp_Int

230
extern StgWord hs_atomic_or8(StgWord x, StgWord val);
231
StgWord
232
hs_atomic_or8(StgWord x, StgWord val)
233
{
234
  return __sync_fetch_and_or((volatile StgWord8 *) x, (StgWord8) val);
235 236
}

237
extern StgWord hs_atomic_or16(StgWord x, StgWord val);
238
StgWord
239
hs_atomic_or16(StgWord x, StgWord val)
240
{
241
  return __sync_fetch_and_or((volatile StgWord16 *) x, (StgWord16) val);
242 243
}

244
extern StgWord hs_atomic_or32(StgWord x, StgWord val);
245
StgWord
246
hs_atomic_or32(StgWord x, StgWord val)
247
{
248
  return __sync_fetch_and_or((volatile StgWord32 *) x, (StgWord32) val);
249 250
}

251
#if WORD_SIZE_IN_BITS == 64
252
extern StgWord64 hs_atomic_or64(StgWord x, StgWord64 val);
253
StgWord64
254
hs_atomic_or64(StgWord x, StgWord64 val)
255
{
256
  return __sync_fetch_and_or((volatile StgWord64 *) x, val);
257
}
258
#endif
259 260 261

// FetchXorByteArrayOp_Int

262
extern StgWord hs_atomic_xor8(StgWord x, StgWord val);
263
StgWord
264
hs_atomic_xor8(StgWord x, StgWord val)
265
{
266
  return __sync_fetch_and_xor((volatile StgWord8 *) x, (StgWord8) val);
267 268
}

269
extern StgWord hs_atomic_xor16(StgWord x, StgWord val);
270
StgWord
271
hs_atomic_xor16(StgWord x, StgWord val)
272
{
273
  return __sync_fetch_and_xor((volatile StgWord16 *) x, (StgWord16) val);
274 275
}

276
extern StgWord hs_atomic_xor32(StgWord x, StgWord val);
277
StgWord
278
hs_atomic_xor32(StgWord x, StgWord val)
279
{
280
  return __sync_fetch_and_xor((volatile StgWord32 *) x, (StgWord32) val);
281 282
}

283
#if WORD_SIZE_IN_BITS == 64
284
extern StgWord64 hs_atomic_xor64(StgWord x, StgWord64 val);
285
StgWord64
286
hs_atomic_xor64(StgWord x, StgWord64 val)
287
{
288
  return __sync_fetch_and_xor((volatile StgWord64 *) x, val);
289
}
290
#endif
291 292 293

// CasByteArrayOp_Int

294
extern StgWord hs_cmpxchg8(StgWord x, StgWord old, StgWord new);
295
StgWord
296
hs_cmpxchg8(StgWord x, StgWord old, StgWord new)
297
{
298
  return __sync_val_compare_and_swap((volatile StgWord8 *) x, (StgWord8) old, (StgWord8) new);
299 300
}

301
extern StgWord hs_cmpxchg16(StgWord x, StgWord old, StgWord new);
302
StgWord
303
hs_cmpxchg16(StgWord x, StgWord old, StgWord new)
304
{
305
  return __sync_val_compare_and_swap((volatile StgWord16 *) x, (StgWord16) old, (StgWord16) new);
306 307
}

308
extern StgWord hs_cmpxchg32(StgWord x, StgWord old, StgWord new);
309
StgWord
310
hs_cmpxchg32(StgWord x, StgWord old, StgWord new)
311
{
312
  return __sync_val_compare_and_swap((volatile StgWord32 *) x, (StgWord32) old, (StgWord32) new);
313 314
}

315
#if WORD_SIZE_IN_BITS == 64
316
extern StgWord hs_cmpxchg64(StgWord x, StgWord64 old, StgWord64 new);
317
StgWord
318
hs_cmpxchg64(StgWord x, StgWord64 old, StgWord64 new)
319
{
320
  return __sync_val_compare_and_swap((volatile StgWord64 *) x, old, new);
321
}
322
#endif
323 324

// AtomicReadByteArrayOp_Int
325 326 327 328
// Implies a full memory barrier (see compiler/prelude/primops.txt.pp)
// __ATOMIC_SEQ_CST: Full barrier in both directions (hoisting and sinking
// of code) and synchronizes with acquire loads and release stores in
// all threads.
329 330 331 332
//
// When we lack C11 atomics support we emulate these using the old GCC __sync
// primitives which the GCC documentation claims "usually" implies a full
// barrier.
333

334
extern StgWord hs_atomicread8(StgWord x);
335
StgWord
336
hs_atomicread8(StgWord x)
337
{
338
#if HAVE_C11_ATOMICS
339
  return __atomic_load_n((StgWord8 *) x, __ATOMIC_SEQ_CST);
340 341 342
#else
  return __sync_add_and_fetch((StgWord8 *) x, 0);
#endif
343 344
}

345
extern StgWord hs_atomicread16(StgWord x);
346
StgWord
347
hs_atomicread16(StgWord x)
348
{
349
#if HAVE_C11_ATOMICS
350
  return __atomic_load_n((StgWord16 *) x, __ATOMIC_SEQ_CST);
351 352 353
#else
  return __sync_add_and_fetch((StgWord16 *) x, 0);
#endif
354 355
}

356
extern StgWord hs_atomicread32(StgWord x);
357
StgWord
358
hs_atomicread32(StgWord x)
359
{
360
#if HAVE_C11_ATOMICS
361
  return __atomic_load_n((StgWord32 *) x, __ATOMIC_SEQ_CST);
362 363 364
#else
  return __sync_add_and_fetch((StgWord32 *) x, 0);
#endif
365 366
}

367
extern StgWord64 hs_atomicread64(StgWord x);
368
StgWord64
369
hs_atomicread64(StgWord x)
370
{
371
#if HAVE_C11_ATOMICS
372
  return __atomic_load_n((StgWord64 *) x, __ATOMIC_SEQ_CST);
373 374 375
#else
  return __sync_add_and_fetch((StgWord64 *) x, 0);
#endif
376 377 378
}

// AtomicWriteByteArrayOp_Int
379 380
// Implies a full memory barrier (see compiler/prelude/primops.txt.pp)
// __ATOMIC_SEQ_CST: Full barrier (see hs_atomicread8 above).
381

382
extern void hs_atomicwrite8(StgWord x, StgWord val);
383
void
384
hs_atomicwrite8(StgWord x, StgWord val)
385
{
386
#if HAVE_C11_ATOMICS
387
  __atomic_store_n((StgWord8 *) x, (StgWord8) val, __ATOMIC_SEQ_CST);
388 389 390
#else
  while (!__sync_bool_compare_and_swap((StgWord8 *) x, *(StgWord8 *) x, (StgWord8) val));
#endif
391 392
}

393
extern void hs_atomicwrite16(StgWord x, StgWord val);
394
void
395
hs_atomicwrite16(StgWord x, StgWord val)
396
{
397
#if HAVE_C11_ATOMICS
398
  __atomic_store_n((StgWord16 *) x, (StgWord16) val, __ATOMIC_SEQ_CST);
399 400 401
#else
  while (!__sync_bool_compare_and_swap((StgWord16 *) x, *(StgWord16 *) x, (StgWord16) val));
#endif
402 403
}

404
extern void hs_atomicwrite32(StgWord x, StgWord val);
405
void
406
hs_atomicwrite32(StgWord x, StgWord val)
407
{
408
#if HAVE_C11_ATOMICS
409
  __atomic_store_n((StgWord32 *) x, (StgWord32) val, __ATOMIC_SEQ_CST);
410 411 412
#else
  while (!__sync_bool_compare_and_swap((StgWord32 *) x, *(StgWord32 *) x, (StgWord32) val));
#endif
413 414
}

415
extern void hs_atomicwrite64(StgWord x, StgWord64 val);
416
void
417
hs_atomicwrite64(StgWord x, StgWord64 val)
418
{
419
#if HAVE_C11_ATOMICS
420
  __atomic_store_n((StgWord64 *) x, (StgWord64) val, __ATOMIC_SEQ_CST);
421 422 423
#else
  while (!__sync_bool_compare_and_swap((StgWord64 *) x, *(StgWord64 *) x, (StgWord64) val));
#endif
424
}