new file mode 100644
@@ -0,0 +1,39 @@
+/* Test the atomic exchange expansion, shared state space. */
+
+/* { dg-do compile } */
+/* { dg-options "-Wno-long-long" } */
+
+enum memmodel
+{
+ MEMMODEL_SEQ_CST = 5
+};
+
+unsigned char u8 __attribute__((shared));
+unsigned short u16 __attribute__((shared));
+unsigned int u32 __attribute__((shared));
+unsigned long long int u64 __attribute__((shared));
+
+int
+main()
+{
+ __atomic_exchange_n (&u8, 0, MEMMODEL_SEQ_CST);
+ __atomic_exchange_n (&u16, 0, MEMMODEL_SEQ_CST);
+ __atomic_exchange_n (&u32, 0, MEMMODEL_SEQ_CST);
+ __atomic_exchange_n (&u64, 0, MEMMODEL_SEQ_CST);
+
+ return 0;
+}
+
+
+/* Not ptx-native, fallback to libatomic.
+ Libatomic uses generic addressing with a global lock and membar.sys barriers.
+ We could implement these more efficiently by cloning libatomic for .shared,
+ using a per-CTA lock and membar.cta barrier. But we'd expect
+ performance-critical code to use the ptx-native atomic sizes 32 and 64 bit,
+ so that doesn't seem to be worth the trouble. */
+/* { dg-final { scan-assembler-times "(?n)call .* __atomic_exchange_1" 1 } } */
+/* { dg-final { scan-assembler-times "(?n)call .* __atomic_exchange_2" 1 } } */
+
+/* { dg-final { scan-assembler-times "atom.shared.exch.b32" 1 } } */
+/* { dg-final { scan-assembler-times "atom.shared.exch.b64" 1 } } */
+/* { dg-final { scan-assembler-times "membar.cta" 4 } } */
new file mode 100644
@@ -0,0 +1,33 @@
+/* Test the atomic exchange expansion, global state space. */
+
+/* { dg-do compile } */
+/* { dg-options "-Wno-long-long" } */
+
+enum memmodel
+{
+ MEMMODEL_SEQ_CST = 5
+};
+
+unsigned char u8;
+unsigned short u16;
+unsigned int u32;
+unsigned long long int u64;
+
+int
+main()
+{
+ __atomic_exchange_n (&u8, 0, MEMMODEL_SEQ_CST);
+ __atomic_exchange_n (&u16, 0, MEMMODEL_SEQ_CST);
+ __atomic_exchange_n (&u32, 0, MEMMODEL_SEQ_CST);
+ __atomic_exchange_n (&u64, 0, MEMMODEL_SEQ_CST);
+
+ return 0;
+}
+
+/* Not ptx-native, fallback to libatomic. */
+/* { dg-final { scan-assembler-times "(?n)call .* __atomic_exchange_1" 1 } } */
+/* { dg-final { scan-assembler-times "(?n)call .* __atomic_exchange_2" 1 } } */
+
+/* { dg-final { scan-assembler-times "atom.global.exch.b32" 1 } } */
+/* { dg-final { scan-assembler-times "atom.global.exch.b64" 1 } } */
+/* { dg-final { scan-assembler-times "membar.sys" 4 } } */
new file mode 100644
@@ -0,0 +1,33 @@
+/* Test the atomic exchange expansion, generic addressing. */
+
+/* { dg-do compile } */
+/* { dg-options "-Wno-long-long" } */
+
+enum memmodel
+{
+ MEMMODEL_SEQ_CST = 5
+};
+
+unsigned char *u8;
+unsigned short *u16;
+unsigned int *u32;
+unsigned long long int *u64;
+
+int
+main()
+{
+ __atomic_exchange_n (u8, 0, MEMMODEL_SEQ_CST);
+ __atomic_exchange_n (u16, 0, MEMMODEL_SEQ_CST);
+ __atomic_exchange_n (u32, 0, MEMMODEL_SEQ_CST);
+ __atomic_exchange_n (u64, 0, MEMMODEL_SEQ_CST);
+
+ return 0;
+}
+
+/* Not ptx-native, fallback to libatomic. */
+/* { dg-final { scan-assembler-times "(?n)call .* __atomic_exchange_1" 1 } } */
+/* { dg-final { scan-assembler-times "(?n)call .* __atomic_exchange_2" 1 } } */
+
+/* { dg-final { scan-assembler-times "atom.exch.b32" 1 } } */
+/* { dg-final { scan-assembler-times "atom.exch.b64" 1 } } */
+/* { dg-final { scan-assembler-times "membar.sys" 4 } } */
new file mode 100644
@@ -0,0 +1,74 @@
+/* Test the atomic exchange expansion, execution. */
+
+/* { dg-do run } */
+/* { dg-options "-Wno-long-long" } */
+
+/* We're trying to generate this type of store/exchange/load sequence:
+ st.global.u32 [g32], %r60;
+ atom.global.exch.b32 %r22, [g32], 2;
+ ld.global.u32 %r23, [g32];
+ with no insns inbetween.
+
+ We compile this at -O0, to keep the compiler from optimizing out the
+ "p = (P)" assignment. If the assignment is optimized out we don't test
+ the generic case, iow we generate for instance atom.global.exch.b32 instead
+ of atom.exch.b32.
+
+ Compiling at -O0 however does introduce loads and stores in the
+ store/exchange/load sequence, so we fix that by using the register
+ keyword. */
+
+enum memmodel
+{
+ MEMMODEL_RELAXED = 0,
+};
+
+unsigned int g32;
+unsigned long long int g64;
+
+unsigned int s32 __attribute__((shared));
+unsigned long long int s64 __attribute__((shared));
+
+#define TEST(P, V1, V2) \
+ { \
+ register typeof (*(P)) tmp; \
+ register typeof (*(P)) tmp2; \
+ __atomic_store_n ((P), (V1), MEMMODEL_RELAXED); \
+ tmp = __atomic_exchange_n ((P), (V2), MEMMODEL_RELAXED); \
+ tmp2 = __atomic_load_n ((P), MEMMODEL_RELAXED); \
+ if (tmp != (V1) || tmp2 != (V2)) \
+ __builtin_abort (); \
+ }
+
+#define TEST2(P, V1, V2) \
+ { \
+ register typeof (*(P)) tmp; \
+ register typeof (*(P)) tmp2; \
+ *(P) = (V1); \
+ tmp = __atomic_exchange_n ((P), (V2), MEMMODEL_RELAXED); \
+ tmp2 = *(P); \
+ if (tmp != (V1) || tmp2 != (V2)) \
+ __builtin_abort (); \
+ }
+
+#define TESTS(P) \
+ { \
+ TEST ((P), 1, 2); \
+ TEST2 ((P), 3, 4); \
+ { \
+ register typeof (*(P)) * p = (P); \
+ TEST (p, 1, 2); \
+ TEST2 (p, 3, 4); \
+ } \
+ }
+
+int
+main ()
+{
+ TESTS (&g32);
+ TESTS (&g64);
+ TESTS (&s32);
+ TESTS (&s64);
+
+ return 0;
+}